initial commit

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I4a6b498153eccd5407510dd541b7f4816a6a6964
This commit is contained in:
raf 2026-01-30 22:05:46 +03:00
commit 6a73d11c4b
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
124 changed files with 34856 additions and 0 deletions

View file

@ -0,0 +1,39 @@
[package]
name = "pinakes-core"
edition.workspace = true
version.workspace = true
license.workspace = true
[dependencies]
tokio = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
toml = { workspace = true }
chrono = { workspace = true }
uuid = { workspace = true }
thiserror = { workspace = true }
anyhow = { workspace = true }
tracing = { workspace = true }
blake3 = { workspace = true }
lofty = { workspace = true }
lopdf = { workspace = true }
epub = { workspace = true }
matroska = { workspace = true }
gray_matter = { workspace = true }
rusqlite = { workspace = true }
tokio-postgres = { workspace = true }
deadpool-postgres = { workspace = true }
postgres-types = { workspace = true }
refinery = { workspace = true }
walkdir = { workspace = true }
notify = { workspace = true }
winnow = { workspace = true }
mime_guess = { workspace = true }
async-trait = { workspace = true }
kamadak-exif = { workspace = true }
image = { workspace = true }
tokio-util = { version = "0.7", features = ["rt"] }
reqwest = { workspace = true }
[dev-dependencies]
tempfile = "3"

View file

@ -0,0 +1,21 @@
use uuid::Uuid;
use crate::error::Result;
use crate::model::{AuditAction, AuditEntry, MediaId};
use crate::storage::DynStorageBackend;
pub async fn record_action(
storage: &DynStorageBackend,
media_id: Option<MediaId>,
action: AuditAction,
details: Option<String>,
) -> Result<()> {
let entry = AuditEntry {
id: Uuid::now_v7(),
media_id,
action,
details,
timestamp: chrono::Utc::now(),
};
storage.record_audit(&entry).await
}

View file

@ -0,0 +1,91 @@
use std::collections::HashMap;
use std::hash::Hash;
use std::sync::Arc;
use std::time::{Duration, Instant};
use tokio::sync::RwLock;
struct CacheEntry<V> {
value: V,
inserted_at: Instant,
}
/// A simple TTL-based in-memory cache with periodic eviction.
pub struct Cache<K, V> {
entries: Arc<RwLock<HashMap<K, CacheEntry<V>>>>,
ttl: Duration,
}
impl<K, V> Cache<K, V>
where
K: Eq + Hash + Clone + Send + Sync + 'static,
V: Clone + Send + Sync + 'static,
{
pub fn new(ttl: Duration) -> Self {
let cache = Self {
entries: Arc::new(RwLock::new(HashMap::new())),
ttl,
};
// Spawn periodic eviction task
let entries = cache.entries.clone();
let ttl = cache.ttl;
tokio::spawn(async move {
let mut interval = tokio::time::interval(ttl);
loop {
interval.tick().await;
let now = Instant::now();
let mut map = entries.write().await;
map.retain(|_, entry| now.duration_since(entry.inserted_at) < ttl);
}
});
cache
}
pub async fn get(&self, key: &K) -> Option<V> {
let map = self.entries.read().await;
if let Some(entry) = map.get(key) {
if entry.inserted_at.elapsed() < self.ttl {
return Some(entry.value.clone());
}
}
None
}
pub async fn insert(&self, key: K, value: V) {
let mut map = self.entries.write().await;
map.insert(
key,
CacheEntry {
value,
inserted_at: Instant::now(),
},
);
}
pub async fn invalidate(&self, key: &K) {
let mut map = self.entries.write().await;
map.remove(key);
}
pub async fn invalidate_all(&self) {
let mut map = self.entries.write().await;
map.clear();
}
}
/// Application-level cache layer wrapping multiple caches for different data types.
pub struct CacheLayer {
/// Cache for serialized API responses, keyed by request path + query string.
pub responses: Cache<String, String>,
}
impl CacheLayer {
pub fn new(ttl_secs: u64) -> Self {
let ttl = Duration::from_secs(ttl_secs);
Self {
responses: Cache::new(ttl),
}
}
}

View file

@ -0,0 +1,78 @@
use uuid::Uuid;
use crate::error::Result;
use crate::model::*;
use crate::storage::DynStorageBackend;
pub async fn create_collection(
storage: &DynStorageBackend,
name: &str,
kind: CollectionKind,
description: Option<&str>,
filter_query: Option<&str>,
) -> Result<Collection> {
storage
.create_collection(name, kind, description, filter_query)
.await
}
pub async fn add_member(
storage: &DynStorageBackend,
collection_id: Uuid,
media_id: MediaId,
position: i32,
) -> Result<()> {
storage
.add_to_collection(collection_id, media_id, position)
.await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::AddedToCollection,
Some(format!("collection_id={collection_id}")),
)
.await
}
pub async fn remove_member(
storage: &DynStorageBackend,
collection_id: Uuid,
media_id: MediaId,
) -> Result<()> {
storage
.remove_from_collection(collection_id, media_id)
.await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::RemovedFromCollection,
Some(format!("collection_id={collection_id}")),
)
.await
}
pub async fn get_members(
storage: &DynStorageBackend,
collection_id: Uuid,
) -> Result<Vec<MediaItem>> {
let collection = storage.get_collection(collection_id).await?;
match collection.kind {
CollectionKind::Virtual => {
// Virtual collections evaluate their filter_query dynamically
if let Some(ref query_str) = collection.filter_query {
let query = crate::search::parse_search_query(query_str)?;
let request = crate::search::SearchRequest {
query,
sort: crate::search::SortOrder::DateDesc,
pagination: Pagination::new(0, 10000, None),
};
let results = storage.search(&request).await?;
Ok(results.items)
} else {
Ok(Vec::new())
}
}
CollectionKind::Manual => storage.get_collection_members(collection_id).await,
}
}

View file

@ -0,0 +1,437 @@
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Config {
pub storage: StorageConfig,
pub directories: DirectoryConfig,
pub scanning: ScanningConfig,
pub server: ServerConfig,
#[serde(default)]
pub ui: UiConfig,
#[serde(default)]
pub accounts: AccountsConfig,
#[serde(default)]
pub jobs: JobsConfig,
#[serde(default)]
pub thumbnails: ThumbnailConfig,
#[serde(default)]
pub webhooks: Vec<WebhookConfig>,
#[serde(default)]
pub scheduled_tasks: Vec<ScheduledTaskConfig>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScheduledTaskConfig {
pub id: String,
pub enabled: bool,
pub schedule: crate::scheduler::Schedule,
pub last_run: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct JobsConfig {
#[serde(default = "default_worker_count")]
pub worker_count: usize,
#[serde(default = "default_cache_ttl")]
pub cache_ttl_secs: u64,
}
fn default_worker_count() -> usize {
2
}
fn default_cache_ttl() -> u64 {
60
}
impl Default for JobsConfig {
fn default() -> Self {
Self {
worker_count: default_worker_count(),
cache_ttl_secs: default_cache_ttl(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ThumbnailConfig {
#[serde(default = "default_thumb_size")]
pub size: u32,
#[serde(default = "default_thumb_quality")]
pub quality: u8,
#[serde(default)]
pub ffmpeg_path: Option<String>,
#[serde(default = "default_video_seek")]
pub video_seek_secs: u32,
}
fn default_thumb_size() -> u32 {
320
}
fn default_thumb_quality() -> u8 {
80
}
fn default_video_seek() -> u32 {
2
}
impl Default for ThumbnailConfig {
fn default() -> Self {
Self {
size: default_thumb_size(),
quality: default_thumb_quality(),
ffmpeg_path: None,
video_seek_secs: default_video_seek(),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct WebhookConfig {
pub url: String,
pub events: Vec<String>,
#[serde(default)]
pub secret: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UiConfig {
#[serde(default = "default_theme")]
pub theme: String,
#[serde(default = "default_view")]
pub default_view: String,
#[serde(default = "default_page_size")]
pub default_page_size: usize,
#[serde(default = "default_view_mode")]
pub default_view_mode: String,
#[serde(default)]
pub auto_play_media: bool,
#[serde(default = "default_true")]
pub show_thumbnails: bool,
#[serde(default)]
pub sidebar_collapsed: bool,
}
fn default_theme() -> String {
"dark".to_string()
}
fn default_view() -> String {
"library".to_string()
}
fn default_page_size() -> usize {
48
}
fn default_view_mode() -> String {
"grid".to_string()
}
fn default_true() -> bool {
true
}
impl Default for UiConfig {
fn default() -> Self {
Self {
theme: default_theme(),
default_view: default_view(),
default_page_size: default_page_size(),
default_view_mode: default_view_mode(),
auto_play_media: false,
show_thumbnails: true,
sidebar_collapsed: false,
}
}
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct AccountsConfig {
#[serde(default)]
pub enabled: bool,
#[serde(default)]
pub users: Vec<UserAccount>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UserAccount {
pub username: String,
pub password_hash: String,
#[serde(default)]
pub role: UserRole,
}
#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum UserRole {
Admin,
Editor,
#[default]
Viewer,
}
impl UserRole {
pub fn can_read(self) -> bool {
true
}
pub fn can_write(self) -> bool {
matches!(self, Self::Admin | Self::Editor)
}
pub fn can_admin(self) -> bool {
matches!(self, Self::Admin)
}
}
impl std::fmt::Display for UserRole {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Admin => write!(f, "admin"),
Self::Editor => write!(f, "editor"),
Self::Viewer => write!(f, "viewer"),
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct StorageConfig {
pub backend: StorageBackendType,
pub sqlite: Option<SqliteConfig>,
pub postgres: Option<PostgresConfig>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum StorageBackendType {
Sqlite,
Postgres,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SqliteConfig {
pub path: PathBuf,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PostgresConfig {
pub host: String,
pub port: u16,
pub database: String,
pub username: String,
pub password: String,
pub max_connections: usize,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DirectoryConfig {
pub roots: Vec<PathBuf>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScanningConfig {
pub watch: bool,
pub poll_interval_secs: u64,
pub ignore_patterns: Vec<String>,
#[serde(default = "default_import_concurrency")]
pub import_concurrency: usize,
}
fn default_import_concurrency() -> usize {
8
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ServerConfig {
pub host: String,
pub port: u16,
/// Optional API key for bearer token authentication.
/// If set, all requests (except /health) must include `Authorization: Bearer <key>`.
/// Can also be set via `PINAKES_API_KEY` environment variable.
pub api_key: Option<String>,
}
impl Config {
pub fn from_file(path: &Path) -> crate::error::Result<Self> {
let content = std::fs::read_to_string(path).map_err(|e| {
crate::error::PinakesError::Config(format!("failed to read config file: {e}"))
})?;
toml::from_str(&content)
.map_err(|e| crate::error::PinakesError::Config(format!("failed to parse config: {e}")))
}
/// Try loading from file, falling back to defaults if the file doesn't exist.
pub fn load_or_default(path: &Path) -> crate::error::Result<Self> {
if path.exists() {
Self::from_file(path)
} else {
let config = Self::default();
// Ensure the data directory exists for the default SQLite database
config.ensure_dirs()?;
Ok(config)
}
}
/// Save the current config to a TOML file.
pub fn save_to_file(&self, path: &Path) -> crate::error::Result<()> {
if let Some(parent) = path.parent() {
std::fs::create_dir_all(parent)?;
}
let content = toml::to_string_pretty(self).map_err(|e| {
crate::error::PinakesError::Config(format!("failed to serialize config: {e}"))
})?;
std::fs::write(path, content)?;
Ok(())
}
/// Ensure all directories needed by this config exist and are writable.
pub fn ensure_dirs(&self) -> crate::error::Result<()> {
if let Some(ref sqlite) = self.storage.sqlite
&& let Some(parent) = sqlite.path.parent()
{
std::fs::create_dir_all(parent)?;
let metadata = std::fs::metadata(parent)?;
if metadata.permissions().readonly() {
return Err(crate::error::PinakesError::Config(format!(
"directory is not writable: {}",
parent.display()
)));
}
}
Ok(())
}
/// Returns the default config file path following XDG conventions.
pub fn default_config_path() -> PathBuf {
if let Ok(xdg) = std::env::var("XDG_CONFIG_HOME") {
PathBuf::from(xdg).join("pinakes").join("pinakes.toml")
} else if let Ok(home) = std::env::var("HOME") {
PathBuf::from(home)
.join(".config")
.join("pinakes")
.join("pinakes.toml")
} else {
PathBuf::from("pinakes.toml")
}
}
/// Validate configuration values for correctness.
pub fn validate(&self) -> Result<(), String> {
if self.server.port == 0 {
return Err("server port cannot be 0".into());
}
if self.server.host.is_empty() {
return Err("server host cannot be empty".into());
}
if self.scanning.poll_interval_secs == 0 {
return Err("poll interval cannot be 0".into());
}
if self.scanning.import_concurrency == 0 || self.scanning.import_concurrency > 256 {
return Err("import_concurrency must be between 1 and 256".into());
}
Ok(())
}
/// Returns the default data directory following XDG conventions.
pub fn default_data_dir() -> PathBuf {
if let Ok(xdg) = std::env::var("XDG_DATA_HOME") {
PathBuf::from(xdg).join("pinakes")
} else if let Ok(home) = std::env::var("HOME") {
PathBuf::from(home)
.join(".local")
.join("share")
.join("pinakes")
} else {
PathBuf::from("pinakes-data")
}
}
}
impl Default for Config {
fn default() -> Self {
let data_dir = Self::default_data_dir();
Self {
storage: StorageConfig {
backend: StorageBackendType::Sqlite,
sqlite: Some(SqliteConfig {
path: data_dir.join("pinakes.db"),
}),
postgres: None,
},
directories: DirectoryConfig { roots: vec![] },
scanning: ScanningConfig {
watch: false,
poll_interval_secs: 300,
ignore_patterns: vec![
".*".to_string(),
"node_modules".to_string(),
"__pycache__".to_string(),
"target".to_string(),
],
import_concurrency: default_import_concurrency(),
},
server: ServerConfig {
host: "127.0.0.1".to_string(),
port: 3000,
api_key: None,
},
ui: UiConfig::default(),
accounts: AccountsConfig::default(),
jobs: JobsConfig::default(),
thumbnails: ThumbnailConfig::default(),
webhooks: vec![],
scheduled_tasks: vec![],
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn test_config_with_concurrency(concurrency: usize) -> Config {
let mut config = Config::default();
config.scanning.import_concurrency = concurrency;
config
}
#[test]
fn test_validate_import_concurrency_zero() {
let config = test_config_with_concurrency(0);
assert!(config.validate().is_err());
assert!(
config
.validate()
.unwrap_err()
.contains("import_concurrency")
);
}
#[test]
fn test_validate_import_concurrency_too_high() {
let config = test_config_with_concurrency(257);
assert!(config.validate().is_err());
assert!(
config
.validate()
.unwrap_err()
.contains("import_concurrency")
);
}
#[test]
fn test_validate_import_concurrency_valid() {
let config = test_config_with_concurrency(8);
assert!(config.validate().is_ok());
}
#[test]
fn test_validate_import_concurrency_boundary_low() {
let config = test_config_with_concurrency(1);
assert!(config.validate().is_ok());
}
#[test]
fn test_validate_import_concurrency_boundary_high() {
let config = test_config_with_concurrency(256);
assert!(config.validate().is_ok());
}
}

View file

@ -0,0 +1,59 @@
use std::path::PathBuf;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum PinakesError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("database error: {0}")]
Database(String),
#[error("migration error: {0}")]
Migration(String),
#[error("configuration error: {0}")]
Config(String),
#[error("media item not found: {0}")]
NotFound(String),
#[error("duplicate content hash: {0}")]
DuplicateHash(String),
#[error("unsupported media type for path: {0}")]
UnsupportedMediaType(PathBuf),
#[error("metadata extraction failed: {0}")]
MetadataExtraction(String),
#[error("search query parse error: {0}")]
SearchParse(String),
#[error("file not found at path: {0}")]
FileNotFound(PathBuf),
#[error("tag not found: {0}")]
TagNotFound(String),
#[error("collection not found: {0}")]
CollectionNotFound(String),
#[error("invalid operation: {0}")]
InvalidOperation(String),
}
impl From<rusqlite::Error> for PinakesError {
fn from(e: rusqlite::Error) -> Self {
PinakesError::Database(e.to_string())
}
}
impl From<tokio_postgres::Error> for PinakesError {
fn from(e: tokio_postgres::Error) -> Self {
PinakesError::Database(e.to_string())
}
}
pub type Result<T> = std::result::Result<T, PinakesError>;

View file

@ -0,0 +1,106 @@
use std::sync::Arc;
use serde::{Deserialize, Serialize};
use tokio::sync::broadcast;
use tracing::warn;
use crate::config::WebhookConfig;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum PinakesEvent {
MediaImported {
media_id: String,
},
MediaUpdated {
media_id: String,
},
MediaDeleted {
media_id: String,
},
ScanCompleted {
files_found: usize,
files_processed: usize,
},
IntegrityMismatch {
media_id: String,
expected: String,
actual: String,
},
}
impl PinakesEvent {
pub fn event_name(&self) -> &'static str {
match self {
Self::MediaImported { .. } => "media_imported",
Self::MediaUpdated { .. } => "media_updated",
Self::MediaDeleted { .. } => "media_deleted",
Self::ScanCompleted { .. } => "scan_completed",
Self::IntegrityMismatch { .. } => "integrity_mismatch",
}
}
}
pub struct EventBus {
tx: broadcast::Sender<PinakesEvent>,
}
impl EventBus {
pub fn new(webhooks: Vec<WebhookConfig>) -> Arc<Self> {
let (tx, _) = broadcast::channel(256);
// Spawn webhook delivery task
if !webhooks.is_empty() {
let mut rx: broadcast::Receiver<PinakesEvent> = tx.subscribe();
let webhooks = Arc::new(webhooks);
tokio::spawn(async move {
while let Ok(event) = rx.recv().await {
let event_name = event.event_name();
for hook in webhooks.iter() {
if hook.events.iter().any(|e| e == event_name || e == "*") {
let url = hook.url.clone();
let event_clone = event.clone();
let secret = hook.secret.clone();
tokio::spawn(async move {
deliver_webhook(&url, &event_clone, secret.as_deref()).await;
});
}
}
}
});
}
Arc::new(Self { tx })
}
pub fn emit(&self, event: PinakesEvent) {
// Ignore send errors (no receivers)
let _ = self.tx.send(event);
}
}
async fn deliver_webhook(url: &str, event: &PinakesEvent, _secret: Option<&str>) {
let client = reqwest::Client::new();
let body = serde_json::to_string(event).unwrap_or_default();
for attempt in 0..3 {
match client
.post(url)
.header("Content-Type", "application/json")
.body(body.clone())
.send()
.await
{
Ok(resp) if resp.status().is_success() => return,
Ok(resp) => {
warn!(url, status = %resp.status(), attempt, "webhook delivery failed");
}
Err(e) => {
warn!(url, error = %e, attempt, "webhook delivery error");
}
}
// Exponential backoff
tokio::time::sleep(std::time::Duration::from_secs(1 << attempt)).await;
}
}

View file

@ -0,0 +1,68 @@
use std::path::Path;
use serde::{Deserialize, Serialize};
use crate::error::Result;
use crate::jobs::ExportFormat;
use crate::storage::DynStorageBackend;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExportResult {
pub items_exported: usize,
pub output_path: String,
}
/// Export library data to the specified format.
pub async fn export_library(
storage: &DynStorageBackend,
format: &ExportFormat,
destination: &Path,
) -> Result<ExportResult> {
let pagination = crate::model::Pagination {
offset: 0,
limit: u64::MAX,
sort: None,
};
let items = storage.list_media(&&pagination).await?;
let count = items.len();
match format {
ExportFormat::Json => {
let json = serde_json::to_string_pretty(&items)
.map_err(|e| crate::error::PinakesError::Config(format!("json serialize: {e}")))?;
std::fs::write(destination, json)?;
}
ExportFormat::Csv => {
let mut csv = String::new();
csv.push_str("id,path,file_name,media_type,content_hash,file_size,title,artist,album,genre,year,duration_secs,description,created_at,updated_at\n");
for item in &items {
csv.push_str(&format!(
"{},{},{},{:?},{},{},{},{},{},{},{},{},{},{},{}\n",
item.id,
item.path.display(),
item.file_name,
item.media_type,
item.content_hash,
item.file_size,
item.title.as_deref().unwrap_or(""),
item.artist.as_deref().unwrap_or(""),
item.album.as_deref().unwrap_or(""),
item.genre.as_deref().unwrap_or(""),
item.year.map(|y| y.to_string()).unwrap_or_default(),
item.duration_secs
.map(|d| d.to_string())
.unwrap_or_default(),
item.description.as_deref().unwrap_or(""),
item.created_at,
item.updated_at,
));
}
std::fs::write(destination, csv)?;
}
}
Ok(ExportResult {
items_exported: count,
output_path: destination.to_string_lossy().to_string(),
})
}

View file

@ -0,0 +1,31 @@
use std::path::Path;
use crate::error::Result;
use crate::model::ContentHash;
const BUFFER_SIZE: usize = 65536;
pub async fn compute_file_hash(path: &Path) -> Result<ContentHash> {
let path = path.to_path_buf();
let hash = tokio::task::spawn_blocking(move || -> Result<ContentHash> {
let mut hasher = blake3::Hasher::new();
let mut file = std::fs::File::open(&path)?;
let mut buf = vec![0u8; BUFFER_SIZE];
loop {
let n = std::io::Read::read(&mut file, &mut buf)?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(ContentHash::new(hasher.finalize().to_hex().to_string()))
})
.await
.map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))??;
Ok(hash)
}
pub fn compute_hash_sync(data: &[u8]) -> ContentHash {
let hash = blake3::hash(data);
ContentHash::new(hash.to_hex().to_string())
}

View file

@ -0,0 +1,250 @@
use std::path::{Path, PathBuf};
use tracing::info;
use crate::audit;
use crate::error::{PinakesError, Result};
use crate::hash::compute_file_hash;
use crate::media_type::MediaType;
use crate::metadata;
use crate::model::*;
use crate::storage::DynStorageBackend;
use crate::thumbnail;
pub struct ImportResult {
pub media_id: MediaId,
pub was_duplicate: bool,
pub path: PathBuf,
}
/// Check that a canonicalized path falls under at least one configured root directory.
/// If no roots are configured, all paths are allowed (for ad-hoc imports).
pub async fn validate_path_in_roots(storage: &DynStorageBackend, path: &Path) -> Result<()> {
let roots = storage.list_root_dirs().await?;
if roots.is_empty() {
return Ok(());
}
for root in &roots {
if let Ok(canonical_root) = root.canonicalize()
&& path.starts_with(&canonical_root)
{
return Ok(());
}
}
Err(PinakesError::InvalidOperation(format!(
"path {} is not within any configured root directory",
path.display()
)))
}
pub async fn import_file(storage: &DynStorageBackend, path: &Path) -> Result<ImportResult> {
let path = path.canonicalize()?;
if !path.exists() {
return Err(PinakesError::FileNotFound(path));
}
validate_path_in_roots(storage, &path).await?;
let media_type = MediaType::from_path(&path)
.ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;
let content_hash = compute_file_hash(&path).await?;
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
return Ok(ImportResult {
media_id: existing.id,
was_duplicate: true,
path: path.clone(),
});
}
let file_meta = std::fs::metadata(&path)?;
let file_size = file_meta.len();
let extracted = {
let path_clone = path.clone();
tokio::task::spawn_blocking(move || metadata::extract_metadata(&path_clone, media_type))
.await
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
};
let file_name = path
.file_name()
.unwrap_or_default()
.to_string_lossy()
.to_string();
let now = chrono::Utc::now();
let media_id = MediaId::new();
// Generate thumbnail for image types
let thumb_path = {
let source = path.clone();
let thumb_dir = thumbnail::default_thumbnail_dir();
tokio::task::spawn_blocking(move || {
thumbnail::generate_thumbnail(media_id, &source, media_type, &thumb_dir)
})
.await
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
};
let item = MediaItem {
id: media_id,
path: path.clone(),
file_name,
media_type,
content_hash,
file_size,
title: extracted.title,
artist: extracted.artist,
album: extracted.album,
genre: extracted.genre,
year: extracted.year,
duration_secs: extracted.duration_secs,
description: extracted.description,
thumbnail_path: thumb_path,
custom_fields: std::collections::HashMap::new(),
created_at: now,
updated_at: now,
};
storage.insert_media(&item).await?;
// Store extracted extra metadata as custom fields
for (key, value) in &extracted.extra {
let field = CustomField {
field_type: CustomFieldType::Text,
value: value.clone(),
};
if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
tracing::warn!(
media_id = %media_id,
field = %key,
error = %e,
"failed to store extracted metadata as custom field"
);
}
}
audit::record_action(
storage,
Some(media_id),
AuditAction::Imported,
Some(format!("path={}", path.display())),
)
.await?;
info!(media_id = %media_id, path = %path.display(), "imported media file");
Ok(ImportResult {
media_id,
was_duplicate: false,
path: path.clone(),
})
}
pub(crate) fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
for component in path.components() {
if let std::path::Component::Normal(name) = component {
let name_str = name.to_string_lossy();
for pattern in patterns {
if pattern.starts_with('.')
&& name_str.starts_with('.')
&& pattern == name_str.as_ref()
{
return true;
}
// Simple glob: ".*" matches any dotfile
if pattern == ".*" && name_str.starts_with('.') {
return true;
}
if name_str == pattern.as_str() {
return true;
}
}
}
}
false
}
/// Default number of concurrent import tasks.
const DEFAULT_IMPORT_CONCURRENCY: usize = 8;
pub async fn import_directory(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
import_directory_with_concurrency(storage, dir, ignore_patterns, DEFAULT_IMPORT_CONCURRENCY)
.await
}
pub async fn import_directory_with_concurrency(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
concurrency: usize,
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
let concurrency = concurrency.clamp(1, 256);
let dir = dir.to_path_buf();
let patterns = ignore_patterns.to_vec();
let entries: Vec<PathBuf> = {
let dir = dir.clone();
tokio::task::spawn_blocking(move || {
walkdir::WalkDir::new(&dir)
.follow_links(true)
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| e.file_type().is_file())
.filter(|e| MediaType::from_path(e.path()).is_some())
.filter(|e| !should_ignore(e.path(), &patterns))
.map(|e| e.path().to_path_buf())
.collect()
})
.await
.map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
};
let mut results = Vec::with_capacity(entries.len());
let mut join_set = tokio::task::JoinSet::new();
let mut pending_paths: Vec<PathBuf> = Vec::new();
for entry_path in entries {
let storage = storage.clone();
let path = entry_path.clone();
pending_paths.push(entry_path);
join_set.spawn(async move {
let result = import_file(&storage, &path).await;
(path, result)
});
// Limit concurrency by draining when we hit the cap
if join_set.len() >= concurrency
&& let Some(Ok((path, result))) = join_set.join_next().await
{
match result {
Ok(r) => results.push(Ok(r)),
Err(e) => {
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
results.push(Err(e));
}
}
}
}
// Drain remaining tasks
while let Some(Ok((path, result))) = join_set.join_next().await {
match result {
Ok(r) => results.push(Ok(r)),
Err(e) => {
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
results.push(Err(e));
}
}
}
Ok(results)
}

View file

@ -0,0 +1,201 @@
use std::path::{Path, PathBuf};
use serde::{Deserialize, Serialize};
use tracing::{info, warn};
use crate::error::Result;
use crate::hash::compute_file_hash;
use crate::model::{ContentHash, MediaId};
use crate::storage::DynStorageBackend;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OrphanReport {
/// Media items whose files no longer exist on disk.
pub orphaned_ids: Vec<MediaId>,
/// Files on disk that are not tracked in the database.
pub untracked_paths: Vec<PathBuf>,
/// Files that appear to have moved (same hash, different path).
pub moved_files: Vec<(MediaId, PathBuf, PathBuf)>,
}
#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum OrphanAction {
Delete,
Ignore,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VerificationReport {
pub verified: usize,
pub mismatched: Vec<(MediaId, String, String)>,
pub missing: Vec<MediaId>,
pub errors: Vec<(MediaId, String)>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum IntegrityStatus {
Unverified,
Verified,
Mismatch,
Missing,
}
impl std::fmt::Display for IntegrityStatus {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Unverified => write!(f, "unverified"),
Self::Verified => write!(f, "verified"),
Self::Mismatch => write!(f, "mismatch"),
Self::Missing => write!(f, "missing"),
}
}
}
impl std::str::FromStr for IntegrityStatus {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"unverified" => Ok(Self::Unverified),
"verified" => Ok(Self::Verified),
"mismatch" => Ok(Self::Mismatch),
"missing" => Ok(Self::Missing),
_ => Err(format!("unknown integrity status: {s}")),
}
}
}
/// Detect orphaned media items (files that no longer exist on disk).
pub async fn detect_orphans(storage: &DynStorageBackend) -> Result<OrphanReport> {
let media_paths = storage.list_media_paths().await?;
let mut orphaned_ids = Vec::new();
let moved_files = Vec::new();
for (id, path, _hash) in &media_paths {
if !path.exists() {
orphaned_ids.push(*id);
}
}
info!(
orphaned = orphaned_ids.len(),
total = media_paths.len(),
"orphan detection complete"
);
Ok(OrphanReport {
orphaned_ids,
untracked_paths: Vec::new(),
moved_files,
})
}
/// Resolve orphaned media items by deleting them from the database.
pub async fn resolve_orphans(
storage: &DynStorageBackend,
action: OrphanAction,
ids: &[MediaId],
) -> Result<u64> {
match action {
OrphanAction::Delete => {
let count = storage.batch_delete_media(ids).await?;
info!(count, "resolved orphans by deletion");
Ok(count)
}
OrphanAction::Ignore => {
info!(count = ids.len(), "orphans ignored");
Ok(0)
}
}
}
/// Verify integrity of media files by recomputing hashes and comparing.
pub async fn verify_integrity(
storage: &DynStorageBackend,
media_ids: Option<&[MediaId]>,
) -> Result<VerificationReport> {
let all_paths = storage.list_media_paths().await?;
let paths_to_check: Vec<(MediaId, PathBuf, ContentHash)> = if let Some(ids) = media_ids {
let id_set: std::collections::HashSet<MediaId> = ids.iter().copied().collect();
all_paths
.into_iter()
.filter(|(id, _, _)| id_set.contains(id))
.collect()
} else {
all_paths
};
let mut report = VerificationReport {
verified: 0,
mismatched: Vec::new(),
missing: Vec::new(),
errors: Vec::new(),
};
for (id, path, expected_hash) in paths_to_check {
if !path.exists() {
report.missing.push(id);
continue;
}
match compute_file_hash(&path).await {
Ok(actual_hash) => {
if actual_hash.0 == expected_hash.0 {
report.verified += 1;
} else {
report
.mismatched
.push((id, expected_hash.0.clone(), actual_hash.0));
}
}
Err(e) => {
report.errors.push((id, e.to_string()));
}
}
}
info!(
verified = report.verified,
mismatched = report.mismatched.len(),
missing = report.missing.len(),
errors = report.errors.len(),
"integrity verification complete"
);
Ok(report)
}
/// Clean up orphaned thumbnail files that don't correspond to any media item.
pub async fn cleanup_orphaned_thumbnails(
storage: &DynStorageBackend,
thumbnail_dir: &Path,
) -> Result<usize> {
let media_paths = storage.list_media_paths().await?;
let known_ids: std::collections::HashSet<String> = media_paths
.iter()
.map(|(id, _, _)| id.0.to_string())
.collect();
let mut removed = 0;
if thumbnail_dir.exists() {
let entries = std::fs::read_dir(thumbnail_dir)?;
for entry in entries.flatten() {
let path = entry.path();
if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
if !known_ids.contains(stem) {
if let Err(e) = std::fs::remove_file(&path) {
warn!(path = %path.display(), error = %e, "failed to remove orphaned thumbnail");
} else {
removed += 1;
}
}
}
}
}
info!(removed, "orphaned thumbnail cleanup complete");
Ok(removed)
}

View file

@ -0,0 +1,226 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_json::Value;
use tokio::sync::{RwLock, mpsc};
use tokio_util::sync::CancellationToken;
use uuid::Uuid;
use crate::model::MediaId;
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum JobKind {
Scan {
path: Option<PathBuf>,
},
GenerateThumbnails {
media_ids: Vec<MediaId>,
},
VerifyIntegrity {
media_ids: Vec<MediaId>,
},
OrphanDetection,
CleanupThumbnails,
Export {
format: ExportFormat,
destination: PathBuf,
},
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ExportFormat {
Json,
Csv,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "state")]
pub enum JobStatus {
Pending,
Running { progress: f32, message: String },
Completed { result: Value },
Failed { error: String },
Cancelled,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Job {
pub id: Uuid,
pub kind: JobKind,
pub status: JobStatus,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
struct WorkerItem {
job_id: Uuid,
kind: JobKind,
cancel: CancellationToken,
}
pub struct JobQueue {
jobs: Arc<RwLock<HashMap<Uuid, Job>>>,
cancellations: Arc<RwLock<HashMap<Uuid, CancellationToken>>>,
tx: mpsc::Sender<WorkerItem>,
}
impl JobQueue {
/// Create a new job queue and spawn `worker_count` background workers.
///
/// The `executor` callback is invoked for each job; it receives the job kind,
/// a progress-reporting callback, and a cancellation token.
pub fn new<F>(worker_count: usize, executor: F) -> Arc<Self>
where
F: Fn(
Uuid,
JobKind,
CancellationToken,
Arc<RwLock<HashMap<Uuid, Job>>>,
) -> tokio::task::JoinHandle<()>
+ Send
+ Sync
+ 'static,
{
let (tx, rx) = mpsc::channel::<WorkerItem>(256);
let rx = Arc::new(tokio::sync::Mutex::new(rx));
let jobs: Arc<RwLock<HashMap<Uuid, Job>>> = Arc::new(RwLock::new(HashMap::new()));
let cancellations: Arc<RwLock<HashMap<Uuid, CancellationToken>>> =
Arc::new(RwLock::new(HashMap::new()));
let executor = Arc::new(executor);
for _ in 0..worker_count {
let rx = rx.clone();
let jobs = jobs.clone();
let cancellations = cancellations.clone();
let executor = executor.clone();
tokio::spawn(async move {
loop {
let item = {
let mut guard = rx.lock().await;
guard.recv().await
};
let Some(item) = item else { break };
// Mark as running
{
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&item.job_id) {
job.status = JobStatus::Running {
progress: 0.0,
message: "starting".to_string(),
};
job.updated_at = Utc::now();
}
}
let handle = executor(item.job_id, item.kind, item.cancel, jobs.clone());
let _ = handle.await;
// Clean up cancellation token
cancellations.write().await.remove(&item.job_id);
}
});
}
Arc::new(Self {
jobs,
cancellations,
tx,
})
}
/// Submit a new job, returning its ID.
pub async fn submit(&self, kind: JobKind) -> Uuid {
let id = Uuid::now_v7();
let now = Utc::now();
let cancel = CancellationToken::new();
let job = Job {
id,
kind: kind.clone(),
status: JobStatus::Pending,
created_at: now,
updated_at: now,
};
self.jobs.write().await.insert(id, job);
self.cancellations.write().await.insert(id, cancel.clone());
let item = WorkerItem {
job_id: id,
kind,
cancel,
};
// If the channel is full we still record the job — it'll stay Pending
let _ = self.tx.send(item).await;
id
}
/// Get the status of a job.
pub async fn status(&self, id: Uuid) -> Option<Job> {
self.jobs.read().await.get(&id).cloned()
}
/// List all jobs, most recent first.
pub async fn list(&self) -> Vec<Job> {
let map = self.jobs.read().await;
let mut jobs: Vec<Job> = map.values().cloned().collect();
jobs.sort_by(|a, b| b.created_at.cmp(&a.created_at));
jobs
}
/// Cancel a running or pending job.
pub async fn cancel(&self, id: Uuid) -> bool {
if let Some(token) = self.cancellations.read().await.get(&id) {
token.cancel();
let mut map = self.jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Cancelled;
job.updated_at = Utc::now();
}
true
} else {
false
}
}
/// Update a job's progress. Called by executors.
pub async fn update_progress(
jobs: &Arc<RwLock<HashMap<Uuid, Job>>>,
id: Uuid,
progress: f32,
message: String,
) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Running { progress, message };
job.updated_at = Utc::now();
}
}
/// Mark a job as completed.
pub async fn complete(jobs: &Arc<RwLock<HashMap<Uuid, Job>>>, id: Uuid, result: Value) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Completed { result };
job.updated_at = Utc::now();
}
}
/// Mark a job as failed.
pub async fn fail(jobs: &Arc<RwLock<HashMap<Uuid, Job>>>, id: Uuid, error: String) {
let mut map = jobs.write().await;
if let Some(job) = map.get_mut(&id) {
job.status = JobStatus::Failed { error };
job.updated_at = Utc::now();
}
}
}

View file

@ -0,0 +1,21 @@
pub mod audit;
pub mod cache;
pub mod collections;
pub mod config;
pub mod error;
pub mod events;
pub mod export;
pub mod hash;
pub mod import;
pub mod integrity;
pub mod jobs;
pub mod media_type;
pub mod metadata;
pub mod model;
pub mod opener;
pub mod scan;
pub mod scheduler;
pub mod search;
pub mod storage;
pub mod tags;
pub mod thumbnail;

View file

@ -0,0 +1,209 @@
use std::path::Path;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum MediaType {
// Audio
Mp3,
Flac,
Ogg,
Wav,
Aac,
Opus,
// Video
Mp4,
Mkv,
Avi,
Webm,
// Documents
Pdf,
Epub,
Djvu,
// Text
Markdown,
PlainText,
// Images
Jpeg,
Png,
Gif,
Webp,
Svg,
Avif,
Tiff,
Bmp,
// RAW Images
Cr2,
Nef,
Arw,
Dng,
Orf,
Rw2,
// HEIC/HEIF
Heic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum MediaCategory {
Audio,
Video,
Document,
Text,
Image,
}
impl MediaType {
pub fn from_extension(ext: &str) -> Option<Self> {
match ext.to_ascii_lowercase().as_str() {
"mp3" => Some(Self::Mp3),
"flac" => Some(Self::Flac),
"ogg" | "oga" => Some(Self::Ogg),
"wav" => Some(Self::Wav),
"aac" | "m4a" => Some(Self::Aac),
"opus" => Some(Self::Opus),
"mp4" | "m4v" => Some(Self::Mp4),
"mkv" => Some(Self::Mkv),
"avi" => Some(Self::Avi),
"webm" => Some(Self::Webm),
"pdf" => Some(Self::Pdf),
"epub" => Some(Self::Epub),
"djvu" => Some(Self::Djvu),
"md" | "markdown" => Some(Self::Markdown),
"txt" | "text" => Some(Self::PlainText),
"jpg" | "jpeg" => Some(Self::Jpeg),
"png" => Some(Self::Png),
"gif" => Some(Self::Gif),
"webp" => Some(Self::Webp),
"svg" => Some(Self::Svg),
"avif" => Some(Self::Avif),
"tiff" | "tif" => Some(Self::Tiff),
"bmp" => Some(Self::Bmp),
"cr2" => Some(Self::Cr2),
"nef" => Some(Self::Nef),
"arw" => Some(Self::Arw),
"dng" => Some(Self::Dng),
"orf" => Some(Self::Orf),
"rw2" => Some(Self::Rw2),
"heic" | "heif" => Some(Self::Heic),
_ => None,
}
}
pub fn from_path(path: &Path) -> Option<Self> {
path.extension()
.and_then(|e| e.to_str())
.and_then(Self::from_extension)
}
pub fn mime_type(&self) -> &'static str {
match self {
Self::Mp3 => "audio/mpeg",
Self::Flac => "audio/flac",
Self::Ogg => "audio/ogg",
Self::Wav => "audio/wav",
Self::Aac => "audio/aac",
Self::Opus => "audio/opus",
Self::Mp4 => "video/mp4",
Self::Mkv => "video/x-matroska",
Self::Avi => "video/x-msvideo",
Self::Webm => "video/webm",
Self::Pdf => "application/pdf",
Self::Epub => "application/epub+zip",
Self::Djvu => "image/vnd.djvu",
Self::Markdown => "text/markdown",
Self::PlainText => "text/plain",
Self::Jpeg => "image/jpeg",
Self::Png => "image/png",
Self::Gif => "image/gif",
Self::Webp => "image/webp",
Self::Svg => "image/svg+xml",
Self::Avif => "image/avif",
Self::Tiff => "image/tiff",
Self::Bmp => "image/bmp",
Self::Cr2 => "image/x-canon-cr2",
Self::Nef => "image/x-nikon-nef",
Self::Arw => "image/x-sony-arw",
Self::Dng => "image/x-adobe-dng",
Self::Orf => "image/x-olympus-orf",
Self::Rw2 => "image/x-panasonic-rw2",
Self::Heic => "image/heic",
}
}
pub fn category(&self) -> MediaCategory {
match self {
Self::Mp3 | Self::Flac | Self::Ogg | Self::Wav | Self::Aac | Self::Opus => {
MediaCategory::Audio
}
Self::Mp4 | Self::Mkv | Self::Avi | Self::Webm => MediaCategory::Video,
Self::Pdf | Self::Epub | Self::Djvu => MediaCategory::Document,
Self::Markdown | Self::PlainText => MediaCategory::Text,
Self::Jpeg
| Self::Png
| Self::Gif
| Self::Webp
| Self::Svg
| Self::Avif
| Self::Tiff
| Self::Bmp
| Self::Cr2
| Self::Nef
| Self::Arw
| Self::Dng
| Self::Orf
| Self::Rw2
| Self::Heic => MediaCategory::Image,
}
}
pub fn extensions(&self) -> &'static [&'static str] {
match self {
Self::Mp3 => &["mp3"],
Self::Flac => &["flac"],
Self::Ogg => &["ogg", "oga"],
Self::Wav => &["wav"],
Self::Aac => &["aac", "m4a"],
Self::Opus => &["opus"],
Self::Mp4 => &["mp4", "m4v"],
Self::Mkv => &["mkv"],
Self::Avi => &["avi"],
Self::Webm => &["webm"],
Self::Pdf => &["pdf"],
Self::Epub => &["epub"],
Self::Djvu => &["djvu"],
Self::Markdown => &["md", "markdown"],
Self::PlainText => &["txt", "text"],
Self::Jpeg => &["jpg", "jpeg"],
Self::Png => &["png"],
Self::Gif => &["gif"],
Self::Webp => &["webp"],
Self::Svg => &["svg"],
Self::Avif => &["avif"],
Self::Tiff => &["tiff", "tif"],
Self::Bmp => &["bmp"],
Self::Cr2 => &["cr2"],
Self::Nef => &["nef"],
Self::Arw => &["arw"],
Self::Dng => &["dng"],
Self::Orf => &["orf"],
Self::Rw2 => &["rw2"],
Self::Heic => &["heic", "heif"],
}
}
/// Returns true if this is a RAW image format.
pub fn is_raw(&self) -> bool {
matches!(
self,
Self::Cr2 | Self::Nef | Self::Arw | Self::Dng | Self::Orf | Self::Rw2
)
}
}

View file

@ -0,0 +1,81 @@
use std::path::Path;
use lofty::file::{AudioFile, TaggedFileExt};
use lofty::tag::Accessor;
use crate::error::{PinakesError, Result};
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct AudioExtractor;
impl MetadataExtractor for AudioExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let tagged_file = lofty::read_from_path(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("audio metadata: {e}")))?;
let mut meta = ExtractedMetadata::default();
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag.title().map(|s| s.to_string());
meta.artist = tag.artist().map(|s| s.to_string());
meta.album = tag.album().map(|s| s.to_string());
meta.genre = tag.genre().map(|s| s.to_string());
meta.year = tag.year().map(|y| y as i32);
}
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
if let Some(track) = tag.track() {
meta.extra
.insert("track_number".to_string(), track.to_string());
}
if let Some(disc) = tag.disk() {
meta.extra
.insert("disc_number".to_string(), disc.to_string());
}
if let Some(comment) = tag.comment() {
meta.extra
.insert("comment".to_string(), comment.to_string());
}
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta.extra
.insert("bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
}
fn supported_types(&self) -> &[MediaType] {
&[
MediaType::Mp3,
MediaType::Flac,
MediaType::Ogg,
MediaType::Wav,
MediaType::Aac,
MediaType::Opus,
]
}
}

View file

@ -0,0 +1,192 @@
use std::path::Path;
use crate::error::{PinakesError, Result};
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct DocumentExtractor;
impl MetadataExtractor for DocumentExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Pdf) => extract_pdf(path),
Some(MediaType::Epub) => extract_epub(path),
Some(MediaType::Djvu) => extract_djvu(path),
_ => Ok(ExtractedMetadata::default()),
}
}
fn supported_types(&self) -> &[MediaType] {
&[MediaType::Pdf, MediaType::Epub, MediaType::Djvu]
}
}
fn extract_pdf(path: &Path) -> Result<ExtractedMetadata> {
let doc = lopdf::Document::load(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("PDF load: {e}")))?;
let mut meta = ExtractedMetadata::default();
// Find the Info dictionary via the trailer
if let Ok(info_ref) = doc.trailer.get(b"Info") {
let info_obj = if let Ok(reference) = info_ref.as_reference() {
doc.get_object(reference).ok()
} else {
Some(info_ref)
};
if let Some(obj) = info_obj
&& let Ok(dict) = obj.as_dict()
{
if let Ok(title) = dict.get(b"Title") {
meta.title = pdf_object_to_string(title);
}
if let Ok(author) = dict.get(b"Author") {
meta.artist = pdf_object_to_string(author);
}
if let Ok(subject) = dict.get(b"Subject") {
meta.description = pdf_object_to_string(subject);
}
if let Ok(creator) = dict.get(b"Creator") {
meta.extra.insert(
"creator".to_string(),
pdf_object_to_string(creator).unwrap_or_default(),
);
}
if let Ok(producer) = dict.get(b"Producer") {
meta.extra.insert(
"producer".to_string(),
pdf_object_to_string(producer).unwrap_or_default(),
);
}
}
}
// Page count
let page_count = doc.get_pages().len();
if page_count > 0 {
meta.extra
.insert("page_count".to_string(), page_count.to_string());
}
Ok(meta)
}
fn pdf_object_to_string(obj: &lopdf::Object) -> Option<String> {
match obj {
lopdf::Object::String(bytes, _) => Some(String::from_utf8_lossy(bytes).into_owned()),
lopdf::Object::Name(name) => Some(String::from_utf8_lossy(name).into_owned()),
_ => None,
}
}
fn extract_epub(path: &Path) -> Result<ExtractedMetadata> {
let doc = epub::doc::EpubDoc::new(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("EPUB parse: {e}")))?;
let mut meta = ExtractedMetadata {
title: doc.mdata("title").map(|item| item.value.clone()),
artist: doc.mdata("creator").map(|item| item.value.clone()),
description: doc.mdata("description").map(|item| item.value.clone()),
..Default::default()
};
if let Some(lang) = doc.mdata("language") {
meta.extra
.insert("language".to_string(), lang.value.clone());
}
if let Some(publisher) = doc.mdata("publisher") {
meta.extra
.insert("publisher".to_string(), publisher.value.clone());
}
if let Some(date) = doc.mdata("date") {
meta.extra.insert("date".to_string(), date.value.clone());
}
Ok(meta)
}
fn extract_djvu(path: &Path) -> Result<ExtractedMetadata> {
// DjVu files contain metadata in SEXPR (S-expression) format within
// ANTa/ANTz chunks, or in the DIRM chunk. We parse the raw bytes to
// extract any metadata fields we can find.
let data = std::fs::read(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("DjVu read: {e}")))?;
let mut meta = ExtractedMetadata::default();
// DjVu files start with "AT&T" magic followed by FORM:DJVU or FORM:DJVM
if data.len() < 16 {
return Ok(meta);
}
// Search for metadata annotations in the file. DjVu metadata is stored
// as S-expressions like (metadata (key "value") ...) within ANTa chunks.
let content = String::from_utf8_lossy(&data);
// Look for (metadata ...) blocks
if let Some(meta_start) = content.find("(metadata") {
let remainder = &content[meta_start..];
// Extract key-value pairs like (title "Some Title")
extract_djvu_field(remainder, "title", &mut meta.title);
extract_djvu_field(remainder, "author", &mut meta.artist);
let mut desc = None;
extract_djvu_field(remainder, "subject", &mut desc);
if desc.is_none() {
extract_djvu_field(remainder, "description", &mut desc);
}
meta.description = desc;
let mut year_str = None;
extract_djvu_field(remainder, "year", &mut year_str);
if let Some(ref y) = year_str {
meta.year = y.parse().ok();
}
let mut creator = None;
extract_djvu_field(remainder, "creator", &mut creator);
if let Some(c) = creator {
meta.extra.insert("creator".to_string(), c);
}
}
// Also check for booklet-style metadata that some DjVu encoders write
// outside the metadata SEXPR
if meta.title.is_none()
&& let Some(title_start) = content.find("(bookmarks")
{
let remainder = &content[title_start..];
// First bookmark title is often the document title
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
meta.title = Some(val.to_string());
}
}
}
}
Ok(meta)
}
fn extract_djvu_field(sexpr: &str, key: &str, out: &mut Option<String>) {
// Look for patterns like (key "value") in the S-expression
let pattern = format!("({key}");
if let Some(start) = sexpr.find(&pattern) {
let remainder = &sexpr[start + pattern.len()..];
// Find the quoted value
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
*out = Some(val.to_string());
}
}
}
}
}

View file

@ -0,0 +1,213 @@
use std::path::Path;
use crate::error::Result;
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct ImageExtractor;
impl MetadataExtractor for ImageExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let mut meta = ExtractedMetadata::default();
let file = std::fs::File::open(path)?;
let mut buf_reader = std::io::BufReader::new(&file);
let exif_data = match exif::Reader::new().read_from_container(&mut buf_reader) {
Ok(exif) => exif,
Err(_) => return Ok(meta),
};
// Image dimensions
if let Some(width) = exif_data
.get_field(exif::Tag::PixelXDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageWidth, exif::In::PRIMARY))
&& let Some(w) = field_to_u32(width)
{
meta.extra.insert("width".to_string(), w.to_string());
}
if let Some(height) = exif_data
.get_field(exif::Tag::PixelYDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageLength, exif::In::PRIMARY))
&& let Some(h) = field_to_u32(height)
{
meta.extra.insert("height".to_string(), h.to_string());
}
// Camera make and model
if let Some(make) = exif_data.get_field(exif::Tag::Make, exif::In::PRIMARY) {
let val = make.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("camera_make".to_string(), val);
}
}
if let Some(model) = exif_data.get_field(exif::Tag::Model, exif::In::PRIMARY) {
let val = model.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("camera_model".to_string(), val);
}
}
// Date taken
if let Some(date) = exif_data
.get_field(exif::Tag::DateTimeOriginal, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::DateTime, exif::In::PRIMARY))
{
let val = date.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("date_taken".to_string(), val);
}
}
// GPS coordinates
if let (Some(lat), Some(lat_ref), Some(lon), Some(lon_ref)) = (
exif_data.get_field(exif::Tag::GPSLatitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLatitudeRef, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitudeRef, exif::In::PRIMARY),
) && let (Some(lat_val), Some(lon_val)) =
(dms_to_decimal(lat, lat_ref), dms_to_decimal(lon, lon_ref))
{
meta.extra
.insert("gps_latitude".to_string(), format!("{lat_val:.6}"));
meta.extra
.insert("gps_longitude".to_string(), format!("{lon_val:.6}"));
}
// Exposure info
if let Some(iso) =
exif_data.get_field(exif::Tag::PhotographicSensitivity, exif::In::PRIMARY)
{
let val = iso.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("iso".to_string(), val);
}
}
if let Some(exposure) = exif_data.get_field(exif::Tag::ExposureTime, exif::In::PRIMARY) {
let val = exposure.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("exposure_time".to_string(), val);
}
}
if let Some(aperture) = exif_data.get_field(exif::Tag::FNumber, exif::In::PRIMARY) {
let val = aperture.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("f_number".to_string(), val);
}
}
if let Some(focal) = exif_data.get_field(exif::Tag::FocalLength, exif::In::PRIMARY) {
let val = focal.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("focal_length".to_string(), val);
}
}
// Lens model
if let Some(lens) = exif_data.get_field(exif::Tag::LensModel, exif::In::PRIMARY) {
let val = lens.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.extra
.insert("lens_model".to_string(), val.trim_matches('"').to_string());
}
}
// Flash
if let Some(flash) = exif_data.get_field(exif::Tag::Flash, exif::In::PRIMARY) {
let val = flash.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("flash".to_string(), val);
}
}
// Orientation
if let Some(orientation) = exif_data.get_field(exif::Tag::Orientation, exif::In::PRIMARY) {
let val = orientation.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("orientation".to_string(), val);
}
}
// Software
if let Some(software) = exif_data.get_field(exif::Tag::Software, exif::In::PRIMARY) {
let val = software.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("software".to_string(), val);
}
}
// Image description as title
if let Some(desc) = exif_data.get_field(exif::Tag::ImageDescription, exif::In::PRIMARY) {
let val = desc.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.title = Some(val.trim_matches('"').to_string());
}
}
// Artist
if let Some(artist) = exif_data.get_field(exif::Tag::Artist, exif::In::PRIMARY) {
let val = artist.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.artist = Some(val.trim_matches('"').to_string());
}
}
// Copyright as description
if let Some(copyright) = exif_data.get_field(exif::Tag::Copyright, exif::In::PRIMARY) {
let val = copyright.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.description = Some(val.trim_matches('"').to_string());
}
}
Ok(meta)
}
fn supported_types(&self) -> &[MediaType] {
&[
MediaType::Jpeg,
MediaType::Png,
MediaType::Gif,
MediaType::Webp,
MediaType::Avif,
MediaType::Tiff,
MediaType::Bmp,
// RAW formats (TIFF-based, kamadak-exif handles these)
MediaType::Cr2,
MediaType::Nef,
MediaType::Arw,
MediaType::Dng,
MediaType::Orf,
MediaType::Rw2,
// HEIC
MediaType::Heic,
]
}
}
fn field_to_u32(field: &exif::Field) -> Option<u32> {
match &field.value {
exif::Value::Long(v) => v.first().copied(),
exif::Value::Short(v) => v.first().map(|&x| x as u32),
_ => None,
}
}
fn dms_to_decimal(dms_field: &exif::Field, ref_field: &exif::Field) -> Option<f64> {
if let exif::Value::Rational(ref rationals) = dms_field.value
&& rationals.len() >= 3
{
let degrees = rationals[0].to_f64();
let minutes = rationals[1].to_f64();
let seconds = rationals[2].to_f64();
let mut decimal = degrees + minutes / 60.0 + seconds / 3600.0;
let ref_str = ref_field.display_value().to_string();
if ref_str.contains('S') || ref_str.contains('W') {
decimal = -decimal;
}
return Some(decimal);
}
None
}

View file

@ -0,0 +1,40 @@
use std::path::Path;
use crate::error::Result;
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct MarkdownExtractor;
impl MetadataExtractor for MarkdownExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let content = std::fs::read_to_string(path)?;
let parsed = gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(&content);
let mut meta = ExtractedMetadata::default();
if let Some(data) = parsed.ok().and_then(|p| p.data)
&& let gray_matter::Pod::Hash(map) = data
{
if let Some(gray_matter::Pod::String(title)) = map.get("title") {
meta.title = Some(title.clone());
}
if let Some(gray_matter::Pod::String(author)) = map.get("author") {
meta.artist = Some(author.clone());
}
if let Some(gray_matter::Pod::String(desc)) = map.get("description") {
meta.description = Some(desc.clone());
}
if let Some(gray_matter::Pod::String(date)) = map.get("date") {
meta.extra.insert("date".to_string(), date.clone());
}
}
Ok(meta)
}
fn supported_types(&self) -> &[MediaType] {
&[MediaType::Markdown, MediaType::PlainText]
}
}

View file

@ -0,0 +1,46 @@
pub mod audio;
pub mod document;
pub mod image;
pub mod markdown;
pub mod video;
use std::collections::HashMap;
use std::path::Path;
use crate::error::Result;
use crate::media_type::MediaType;
#[derive(Debug, Clone, Default)]
pub struct ExtractedMetadata {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub extra: HashMap<String, String>,
}
pub trait MetadataExtractor: Send + Sync {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata>;
fn supported_types(&self) -> &[MediaType];
}
pub fn extract_metadata(path: &Path, media_type: MediaType) -> Result<ExtractedMetadata> {
let extractors: Vec<Box<dyn MetadataExtractor>> = vec![
Box::new(audio::AudioExtractor),
Box::new(document::DocumentExtractor),
Box::new(video::VideoExtractor),
Box::new(markdown::MarkdownExtractor),
Box::new(image::ImageExtractor),
];
for extractor in &extractors {
if extractor.supported_types().contains(&media_type) {
return extractor.extract(path);
}
}
Ok(ExtractedMetadata::default())
}

View file

@ -0,0 +1,120 @@
use std::path::Path;
use crate::error::{PinakesError, Result};
use crate::media_type::MediaType;
use super::{ExtractedMetadata, MetadataExtractor};
pub struct VideoExtractor;
impl MetadataExtractor for VideoExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Mkv) => extract_mkv(path),
Some(MediaType::Mp4) => extract_mp4(path),
_ => Ok(ExtractedMetadata::default()),
}
}
fn supported_types(&self) -> &[MediaType] {
&[
MediaType::Mp4,
MediaType::Mkv,
MediaType::Avi,
MediaType::Webm,
]
}
}
fn extract_mkv(path: &Path) -> Result<ExtractedMetadata> {
let file = std::fs::File::open(path)?;
let mkv = matroska::Matroska::open(file)
.map_err(|e| PinakesError::MetadataExtraction(format!("MKV parse: {e}")))?;
let mut meta = ExtractedMetadata {
title: mkv.info.title.clone(),
duration_secs: mkv.info.duration.map(|dur| dur.as_secs_f64()),
..Default::default()
};
// Extract resolution and codec info from tracks
for track in &mkv.tracks {
match &track.settings {
matroska::Settings::Video(v) => {
meta.extra.insert(
"resolution".to_string(),
format!("{}x{}", v.pixel_width, v.pixel_height),
);
if !track.codec_id.is_empty() {
meta.extra
.insert("video_codec".to_string(), track.codec_id.clone());
}
}
matroska::Settings::Audio(a) => {
meta.extra.insert(
"sample_rate".to_string(),
format!("{} Hz", a.sample_rate as u32),
);
meta.extra
.insert("channels".to_string(), a.channels.to_string());
if !track.codec_id.is_empty() {
meta.extra
.insert("audio_codec".to_string(), track.codec_id.clone());
}
}
_ => {}
}
}
Ok(meta)
}
fn extract_mp4(path: &Path) -> Result<ExtractedMetadata> {
use lofty::file::{AudioFile, TaggedFileExt};
use lofty::tag::Accessor;
let tagged_file = lofty::read_from_path(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("MP4 metadata: {e}")))?;
let mut meta = ExtractedMetadata::default();
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag
.title()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.artist = tag
.artist()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.album = tag
.album()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.genre = tag
.genre()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.year = tag.year().map(|y| y as i32);
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta.extra
.insert("audio_bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
}

View file

@ -0,0 +1,191 @@
use std::collections::HashMap;
use std::fmt;
use std::path::PathBuf;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::media_type::MediaType;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct MediaId(pub Uuid);
impl MediaId {
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl fmt::Display for MediaId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Default for MediaId {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ContentHash(pub String);
impl ContentHash {
pub fn new(hex: String) -> Self {
Self(hex)
}
}
impl fmt::Display for ContentHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MediaItem {
pub id: MediaId,
pub path: PathBuf,
pub file_name: String,
pub media_type: MediaType,
pub content_hash: ContentHash,
pub file_size: u64,
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub thumbnail_path: Option<PathBuf>,
pub custom_fields: HashMap<String, CustomField>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CustomField {
pub field_type: CustomFieldType,
pub value: String,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CustomFieldType {
Text,
Number,
Date,
Boolean,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tag {
pub id: Uuid,
pub name: String,
pub parent_id: Option<Uuid>,
pub created_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Collection {
pub id: Uuid,
pub name: String,
pub description: Option<String>,
pub kind: CollectionKind,
pub filter_query: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CollectionKind {
Manual,
Virtual,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionMember {
pub collection_id: Uuid,
pub media_id: MediaId,
pub position: i32,
pub added_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuditEntry {
pub id: Uuid,
pub media_id: Option<MediaId>,
pub action: AuditAction,
pub details: Option<String>,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditAction {
Imported,
Updated,
Deleted,
Tagged,
Untagged,
AddedToCollection,
RemovedFromCollection,
Opened,
Scanned,
}
impl fmt::Display for AuditAction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
Self::Imported => "imported",
Self::Updated => "updated",
Self::Deleted => "deleted",
Self::Tagged => "tagged",
Self::Untagged => "untagged",
Self::AddedToCollection => "added_to_collection",
Self::RemovedFromCollection => "removed_from_collection",
Self::Opened => "opened",
Self::Scanned => "scanned",
};
write!(f, "{s}")
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Pagination {
pub offset: u64,
pub limit: u64,
pub sort: Option<String>,
}
impl Pagination {
pub fn new(offset: u64, limit: u64, sort: Option<String>) -> Self {
Self {
offset,
limit,
sort,
}
}
}
impl Default for Pagination {
fn default() -> Self {
Self {
offset: 0,
limit: 50,
sort: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SavedSearch {
pub id: Uuid,
pub name: String,
pub query: String,
pub sort_order: Option<String>,
pub created_at: DateTime<Utc>,
}

View file

@ -0,0 +1,79 @@
use std::path::Path;
use std::process::Command;
use crate::error::{PinakesError, Result};
pub trait Opener: Send + Sync {
fn open(&self, path: &Path) -> Result<()>;
}
/// Linux opener using xdg-open
pub struct XdgOpener;
impl Opener for XdgOpener {
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("xdg-open")
.arg(path)
.status()
.map_err(|e| PinakesError::InvalidOperation(format!("failed to run xdg-open: {e}")))?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"xdg-open exited with status {status}"
)))
}
}
}
/// macOS opener using the `open` command
pub struct MacOpener;
impl Opener for MacOpener {
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("open")
.arg(path)
.status()
.map_err(|e| PinakesError::InvalidOperation(format!("failed to run open: {e}")))?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"open exited with status {status}"
)))
}
}
}
/// Windows opener using `cmd /c start`
pub struct WindowsOpener;
impl Opener for WindowsOpener {
fn open(&self, path: &Path) -> Result<()> {
let status = Command::new("cmd")
.args(["/C", "start", ""])
.arg(path)
.status()
.map_err(|e| {
PinakesError::InvalidOperation(format!("failed to run cmd /c start: {e}"))
})?;
if status.success() {
Ok(())
} else {
Err(PinakesError::InvalidOperation(format!(
"cmd /c start exited with status {status}"
)))
}
}
}
/// Returns the platform-appropriate opener.
pub fn default_opener() -> Box<dyn Opener> {
if cfg!(target_os = "macos") {
Box::new(MacOpener)
} else if cfg!(target_os = "windows") {
Box::new(WindowsOpener)
} else {
Box::new(XdgOpener)
}
}

View file

@ -0,0 +1,283 @@
use std::path::{Path, PathBuf};
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use notify::{PollWatcher, RecursiveMode, Watcher};
use tokio::sync::mpsc;
use tracing::{info, warn};
use crate::error::Result;
use crate::import;
use crate::storage::DynStorageBackend;
pub struct ScanStatus {
pub scanning: bool,
pub files_found: usize,
pub files_processed: usize,
pub errors: Vec<String>,
}
/// Shared scan progress that can be read by the status endpoint while a scan runs.
#[derive(Clone)]
pub struct ScanProgress {
pub is_scanning: Arc<AtomicBool>,
pub files_found: Arc<AtomicUsize>,
pub files_processed: Arc<AtomicUsize>,
pub error_count: Arc<AtomicUsize>,
pub error_messages: Arc<Mutex<Vec<String>>>,
}
const MAX_STORED_ERRORS: usize = 100;
impl ScanProgress {
pub fn new() -> Self {
Self {
is_scanning: Arc::new(AtomicBool::new(false)),
files_found: Arc::new(AtomicUsize::new(0)),
files_processed: Arc::new(AtomicUsize::new(0)),
error_count: Arc::new(AtomicUsize::new(0)),
error_messages: Arc::new(Mutex::new(Vec::new())),
}
}
pub fn snapshot(&self) -> ScanStatus {
let errors = self
.error_messages
.lock()
.map(|v| v.clone())
.unwrap_or_default();
ScanStatus {
scanning: self.is_scanning.load(Ordering::Acquire),
files_found: self.files_found.load(Ordering::Acquire),
files_processed: self.files_processed.load(Ordering::Acquire),
errors,
}
}
fn begin(&self) {
self.is_scanning.store(true, Ordering::Release);
self.files_found.store(0, Ordering::Release);
self.files_processed.store(0, Ordering::Release);
self.error_count.store(0, Ordering::Release);
if let Ok(mut msgs) = self.error_messages.lock() {
msgs.clear();
}
}
fn record_error(&self, message: String) {
self.error_count.fetch_add(1, Ordering::Release);
if let Ok(mut msgs) = self.error_messages.lock()
&& msgs.len() < MAX_STORED_ERRORS
{
msgs.push(message);
}
}
fn finish(&self) {
self.is_scanning.store(false, Ordering::Release);
}
}
impl Default for ScanProgress {
fn default() -> Self {
Self::new()
}
}
pub async fn scan_directory(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
) -> Result<ScanStatus> {
scan_directory_with_progress(storage, dir, ignore_patterns, None).await
}
pub async fn scan_directory_with_progress(
storage: &DynStorageBackend,
dir: &Path,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
) -> Result<ScanStatus> {
info!(dir = %dir.display(), "starting directory scan");
if let Some(p) = progress {
p.begin();
}
let results = import::import_directory(storage, dir, ignore_patterns).await?;
// Note: for configurable concurrency, use import_directory_with_concurrency directly
let mut errors = Vec::new();
let mut processed = 0;
for result in &results {
match result {
Ok(_) => processed += 1,
Err(e) => {
let msg = e.to_string();
if let Some(p) = progress {
p.record_error(msg.clone());
}
errors.push(msg);
}
}
}
if let Some(p) = progress {
p.files_found.store(results.len(), Ordering::Release);
p.files_processed.store(processed, Ordering::Release);
p.finish();
}
let status = ScanStatus {
scanning: false,
files_found: results.len(),
files_processed: processed,
errors,
};
Ok(status)
}
pub async fn scan_all_roots(
storage: &DynStorageBackend,
ignore_patterns: &[String],
) -> Result<Vec<ScanStatus>> {
scan_all_roots_with_progress(storage, ignore_patterns, None).await
}
pub async fn scan_all_roots_with_progress(
storage: &DynStorageBackend,
ignore_patterns: &[String],
progress: Option<&ScanProgress>,
) -> Result<Vec<ScanStatus>> {
let roots = storage.list_root_dirs().await?;
let mut statuses = Vec::new();
for root in roots {
match scan_directory_with_progress(storage, &root, ignore_patterns, progress).await {
Ok(status) => statuses.push(status),
Err(e) => {
warn!(root = %root.display(), error = %e, "failed to scan root directory");
statuses.push(ScanStatus {
scanning: false,
files_found: 0,
files_processed: 0,
errors: vec![e.to_string()],
});
}
}
}
Ok(statuses)
}
pub struct FileWatcher {
_watcher: Box<dyn Watcher + Send>,
rx: mpsc::Receiver<PathBuf>,
}
impl FileWatcher {
pub fn new(dirs: &[PathBuf]) -> Result<Self> {
let (tx, rx) = mpsc::channel(1024);
// Try the recommended (native) watcher first, fall back to polling
let watcher: Box<dyn Watcher + Send> = match Self::try_native_watcher(dirs, tx.clone()) {
Ok(w) => {
info!("using native filesystem watcher");
w
}
Err(native_err) => {
warn!(error = %native_err, "native watcher failed, falling back to polling");
Self::polling_watcher(dirs, tx)?
}
};
Ok(Self {
_watcher: watcher,
rx,
})
}
fn try_native_watcher(
dirs: &[PathBuf],
tx: mpsc::Sender<PathBuf>,
) -> std::result::Result<Box<dyn Watcher + Send>, notify::Error> {
let tx_clone = tx.clone();
let mut watcher =
notify::recommended_watcher(move |res: notify::Result<notify::Event>| {
if let Ok(event) = res {
for path in event.paths {
if tx_clone.blocking_send(path).is_err() {
tracing::warn!("filesystem watcher channel closed, stopping");
break;
}
}
}
})?;
for dir in dirs {
watcher.watch(dir, RecursiveMode::Recursive)?;
}
Ok(Box::new(watcher))
}
fn polling_watcher(
dirs: &[PathBuf],
tx: mpsc::Sender<PathBuf>,
) -> Result<Box<dyn Watcher + Send>> {
let tx_clone = tx.clone();
let poll_interval = std::time::Duration::from_secs(5);
let config = notify::Config::default().with_poll_interval(poll_interval);
let mut watcher = PollWatcher::new(
move |res: notify::Result<notify::Event>| {
if let Ok(event) = res {
for path in event.paths {
if tx_clone.blocking_send(path).is_err() {
tracing::warn!("filesystem watcher channel closed, stopping");
break;
}
}
}
},
config,
)
.map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))?;
for dir in dirs {
watcher
.watch(dir, RecursiveMode::Recursive)
.map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))?;
}
Ok(Box::new(watcher))
}
pub async fn next_change(&mut self) -> Option<PathBuf> {
self.rx.recv().await
}
}
pub async fn watch_and_import(
storage: DynStorageBackend,
dirs: Vec<PathBuf>,
ignore_patterns: Vec<String>,
) -> Result<()> {
let mut watcher = FileWatcher::new(&dirs)?;
info!("filesystem watcher started");
while let Some(path) = watcher.next_change().await {
if path.is_file()
&& crate::media_type::MediaType::from_path(&path).is_some()
&& !crate::import::should_ignore(&path, &ignore_patterns)
{
info!(path = %path.display(), "detected file change, importing");
if let Err(e) = import::import_file(&storage, &path).await {
warn!(path = %path.display(), error = %e, "failed to import changed file");
}
}
}
Ok(())
}

View file

@ -0,0 +1,517 @@
use std::path::PathBuf;
use std::sync::Arc;
use chrono::{DateTime, Datelike, Utc};
use serde::{Deserialize, Serialize};
use tokio::sync::RwLock;
use tokio_util::sync::CancellationToken;
use uuid::Uuid;
use crate::config::Config;
use crate::jobs::{JobKind, JobQueue};
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(rename_all = "snake_case", tag = "type")]
pub enum Schedule {
Interval { secs: u64 },
Daily { hour: u32, minute: u32 },
Weekly { day: u32, hour: u32, minute: u32 },
}
impl Schedule {
pub fn next_run(&self, from: DateTime<Utc>) -> DateTime<Utc> {
match self {
Schedule::Interval { secs } => from + chrono::Duration::seconds(*secs as i64),
Schedule::Daily { hour, minute } => {
let today = from
.date_naive()
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default();
let today_utc = today.and_utc();
if today_utc > from {
today_utc
} else {
today_utc + chrono::Duration::days(1)
}
}
Schedule::Weekly { day, hour, minute } => {
let current_day = from.weekday().num_days_from_monday();
let target_day = *day;
let days_ahead = if target_day > current_day {
target_day - current_day
} else if target_day < current_day {
7 - (current_day - target_day)
} else {
let today = from
.date_naive()
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default()
.and_utc();
if today > from {
return today;
}
7
};
let target_date = from.date_naive() + chrono::Duration::days(days_ahead as i64);
target_date
.and_hms_opt(*hour, *minute, 0)
.unwrap_or_default()
.and_utc()
}
}
}
pub fn display_string(&self) -> String {
match self {
Schedule::Interval { secs } => {
if *secs >= 3600 {
format!("Every {}h", secs / 3600)
} else if *secs >= 60 {
format!("Every {}m", secs / 60)
} else {
format!("Every {}s", secs)
}
}
Schedule::Daily { hour, minute } => format!("Daily {hour:02}:{minute:02}"),
Schedule::Weekly { day, hour, minute } => {
let day_name = match day {
0 => "Mon",
1 => "Tue",
2 => "Wed",
3 => "Thu",
4 => "Fri",
5 => "Sat",
_ => "Sun",
};
format!("{day_name} {hour:02}:{minute:02}")
}
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ScheduledTask {
pub id: String,
pub name: String,
pub kind: JobKind,
pub schedule: Schedule,
pub enabled: bool,
pub last_run: Option<DateTime<Utc>>,
pub next_run: Option<DateTime<Utc>>,
pub last_status: Option<String>,
/// Whether a job for this task is currently running. Skipped during serialization.
#[serde(default, skip_serializing)]
pub running: bool,
/// The job ID of the last submitted job. Skipped during serialization/deserialization.
#[serde(skip)]
pub last_job_id: Option<Uuid>,
}
pub struct TaskScheduler {
tasks: Arc<RwLock<Vec<ScheduledTask>>>,
job_queue: Arc<JobQueue>,
cancel: CancellationToken,
config: Arc<RwLock<Config>>,
config_path: Option<PathBuf>,
}
impl TaskScheduler {
pub fn new(
job_queue: Arc<JobQueue>,
cancel: CancellationToken,
config: Arc<RwLock<Config>>,
config_path: Option<PathBuf>,
) -> Self {
let now = Utc::now();
let default_tasks = vec![
ScheduledTask {
id: "periodic_scan".to_string(),
name: "Periodic Scan".to_string(),
kind: JobKind::Scan { path: None },
schedule: Schedule::Interval { secs: 3600 },
enabled: true,
last_run: None,
next_run: Some(now + chrono::Duration::seconds(3600)),
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "integrity_check".to_string(),
name: "Integrity Check".to_string(),
kind: JobKind::VerifyIntegrity { media_ids: vec![] },
schedule: Schedule::Weekly {
day: 0,
hour: 3,
minute: 0,
},
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "orphan_detection".to_string(),
name: "Orphan Detection".to_string(),
kind: JobKind::OrphanDetection,
schedule: Schedule::Daily { hour: 2, minute: 0 },
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
ScheduledTask {
id: "thumbnail_cleanup".to_string(),
name: "Thumbnail Cleanup".to_string(),
kind: JobKind::CleanupThumbnails,
schedule: Schedule::Weekly {
day: 6,
hour: 4,
minute: 0,
},
enabled: false,
last_run: None,
next_run: None,
last_status: None,
running: false,
last_job_id: None,
},
];
Self {
tasks: Arc::new(RwLock::new(default_tasks)),
job_queue,
cancel,
config,
config_path,
}
}
/// Restore saved task state from config. Should be called once after construction.
pub async fn restore_state(&self) {
let saved = self.config.read().await.scheduled_tasks.clone();
if saved.is_empty() {
return;
}
let mut tasks = self.tasks.write().await;
for saved_task in &saved {
if let Some(task) = tasks.iter_mut().find(|t| t.id == saved_task.id) {
task.enabled = saved_task.enabled;
task.schedule = saved_task.schedule.clone();
if let Some(Ok(dt)) = saved_task
.last_run
.as_ref()
.map(|s| DateTime::parse_from_rfc3339(s))
{
task.last_run = Some(dt.with_timezone(&Utc));
}
if task.enabled {
let from = task.last_run.unwrap_or_else(Utc::now);
task.next_run = Some(task.schedule.next_run(from));
} else {
task.next_run = None;
}
}
}
}
/// Persist current task state to config file.
async fn persist_task_state(&self) {
let tasks = self.tasks.read().await;
let task_configs: Vec<crate::config::ScheduledTaskConfig> = tasks
.iter()
.map(|t| crate::config::ScheduledTaskConfig {
id: t.id.clone(),
enabled: t.enabled,
schedule: t.schedule.clone(),
last_run: t.last_run.map(|dt| dt.to_rfc3339()),
})
.collect();
drop(tasks);
{
let mut config = self.config.write().await;
config.scheduled_tasks = task_configs;
}
if let Some(ref path) = self.config_path {
let config = self.config.read().await;
if let Err(e) = config.save_to_file(path) {
tracing::warn!(error = %e, "failed to persist scheduler state to config file");
}
}
}
pub async fn list_tasks(&self) -> Vec<ScheduledTask> {
self.tasks.read().await.clone()
}
pub async fn toggle_task(&self, id: &str) -> Option<bool> {
let result = {
let mut tasks = self.tasks.write().await;
if let Some(task) = tasks.iter_mut().find(|t| t.id == id) {
task.enabled = !task.enabled;
if task.enabled {
task.next_run = Some(task.schedule.next_run(Utc::now()));
} else {
task.next_run = None;
}
Some(task.enabled)
} else {
None
}
};
if result.is_some() {
self.persist_task_state().await;
}
result
}
/// Run a task immediately. Uses a single write lock to avoid TOCTOU races.
pub async fn run_now(&self, id: &str) -> Option<String> {
let result = {
let mut tasks = self.tasks.write().await;
let task = tasks.iter_mut().find(|t| t.id == id)?;
// Submit the job (cheap: sends to mpsc channel)
let job_id = self.job_queue.submit(task.kind.clone()).await;
task.last_run = Some(Utc::now());
task.last_status = Some("running".to_string());
task.running = true;
task.last_job_id = Some(job_id);
if task.enabled {
task.next_run = Some(task.schedule.next_run(Utc::now()));
}
Some(job_id.to_string())
};
if result.is_some() {
self.persist_task_state().await;
}
result
}
/// Main scheduler loop. Uses a two-phase approach per tick to avoid
/// holding the write lock across await points. Returns when the
/// cancellation token is triggered.
pub async fn run(&self) {
let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
loop {
tokio::select! {
_ = interval.tick() => {}
_ = self.cancel.cancelled() => {
tracing::info!("scheduler shutting down");
return;
}
}
// Phase 1: Check completed jobs and update running status
{
use crate::jobs::JobStatus;
let mut tasks = self.tasks.write().await;
for task in tasks.iter_mut() {
if !task.running {
continue;
}
let Some(job_id) = task.last_job_id else {
continue;
};
let Some(job) = self.job_queue.status(job_id).await else {
continue;
};
match &job.status {
JobStatus::Completed { .. } => {
task.running = false;
task.last_status = Some("completed".to_string());
}
JobStatus::Failed { error } => {
task.running = false;
task.last_status = Some(format!("failed: {error}"));
}
JobStatus::Cancelled => {
task.running = false;
task.last_status = Some("cancelled".to_string());
}
_ => {} // still pending or running
}
}
}
// Phase 2: Collect due tasks and submit jobs
let now = Utc::now();
let mut to_submit: Vec<(usize, JobKind)> = Vec::new();
{
let mut tasks = self.tasks.write().await;
for (i, task) in tasks.iter_mut().enumerate() {
if !task.enabled || task.running {
continue;
}
let due = task.next_run.is_some_and(|next| now >= next);
if due {
to_submit.push((i, task.kind.clone()));
task.last_run = Some(now);
task.last_status = Some("running".to_string());
task.running = true;
task.next_run = Some(task.schedule.next_run(now));
}
}
}
// Submit jobs without holding the lock
for (idx, kind) in to_submit {
let job_id = self.job_queue.submit(kind).await;
let mut tasks = self.tasks.write().await;
if let Some(task) = tasks.get_mut(idx) {
task.last_job_id = Some(job_id);
}
}
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use chrono::TimeZone;
#[test]
fn test_interval_next_run() {
let from = Utc.with_ymd_and_hms(2025, 6, 15, 12, 0, 0).unwrap();
let schedule = Schedule::Interval { secs: 3600 };
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 13, 0, 0).unwrap());
}
#[test]
fn test_daily_next_run_future_today() {
// 10:00 UTC, schedule is 14:00 => same day
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Daily {
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 14, 0, 0).unwrap());
}
#[test]
fn test_daily_next_run_past_today() {
// 16:00 UTC, schedule is 14:00 => next day
let from = Utc.with_ymd_and_hms(2025, 6, 15, 16, 0, 0).unwrap();
let schedule = Schedule::Daily {
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 16, 14, 0, 0).unwrap());
}
#[test]
fn test_weekly_next_run() {
// 2025-06-15 is a Sunday (day 6). Target is Monday (day 0) at 03:00.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 12, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 0,
hour: 3,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 16, 3, 0, 0).unwrap());
}
#[test]
fn test_weekly_same_day_future() {
// 2025-06-15 is Sunday (day 6). Schedule is Sunday 14:00, current is 10:00 => today.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 6,
hour: 14,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 14, 0, 0).unwrap());
}
#[test]
fn test_weekly_same_day_past() {
// 2025-06-15 is Sunday (day 6). Schedule is Sunday 08:00, current is 10:00 => next week.
let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
let schedule = Schedule::Weekly {
day: 6,
hour: 8,
minute: 0,
};
let next = schedule.next_run(from);
assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 22, 8, 0, 0).unwrap());
}
#[test]
fn test_serde_roundtrip() {
let task = ScheduledTask {
id: "test".to_string(),
name: "Test Task".to_string(),
kind: JobKind::Scan { path: None },
schedule: Schedule::Interval { secs: 3600 },
enabled: true,
last_run: Some(Utc::now()),
next_run: Some(Utc::now()),
last_status: Some("completed".to_string()),
running: true,
last_job_id: Some(Uuid::now_v7()),
};
let json = serde_json::to_string(&task).unwrap();
let deserialized: ScheduledTask = serde_json::from_str(&json).unwrap();
assert_eq!(deserialized.id, "test");
assert_eq!(deserialized.enabled, true);
// running defaults to false on deserialization (skip_serializing)
assert!(!deserialized.running);
// last_job_id is skipped entirely
assert!(deserialized.last_job_id.is_none());
}
#[test]
fn test_display_string() {
assert_eq!(
Schedule::Interval { secs: 3600 }.display_string(),
"Every 1h"
);
assert_eq!(
Schedule::Interval { secs: 300 }.display_string(),
"Every 5m"
);
assert_eq!(
Schedule::Interval { secs: 30 }.display_string(),
"Every 30s"
);
assert_eq!(
Schedule::Daily { hour: 3, minute: 0 }.display_string(),
"Daily 03:00"
);
assert_eq!(
Schedule::Weekly {
day: 0,
hour: 3,
minute: 0
}
.display_string(),
"Mon 03:00"
);
assert_eq!(
Schedule::Weekly {
day: 6,
hour: 14,
minute: 30
}
.display_string(),
"Sun 14:30"
);
}
}

View file

@ -0,0 +1,256 @@
use serde::{Deserialize, Serialize};
use winnow::combinator::{alt, delimited, preceded, repeat};
use winnow::token::{take_till, take_while};
use winnow::{ModalResult, Parser};
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
pub enum SearchQuery {
FullText(String),
FieldMatch { field: String, value: String },
And(Vec<SearchQuery>),
Or(Vec<SearchQuery>),
Not(Box<SearchQuery>),
Prefix(String),
Fuzzy(String),
TypeFilter(String),
TagFilter(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchRequest {
pub query: SearchQuery,
pub sort: SortOrder,
pub pagination: crate::model::Pagination,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SearchResults {
pub items: Vec<crate::model::MediaItem>,
pub total_count: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
#[derive(Default)]
pub enum SortOrder {
#[default]
Relevance,
DateAsc,
DateDesc,
NameAsc,
NameDesc,
SizeAsc,
SizeDesc,
}
fn ws<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
take_while(0.., ' ').parse_next(input)
}
fn quoted_string(input: &mut &str) -> ModalResult<String> {
delimited('"', take_till(0.., '"'), '"')
.map(|s: &str| s.to_string())
.parse_next(input)
}
fn bare_word(input: &mut &str) -> ModalResult<String> {
take_while(1.., |c: char| !c.is_whitespace() && c != ')' && c != '(')
.map(|s: &str| s.to_string())
.parse_next(input)
}
fn word_or_quoted(input: &mut &str) -> ModalResult<String> {
alt((quoted_string, bare_word)).parse_next(input)
}
fn not_expr(input: &mut &str) -> ModalResult<SearchQuery> {
preceded(('-', ws), atom)
.map(|q| SearchQuery::Not(Box::new(q)))
.parse_next(input)
}
fn field_match(input: &mut &str) -> ModalResult<SearchQuery> {
let field_name =
take_while(1.., |c: char| c.is_alphanumeric() || c == '_').map(|s: &str| s.to_string());
(field_name, ':', word_or_quoted)
.map(|(field, _, value)| match field.as_str() {
"type" => SearchQuery::TypeFilter(value),
"tag" => SearchQuery::TagFilter(value),
_ => SearchQuery::FieldMatch { field, value },
})
.parse_next(input)
}
fn prefix_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let word = take_while(1.., |c: char| {
!c.is_whitespace() && c != ')' && c != '(' && c != '*'
})
.map(|s: &str| s.to_string());
(word, '*')
.map(|(w, _)| SearchQuery::Prefix(w))
.parse_next(input)
}
fn fuzzy_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let word = take_while(1.., |c: char| {
!c.is_whitespace() && c != ')' && c != '(' && c != '~'
})
.map(|s: &str| s.to_string());
(word, '~')
.map(|(w, _)| SearchQuery::Fuzzy(w))
.parse_next(input)
}
fn paren_expr(input: &mut &str) -> ModalResult<SearchQuery> {
delimited(('(', ws), or_expr, (ws, ')')).parse_next(input)
}
fn not_or_keyword(input: &mut &str) -> ModalResult<()> {
if let Some(rest) = input.strip_prefix("OR")
&& (rest.is_empty() || rest.starts_with(' ') || rest.starts_with(')'))
{
return Err(winnow::error::ErrMode::Backtrack(
winnow::error::ContextError::new(),
));
}
Ok(())
}
fn full_text(input: &mut &str) -> ModalResult<SearchQuery> {
not_or_keyword.parse_next(input)?;
word_or_quoted.map(SearchQuery::FullText).parse_next(input)
}
fn atom(input: &mut &str) -> ModalResult<SearchQuery> {
alt((
paren_expr,
not_expr,
field_match,
prefix_expr,
fuzzy_expr,
full_text,
))
.parse_next(input)
}
fn and_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let first = atom.parse_next(input)?;
let rest: Vec<SearchQuery> = repeat(0.., preceded(ws, atom)).parse_next(input)?;
if rest.is_empty() {
Ok(first)
} else {
let mut terms = vec![first];
terms.extend(rest);
Ok(SearchQuery::And(terms))
}
}
fn or_expr(input: &mut &str) -> ModalResult<SearchQuery> {
let first = and_expr.parse_next(input)?;
let rest: Vec<SearchQuery> =
repeat(0.., preceded((ws, "OR", ws), and_expr)).parse_next(input)?;
if rest.is_empty() {
Ok(first)
} else {
let mut terms = vec![first];
terms.extend(rest);
Ok(SearchQuery::Or(terms))
}
}
pub fn parse_search_query(input: &str) -> crate::error::Result<SearchQuery> {
let trimmed = input.trim();
if trimmed.is_empty() {
return Ok(SearchQuery::FullText(String::new()));
}
let mut input = trimmed;
or_expr
.parse_next(&mut input)
.map_err(|e| crate::error::PinakesError::SearchParse(format!("{e}")))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_simple_text() {
let q = parse_search_query("hello").unwrap();
assert_eq!(q, SearchQuery::FullText("hello".into()));
}
#[test]
fn test_field_match() {
let q = parse_search_query("artist:Beatles").unwrap();
assert_eq!(
q,
SearchQuery::FieldMatch {
field: "artist".into(),
value: "Beatles".into()
}
);
}
#[test]
fn test_type_filter() {
let q = parse_search_query("type:pdf").unwrap();
assert_eq!(q, SearchQuery::TypeFilter("pdf".into()));
}
#[test]
fn test_tag_filter() {
let q = parse_search_query("tag:music").unwrap();
assert_eq!(q, SearchQuery::TagFilter("music".into()));
}
#[test]
fn test_and_implicit() {
let q = parse_search_query("hello world").unwrap();
assert_eq!(
q,
SearchQuery::And(vec![
SearchQuery::FullText("hello".into()),
SearchQuery::FullText("world".into()),
])
);
}
#[test]
fn test_or() {
let q = parse_search_query("hello OR world").unwrap();
assert_eq!(
q,
SearchQuery::Or(vec![
SearchQuery::FullText("hello".into()),
SearchQuery::FullText("world".into()),
])
);
}
#[test]
fn test_not() {
let q = parse_search_query("-excluded").unwrap();
assert_eq!(
q,
SearchQuery::Not(Box::new(SearchQuery::FullText("excluded".into())))
);
}
#[test]
fn test_prefix() {
let q = parse_search_query("hel*").unwrap();
assert_eq!(q, SearchQuery::Prefix("hel".into()));
}
#[test]
fn test_fuzzy() {
let q = parse_search_query("hello~").unwrap();
assert_eq!(q, SearchQuery::Fuzzy("hello".into()));
}
#[test]
fn test_quoted() {
let q = parse_search_query("\"hello world\"").unwrap();
assert_eq!(q, SearchQuery::FullText("hello world".into()));
}
}

View file

@ -0,0 +1,26 @@
use crate::error::{PinakesError, Result};
mod sqlite_migrations {
use refinery::embed_migrations;
embed_migrations!("../../migrations/sqlite");
}
mod postgres_migrations {
use refinery::embed_migrations;
embed_migrations!("../../migrations/postgres");
}
pub fn run_sqlite_migrations(conn: &mut rusqlite::Connection) -> Result<()> {
sqlite_migrations::migrations::runner()
.run(conn)
.map_err(|e| PinakesError::Migration(e.to_string()))?;
Ok(())
}
pub async fn run_postgres_migrations(client: &mut tokio_postgres::Client) -> Result<()> {
postgres_migrations::migrations::runner()
.run_async(client)
.await
.map_err(|e| PinakesError::Migration(e.to_string()))?;
Ok(())
}

View file

@ -0,0 +1,209 @@
pub mod migrations;
pub mod postgres;
pub mod sqlite;
use std::path::PathBuf;
use std::sync::Arc;
use uuid::Uuid;
use crate::error::Result;
use crate::model::*;
use crate::search::{SearchRequest, SearchResults};
/// Statistics about the database.
#[derive(Debug, Clone, Default)]
pub struct DatabaseStats {
pub media_count: u64,
pub tag_count: u64,
pub collection_count: u64,
pub audit_count: u64,
pub database_size_bytes: u64,
pub backend_name: String,
}
#[async_trait::async_trait]
pub trait StorageBackend: Send + Sync + 'static {
// Migrations
async fn run_migrations(&self) -> Result<()>;
// Root directories
async fn add_root_dir(&self, path: PathBuf) -> Result<()>;
async fn list_root_dirs(&self) -> Result<Vec<PathBuf>>;
async fn remove_root_dir(&self, path: &std::path::Path) -> Result<()>;
// Media CRUD
async fn insert_media(&self, item: &MediaItem) -> Result<()>;
async fn get_media(&self, id: MediaId) -> Result<MediaItem>;
async fn count_media(&self) -> Result<u64>;
async fn get_media_by_hash(&self, hash: &ContentHash) -> Result<Option<MediaItem>>;
async fn list_media(&self, pagination: &Pagination) -> Result<Vec<MediaItem>>;
async fn update_media(&self, item: &MediaItem) -> Result<()>;
async fn delete_media(&self, id: MediaId) -> Result<()>;
async fn delete_all_media(&self) -> Result<u64>;
// Tags
async fn create_tag(&self, name: &str, parent_id: Option<Uuid>) -> Result<Tag>;
async fn get_tag(&self, id: Uuid) -> Result<Tag>;
async fn list_tags(&self) -> Result<Vec<Tag>>;
async fn delete_tag(&self, id: Uuid) -> Result<()>;
async fn tag_media(&self, media_id: MediaId, tag_id: Uuid) -> Result<()>;
async fn untag_media(&self, media_id: MediaId, tag_id: Uuid) -> Result<()>;
async fn get_media_tags(&self, media_id: MediaId) -> Result<Vec<Tag>>;
async fn get_tag_descendants(&self, tag_id: Uuid) -> Result<Vec<Tag>>;
// Collections
async fn create_collection(
&self,
name: &str,
kind: CollectionKind,
description: Option<&str>,
filter_query: Option<&str>,
) -> Result<Collection>;
async fn get_collection(&self, id: Uuid) -> Result<Collection>;
async fn list_collections(&self) -> Result<Vec<Collection>>;
async fn delete_collection(&self, id: Uuid) -> Result<()>;
async fn add_to_collection(
&self,
collection_id: Uuid,
media_id: MediaId,
position: i32,
) -> Result<()>;
async fn remove_from_collection(&self, collection_id: Uuid, media_id: MediaId) -> Result<()>;
async fn get_collection_members(&self, collection_id: Uuid) -> Result<Vec<MediaItem>>;
// Search
async fn search(&self, request: &SearchRequest) -> Result<SearchResults>;
// Audit
async fn record_audit(&self, entry: &AuditEntry) -> Result<()>;
async fn list_audit_entries(
&self,
media_id: Option<MediaId>,
pagination: &Pagination,
) -> Result<Vec<AuditEntry>>;
// Custom fields
async fn set_custom_field(
&self,
media_id: MediaId,
name: &str,
field: &CustomField,
) -> Result<()>;
async fn get_custom_fields(
&self,
media_id: MediaId,
) -> Result<std::collections::HashMap<String, CustomField>>;
async fn delete_custom_field(&self, media_id: MediaId, name: &str) -> Result<()>;
// Batch operations (transactional where supported)
async fn batch_delete_media(&self, ids: &[MediaId]) -> Result<u64> {
let mut count = 0u64;
for id in ids {
self.delete_media(*id).await?;
count += 1;
}
Ok(count)
}
async fn batch_tag_media(&self, media_ids: &[MediaId], tag_ids: &[Uuid]) -> Result<u64> {
let mut count = 0u64;
for media_id in media_ids {
for tag_id in tag_ids {
self.tag_media(*media_id, *tag_id).await?;
count += 1;
}
}
Ok(count)
}
// Integrity
async fn list_media_paths(&self) -> Result<Vec<(MediaId, std::path::PathBuf, ContentHash)>>;
// Batch metadata update
async fn batch_update_media(
&self,
ids: &[MediaId],
title: Option<&str>,
artist: Option<&str>,
album: Option<&str>,
genre: Option<&str>,
year: Option<i32>,
description: Option<&str>,
) -> Result<u64> {
let mut count = 0u64;
for id in ids {
let mut item = self.get_media(*id).await?;
if let Some(v) = title {
item.title = Some(v.to_string());
}
if let Some(v) = artist {
item.artist = Some(v.to_string());
}
if let Some(v) = album {
item.album = Some(v.to_string());
}
if let Some(v) = genre {
item.genre = Some(v.to_string());
}
if let Some(v) = &year {
item.year = Some(*v);
}
if let Some(v) = description {
item.description = Some(v.to_string());
}
item.updated_at = chrono::Utc::now();
self.update_media(&item).await?;
count += 1;
}
Ok(count)
}
// Saved searches
async fn save_search(
&self,
id: uuid::Uuid,
name: &str,
query: &str,
sort_order: Option<&str>,
) -> Result<()>;
async fn list_saved_searches(&self) -> Result<Vec<crate::model::SavedSearch>>;
async fn delete_saved_search(&self, id: uuid::Uuid) -> Result<()>;
// Duplicates
async fn find_duplicates(&self) -> Result<Vec<Vec<MediaItem>>>;
// Database management
async fn database_stats(&self) -> Result<DatabaseStats>;
async fn vacuum(&self) -> Result<()>;
async fn clear_all_data(&self) -> Result<()>;
// Thumbnail helpers
/// List all media IDs, optionally filtering to those missing thumbnails.
async fn list_media_ids_for_thumbnails(
&self,
only_missing: bool,
) -> Result<Vec<crate::model::MediaId>>;
// Library statistics
async fn library_statistics(&self) -> Result<LibraryStatistics>;
}
/// Comprehensive library statistics.
#[derive(Debug, Clone, Default)]
pub struct LibraryStatistics {
pub total_media: u64,
pub total_size_bytes: u64,
pub avg_file_size_bytes: u64,
pub media_by_type: Vec<(String, u64)>,
pub storage_by_type: Vec<(String, u64)>,
pub newest_item: Option<String>,
pub oldest_item: Option<String>,
pub top_tags: Vec<(String, u64)>,
pub top_collections: Vec<(String, u64)>,
pub total_tags: u64,
pub total_collections: u64,
pub total_duplicates: u64,
}
pub type DynStorageBackend = Arc<dyn StorageBackend>;

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
use uuid::Uuid;
use crate::error::Result;
use crate::model::{AuditAction, MediaId, Tag};
use crate::storage::DynStorageBackend;
pub async fn create_tag(
storage: &DynStorageBackend,
name: &str,
parent_id: Option<Uuid>,
) -> Result<Tag> {
storage.create_tag(name, parent_id).await
}
pub async fn tag_media(storage: &DynStorageBackend, media_id: MediaId, tag_id: Uuid) -> Result<()> {
storage.tag_media(media_id, tag_id).await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::Tagged,
Some(format!("tag_id={tag_id}")),
)
.await
}
pub async fn untag_media(
storage: &DynStorageBackend,
media_id: MediaId,
tag_id: Uuid,
) -> Result<()> {
storage.untag_media(media_id, tag_id).await?;
crate::audit::record_action(
storage,
Some(media_id),
AuditAction::Untagged,
Some(format!("tag_id={tag_id}")),
)
.await
}
pub async fn get_tag_tree(storage: &DynStorageBackend, tag_id: Uuid) -> Result<Vec<Tag>> {
storage.get_tag_descendants(tag_id).await
}

View file

@ -0,0 +1,278 @@
use std::path::{Path, PathBuf};
use std::process::Command;
use tracing::{info, warn};
use crate::config::ThumbnailConfig;
use crate::error::{PinakesError, Result};
use crate::media_type::{MediaCategory, MediaType};
use crate::model::MediaId;
/// Generate a thumbnail for a media file and return the path to the thumbnail.
///
/// Supports images (via `image` crate), videos (via ffmpeg), PDFs (via pdftoppm),
/// and EPUBs (via cover image extraction).
pub fn generate_thumbnail(
media_id: MediaId,
source_path: &Path,
media_type: MediaType,
thumbnail_dir: &Path,
) -> Result<Option<PathBuf>> {
generate_thumbnail_with_config(
media_id,
source_path,
media_type,
thumbnail_dir,
&ThumbnailConfig::default(),
)
}
pub fn generate_thumbnail_with_config(
media_id: MediaId,
source_path: &Path,
media_type: MediaType,
thumbnail_dir: &Path,
config: &ThumbnailConfig,
) -> Result<Option<PathBuf>> {
std::fs::create_dir_all(thumbnail_dir)?;
let thumb_path = thumbnail_dir.join(format!("{}.jpg", media_id));
let result = match media_type.category() {
MediaCategory::Image => {
if media_type.is_raw() {
generate_raw_thumbnail(source_path, &thumb_path, config)
} else if media_type == MediaType::Heic {
generate_heic_thumbnail(source_path, &thumb_path, config)
} else {
generate_image_thumbnail(source_path, &thumb_path, config)
}
}
MediaCategory::Video => generate_video_thumbnail(source_path, &thumb_path, config),
MediaCategory::Document => match media_type {
MediaType::Pdf => generate_pdf_thumbnail(source_path, &thumb_path, config),
MediaType::Epub => generate_epub_thumbnail(source_path, &thumb_path, config),
_ => return Ok(None),
},
_ => return Ok(None),
};
match result {
Ok(()) => {
info!(media_id = %media_id, category = ?media_type.category(), "generated thumbnail");
Ok(Some(thumb_path))
}
Err(e) => {
warn!(media_id = %media_id, error = %e, "failed to generate thumbnail");
Ok(None)
}
}
}
fn generate_image_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
let img = image::open(source)
.map_err(|e| PinakesError::MetadataExtraction(format!("image open: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("thumbnail encode: {e}")))?;
Ok(())
}
fn generate_video_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
let ffmpeg = config.ffmpeg_path.as_deref().unwrap_or("ffmpeg");
let status = Command::new(ffmpeg)
.args(["-ss", &config.video_seek_secs.to_string(), "-i"])
.arg(source)
.args([
"-vframes",
"1",
"-vf",
&format!("scale={}:{}", config.size, config.size),
"-y",
])
.arg(dest)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!("ffmpeg not found or failed to execute: {e}"))
})?;
if !status.success() {
return Err(PinakesError::MetadataExtraction(format!(
"ffmpeg exited with status {}",
status
)));
}
Ok(())
}
fn generate_pdf_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
// Use pdftoppm to render first page, then resize with image crate
let temp_prefix = dest.with_extension("tmp");
let status = Command::new("pdftoppm")
.args(["-jpeg", "-f", "1", "-l", "1", "-singlefile"])
.arg(source)
.arg(&temp_prefix)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"pdftoppm not found or failed to execute: {e}"
))
})?;
if !status.success() {
return Err(PinakesError::MetadataExtraction(format!(
"pdftoppm exited with status {}",
status
)));
}
// pdftoppm outputs <prefix>.jpg
let rendered = temp_prefix.with_extension("jpg");
if rendered.exists() {
// Resize to thumbnail size
let img = image::open(&rendered)
.map_err(|e| PinakesError::MetadataExtraction(format!("pdf thumbnail open: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("pdf thumbnail encode: {e}")))?;
let _ = std::fs::remove_file(&rendered);
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"pdftoppm did not produce output".to_string(),
))
}
}
fn generate_epub_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
// Try to extract cover image from EPUB
let mut doc = epub::doc::EpubDoc::new(source)
.map_err(|e| PinakesError::MetadataExtraction(format!("epub open: {e}")))?;
let cover_data = doc.get_cover().map(|(data, _mime)| data).or_else(|| {
// Fallback: try to find a cover image in the resources
doc.get_resource("cover-image")
.map(|(data, _)| data)
.or_else(|| doc.get_resource("cover").map(|(data, _)| data))
});
if let Some(data) = cover_data {
let img = image::load_from_memory(&data)
.map_err(|e| PinakesError::MetadataExtraction(format!("epub cover decode: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("epub thumbnail encode: {e}")))?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"no cover image found in epub".to_string(),
))
}
}
fn generate_raw_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
// Try dcraw to extract embedded JPEG preview, then resize
let temp_ppm = dest.with_extension("ppm");
let status = Command::new("dcraw")
.args(["-e", "-c"])
.arg(source)
.stdout(std::fs::File::create(&temp_ppm).map_err(|e| {
PinakesError::MetadataExtraction(format!("failed to create temp file: {e}"))
})?)
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| PinakesError::MetadataExtraction(format!("dcraw not found or failed: {e}")))?;
if !status.success() {
let _ = std::fs::remove_file(&temp_ppm);
return Err(PinakesError::MetadataExtraction(format!(
"dcraw exited with status {}",
status
)));
}
// The extracted preview is typically a JPEG — try loading it
if temp_ppm.exists() {
let result = image::open(&temp_ppm);
let _ = std::fs::remove_file(&temp_ppm);
let img = result
.map_err(|e| PinakesError::MetadataExtraction(format!("raw preview decode: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("raw thumbnail encode: {e}")))?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"dcraw did not produce output".to_string(),
))
}
}
fn generate_heic_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
// Use heif-convert to convert to JPEG, then resize
let temp_jpg = dest.with_extension("tmp.jpg");
let status = Command::new("heif-convert")
.arg(source)
.arg(&temp_jpg)
.stdout(std::process::Stdio::null())
.stderr(std::process::Stdio::null())
.status()
.map_err(|e| {
PinakesError::MetadataExtraction(format!("heif-convert not found or failed: {e}"))
})?;
if !status.success() {
let _ = std::fs::remove_file(&temp_jpg);
return Err(PinakesError::MetadataExtraction(format!(
"heif-convert exited with status {}",
status
)));
}
if temp_jpg.exists() {
let result = image::open(&temp_jpg);
let _ = std::fs::remove_file(&temp_jpg);
let img =
result.map_err(|e| PinakesError::MetadataExtraction(format!("heic decode: {e}")))?;
let thumb = img.thumbnail(config.size, config.size);
let mut output = std::fs::File::create(dest)?;
let encoder =
image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
thumb
.write_with_encoder(encoder)
.map_err(|e| PinakesError::MetadataExtraction(format!("heic thumbnail encode: {e}")))?;
Ok(())
} else {
Err(PinakesError::MetadataExtraction(
"heif-convert did not produce output".to_string(),
))
}
}
/// Returns the default thumbnail directory under the data dir.
pub fn default_thumbnail_dir() -> PathBuf {
crate::config::Config::default_data_dir().join("thumbnails")
}

View file

@ -0,0 +1,414 @@
use std::collections::HashMap;
use std::sync::Arc;
use pinakes_core::model::*;
use pinakes_core::storage::StorageBackend;
use pinakes_core::storage::sqlite::SqliteBackend;
async fn setup() -> Arc<SqliteBackend> {
let backend = SqliteBackend::in_memory().expect("in-memory SQLite");
backend.run_migrations().await.expect("migrations");
Arc::new(backend)
}
#[tokio::test]
async fn test_media_crud() {
let storage = setup().await;
let now = chrono::Utc::now();
let id = MediaId::new();
let item = MediaItem {
id,
path: "/tmp/test.txt".into(),
file_name: "test.txt".to_string(),
media_type: pinakes_core::media_type::MediaType::PlainText,
content_hash: ContentHash::new("abc123".to_string()),
file_size: 100,
title: Some("Test Title".to_string()),
artist: None,
album: None,
genre: None,
year: Some(2024),
duration_secs: None,
description: Some("A test file".to_string()),
thumbnail_path: None,
custom_fields: HashMap::new(),
created_at: now,
updated_at: now,
};
// Insert
storage.insert_media(&item).await.unwrap();
// Get
let fetched = storage.get_media(id).await.unwrap();
assert_eq!(fetched.id, id);
assert_eq!(fetched.title.as_deref(), Some("Test Title"));
assert_eq!(fetched.file_size, 100);
// Get by hash
let by_hash = storage
.get_media_by_hash(&ContentHash::new("abc123".into()))
.await
.unwrap();
assert!(by_hash.is_some());
assert_eq!(by_hash.unwrap().id, id);
// Update
let mut updated = fetched;
updated.title = Some("Updated Title".to_string());
storage.update_media(&updated).await.unwrap();
let re_fetched = storage.get_media(id).await.unwrap();
assert_eq!(re_fetched.title.as_deref(), Some("Updated Title"));
// List
let list = storage.list_media(&Pagination::default()).await.unwrap();
assert_eq!(list.len(), 1);
// Delete
storage.delete_media(id).await.unwrap();
let result = storage.get_media(id).await;
assert!(result.is_err());
}
#[tokio::test]
async fn test_tags() {
let storage = setup().await;
// Create tags
let parent = storage.create_tag("Music", None).await.unwrap();
let child = storage.create_tag("Rock", Some(parent.id)).await.unwrap();
assert_eq!(parent.name, "Music");
assert_eq!(child.parent_id, Some(parent.id));
// List tags
let tags = storage.list_tags().await.unwrap();
assert_eq!(tags.len(), 2);
// Get descendants
let descendants = storage.get_tag_descendants(parent.id).await.unwrap();
assert!(descendants.iter().any(|t| t.name == "Rock"));
// Tag media
let now = chrono::Utc::now();
let id = MediaId::new();
let item = MediaItem {
id,
path: "/tmp/song.mp3".into(),
file_name: "song.mp3".to_string(),
media_type: pinakes_core::media_type::MediaType::Mp3,
content_hash: ContentHash::new("hash1".to_string()),
file_size: 5000,
title: Some("Test Song".to_string()),
artist: Some("Test Artist".to_string()),
album: None,
genre: None,
year: None,
duration_secs: Some(180.0),
description: None,
thumbnail_path: None,
custom_fields: HashMap::new(),
created_at: now,
updated_at: now,
};
storage.insert_media(&item).await.unwrap();
storage.tag_media(id, parent.id).await.unwrap();
let media_tags = storage.get_media_tags(id).await.unwrap();
assert_eq!(media_tags.len(), 1);
assert_eq!(media_tags[0].name, "Music");
// Untag
storage.untag_media(id, parent.id).await.unwrap();
let media_tags = storage.get_media_tags(id).await.unwrap();
assert_eq!(media_tags.len(), 0);
// Delete tag
storage.delete_tag(child.id).await.unwrap();
let tags = storage.list_tags().await.unwrap();
assert_eq!(tags.len(), 1);
}
#[tokio::test]
async fn test_collections() {
let storage = setup().await;
let col = storage
.create_collection("Favorites", CollectionKind::Manual, Some("My faves"), None)
.await
.unwrap();
assert_eq!(col.name, "Favorites");
assert_eq!(col.kind, CollectionKind::Manual);
let now = chrono::Utc::now();
let id = MediaId::new();
let item = MediaItem {
id,
path: "/tmp/doc.pdf".into(),
file_name: "doc.pdf".to_string(),
media_type: pinakes_core::media_type::MediaType::Pdf,
content_hash: ContentHash::new("pdfhash".to_string()),
file_size: 10000,
title: None,
artist: None,
album: None,
genre: None,
year: None,
duration_secs: None,
description: None,
thumbnail_path: None,
custom_fields: HashMap::new(),
created_at: now,
updated_at: now,
};
storage.insert_media(&item).await.unwrap();
storage.add_to_collection(col.id, id, 0).await.unwrap();
let members = storage.get_collection_members(col.id).await.unwrap();
assert_eq!(members.len(), 1);
assert_eq!(members[0].id, id);
storage.remove_from_collection(col.id, id).await.unwrap();
let members = storage.get_collection_members(col.id).await.unwrap();
assert_eq!(members.len(), 0);
// List collections
let cols = storage.list_collections().await.unwrap();
assert_eq!(cols.len(), 1);
storage.delete_collection(col.id).await.unwrap();
let cols = storage.list_collections().await.unwrap();
assert_eq!(cols.len(), 0);
}
#[tokio::test]
async fn test_custom_fields() {
let storage = setup().await;
let now = chrono::Utc::now();
let id = MediaId::new();
let item = MediaItem {
id,
path: "/tmp/test.md".into(),
file_name: "test.md".to_string(),
media_type: pinakes_core::media_type::MediaType::Markdown,
content_hash: ContentHash::new("mdhash".to_string()),
file_size: 500,
title: None,
artist: None,
album: None,
genre: None,
year: None,
duration_secs: None,
description: None,
thumbnail_path: None,
custom_fields: HashMap::new(),
created_at: now,
updated_at: now,
};
storage.insert_media(&item).await.unwrap();
// Set custom field
let field = CustomField {
field_type: CustomFieldType::Text,
value: "important".to_string(),
};
storage
.set_custom_field(id, "priority", &field)
.await
.unwrap();
// Get custom fields
let fields = storage.get_custom_fields(id).await.unwrap();
assert_eq!(fields.len(), 1);
assert_eq!(fields["priority"].value, "important");
// Verify custom fields are loaded with get_media
let media = storage.get_media(id).await.unwrap();
assert_eq!(media.custom_fields.len(), 1);
assert_eq!(media.custom_fields["priority"].value, "important");
// Delete custom field
storage.delete_custom_field(id, "priority").await.unwrap();
let fields = storage.get_custom_fields(id).await.unwrap();
assert_eq!(fields.len(), 0);
}
#[tokio::test]
async fn test_search() {
let storage = setup().await;
let now = chrono::Utc::now();
// Insert a few items
for (i, (name, title, artist)) in [
("song1.mp3", "Bohemian Rhapsody", "Queen"),
("song2.mp3", "Stairway to Heaven", "Led Zeppelin"),
("doc.pdf", "Rust Programming", ""),
]
.iter()
.enumerate()
{
let item = MediaItem {
id: MediaId::new(),
path: format!("/tmp/{name}").into(),
file_name: name.to_string(),
media_type: pinakes_core::media_type::MediaType::from_path(std::path::Path::new(name))
.unwrap(),
content_hash: ContentHash::new(format!("hash{i}")),
file_size: 1000 * (i as u64 + 1),
title: Some(title.to_string()),
artist: if artist.is_empty() {
None
} else {
Some(artist.to_string())
},
album: None,
genre: None,
year: None,
duration_secs: None,
description: None,
thumbnail_path: None,
custom_fields: HashMap::new(),
created_at: now,
updated_at: now,
};
storage.insert_media(&item).await.unwrap();
}
// Full-text search
let request = pinakes_core::search::SearchRequest {
query: pinakes_core::search::parse_search_query("Bohemian").unwrap(),
sort: pinakes_core::search::SortOrder::Relevance,
pagination: Pagination::new(0, 50, None),
};
let results = storage.search(&request).await.unwrap();
assert_eq!(results.total_count, 1);
assert_eq!(results.items[0].title.as_deref(), Some("Bohemian Rhapsody"));
// Type filter
let request = pinakes_core::search::SearchRequest {
query: pinakes_core::search::parse_search_query("type:pdf").unwrap(),
sort: pinakes_core::search::SortOrder::Relevance,
pagination: Pagination::new(0, 50, None),
};
let results = storage.search(&request).await.unwrap();
assert_eq!(results.total_count, 1);
assert_eq!(results.items[0].file_name, "doc.pdf");
}
#[tokio::test]
async fn test_audit_log() {
let storage = setup().await;
let entry = AuditEntry {
id: uuid::Uuid::now_v7(),
media_id: None,
action: AuditAction::Scanned,
details: Some("test scan".to_string()),
timestamp: chrono::Utc::now(),
};
storage.record_audit(&entry).await.unwrap();
let entries = storage
.list_audit_entries(None, &Pagination::new(0, 10, None))
.await
.unwrap();
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].action, AuditAction::Scanned);
}
#[tokio::test]
async fn test_import_with_dedup() {
let storage = setup().await as pinakes_core::storage::DynStorageBackend;
// Create a temp file
let dir = tempfile::tempdir().unwrap();
let file_path = dir.path().join("test.txt");
std::fs::write(&file_path, "hello world").unwrap();
// First import
let result1 = pinakes_core::import::import_file(&storage, &file_path)
.await
.unwrap();
assert!(!result1.was_duplicate);
// Second import of same file
let result2 = pinakes_core::import::import_file(&storage, &file_path)
.await
.unwrap();
assert!(result2.was_duplicate);
assert_eq!(result1.media_id, result2.media_id);
}
#[tokio::test]
async fn test_root_dirs() {
let storage = setup().await;
storage.add_root_dir("/tmp/music".into()).await.unwrap();
storage.add_root_dir("/tmp/docs".into()).await.unwrap();
let dirs = storage.list_root_dirs().await.unwrap();
assert_eq!(dirs.len(), 2);
storage
.remove_root_dir(std::path::Path::new("/tmp/music"))
.await
.unwrap();
let dirs = storage.list_root_dirs().await.unwrap();
assert_eq!(dirs.len(), 1);
assert_eq!(dirs[0], std::path::PathBuf::from("/tmp/docs"));
}
#[tokio::test]
async fn test_library_statistics_empty() {
let storage = setup().await;
let stats = storage.library_statistics().await.unwrap();
assert_eq!(stats.total_media, 0);
assert_eq!(stats.total_size_bytes, 0);
assert_eq!(stats.avg_file_size_bytes, 0);
assert!(stats.media_by_type.is_empty());
assert!(stats.storage_by_type.is_empty());
assert!(stats.top_tags.is_empty());
assert!(stats.top_collections.is_empty());
assert!(stats.newest_item.is_none());
assert!(stats.oldest_item.is_none());
assert_eq!(stats.total_tags, 0);
assert_eq!(stats.total_collections, 0);
assert_eq!(stats.total_duplicates, 0);
}
#[tokio::test]
async fn test_library_statistics_with_data() {
let storage = setup().await;
let now = chrono::Utc::now();
let item = MediaItem {
id: MediaId::new(),
path: "/tmp/stats_test.mp3".into(),
file_name: "stats_test.mp3".to_string(),
media_type: pinakes_core::media_type::MediaType::Mp3,
content_hash: ContentHash::new("stats_hash".to_string()),
file_size: 5000,
title: Some("Stats Song".to_string()),
artist: None,
album: None,
genre: None,
year: None,
duration_secs: Some(120.0),
description: None,
thumbnail_path: None,
custom_fields: HashMap::new(),
created_at: now,
updated_at: now,
};
storage.insert_media(&item).await.unwrap();
let stats = storage.library_statistics().await.unwrap();
assert_eq!(stats.total_media, 1);
assert_eq!(stats.total_size_bytes, 5000);
assert_eq!(stats.avg_file_size_bytes, 5000);
assert!(!stats.media_by_type.is_empty());
assert!(stats.newest_item.is_some());
assert!(stats.oldest_item.is_some());
}