initial commit

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I4a6b498153eccd5407510dd541b7f4816a6a6964
2026-01-30 22:05:46 +03:00 · 2026-01-30 22:05:46 +03:00 · 6a73d11c4b
commit 6a73d11c4b
124 changed files with 26877 additions and 0 deletions
--- a/crates/pinakes-core/Cargo.toml
+++ b/crates/pinakes-core/Cargo.toml
@ -0,0 +1,39 @@
+[package]
+name = "pinakes-core"
+edition.workspace = true
+version.workspace = true
+license.workspace = true
+
+[dependencies]
+tokio = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+toml = { workspace = true }
+chrono = { workspace = true }
+uuid = { workspace = true }
+thiserror = { workspace = true }
+anyhow = { workspace = true }
+tracing = { workspace = true }
+blake3 = { workspace = true }
+lofty = { workspace = true }
+lopdf = { workspace = true }
+epub = { workspace = true }
+matroska = { workspace = true }
+gray_matter = { workspace = true }
+rusqlite = { workspace = true }
+tokio-postgres = { workspace = true }
+deadpool-postgres = { workspace = true }
+postgres-types = { workspace = true }
+refinery = { workspace = true }
+walkdir = { workspace = true }
+notify = { workspace = true }
+winnow = { workspace = true }
+mime_guess = { workspace = true }
+async-trait = { workspace = true }
+kamadak-exif = { workspace = true }
+image = { workspace = true }
+tokio-util = { version = "0.7", features = ["rt"] }
+reqwest = { workspace = true }
+
+[dev-dependencies]
+tempfile = "3"
--- a/crates/pinakes-core/src/audit.rs
+++ b/crates/pinakes-core/src/audit.rs
@ -0,0 +1,21 @@
+use uuid::Uuid;
+
+use crate::error::Result;
+use crate::model::{AuditAction, AuditEntry, MediaId};
+use crate::storage::DynStorageBackend;
+
+pub async fn record_action(
+    storage: &DynStorageBackend,
+    media_id: Option<MediaId>,
+    action: AuditAction,
+    details: Option<String>,
+) -> Result<()> {
+    let entry = AuditEntry {
+        id: Uuid::now_v7(),
+        media_id,
+        action,
+        details,
+        timestamp: chrono::Utc::now(),
+    };
+    storage.record_audit(&entry).await
+}
--- a/crates/pinakes-core/src/cache.rs
+++ b/crates/pinakes-core/src/cache.rs
@ -0,0 +1,91 @@
+use std::collections::HashMap;
+use std::hash::Hash;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use tokio::sync::RwLock;
+
+struct CacheEntry<V> {
+    value: V,
+    inserted_at: Instant,
+}
+
+/// A simple TTL-based in-memory cache with periodic eviction.
+pub struct Cache<K, V> {
+    entries: Arc<RwLock<HashMap<K, CacheEntry<V>>>>,
+    ttl: Duration,
+}
+
+impl<K, V> Cache<K, V>
+where
+    K: Eq + Hash + Clone + Send + Sync + 'static,
+    V: Clone + Send + Sync + 'static,
+{
+    pub fn new(ttl: Duration) -> Self {
+        let cache = Self {
+            entries: Arc::new(RwLock::new(HashMap::new())),
+            ttl,
+        };
+
+        // Spawn periodic eviction task
+        let entries = cache.entries.clone();
+        let ttl = cache.ttl;
+        tokio::spawn(async move {
+            let mut interval = tokio::time::interval(ttl);
+            loop {
+                interval.tick().await;
+                let now = Instant::now();
+                let mut map = entries.write().await;
+                map.retain(|_, entry| now.duration_since(entry.inserted_at) < ttl);
+            }
+        });
+
+        cache
+    }
+
+    pub async fn get(&self, key: &K) -> Option<V> {
+        let map = self.entries.read().await;
+        if let Some(entry) = map.get(key) {
+            if entry.inserted_at.elapsed() < self.ttl {
+                return Some(entry.value.clone());
+            }
+        }
+        None
+    }
+
+    pub async fn insert(&self, key: K, value: V) {
+        let mut map = self.entries.write().await;
+        map.insert(
+            key,
+            CacheEntry {
+                value,
+                inserted_at: Instant::now(),
+            },
+        );
+    }
+
+    pub async fn invalidate(&self, key: &K) {
+        let mut map = self.entries.write().await;
+        map.remove(key);
+    }
+
+    pub async fn invalidate_all(&self) {
+        let mut map = self.entries.write().await;
+        map.clear();
+    }
+}
+
+/// Application-level cache layer wrapping multiple caches for different data types.
+pub struct CacheLayer {
+    /// Cache for serialized API responses, keyed by request path + query string.
+    pub responses: Cache<String, String>,
+}
+
+impl CacheLayer {
+    pub fn new(ttl_secs: u64) -> Self {
+        let ttl = Duration::from_secs(ttl_secs);
+        Self {
+            responses: Cache::new(ttl),
+        }
+    }
+}
--- a/crates/pinakes-core/src/collections.rs
+++ b/crates/pinakes-core/src/collections.rs
@ -0,0 +1,78 @@
+use uuid::Uuid;
+
+use crate::error::Result;
+use crate::model::*;
+use crate::storage::DynStorageBackend;
+
+pub async fn create_collection(
+    storage: &DynStorageBackend,
+    name: &str,
+    kind: CollectionKind,
+    description: Option<&str>,
+    filter_query: Option<&str>,
+) -> Result<Collection> {
+    storage
+        .create_collection(name, kind, description, filter_query)
+        .await
+}
+
+pub async fn add_member(
+    storage: &DynStorageBackend,
+    collection_id: Uuid,
+    media_id: MediaId,
+    position: i32,
+) -> Result<()> {
+    storage
+        .add_to_collection(collection_id, media_id, position)
+        .await?;
+    crate::audit::record_action(
+        storage,
+        Some(media_id),
+        AuditAction::AddedToCollection,
+        Some(format!("collection_id={collection_id}")),
+    )
+    .await
+}
+
+pub async fn remove_member(
+    storage: &DynStorageBackend,
+    collection_id: Uuid,
+    media_id: MediaId,
+) -> Result<()> {
+    storage
+        .remove_from_collection(collection_id, media_id)
+        .await?;
+    crate::audit::record_action(
+        storage,
+        Some(media_id),
+        AuditAction::RemovedFromCollection,
+        Some(format!("collection_id={collection_id}")),
+    )
+    .await
+}
+
+pub async fn get_members(
+    storage: &DynStorageBackend,
+    collection_id: Uuid,
+) -> Result<Vec<MediaItem>> {
+    let collection = storage.get_collection(collection_id).await?;
+
+    match collection.kind {
+        CollectionKind::Virtual => {
+            // Virtual collections evaluate their filter_query dynamically
+            if let Some(ref query_str) = collection.filter_query {
+                let query = crate::search::parse_search_query(query_str)?;
+                let request = crate::search::SearchRequest {
+                    query,
+                    sort: crate::search::SortOrder::DateDesc,
+                    pagination: Pagination::new(0, 10000, None),
+                };
+                let results = storage.search(&request).await?;
+                Ok(results.items)
+            } else {
+                Ok(Vec::new())
+            }
+        }
+        CollectionKind::Manual => storage.get_collection_members(collection_id).await,
+    }
+}
--- a/crates/pinakes-core/src/config.rs
+++ b/crates/pinakes-core/src/config.rs
@ -0,0 +1,437 @@
+use std::path::{Path, PathBuf};
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Config {
+    pub storage: StorageConfig,
+    pub directories: DirectoryConfig,
+    pub scanning: ScanningConfig,
+    pub server: ServerConfig,
+    #[serde(default)]
+    pub ui: UiConfig,
+    #[serde(default)]
+    pub accounts: AccountsConfig,
+    #[serde(default)]
+    pub jobs: JobsConfig,
+    #[serde(default)]
+    pub thumbnails: ThumbnailConfig,
+    #[serde(default)]
+    pub webhooks: Vec<WebhookConfig>,
+    #[serde(default)]
+    pub scheduled_tasks: Vec<ScheduledTaskConfig>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ScheduledTaskConfig {
+    pub id: String,
+    pub enabled: bool,
+    pub schedule: crate::scheduler::Schedule,
+    pub last_run: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct JobsConfig {
+    #[serde(default = "default_worker_count")]
+    pub worker_count: usize,
+    #[serde(default = "default_cache_ttl")]
+    pub cache_ttl_secs: u64,
+}
+
+fn default_worker_count() -> usize {
+    2
+}
+fn default_cache_ttl() -> u64 {
+    60
+}
+
+impl Default for JobsConfig {
+    fn default() -> Self {
+        Self {
+            worker_count: default_worker_count(),
+            cache_ttl_secs: default_cache_ttl(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ThumbnailConfig {
+    #[serde(default = "default_thumb_size")]
+    pub size: u32,
+    #[serde(default = "default_thumb_quality")]
+    pub quality: u8,
+    #[serde(default)]
+    pub ffmpeg_path: Option<String>,
+    #[serde(default = "default_video_seek")]
+    pub video_seek_secs: u32,
+}
+
+fn default_thumb_size() -> u32 {
+    320
+}
+fn default_thumb_quality() -> u8 {
+    80
+}
+fn default_video_seek() -> u32 {
+    2
+}
+
+impl Default for ThumbnailConfig {
+    fn default() -> Self {
+        Self {
+            size: default_thumb_size(),
+            quality: default_thumb_quality(),
+            ffmpeg_path: None,
+            video_seek_secs: default_video_seek(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct WebhookConfig {
+    pub url: String,
+    pub events: Vec<String>,
+    #[serde(default)]
+    pub secret: Option<String>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct UiConfig {
+    #[serde(default = "default_theme")]
+    pub theme: String,
+    #[serde(default = "default_view")]
+    pub default_view: String,
+    #[serde(default = "default_page_size")]
+    pub default_page_size: usize,
+    #[serde(default = "default_view_mode")]
+    pub default_view_mode: String,
+    #[serde(default)]
+    pub auto_play_media: bool,
+    #[serde(default = "default_true")]
+    pub show_thumbnails: bool,
+    #[serde(default)]
+    pub sidebar_collapsed: bool,
+}
+
+fn default_theme() -> String {
+    "dark".to_string()
+}
+fn default_view() -> String {
+    "library".to_string()
+}
+fn default_page_size() -> usize {
+    48
+}
+fn default_view_mode() -> String {
+    "grid".to_string()
+}
+fn default_true() -> bool {
+    true
+}
+
+impl Default for UiConfig {
+    fn default() -> Self {
+        Self {
+            theme: default_theme(),
+            default_view: default_view(),
+            default_page_size: default_page_size(),
+            default_view_mode: default_view_mode(),
+            auto_play_media: false,
+            show_thumbnails: true,
+            sidebar_collapsed: false,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub struct AccountsConfig {
+    #[serde(default)]
+    pub enabled: bool,
+    #[serde(default)]
+    pub users: Vec<UserAccount>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct UserAccount {
+    pub username: String,
+    pub password_hash: String,
+    #[serde(default)]
+    pub role: UserRole,
+}
+
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum UserRole {
+    Admin,
+    Editor,
+    #[default]
+    Viewer,
+}
+
+impl UserRole {
+    pub fn can_read(self) -> bool {
+        true
+    }
+
+    pub fn can_write(self) -> bool {
+        matches!(self, Self::Admin | Self::Editor)
+    }
+
+    pub fn can_admin(self) -> bool {
+        matches!(self, Self::Admin)
+    }
+}
+
+impl std::fmt::Display for UserRole {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Admin => write!(f, "admin"),
+            Self::Editor => write!(f, "editor"),
+            Self::Viewer => write!(f, "viewer"),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct StorageConfig {
+    pub backend: StorageBackendType,
+    pub sqlite: Option<SqliteConfig>,
+    pub postgres: Option<PostgresConfig>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum StorageBackendType {
+    Sqlite,
+    Postgres,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SqliteConfig {
+    pub path: PathBuf,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PostgresConfig {
+    pub host: String,
+    pub port: u16,
+    pub database: String,
+    pub username: String,
+    pub password: String,
+    pub max_connections: usize,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DirectoryConfig {
+    pub roots: Vec<PathBuf>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ScanningConfig {
+    pub watch: bool,
+    pub poll_interval_secs: u64,
+    pub ignore_patterns: Vec<String>,
+    #[serde(default = "default_import_concurrency")]
+    pub import_concurrency: usize,
+}
+
+fn default_import_concurrency() -> usize {
+    8
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ServerConfig {
+    pub host: String,
+    pub port: u16,
+    /// Optional API key for bearer token authentication.
+    /// If set, all requests (except /health) must include `Authorization: Bearer <key>`.
+    /// Can also be set via `PINAKES_API_KEY` environment variable.
+    pub api_key: Option<String>,
+}
+
+impl Config {
+    pub fn from_file(path: &Path) -> crate::error::Result<Self> {
+        let content = std::fs::read_to_string(path).map_err(|e| {
+            crate::error::PinakesError::Config(format!("failed to read config file: {e}"))
+        })?;
+        toml::from_str(&content)
+            .map_err(|e| crate::error::PinakesError::Config(format!("failed to parse config: {e}")))
+    }
+
+    /// Try loading from file, falling back to defaults if the file doesn't exist.
+    pub fn load_or_default(path: &Path) -> crate::error::Result<Self> {
+        if path.exists() {
+            Self::from_file(path)
+        } else {
+            let config = Self::default();
+            // Ensure the data directory exists for the default SQLite database
+            config.ensure_dirs()?;
+            Ok(config)
+        }
+    }
+
+    /// Save the current config to a TOML file.
+    pub fn save_to_file(&self, path: &Path) -> crate::error::Result<()> {
+        if let Some(parent) = path.parent() {
+            std::fs::create_dir_all(parent)?;
+        }
+        let content = toml::to_string_pretty(self).map_err(|e| {
+            crate::error::PinakesError::Config(format!("failed to serialize config: {e}"))
+        })?;
+        std::fs::write(path, content)?;
+        Ok(())
+    }
+
+    /// Ensure all directories needed by this config exist and are writable.
+    pub fn ensure_dirs(&self) -> crate::error::Result<()> {
+        if let Some(ref sqlite) = self.storage.sqlite
+            && let Some(parent) = sqlite.path.parent()
+        {
+            std::fs::create_dir_all(parent)?;
+            let metadata = std::fs::metadata(parent)?;
+            if metadata.permissions().readonly() {
+                return Err(crate::error::PinakesError::Config(format!(
+                    "directory is not writable: {}",
+                    parent.display()
+                )));
+            }
+        }
+        Ok(())
+    }
+
+    /// Returns the default config file path following XDG conventions.
+    pub fn default_config_path() -> PathBuf {
+        if let Ok(xdg) = std::env::var("XDG_CONFIG_HOME") {
+            PathBuf::from(xdg).join("pinakes").join("pinakes.toml")
+        } else if let Ok(home) = std::env::var("HOME") {
+            PathBuf::from(home)
+                .join(".config")
+                .join("pinakes")
+                .join("pinakes.toml")
+        } else {
+            PathBuf::from("pinakes.toml")
+        }
+    }
+
+    /// Validate configuration values for correctness.
+    pub fn validate(&self) -> Result<(), String> {
+        if self.server.port == 0 {
+            return Err("server port cannot be 0".into());
+        }
+        if self.server.host.is_empty() {
+            return Err("server host cannot be empty".into());
+        }
+        if self.scanning.poll_interval_secs == 0 {
+            return Err("poll interval cannot be 0".into());
+        }
+        if self.scanning.import_concurrency == 0 || self.scanning.import_concurrency > 256 {
+            return Err("import_concurrency must be between 1 and 256".into());
+        }
+        Ok(())
+    }
+
+    /// Returns the default data directory following XDG conventions.
+    pub fn default_data_dir() -> PathBuf {
+        if let Ok(xdg) = std::env::var("XDG_DATA_HOME") {
+            PathBuf::from(xdg).join("pinakes")
+        } else if let Ok(home) = std::env::var("HOME") {
+            PathBuf::from(home)
+                .join(".local")
+                .join("share")
+                .join("pinakes")
+        } else {
+            PathBuf::from("pinakes-data")
+        }
+    }
+}
+
+impl Default for Config {
+    fn default() -> Self {
+        let data_dir = Self::default_data_dir();
+        Self {
+            storage: StorageConfig {
+                backend: StorageBackendType::Sqlite,
+                sqlite: Some(SqliteConfig {
+                    path: data_dir.join("pinakes.db"),
+                }),
+                postgres: None,
+            },
+            directories: DirectoryConfig { roots: vec![] },
+            scanning: ScanningConfig {
+                watch: false,
+                poll_interval_secs: 300,
+                ignore_patterns: vec![
+                    ".*".to_string(),
+                    "node_modules".to_string(),
+                    "__pycache__".to_string(),
+                    "target".to_string(),
+                ],
+                import_concurrency: default_import_concurrency(),
+            },
+            server: ServerConfig {
+                host: "127.0.0.1".to_string(),
+                port: 3000,
+                api_key: None,
+            },
+            ui: UiConfig::default(),
+            accounts: AccountsConfig::default(),
+            jobs: JobsConfig::default(),
+            thumbnails: ThumbnailConfig::default(),
+            webhooks: vec![],
+            scheduled_tasks: vec![],
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn test_config_with_concurrency(concurrency: usize) -> Config {
+        let mut config = Config::default();
+        config.scanning.import_concurrency = concurrency;
+        config
+    }
+
+    #[test]
+    fn test_validate_import_concurrency_zero() {
+        let config = test_config_with_concurrency(0);
+        assert!(config.validate().is_err());
+        assert!(
+            config
+                .validate()
+                .unwrap_err()
+                .contains("import_concurrency")
+        );
+    }
+
+    #[test]
+    fn test_validate_import_concurrency_too_high() {
+        let config = test_config_with_concurrency(257);
+        assert!(config.validate().is_err());
+        assert!(
+            config
+                .validate()
+                .unwrap_err()
+                .contains("import_concurrency")
+        );
+    }
+
+    #[test]
+    fn test_validate_import_concurrency_valid() {
+        let config = test_config_with_concurrency(8);
+        assert!(config.validate().is_ok());
+    }
+
+    #[test]
+    fn test_validate_import_concurrency_boundary_low() {
+        let config = test_config_with_concurrency(1);
+        assert!(config.validate().is_ok());
+    }
+
+    #[test]
+    fn test_validate_import_concurrency_boundary_high() {
+        let config = test_config_with_concurrency(256);
+        assert!(config.validate().is_ok());
+    }
+}
--- a/crates/pinakes-core/src/error.rs
+++ b/crates/pinakes-core/src/error.rs
@ -0,0 +1,59 @@
+use std::path::PathBuf;
+
+use thiserror::Error;
+
+#[derive(Debug, Error)]
+pub enum PinakesError {
+    #[error("IO error: {0}")]
+    Io(#[from] std::io::Error),
+
+    #[error("database error: {0}")]
+    Database(String),
+
+    #[error("migration error: {0}")]
+    Migration(String),
+
+    #[error("configuration error: {0}")]
+    Config(String),
+
+    #[error("media item not found: {0}")]
+    NotFound(String),
+
+    #[error("duplicate content hash: {0}")]
+    DuplicateHash(String),
+
+    #[error("unsupported media type for path: {0}")]
+    UnsupportedMediaType(PathBuf),
+
+    #[error("metadata extraction failed: {0}")]
+    MetadataExtraction(String),
+
+    #[error("search query parse error: {0}")]
+    SearchParse(String),
+
+    #[error("file not found at path: {0}")]
+    FileNotFound(PathBuf),
+
+    #[error("tag not found: {0}")]
+    TagNotFound(String),
+
+    #[error("collection not found: {0}")]
+    CollectionNotFound(String),
+
+    #[error("invalid operation: {0}")]
+    InvalidOperation(String),
+}
+
+impl From<rusqlite::Error> for PinakesError {
+    fn from(e: rusqlite::Error) -> Self {
+        PinakesError::Database(e.to_string())
+    }
+}
+
+impl From<tokio_postgres::Error> for PinakesError {
+    fn from(e: tokio_postgres::Error) -> Self {
+        PinakesError::Database(e.to_string())
+    }
+}
+
+pub type Result<T> = std::result::Result<T, PinakesError>;
--- a/crates/pinakes-core/src/events.rs
+++ b/crates/pinakes-core/src/events.rs
@ -0,0 +1,106 @@
+use std::sync::Arc;
+
+use serde::{Deserialize, Serialize};
+use tokio::sync::broadcast;
+use tracing::warn;
+
+use crate::config::WebhookConfig;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum PinakesEvent {
+    MediaImported {
+        media_id: String,
+    },
+    MediaUpdated {
+        media_id: String,
+    },
+    MediaDeleted {
+        media_id: String,
+    },
+    ScanCompleted {
+        files_found: usize,
+        files_processed: usize,
+    },
+    IntegrityMismatch {
+        media_id: String,
+        expected: String,
+        actual: String,
+    },
+}
+
+impl PinakesEvent {
+    pub fn event_name(&self) -> &'static str {
+        match self {
+            Self::MediaImported { .. } => "media_imported",
+            Self::MediaUpdated { .. } => "media_updated",
+            Self::MediaDeleted { .. } => "media_deleted",
+            Self::ScanCompleted { .. } => "scan_completed",
+            Self::IntegrityMismatch { .. } => "integrity_mismatch",
+        }
+    }
+}
+
+pub struct EventBus {
+    tx: broadcast::Sender<PinakesEvent>,
+}
+
+impl EventBus {
+    pub fn new(webhooks: Vec<WebhookConfig>) -> Arc<Self> {
+        let (tx, _) = broadcast::channel(256);
+
+        // Spawn webhook delivery task
+        if !webhooks.is_empty() {
+            let mut rx: broadcast::Receiver<PinakesEvent> = tx.subscribe();
+            let webhooks = Arc::new(webhooks);
+            tokio::spawn(async move {
+                while let Ok(event) = rx.recv().await {
+                    let event_name = event.event_name();
+                    for hook in webhooks.iter() {
+                        if hook.events.iter().any(|e| e == event_name || e == "*") {
+                            let url = hook.url.clone();
+                            let event_clone = event.clone();
+                            let secret = hook.secret.clone();
+                            tokio::spawn(async move {
+                                deliver_webhook(&url, &event_clone, secret.as_deref()).await;
+                            });
+                        }
+                    }
+                }
+            });
+        }
+
+        Arc::new(Self { tx })
+    }
+
+    pub fn emit(&self, event: PinakesEvent) {
+        // Ignore send errors (no receivers)
+        let _ = self.tx.send(event);
+    }
+}
+
+async fn deliver_webhook(url: &str, event: &PinakesEvent, _secret: Option<&str>) {
+    let client = reqwest::Client::new();
+    let body = serde_json::to_string(event).unwrap_or_default();
+
+    for attempt in 0..3 {
+        match client
+            .post(url)
+            .header("Content-Type", "application/json")
+            .body(body.clone())
+            .send()
+            .await
+        {
+            Ok(resp) if resp.status().is_success() => return,
+            Ok(resp) => {
+                warn!(url, status = %resp.status(), attempt, "webhook delivery failed");
+            }
+            Err(e) => {
+                warn!(url, error = %e, attempt, "webhook delivery error");
+            }
+        }
+
+        // Exponential backoff
+        tokio::time::sleep(std::time::Duration::from_secs(1 << attempt)).await;
+    }
+}
--- a/crates/pinakes-core/src/export.rs
+++ b/crates/pinakes-core/src/export.rs
@ -0,0 +1,68 @@
+use std::path::Path;
+
+use serde::{Deserialize, Serialize};
+
+use crate::error::Result;
+use crate::jobs::ExportFormat;
+use crate::storage::DynStorageBackend;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ExportResult {
+    pub items_exported: usize,
+    pub output_path: String,
+}
+
+/// Export library data to the specified format.
+pub async fn export_library(
+    storage: &DynStorageBackend,
+    format: &ExportFormat,
+    destination: &Path,
+) -> Result<ExportResult> {
+    let pagination = crate::model::Pagination {
+        offset: 0,
+        limit: u64::MAX,
+        sort: None,
+    };
+    let items = storage.list_media(&&pagination).await?;
+    let count = items.len();
+
+    match format {
+        ExportFormat::Json => {
+            let json = serde_json::to_string_pretty(&items)
+                .map_err(|e| crate::error::PinakesError::Config(format!("json serialize: {e}")))?;
+            std::fs::write(destination, json)?;
+        }
+        ExportFormat::Csv => {
+            let mut csv = String::new();
+            csv.push_str("id,path,file_name,media_type,content_hash,file_size,title,artist,album,genre,year,duration_secs,description,created_at,updated_at\n");
+            for item in &items {
+                csv.push_str(&format!(
+                    "{},{},{},{:?},{},{},{},{},{},{},{},{},{},{},{}\n",
+                    item.id,
+                    item.path.display(),
+                    item.file_name,
+                    item.media_type,
+                    item.content_hash,
+                    item.file_size,
+                    item.title.as_deref().unwrap_or(""),
+                    item.artist.as_deref().unwrap_or(""),
+                    item.album.as_deref().unwrap_or(""),
+                    item.genre.as_deref().unwrap_or(""),
+                    item.year.map(|y| y.to_string()).unwrap_or_default(),
+                    item.duration_secs
+                        .map(|d| d.to_string())
+                        .unwrap_or_default(),
+                    item.description.as_deref().unwrap_or(""),
+                    item.created_at,
+                    item.updated_at,
+                ));
+            }
+            std::fs::write(destination, csv)?;
+        }
+    }
+
+    Ok(ExportResult {
+        items_exported: count,
+        output_path: destination.to_string_lossy().to_string(),
+    })
+}
--- a/crates/pinakes-core/src/hash.rs
+++ b/crates/pinakes-core/src/hash.rs
@ -0,0 +1,31 @@
+use std::path::Path;
+
+use crate::error::Result;
+use crate::model::ContentHash;
+
+const BUFFER_SIZE: usize = 65536;
+
+pub async fn compute_file_hash(path: &Path) -> Result<ContentHash> {
+    let path = path.to_path_buf();
+    let hash = tokio::task::spawn_blocking(move || -> Result<ContentHash> {
+        let mut hasher = blake3::Hasher::new();
+        let mut file = std::fs::File::open(&path)?;
+        let mut buf = vec![0u8; BUFFER_SIZE];
+        loop {
+            let n = std::io::Read::read(&mut file, &mut buf)?;
+            if n == 0 {
+                break;
+            }
+            hasher.update(&buf[..n]);
+        }
+        Ok(ContentHash::new(hasher.finalize().to_hex().to_string()))
+    })
+    .await
+    .map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))??;
+    Ok(hash)
+}
+
+pub fn compute_hash_sync(data: &[u8]) -> ContentHash {
+    let hash = blake3::hash(data);
+    ContentHash::new(hash.to_hex().to_string())
+}
--- a/crates/pinakes-core/src/import.rs
+++ b/crates/pinakes-core/src/import.rs
@ -0,0 +1,250 @@
+use std::path::{Path, PathBuf};
+
+use tracing::info;
+
+use crate::audit;
+use crate::error::{PinakesError, Result};
+use crate::hash::compute_file_hash;
+use crate::media_type::MediaType;
+use crate::metadata;
+use crate::model::*;
+use crate::storage::DynStorageBackend;
+use crate::thumbnail;
+
+pub struct ImportResult {
+    pub media_id: MediaId,
+    pub was_duplicate: bool,
+    pub path: PathBuf,
+}
+
+/// Check that a canonicalized path falls under at least one configured root directory.
+/// If no roots are configured, all paths are allowed (for ad-hoc imports).
+pub async fn validate_path_in_roots(storage: &DynStorageBackend, path: &Path) -> Result<()> {
+    let roots = storage.list_root_dirs().await?;
+    if roots.is_empty() {
+        return Ok(());
+    }
+    for root in &roots {
+        if let Ok(canonical_root) = root.canonicalize()
+            && path.starts_with(&canonical_root)
+        {
+            return Ok(());
+        }
+    }
+    Err(PinakesError::InvalidOperation(format!(
+        "path {} is not within any configured root directory",
+        path.display()
+    )))
+}
+
+pub async fn import_file(storage: &DynStorageBackend, path: &Path) -> Result<ImportResult> {
+    let path = path.canonicalize()?;
+
+    if !path.exists() {
+        return Err(PinakesError::FileNotFound(path));
+    }
+
+    validate_path_in_roots(storage, &path).await?;
+
+    let media_type = MediaType::from_path(&path)
+        .ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;
+
+    let content_hash = compute_file_hash(&path).await?;
+
+    if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
+        return Ok(ImportResult {
+            media_id: existing.id,
+            was_duplicate: true,
+            path: path.clone(),
+        });
+    }
+
+    let file_meta = std::fs::metadata(&path)?;
+    let file_size = file_meta.len();
+
+    let extracted = {
+        let path_clone = path.clone();
+        tokio::task::spawn_blocking(move || metadata::extract_metadata(&path_clone, media_type))
+            .await
+            .map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
+    };
+
+    let file_name = path
+        .file_name()
+        .unwrap_or_default()
+        .to_string_lossy()
+        .to_string();
+
+    let now = chrono::Utc::now();
+    let media_id = MediaId::new();
+
+    // Generate thumbnail for image types
+    let thumb_path = {
+        let source = path.clone();
+        let thumb_dir = thumbnail::default_thumbnail_dir();
+        tokio::task::spawn_blocking(move || {
+            thumbnail::generate_thumbnail(media_id, &source, media_type, &thumb_dir)
+        })
+        .await
+        .map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
+    };
+
+    let item = MediaItem {
+        id: media_id,
+        path: path.clone(),
+        file_name,
+        media_type,
+        content_hash,
+        file_size,
+        title: extracted.title,
+        artist: extracted.artist,
+        album: extracted.album,
+        genre: extracted.genre,
+        year: extracted.year,
+        duration_secs: extracted.duration_secs,
+        description: extracted.description,
+        thumbnail_path: thumb_path,
+        custom_fields: std::collections::HashMap::new(),
+        created_at: now,
+        updated_at: now,
+    };
+
+    storage.insert_media(&item).await?;
+
+    // Store extracted extra metadata as custom fields
+    for (key, value) in &extracted.extra {
+        let field = CustomField {
+            field_type: CustomFieldType::Text,
+            value: value.clone(),
+        };
+        if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
+            tracing::warn!(
+                media_id = %media_id,
+                field = %key,
+                error = %e,
+                "failed to store extracted metadata as custom field"
+            );
+        }
+    }
+
+    audit::record_action(
+        storage,
+        Some(media_id),
+        AuditAction::Imported,
+        Some(format!("path={}", path.display())),
+    )
+    .await?;
+
+    info!(media_id = %media_id, path = %path.display(), "imported media file");
+
+    Ok(ImportResult {
+        media_id,
+        was_duplicate: false,
+        path: path.clone(),
+    })
+}
+
+pub(crate) fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
+    for component in path.components() {
+        if let std::path::Component::Normal(name) = component {
+            let name_str = name.to_string_lossy();
+            for pattern in patterns {
+                if pattern.starts_with('.')
+                    && name_str.starts_with('.')
+                    && pattern == name_str.as_ref()
+                {
+                    return true;
+                }
+                // Simple glob: ".*" matches any dotfile
+                if pattern == ".*" && name_str.starts_with('.') {
+                    return true;
+                }
+                if name_str == pattern.as_str() {
+                    return true;
+                }
+            }
+        }
+    }
+    false
+}
+
+/// Default number of concurrent import tasks.
+const DEFAULT_IMPORT_CONCURRENCY: usize = 8;
+
+pub async fn import_directory(
+    storage: &DynStorageBackend,
+    dir: &Path,
+    ignore_patterns: &[String],
+) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
+    import_directory_with_concurrency(storage, dir, ignore_patterns, DEFAULT_IMPORT_CONCURRENCY)
+        .await
+}
+
+pub async fn import_directory_with_concurrency(
+    storage: &DynStorageBackend,
+    dir: &Path,
+    ignore_patterns: &[String],
+    concurrency: usize,
+) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
+    let concurrency = concurrency.clamp(1, 256);
+    let dir = dir.to_path_buf();
+    let patterns = ignore_patterns.to_vec();
+
+    let entries: Vec<PathBuf> = {
+        let dir = dir.clone();
+        tokio::task::spawn_blocking(move || {
+            walkdir::WalkDir::new(&dir)
+                .follow_links(true)
+                .into_iter()
+                .filter_map(|e| e.ok())
+                .filter(|e| e.file_type().is_file())
+                .filter(|e| MediaType::from_path(e.path()).is_some())
+                .filter(|e| !should_ignore(e.path(), &patterns))
+                .map(|e| e.path().to_path_buf())
+                .collect()
+        })
+        .await
+        .map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
+    };
+
+    let mut results = Vec::with_capacity(entries.len());
+    let mut join_set = tokio::task::JoinSet::new();
+    let mut pending_paths: Vec<PathBuf> = Vec::new();
+
+    for entry_path in entries {
+        let storage = storage.clone();
+        let path = entry_path.clone();
+        pending_paths.push(entry_path);
+
+        join_set.spawn(async move {
+            let result = import_file(&storage, &path).await;
+            (path, result)
+        });
+
+        // Limit concurrency by draining when we hit the cap
+        if join_set.len() >= concurrency
+            && let Some(Ok((path, result))) = join_set.join_next().await
+        {
+            match result {
+                Ok(r) => results.push(Ok(r)),
+                Err(e) => {
+                    tracing::warn!(path = %path.display(), error = %e, "failed to import file");
+                    results.push(Err(e));
+                }
+            }
+        }
+    }
+
+    // Drain remaining tasks
+    while let Some(Ok((path, result))) = join_set.join_next().await {
+        match result {
+            Ok(r) => results.push(Ok(r)),
+            Err(e) => {
+                tracing::warn!(path = %path.display(), error = %e, "failed to import file");
+                results.push(Err(e));
+            }
+        }
+    }
+
+    Ok(results)
+}
--- a/crates/pinakes-core/src/integrity.rs
+++ b/crates/pinakes-core/src/integrity.rs
@ -0,0 +1,201 @@
+use std::path::{Path, PathBuf};
+
+use serde::{Deserialize, Serialize};
+use tracing::{info, warn};
+
+use crate::error::Result;
+use crate::hash::compute_file_hash;
+use crate::model::{ContentHash, MediaId};
+use crate::storage::DynStorageBackend;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct OrphanReport {
+    /// Media items whose files no longer exist on disk.
+    pub orphaned_ids: Vec<MediaId>,
+    /// Files on disk that are not tracked in the database.
+    pub untracked_paths: Vec<PathBuf>,
+    /// Files that appear to have moved (same hash, different path).
+    pub moved_files: Vec<(MediaId, PathBuf, PathBuf)>,
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum OrphanAction {
+    Delete,
+    Ignore,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct VerificationReport {
+    pub verified: usize,
+    pub mismatched: Vec<(MediaId, String, String)>,
+    pub missing: Vec<MediaId>,
+    pub errors: Vec<(MediaId, String)>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum IntegrityStatus {
+    Unverified,
+    Verified,
+    Mismatch,
+    Missing,
+}
+
+impl std::fmt::Display for IntegrityStatus {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::Unverified => write!(f, "unverified"),
+            Self::Verified => write!(f, "verified"),
+            Self::Mismatch => write!(f, "mismatch"),
+            Self::Missing => write!(f, "missing"),
+        }
+    }
+}
+
+impl std::str::FromStr for IntegrityStatus {
+    type Err = String;
+    fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
+        match s {
+            "unverified" => Ok(Self::Unverified),
+            "verified" => Ok(Self::Verified),
+            "mismatch" => Ok(Self::Mismatch),
+            "missing" => Ok(Self::Missing),
+            _ => Err(format!("unknown integrity status: {s}")),
+        }
+    }
+}
+
+/// Detect orphaned media items (files that no longer exist on disk).
+pub async fn detect_orphans(storage: &DynStorageBackend) -> Result<OrphanReport> {
+    let media_paths = storage.list_media_paths().await?;
+    let mut orphaned_ids = Vec::new();
+    let moved_files = Vec::new();
+
+    for (id, path, _hash) in &media_paths {
+        if !path.exists() {
+            orphaned_ids.push(*id);
+        }
+    }
+
+    info!(
+        orphaned = orphaned_ids.len(),
+        total = media_paths.len(),
+        "orphan detection complete"
+    );
+
+    Ok(OrphanReport {
+        orphaned_ids,
+        untracked_paths: Vec::new(),
+        moved_files,
+    })
+}
+
+/// Resolve orphaned media items by deleting them from the database.
+pub async fn resolve_orphans(
+    storage: &DynStorageBackend,
+    action: OrphanAction,
+    ids: &[MediaId],
+) -> Result<u64> {
+    match action {
+        OrphanAction::Delete => {
+            let count = storage.batch_delete_media(ids).await?;
+            info!(count, "resolved orphans by deletion");
+            Ok(count)
+        }
+        OrphanAction::Ignore => {
+            info!(count = ids.len(), "orphans ignored");
+            Ok(0)
+        }
+    }
+}
+
+/// Verify integrity of media files by recomputing hashes and comparing.
+pub async fn verify_integrity(
+    storage: &DynStorageBackend,
+    media_ids: Option<&[MediaId]>,
+) -> Result<VerificationReport> {
+    let all_paths = storage.list_media_paths().await?;
+
+    let paths_to_check: Vec<(MediaId, PathBuf, ContentHash)> = if let Some(ids) = media_ids {
+        let id_set: std::collections::HashSet<MediaId> = ids.iter().copied().collect();
+        all_paths
+            .into_iter()
+            .filter(|(id, _, _)| id_set.contains(id))
+            .collect()
+    } else {
+        all_paths
+    };
+
+    let mut report = VerificationReport {
+        verified: 0,
+        mismatched: Vec::new(),
+        missing: Vec::new(),
+        errors: Vec::new(),
+    };
+
+    for (id, path, expected_hash) in paths_to_check {
+        if !path.exists() {
+            report.missing.push(id);
+            continue;
+        }
+
+        match compute_file_hash(&path).await {
+            Ok(actual_hash) => {
+                if actual_hash.0 == expected_hash.0 {
+                    report.verified += 1;
+                } else {
+                    report
+                        .mismatched
+                        .push((id, expected_hash.0.clone(), actual_hash.0));
+                }
+            }
+            Err(e) => {
+                report.errors.push((id, e.to_string()));
+            }
+        }
+    }
+
+    info!(
+        verified = report.verified,
+        mismatched = report.mismatched.len(),
+        missing = report.missing.len(),
+        errors = report.errors.len(),
+        "integrity verification complete"
+    );
+
+    Ok(report)
+}
+
+/// Clean up orphaned thumbnail files that don't correspond to any media item.
+pub async fn cleanup_orphaned_thumbnails(
+    storage: &DynStorageBackend,
+    thumbnail_dir: &Path,
+) -> Result<usize> {
+    let media_paths = storage.list_media_paths().await?;
+    let known_ids: std::collections::HashSet<String> = media_paths
+        .iter()
+        .map(|(id, _, _)| id.0.to_string())
+        .collect();
+
+    let mut removed = 0;
+
+    if thumbnail_dir.exists() {
+        let entries = std::fs::read_dir(thumbnail_dir)?;
+        for entry in entries.flatten() {
+            let path = entry.path();
+            if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
+                if !known_ids.contains(stem) {
+                    if let Err(e) = std::fs::remove_file(&path) {
+                        warn!(path = %path.display(), error = %e, "failed to remove orphaned thumbnail");
+                    } else {
+                        removed += 1;
+                    }
+                }
+            }
+        }
+    }
+
+    info!(removed, "orphaned thumbnail cleanup complete");
+    Ok(removed)
+}
--- a/crates/pinakes-core/src/jobs.rs
+++ b/crates/pinakes-core/src/jobs.rs
@ -0,0 +1,226 @@
+use std::collections::HashMap;
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use serde_json::Value;
+use tokio::sync::{RwLock, mpsc};
+use tokio_util::sync::CancellationToken;
+use uuid::Uuid;
+
+use crate::model::MediaId;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case", tag = "type")]
+pub enum JobKind {
+    Scan {
+        path: Option<PathBuf>,
+    },
+    GenerateThumbnails {
+        media_ids: Vec<MediaId>,
+    },
+    VerifyIntegrity {
+        media_ids: Vec<MediaId>,
+    },
+    OrphanDetection,
+    CleanupThumbnails,
+    Export {
+        format: ExportFormat,
+        destination: PathBuf,
+    },
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ExportFormat {
+    Json,
+    Csv,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case", tag = "state")]
+pub enum JobStatus {
+    Pending,
+    Running { progress: f32, message: String },
+    Completed { result: Value },
+    Failed { error: String },
+    Cancelled,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Job {
+    pub id: Uuid,
+    pub kind: JobKind,
+    pub status: JobStatus,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+}
+
+struct WorkerItem {
+    job_id: Uuid,
+    kind: JobKind,
+    cancel: CancellationToken,
+}
+
+pub struct JobQueue {
+    jobs: Arc<RwLock<HashMap<Uuid, Job>>>,
+    cancellations: Arc<RwLock<HashMap<Uuid, CancellationToken>>>,
+    tx: mpsc::Sender<WorkerItem>,
+}
+
+impl JobQueue {
+    /// Create a new job queue and spawn `worker_count` background workers.
+    ///
+    /// The `executor` callback is invoked for each job; it receives the job kind,
+    /// a progress-reporting callback, and a cancellation token.
+    pub fn new<F>(worker_count: usize, executor: F) -> Arc<Self>
+    where
+        F: Fn(
+                Uuid,
+                JobKind,
+                CancellationToken,
+                Arc<RwLock<HashMap<Uuid, Job>>>,
+            ) -> tokio::task::JoinHandle<()>
+            + Send
+            + Sync
+            + 'static,
+    {
+        let (tx, rx) = mpsc::channel::<WorkerItem>(256);
+        let rx = Arc::new(tokio::sync::Mutex::new(rx));
+        let jobs: Arc<RwLock<HashMap<Uuid, Job>>> = Arc::new(RwLock::new(HashMap::new()));
+        let cancellations: Arc<RwLock<HashMap<Uuid, CancellationToken>>> =
+            Arc::new(RwLock::new(HashMap::new()));
+
+        let executor = Arc::new(executor);
+
+        for _ in 0..worker_count {
+            let rx = rx.clone();
+            let jobs = jobs.clone();
+            let cancellations = cancellations.clone();
+            let executor = executor.clone();
+
+            tokio::spawn(async move {
+                loop {
+                    let item = {
+                        let mut guard = rx.lock().await;
+                        guard.recv().await
+                    };
+                    let Some(item) = item else { break };
+
+                    // Mark as running
+                    {
+                        let mut map = jobs.write().await;
+                        if let Some(job) = map.get_mut(&item.job_id) {
+                            job.status = JobStatus::Running {
+                                progress: 0.0,
+                                message: "starting".to_string(),
+                            };
+                            job.updated_at = Utc::now();
+                        }
+                    }
+
+                    let handle = executor(item.job_id, item.kind, item.cancel, jobs.clone());
+                    let _ = handle.await;
+
+                    // Clean up cancellation token
+                    cancellations.write().await.remove(&item.job_id);
+                }
+            });
+        }
+
+        Arc::new(Self {
+            jobs,
+            cancellations,
+            tx,
+        })
+    }
+
+    /// Submit a new job, returning its ID.
+    pub async fn submit(&self, kind: JobKind) -> Uuid {
+        let id = Uuid::now_v7();
+        let now = Utc::now();
+        let cancel = CancellationToken::new();
+
+        let job = Job {
+            id,
+            kind: kind.clone(),
+            status: JobStatus::Pending,
+            created_at: now,
+            updated_at: now,
+        };
+
+        self.jobs.write().await.insert(id, job);
+        self.cancellations.write().await.insert(id, cancel.clone());
+
+        let item = WorkerItem {
+            job_id: id,
+            kind,
+            cancel,
+        };
+
+        // If the channel is full we still record the job — it'll stay Pending
+        let _ = self.tx.send(item).await;
+        id
+    }
+
+    /// Get the status of a job.
+    pub async fn status(&self, id: Uuid) -> Option<Job> {
+        self.jobs.read().await.get(&id).cloned()
+    }
+
+    /// List all jobs, most recent first.
+    pub async fn list(&self) -> Vec<Job> {
+        let map = self.jobs.read().await;
+        let mut jobs: Vec<Job> = map.values().cloned().collect();
+        jobs.sort_by(|a, b| b.created_at.cmp(&a.created_at));
+        jobs
+    }
+
+    /// Cancel a running or pending job.
+    pub async fn cancel(&self, id: Uuid) -> bool {
+        if let Some(token) = self.cancellations.read().await.get(&id) {
+            token.cancel();
+            let mut map = self.jobs.write().await;
+            if let Some(job) = map.get_mut(&id) {
+                job.status = JobStatus::Cancelled;
+                job.updated_at = Utc::now();
+            }
+            true
+        } else {
+            false
+        }
+    }
+
+    /// Update a job's progress. Called by executors.
+    pub async fn update_progress(
+        jobs: &Arc<RwLock<HashMap<Uuid, Job>>>,
+        id: Uuid,
+        progress: f32,
+        message: String,
+    ) {
+        let mut map = jobs.write().await;
+        if let Some(job) = map.get_mut(&id) {
+            job.status = JobStatus::Running { progress, message };
+            job.updated_at = Utc::now();
+        }
+    }
+
+    /// Mark a job as completed.
+    pub async fn complete(jobs: &Arc<RwLock<HashMap<Uuid, Job>>>, id: Uuid, result: Value) {
+        let mut map = jobs.write().await;
+        if let Some(job) = map.get_mut(&id) {
+            job.status = JobStatus::Completed { result };
+            job.updated_at = Utc::now();
+        }
+    }
+
+    /// Mark a job as failed.
+    pub async fn fail(jobs: &Arc<RwLock<HashMap<Uuid, Job>>>, id: Uuid, error: String) {
+        let mut map = jobs.write().await;
+        if let Some(job) = map.get_mut(&id) {
+            job.status = JobStatus::Failed { error };
+            job.updated_at = Utc::now();
+        }
+    }
+}
--- a/crates/pinakes-core/src/lib.rs
+++ b/crates/pinakes-core/src/lib.rs
@ -0,0 +1,21 @@
+pub mod audit;
+pub mod cache;
+pub mod collections;
+pub mod config;
+pub mod error;
+pub mod events;
+pub mod export;
+pub mod hash;
+pub mod import;
+pub mod integrity;
+pub mod jobs;
+pub mod media_type;
+pub mod metadata;
+pub mod model;
+pub mod opener;
+pub mod scan;
+pub mod scheduler;
+pub mod search;
+pub mod storage;
+pub mod tags;
+pub mod thumbnail;
--- a/crates/pinakes-core/src/media_type.rs
+++ b/crates/pinakes-core/src/media_type.rs
@ -0,0 +1,209 @@
+use std::path::Path;
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum MediaType {
+    // Audio
+    Mp3,
+    Flac,
+    Ogg,
+    Wav,
+    Aac,
+    Opus,
+
+    // Video
+    Mp4,
+    Mkv,
+    Avi,
+    Webm,
+
+    // Documents
+    Pdf,
+    Epub,
+    Djvu,
+
+    // Text
+    Markdown,
+    PlainText,
+
+    // Images
+    Jpeg,
+    Png,
+    Gif,
+    Webp,
+    Svg,
+    Avif,
+    Tiff,
+    Bmp,
+
+    // RAW Images
+    Cr2,
+    Nef,
+    Arw,
+    Dng,
+    Orf,
+    Rw2,
+
+    // HEIC/HEIF
+    Heic,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum MediaCategory {
+    Audio,
+    Video,
+    Document,
+    Text,
+    Image,
+}
+
+impl MediaType {
+    pub fn from_extension(ext: &str) -> Option<Self> {
+        match ext.to_ascii_lowercase().as_str() {
+            "mp3" => Some(Self::Mp3),
+            "flac" => Some(Self::Flac),
+            "ogg" | "oga" => Some(Self::Ogg),
+            "wav" => Some(Self::Wav),
+            "aac" | "m4a" => Some(Self::Aac),
+            "opus" => Some(Self::Opus),
+            "mp4" | "m4v" => Some(Self::Mp4),
+            "mkv" => Some(Self::Mkv),
+            "avi" => Some(Self::Avi),
+            "webm" => Some(Self::Webm),
+            "pdf" => Some(Self::Pdf),
+            "epub" => Some(Self::Epub),
+            "djvu" => Some(Self::Djvu),
+            "md" | "markdown" => Some(Self::Markdown),
+            "txt" | "text" => Some(Self::PlainText),
+            "jpg" | "jpeg" => Some(Self::Jpeg),
+            "png" => Some(Self::Png),
+            "gif" => Some(Self::Gif),
+            "webp" => Some(Self::Webp),
+            "svg" => Some(Self::Svg),
+            "avif" => Some(Self::Avif),
+            "tiff" | "tif" => Some(Self::Tiff),
+            "bmp" => Some(Self::Bmp),
+            "cr2" => Some(Self::Cr2),
+            "nef" => Some(Self::Nef),
+            "arw" => Some(Self::Arw),
+            "dng" => Some(Self::Dng),
+            "orf" => Some(Self::Orf),
+            "rw2" => Some(Self::Rw2),
+            "heic" | "heif" => Some(Self::Heic),
+            _ => None,
+        }
+    }
+
+    pub fn from_path(path: &Path) -> Option<Self> {
+        path.extension()
+            .and_then(|e| e.to_str())
+            .and_then(Self::from_extension)
+    }
+
+    pub fn mime_type(&self) -> &'static str {
+        match self {
+            Self::Mp3 => "audio/mpeg",
+            Self::Flac => "audio/flac",
+            Self::Ogg => "audio/ogg",
+            Self::Wav => "audio/wav",
+            Self::Aac => "audio/aac",
+            Self::Opus => "audio/opus",
+            Self::Mp4 => "video/mp4",
+            Self::Mkv => "video/x-matroska",
+            Self::Avi => "video/x-msvideo",
+            Self::Webm => "video/webm",
+            Self::Pdf => "application/pdf",
+            Self::Epub => "application/epub+zip",
+            Self::Djvu => "image/vnd.djvu",
+            Self::Markdown => "text/markdown",
+            Self::PlainText => "text/plain",
+            Self::Jpeg => "image/jpeg",
+            Self::Png => "image/png",
+            Self::Gif => "image/gif",
+            Self::Webp => "image/webp",
+            Self::Svg => "image/svg+xml",
+            Self::Avif => "image/avif",
+            Self::Tiff => "image/tiff",
+            Self::Bmp => "image/bmp",
+            Self::Cr2 => "image/x-canon-cr2",
+            Self::Nef => "image/x-nikon-nef",
+            Self::Arw => "image/x-sony-arw",
+            Self::Dng => "image/x-adobe-dng",
+            Self::Orf => "image/x-olympus-orf",
+            Self::Rw2 => "image/x-panasonic-rw2",
+            Self::Heic => "image/heic",
+        }
+    }
+
+    pub fn category(&self) -> MediaCategory {
+        match self {
+            Self::Mp3 | Self::Flac | Self::Ogg | Self::Wav | Self::Aac | Self::Opus => {
+                MediaCategory::Audio
+            }
+            Self::Mp4 | Self::Mkv | Self::Avi | Self::Webm => MediaCategory::Video,
+            Self::Pdf | Self::Epub | Self::Djvu => MediaCategory::Document,
+            Self::Markdown | Self::PlainText => MediaCategory::Text,
+            Self::Jpeg
+            | Self::Png
+            | Self::Gif
+            | Self::Webp
+            | Self::Svg
+            | Self::Avif
+            | Self::Tiff
+            | Self::Bmp
+            | Self::Cr2
+            | Self::Nef
+            | Self::Arw
+            | Self::Dng
+            | Self::Orf
+            | Self::Rw2
+            | Self::Heic => MediaCategory::Image,
+        }
+    }
+
+    pub fn extensions(&self) -> &'static [&'static str] {
+        match self {
+            Self::Mp3 => &["mp3"],
+            Self::Flac => &["flac"],
+            Self::Ogg => &["ogg", "oga"],
+            Self::Wav => &["wav"],
+            Self::Aac => &["aac", "m4a"],
+            Self::Opus => &["opus"],
+            Self::Mp4 => &["mp4", "m4v"],
+            Self::Mkv => &["mkv"],
+            Self::Avi => &["avi"],
+            Self::Webm => &["webm"],
+            Self::Pdf => &["pdf"],
+            Self::Epub => &["epub"],
+            Self::Djvu => &["djvu"],
+            Self::Markdown => &["md", "markdown"],
+            Self::PlainText => &["txt", "text"],
+            Self::Jpeg => &["jpg", "jpeg"],
+            Self::Png => &["png"],
+            Self::Gif => &["gif"],
+            Self::Webp => &["webp"],
+            Self::Svg => &["svg"],
+            Self::Avif => &["avif"],
+            Self::Tiff => &["tiff", "tif"],
+            Self::Bmp => &["bmp"],
+            Self::Cr2 => &["cr2"],
+            Self::Nef => &["nef"],
+            Self::Arw => &["arw"],
+            Self::Dng => &["dng"],
+            Self::Orf => &["orf"],
+            Self::Rw2 => &["rw2"],
+            Self::Heic => &["heic", "heif"],
+        }
+    }
+
+    /// Returns true if this is a RAW image format.
+    pub fn is_raw(&self) -> bool {
+        matches!(
+            self,
+            Self::Cr2 | Self::Nef | Self::Arw | Self::Dng | Self::Orf | Self::Rw2
+        )
+    }
+}
--- a/crates/pinakes-core/src/metadata/audio.rs
+++ b/crates/pinakes-core/src/metadata/audio.rs
@ -0,0 +1,81 @@
+use std::path::Path;
+
+use lofty::file::{AudioFile, TaggedFileExt};
+use lofty::tag::Accessor;
+
+use crate::error::{PinakesError, Result};
+use crate::media_type::MediaType;
+
+use super::{ExtractedMetadata, MetadataExtractor};
+
+pub struct AudioExtractor;
+
+impl MetadataExtractor for AudioExtractor {
+    fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
+        let tagged_file = lofty::read_from_path(path)
+            .map_err(|e| PinakesError::MetadataExtraction(format!("audio metadata: {e}")))?;
+
+        let mut meta = ExtractedMetadata::default();
+
+        if let Some(tag) = tagged_file
+            .primary_tag()
+            .or_else(|| tagged_file.first_tag())
+        {
+            meta.title = tag.title().map(|s| s.to_string());
+            meta.artist = tag.artist().map(|s| s.to_string());
+            meta.album = tag.album().map(|s| s.to_string());
+            meta.genre = tag.genre().map(|s| s.to_string());
+            meta.year = tag.year().map(|y| y as i32);
+        }
+
+        if let Some(tag) = tagged_file
+            .primary_tag()
+            .or_else(|| tagged_file.first_tag())
+        {
+            if let Some(track) = tag.track() {
+                meta.extra
+                    .insert("track_number".to_string(), track.to_string());
+            }
+            if let Some(disc) = tag.disk() {
+                meta.extra
+                    .insert("disc_number".to_string(), disc.to_string());
+            }
+            if let Some(comment) = tag.comment() {
+                meta.extra
+                    .insert("comment".to_string(), comment.to_string());
+            }
+        }
+
+        let properties = tagged_file.properties();
+        let duration = properties.duration();
+        if !duration.is_zero() {
+            meta.duration_secs = Some(duration.as_secs_f64());
+        }
+
+        if let Some(bitrate) = properties.audio_bitrate() {
+            meta.extra
+                .insert("bitrate".to_string(), format!("{bitrate} kbps"));
+        }
+        if let Some(sample_rate) = properties.sample_rate() {
+            meta.extra
+                .insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
+        }
+        if let Some(channels) = properties.channels() {
+            meta.extra
+                .insert("channels".to_string(), channels.to_string());
+        }
+
+        Ok(meta)
+    }
+
+    fn supported_types(&self) -> &[MediaType] {
+        &[
+            MediaType::Mp3,
+            MediaType::Flac,
+            MediaType::Ogg,
+            MediaType::Wav,
+            MediaType::Aac,
+            MediaType::Opus,
+        ]
+    }
+}
--- a/crates/pinakes-core/src/metadata/document.rs
+++ b/crates/pinakes-core/src/metadata/document.rs
@ -0,0 +1,192 @@
+use std::path::Path;
+
+use crate::error::{PinakesError, Result};
+use crate::media_type::MediaType;
+
+use super::{ExtractedMetadata, MetadataExtractor};
+
+pub struct DocumentExtractor;
+
+impl MetadataExtractor for DocumentExtractor {
+    fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
+        match MediaType::from_path(path) {
+            Some(MediaType::Pdf) => extract_pdf(path),
+            Some(MediaType::Epub) => extract_epub(path),
+            Some(MediaType::Djvu) => extract_djvu(path),
+            _ => Ok(ExtractedMetadata::default()),
+        }
+    }
+
+    fn supported_types(&self) -> &[MediaType] {
+        &[MediaType::Pdf, MediaType::Epub, MediaType::Djvu]
+    }
+}
+
+fn extract_pdf(path: &Path) -> Result<ExtractedMetadata> {
+    let doc = lopdf::Document::load(path)
+        .map_err(|e| PinakesError::MetadataExtraction(format!("PDF load: {e}")))?;
+
+    let mut meta = ExtractedMetadata::default();
+
+    // Find the Info dictionary via the trailer
+    if let Ok(info_ref) = doc.trailer.get(b"Info") {
+        let info_obj = if let Ok(reference) = info_ref.as_reference() {
+            doc.get_object(reference).ok()
+        } else {
+            Some(info_ref)
+        };
+
+        if let Some(obj) = info_obj
+            && let Ok(dict) = obj.as_dict()
+        {
+            if let Ok(title) = dict.get(b"Title") {
+                meta.title = pdf_object_to_string(title);
+            }
+            if let Ok(author) = dict.get(b"Author") {
+                meta.artist = pdf_object_to_string(author);
+            }
+            if let Ok(subject) = dict.get(b"Subject") {
+                meta.description = pdf_object_to_string(subject);
+            }
+            if let Ok(creator) = dict.get(b"Creator") {
+                meta.extra.insert(
+                    "creator".to_string(),
+                    pdf_object_to_string(creator).unwrap_or_default(),
+                );
+            }
+            if let Ok(producer) = dict.get(b"Producer") {
+                meta.extra.insert(
+                    "producer".to_string(),
+                    pdf_object_to_string(producer).unwrap_or_default(),
+                );
+            }
+        }
+    }
+
+    // Page count
+    let page_count = doc.get_pages().len();
+    if page_count > 0 {
+        meta.extra
+            .insert("page_count".to_string(), page_count.to_string());
+    }
+
+    Ok(meta)
+}
+
+fn pdf_object_to_string(obj: &lopdf::Object) -> Option<String> {
+    match obj {
+        lopdf::Object::String(bytes, _) => Some(String::from_utf8_lossy(bytes).into_owned()),
+        lopdf::Object::Name(name) => Some(String::from_utf8_lossy(name).into_owned()),
+        _ => None,
+    }
+}
+
+fn extract_epub(path: &Path) -> Result<ExtractedMetadata> {
+    let doc = epub::doc::EpubDoc::new(path)
+        .map_err(|e| PinakesError::MetadataExtraction(format!("EPUB parse: {e}")))?;
+
+    let mut meta = ExtractedMetadata {
+        title: doc.mdata("title").map(|item| item.value.clone()),
+        artist: doc.mdata("creator").map(|item| item.value.clone()),
+        description: doc.mdata("description").map(|item| item.value.clone()),
+        ..Default::default()
+    };
+
+    if let Some(lang) = doc.mdata("language") {
+        meta.extra
+            .insert("language".to_string(), lang.value.clone());
+    }
+    if let Some(publisher) = doc.mdata("publisher") {
+        meta.extra
+            .insert("publisher".to_string(), publisher.value.clone());
+    }
+    if let Some(date) = doc.mdata("date") {
+        meta.extra.insert("date".to_string(), date.value.clone());
+    }
+
+    Ok(meta)
+}
+
+fn extract_djvu(path: &Path) -> Result<ExtractedMetadata> {
+    // DjVu files contain metadata in SEXPR (S-expression) format within
+    // ANTa/ANTz chunks, or in the DIRM chunk. We parse the raw bytes to
+    // extract any metadata fields we can find.
+    let data = std::fs::read(path)
+        .map_err(|e| PinakesError::MetadataExtraction(format!("DjVu read: {e}")))?;
+
+    let mut meta = ExtractedMetadata::default();
+
+    // DjVu files start with "AT&T" magic followed by FORM:DJVU or FORM:DJVM
+    if data.len() < 16 {
+        return Ok(meta);
+    }
+
+    // Search for metadata annotations in the file. DjVu metadata is stored
+    // as S-expressions like (metadata (key "value") ...) within ANTa chunks.
+    let content = String::from_utf8_lossy(&data);
+
+    // Look for (metadata ...) blocks
+    if let Some(meta_start) = content.find("(metadata") {
+        let remainder = &content[meta_start..];
+        // Extract key-value pairs like (title "Some Title")
+        extract_djvu_field(remainder, "title", &mut meta.title);
+        extract_djvu_field(remainder, "author", &mut meta.artist);
+
+        let mut desc = None;
+        extract_djvu_field(remainder, "subject", &mut desc);
+        if desc.is_none() {
+            extract_djvu_field(remainder, "description", &mut desc);
+        }
+        meta.description = desc;
+
+        let mut year_str = None;
+        extract_djvu_field(remainder, "year", &mut year_str);
+        if let Some(ref y) = year_str {
+            meta.year = y.parse().ok();
+        }
+
+        let mut creator = None;
+        extract_djvu_field(remainder, "creator", &mut creator);
+        if let Some(c) = creator {
+            meta.extra.insert("creator".to_string(), c);
+        }
+    }
+
+    // Also check for booklet-style metadata that some DjVu encoders write
+    // outside the metadata SEXPR
+    if meta.title.is_none()
+        && let Some(title_start) = content.find("(bookmarks")
+    {
+        let remainder = &content[title_start..];
+        // First bookmark title is often the document title
+        if let Some(q1) = remainder.find('"') {
+            let after_q1 = &remainder[q1 + 1..];
+            if let Some(q2) = after_q1.find('"') {
+                let val = &after_q1[..q2];
+                if !val.is_empty() {
+                    meta.title = Some(val.to_string());
+                }
+            }
+        }
+    }
+
+    Ok(meta)
+}
+
+fn extract_djvu_field(sexpr: &str, key: &str, out: &mut Option<String>) {
+    // Look for patterns like (key "value") in the S-expression
+    let pattern = format!("({key}");
+    if let Some(start) = sexpr.find(&pattern) {
+        let remainder = &sexpr[start + pattern.len()..];
+        // Find the quoted value
+        if let Some(q1) = remainder.find('"') {
+            let after_q1 = &remainder[q1 + 1..];
+            if let Some(q2) = after_q1.find('"') {
+                let val = &after_q1[..q2];
+                if !val.is_empty() {
+                    *out = Some(val.to_string());
+                }
+            }
+        }
+    }
+}
--- a/crates/pinakes-core/src/metadata/image.rs
+++ b/crates/pinakes-core/src/metadata/image.rs
@ -0,0 +1,213 @@
+use std::path::Path;
+
+use crate::error::Result;
+use crate::media_type::MediaType;
+
+use super::{ExtractedMetadata, MetadataExtractor};
+
+pub struct ImageExtractor;
+
+impl MetadataExtractor for ImageExtractor {
+    fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
+        let mut meta = ExtractedMetadata::default();
+
+        let file = std::fs::File::open(path)?;
+        let mut buf_reader = std::io::BufReader::new(&file);
+
+        let exif_data = match exif::Reader::new().read_from_container(&mut buf_reader) {
+            Ok(exif) => exif,
+            Err(_) => return Ok(meta),
+        };
+
+        // Image dimensions
+        if let Some(width) = exif_data
+            .get_field(exif::Tag::PixelXDimension, exif::In::PRIMARY)
+            .or_else(|| exif_data.get_field(exif::Tag::ImageWidth, exif::In::PRIMARY))
+            && let Some(w) = field_to_u32(width)
+        {
+            meta.extra.insert("width".to_string(), w.to_string());
+        }
+        if let Some(height) = exif_data
+            .get_field(exif::Tag::PixelYDimension, exif::In::PRIMARY)
+            .or_else(|| exif_data.get_field(exif::Tag::ImageLength, exif::In::PRIMARY))
+            && let Some(h) = field_to_u32(height)
+        {
+            meta.extra.insert("height".to_string(), h.to_string());
+        }
+
+        // Camera make and model
+        if let Some(make) = exif_data.get_field(exif::Tag::Make, exif::In::PRIMARY) {
+            let val = make.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("camera_make".to_string(), val);
+            }
+        }
+        if let Some(model) = exif_data.get_field(exif::Tag::Model, exif::In::PRIMARY) {
+            let val = model.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("camera_model".to_string(), val);
+            }
+        }
+
+        // Date taken
+        if let Some(date) = exif_data
+            .get_field(exif::Tag::DateTimeOriginal, exif::In::PRIMARY)
+            .or_else(|| exif_data.get_field(exif::Tag::DateTime, exif::In::PRIMARY))
+        {
+            let val = date.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("date_taken".to_string(), val);
+            }
+        }
+
+        // GPS coordinates
+        if let (Some(lat), Some(lat_ref), Some(lon), Some(lon_ref)) = (
+            exif_data.get_field(exif::Tag::GPSLatitude, exif::In::PRIMARY),
+            exif_data.get_field(exif::Tag::GPSLatitudeRef, exif::In::PRIMARY),
+            exif_data.get_field(exif::Tag::GPSLongitude, exif::In::PRIMARY),
+            exif_data.get_field(exif::Tag::GPSLongitudeRef, exif::In::PRIMARY),
+        ) && let (Some(lat_val), Some(lon_val)) =
+            (dms_to_decimal(lat, lat_ref), dms_to_decimal(lon, lon_ref))
+        {
+            meta.extra
+                .insert("gps_latitude".to_string(), format!("{lat_val:.6}"));
+            meta.extra
+                .insert("gps_longitude".to_string(), format!("{lon_val:.6}"));
+        }
+
+        // Exposure info
+        if let Some(iso) =
+            exif_data.get_field(exif::Tag::PhotographicSensitivity, exif::In::PRIMARY)
+        {
+            let val = iso.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("iso".to_string(), val);
+            }
+        }
+        if let Some(exposure) = exif_data.get_field(exif::Tag::ExposureTime, exif::In::PRIMARY) {
+            let val = exposure.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("exposure_time".to_string(), val);
+            }
+        }
+        if let Some(aperture) = exif_data.get_field(exif::Tag::FNumber, exif::In::PRIMARY) {
+            let val = aperture.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("f_number".to_string(), val);
+            }
+        }
+        if let Some(focal) = exif_data.get_field(exif::Tag::FocalLength, exif::In::PRIMARY) {
+            let val = focal.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("focal_length".to_string(), val);
+            }
+        }
+
+        // Lens model
+        if let Some(lens) = exif_data.get_field(exif::Tag::LensModel, exif::In::PRIMARY) {
+            let val = lens.display_value().to_string();
+            if !val.is_empty() && val != "\"\"" {
+                meta.extra
+                    .insert("lens_model".to_string(), val.trim_matches('"').to_string());
+            }
+        }
+
+        // Flash
+        if let Some(flash) = exif_data.get_field(exif::Tag::Flash, exif::In::PRIMARY) {
+            let val = flash.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("flash".to_string(), val);
+            }
+        }
+
+        // Orientation
+        if let Some(orientation) = exif_data.get_field(exif::Tag::Orientation, exif::In::PRIMARY) {
+            let val = orientation.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("orientation".to_string(), val);
+            }
+        }
+
+        // Software
+        if let Some(software) = exif_data.get_field(exif::Tag::Software, exif::In::PRIMARY) {
+            let val = software.display_value().to_string();
+            if !val.is_empty() {
+                meta.extra.insert("software".to_string(), val);
+            }
+        }
+
+        // Image description as title
+        if let Some(desc) = exif_data.get_field(exif::Tag::ImageDescription, exif::In::PRIMARY) {
+            let val = desc.display_value().to_string();
+            if !val.is_empty() && val != "\"\"" {
+                meta.title = Some(val.trim_matches('"').to_string());
+            }
+        }
+
+        // Artist
+        if let Some(artist) = exif_data.get_field(exif::Tag::Artist, exif::In::PRIMARY) {
+            let val = artist.display_value().to_string();
+            if !val.is_empty() && val != "\"\"" {
+                meta.artist = Some(val.trim_matches('"').to_string());
+            }
+        }
+
+        // Copyright as description
+        if let Some(copyright) = exif_data.get_field(exif::Tag::Copyright, exif::In::PRIMARY) {
+            let val = copyright.display_value().to_string();
+            if !val.is_empty() && val != "\"\"" {
+                meta.description = Some(val.trim_matches('"').to_string());
+            }
+        }
+
+        Ok(meta)
+    }
+
+    fn supported_types(&self) -> &[MediaType] {
+        &[
+            MediaType::Jpeg,
+            MediaType::Png,
+            MediaType::Gif,
+            MediaType::Webp,
+            MediaType::Avif,
+            MediaType::Tiff,
+            MediaType::Bmp,
+            // RAW formats (TIFF-based, kamadak-exif handles these)
+            MediaType::Cr2,
+            MediaType::Nef,
+            MediaType::Arw,
+            MediaType::Dng,
+            MediaType::Orf,
+            MediaType::Rw2,
+            // HEIC
+            MediaType::Heic,
+        ]
+    }
+}
+
+fn field_to_u32(field: &exif::Field) -> Option<u32> {
+    match &field.value {
+        exif::Value::Long(v) => v.first().copied(),
+        exif::Value::Short(v) => v.first().map(|&x| x as u32),
+        _ => None,
+    }
+}
+
+fn dms_to_decimal(dms_field: &exif::Field, ref_field: &exif::Field) -> Option<f64> {
+    if let exif::Value::Rational(ref rationals) = dms_field.value
+        && rationals.len() >= 3
+    {
+        let degrees = rationals[0].to_f64();
+        let minutes = rationals[1].to_f64();
+        let seconds = rationals[2].to_f64();
+        let mut decimal = degrees + minutes / 60.0 + seconds / 3600.0;
+
+        let ref_str = ref_field.display_value().to_string();
+        if ref_str.contains('S') || ref_str.contains('W') {
+            decimal = -decimal;
+        }
+
+        return Some(decimal);
+    }
+    None
+}
--- a/crates/pinakes-core/src/metadata/markdown.rs
+++ b/crates/pinakes-core/src/metadata/markdown.rs
@ -0,0 +1,40 @@
+use std::path::Path;
+
+use crate::error::Result;
+use crate::media_type::MediaType;
+
+use super::{ExtractedMetadata, MetadataExtractor};
+
+pub struct MarkdownExtractor;
+
+impl MetadataExtractor for MarkdownExtractor {
+    fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
+        let content = std::fs::read_to_string(path)?;
+        let parsed = gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(&content);
+
+        let mut meta = ExtractedMetadata::default();
+
+        if let Some(data) = parsed.ok().and_then(|p| p.data)
+            && let gray_matter::Pod::Hash(map) = data
+        {
+            if let Some(gray_matter::Pod::String(title)) = map.get("title") {
+                meta.title = Some(title.clone());
+            }
+            if let Some(gray_matter::Pod::String(author)) = map.get("author") {
+                meta.artist = Some(author.clone());
+            }
+            if let Some(gray_matter::Pod::String(desc)) = map.get("description") {
+                meta.description = Some(desc.clone());
+            }
+            if let Some(gray_matter::Pod::String(date)) = map.get("date") {
+                meta.extra.insert("date".to_string(), date.clone());
+            }
+        }
+
+        Ok(meta)
+    }
+
+    fn supported_types(&self) -> &[MediaType] {
+        &[MediaType::Markdown, MediaType::PlainText]
+    }
+}
--- a/crates/pinakes-core/src/metadata/mod.rs
+++ b/crates/pinakes-core/src/metadata/mod.rs
@ -0,0 +1,46 @@
+pub mod audio;
+pub mod document;
+pub mod image;
+pub mod markdown;
+pub mod video;
+
+use std::collections::HashMap;
+use std::path::Path;
+
+use crate::error::Result;
+use crate::media_type::MediaType;
+
+#[derive(Debug, Clone, Default)]
+pub struct ExtractedMetadata {
+    pub title: Option<String>,
+    pub artist: Option<String>,
+    pub album: Option<String>,
+    pub genre: Option<String>,
+    pub year: Option<i32>,
+    pub duration_secs: Option<f64>,
+    pub description: Option<String>,
+    pub extra: HashMap<String, String>,
+}
+
+pub trait MetadataExtractor: Send + Sync {
+    fn extract(&self, path: &Path) -> Result<ExtractedMetadata>;
+    fn supported_types(&self) -> &[MediaType];
+}
+
+pub fn extract_metadata(path: &Path, media_type: MediaType) -> Result<ExtractedMetadata> {
+    let extractors: Vec<Box<dyn MetadataExtractor>> = vec![
+        Box::new(audio::AudioExtractor),
+        Box::new(document::DocumentExtractor),
+        Box::new(video::VideoExtractor),
+        Box::new(markdown::MarkdownExtractor),
+        Box::new(image::ImageExtractor),
+    ];
+
+    for extractor in &extractors {
+        if extractor.supported_types().contains(&media_type) {
+            return extractor.extract(path);
+        }
+    }
+
+    Ok(ExtractedMetadata::default())
+}
--- a/crates/pinakes-core/src/metadata/video.rs
+++ b/crates/pinakes-core/src/metadata/video.rs
@ -0,0 +1,120 @@
+use std::path::Path;
+
+use crate::error::{PinakesError, Result};
+use crate::media_type::MediaType;
+
+use super::{ExtractedMetadata, MetadataExtractor};
+
+pub struct VideoExtractor;
+
+impl MetadataExtractor for VideoExtractor {
+    fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
+        match MediaType::from_path(path) {
+            Some(MediaType::Mkv) => extract_mkv(path),
+            Some(MediaType::Mp4) => extract_mp4(path),
+            _ => Ok(ExtractedMetadata::default()),
+        }
+    }
+
+    fn supported_types(&self) -> &[MediaType] {
+        &[
+            MediaType::Mp4,
+            MediaType::Mkv,
+            MediaType::Avi,
+            MediaType::Webm,
+        ]
+    }
+}
+
+fn extract_mkv(path: &Path) -> Result<ExtractedMetadata> {
+    let file = std::fs::File::open(path)?;
+    let mkv = matroska::Matroska::open(file)
+        .map_err(|e| PinakesError::MetadataExtraction(format!("MKV parse: {e}")))?;
+
+    let mut meta = ExtractedMetadata {
+        title: mkv.info.title.clone(),
+        duration_secs: mkv.info.duration.map(|dur| dur.as_secs_f64()),
+        ..Default::default()
+    };
+
+    // Extract resolution and codec info from tracks
+    for track in &mkv.tracks {
+        match &track.settings {
+            matroska::Settings::Video(v) => {
+                meta.extra.insert(
+                    "resolution".to_string(),
+                    format!("{}x{}", v.pixel_width, v.pixel_height),
+                );
+                if !track.codec_id.is_empty() {
+                    meta.extra
+                        .insert("video_codec".to_string(), track.codec_id.clone());
+                }
+            }
+            matroska::Settings::Audio(a) => {
+                meta.extra.insert(
+                    "sample_rate".to_string(),
+                    format!("{} Hz", a.sample_rate as u32),
+                );
+                meta.extra
+                    .insert("channels".to_string(), a.channels.to_string());
+                if !track.codec_id.is_empty() {
+                    meta.extra
+                        .insert("audio_codec".to_string(), track.codec_id.clone());
+                }
+            }
+            _ => {}
+        }
+    }
+
+    Ok(meta)
+}
+
+fn extract_mp4(path: &Path) -> Result<ExtractedMetadata> {
+    use lofty::file::{AudioFile, TaggedFileExt};
+    use lofty::tag::Accessor;
+
+    let tagged_file = lofty::read_from_path(path)
+        .map_err(|e| PinakesError::MetadataExtraction(format!("MP4 metadata: {e}")))?;
+
+    let mut meta = ExtractedMetadata::default();
+
+    if let Some(tag) = tagged_file
+        .primary_tag()
+        .or_else(|| tagged_file.first_tag())
+    {
+        meta.title = tag
+            .title()
+            .map(|s: std::borrow::Cow<'_, str>| s.to_string());
+        meta.artist = tag
+            .artist()
+            .map(|s: std::borrow::Cow<'_, str>| s.to_string());
+        meta.album = tag
+            .album()
+            .map(|s: std::borrow::Cow<'_, str>| s.to_string());
+        meta.genre = tag
+            .genre()
+            .map(|s: std::borrow::Cow<'_, str>| s.to_string());
+        meta.year = tag.year().map(|y| y as i32);
+    }
+
+    let properties = tagged_file.properties();
+    let duration = properties.duration();
+    if !duration.is_zero() {
+        meta.duration_secs = Some(duration.as_secs_f64());
+    }
+
+    if let Some(bitrate) = properties.audio_bitrate() {
+        meta.extra
+            .insert("audio_bitrate".to_string(), format!("{bitrate} kbps"));
+    }
+    if let Some(sample_rate) = properties.sample_rate() {
+        meta.extra
+            .insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
+    }
+    if let Some(channels) = properties.channels() {
+        meta.extra
+            .insert("channels".to_string(), channels.to_string());
+    }
+
+    Ok(meta)
+}
--- a/crates/pinakes-core/src/model.rs
+++ b/crates/pinakes-core/src/model.rs
@ -0,0 +1,191 @@
+use std::collections::HashMap;
+use std::fmt;
+use std::path::PathBuf;
+
+use chrono::{DateTime, Utc};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+use crate::media_type::MediaType;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct MediaId(pub Uuid);
+
+impl MediaId {
+    pub fn new() -> Self {
+        Self(Uuid::now_v7())
+    }
+}
+
+impl fmt::Display for MediaId {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+impl Default for MediaId {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct ContentHash(pub String);
+
+impl ContentHash {
+    pub fn new(hex: String) -> Self {
+        Self(hex)
+    }
+}
+
+impl fmt::Display for ContentHash {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}", self.0)
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct MediaItem {
+    pub id: MediaId,
+    pub path: PathBuf,
+    pub file_name: String,
+    pub media_type: MediaType,
+    pub content_hash: ContentHash,
+    pub file_size: u64,
+    pub title: Option<String>,
+    pub artist: Option<String>,
+    pub album: Option<String>,
+    pub genre: Option<String>,
+    pub year: Option<i32>,
+    pub duration_secs: Option<f64>,
+    pub description: Option<String>,
+    pub thumbnail_path: Option<PathBuf>,
+    pub custom_fields: HashMap<String, CustomField>,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CustomField {
+    pub field_type: CustomFieldType,
+    pub value: String,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum CustomFieldType {
+    Text,
+    Number,
+    Date,
+    Boolean,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Tag {
+    pub id: Uuid,
+    pub name: String,
+    pub parent_id: Option<Uuid>,
+    pub created_at: DateTime<Utc>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Collection {
+    pub id: Uuid,
+    pub name: String,
+    pub description: Option<String>,
+    pub kind: CollectionKind,
+    pub filter_query: Option<String>,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "lowercase")]
+pub enum CollectionKind {
+    Manual,
+    Virtual,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct CollectionMember {
+    pub collection_id: Uuid,
+    pub media_id: MediaId,
+    pub position: i32,
+    pub added_at: DateTime<Utc>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AuditEntry {
+    pub id: Uuid,
+    pub media_id: Option<MediaId>,
+    pub action: AuditAction,
+    pub details: Option<String>,
+    pub timestamp: DateTime<Utc>,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum AuditAction {
+    Imported,
+    Updated,
+    Deleted,
+    Tagged,
+    Untagged,
+    AddedToCollection,
+    RemovedFromCollection,
+    Opened,
+    Scanned,
+}
+
+impl fmt::Display for AuditAction {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match self {
+            Self::Imported => "imported",
+            Self::Updated => "updated",
+            Self::Deleted => "deleted",
+            Self::Tagged => "tagged",
+            Self::Untagged => "untagged",
+            Self::AddedToCollection => "added_to_collection",
+            Self::RemovedFromCollection => "removed_from_collection",
+            Self::Opened => "opened",
+            Self::Scanned => "scanned",
+        };
+        write!(f, "{s}")
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Pagination {
+    pub offset: u64,
+    pub limit: u64,
+    pub sort: Option<String>,
+}
+
+impl Pagination {
+    pub fn new(offset: u64, limit: u64, sort: Option<String>) -> Self {
+        Self {
+            offset,
+            limit,
+            sort,
+        }
+    }
+}
+
+impl Default for Pagination {
+    fn default() -> Self {
+        Self {
+            offset: 0,
+            limit: 50,
+            sort: None,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SavedSearch {
+    pub id: Uuid,
+    pub name: String,
+    pub query: String,
+    pub sort_order: Option<String>,
+    pub created_at: DateTime<Utc>,
+}
--- a/crates/pinakes-core/src/opener.rs
+++ b/crates/pinakes-core/src/opener.rs
@ -0,0 +1,79 @@
+use std::path::Path;
+use std::process::Command;
+
+use crate::error::{PinakesError, Result};
+
+pub trait Opener: Send + Sync {
+    fn open(&self, path: &Path) -> Result<()>;
+}
+
+/// Linux opener using xdg-open
+pub struct XdgOpener;
+
+impl Opener for XdgOpener {
+    fn open(&self, path: &Path) -> Result<()> {
+        let status = Command::new("xdg-open")
+            .arg(path)
+            .status()
+            .map_err(|e| PinakesError::InvalidOperation(format!("failed to run xdg-open: {e}")))?;
+        if status.success() {
+            Ok(())
+        } else {
+            Err(PinakesError::InvalidOperation(format!(
+                "xdg-open exited with status {status}"
+            )))
+        }
+    }
+}
+
+/// macOS opener using the `open` command
+pub struct MacOpener;
+
+impl Opener for MacOpener {
+    fn open(&self, path: &Path) -> Result<()> {
+        let status = Command::new("open")
+            .arg(path)
+            .status()
+            .map_err(|e| PinakesError::InvalidOperation(format!("failed to run open: {e}")))?;
+        if status.success() {
+            Ok(())
+        } else {
+            Err(PinakesError::InvalidOperation(format!(
+                "open exited with status {status}"
+            )))
+        }
+    }
+}
+
+/// Windows opener using `cmd /c start`
+pub struct WindowsOpener;
+
+impl Opener for WindowsOpener {
+    fn open(&self, path: &Path) -> Result<()> {
+        let status = Command::new("cmd")
+            .args(["/C", "start", ""])
+            .arg(path)
+            .status()
+            .map_err(|e| {
+                PinakesError::InvalidOperation(format!("failed to run cmd /c start: {e}"))
+            })?;
+        if status.success() {
+            Ok(())
+        } else {
+            Err(PinakesError::InvalidOperation(format!(
+                "cmd /c start exited with status {status}"
+            )))
+        }
+    }
+}
+
+/// Returns the platform-appropriate opener.
+pub fn default_opener() -> Box<dyn Opener> {
+    if cfg!(target_os = "macos") {
+        Box::new(MacOpener)
+    } else if cfg!(target_os = "windows") {
+        Box::new(WindowsOpener)
+    } else {
+        Box::new(XdgOpener)
+    }
+}
--- a/crates/pinakes-core/src/scan.rs
+++ b/crates/pinakes-core/src/scan.rs
@ -0,0 +1,283 @@
+use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
+use std::sync::{Arc, Mutex};
+
+use notify::{PollWatcher, RecursiveMode, Watcher};
+use tokio::sync::mpsc;
+use tracing::{info, warn};
+
+use crate::error::Result;
+use crate::import;
+use crate::storage::DynStorageBackend;
+
+pub struct ScanStatus {
+    pub scanning: bool,
+    pub files_found: usize,
+    pub files_processed: usize,
+    pub errors: Vec<String>,
+}
+
+/// Shared scan progress that can be read by the status endpoint while a scan runs.
+#[derive(Clone)]
+pub struct ScanProgress {
+    pub is_scanning: Arc<AtomicBool>,
+    pub files_found: Arc<AtomicUsize>,
+    pub files_processed: Arc<AtomicUsize>,
+    pub error_count: Arc<AtomicUsize>,
+    pub error_messages: Arc<Mutex<Vec<String>>>,
+}
+
+const MAX_STORED_ERRORS: usize = 100;
+
+impl ScanProgress {
+    pub fn new() -> Self {
+        Self {
+            is_scanning: Arc::new(AtomicBool::new(false)),
+            files_found: Arc::new(AtomicUsize::new(0)),
+            files_processed: Arc::new(AtomicUsize::new(0)),
+            error_count: Arc::new(AtomicUsize::new(0)),
+            error_messages: Arc::new(Mutex::new(Vec::new())),
+        }
+    }
+
+    pub fn snapshot(&self) -> ScanStatus {
+        let errors = self
+            .error_messages
+            .lock()
+            .map(|v| v.clone())
+            .unwrap_or_default();
+        ScanStatus {
+            scanning: self.is_scanning.load(Ordering::Acquire),
+            files_found: self.files_found.load(Ordering::Acquire),
+            files_processed: self.files_processed.load(Ordering::Acquire),
+            errors,
+        }
+    }
+
+    fn begin(&self) {
+        self.is_scanning.store(true, Ordering::Release);
+        self.files_found.store(0, Ordering::Release);
+        self.files_processed.store(0, Ordering::Release);
+        self.error_count.store(0, Ordering::Release);
+        if let Ok(mut msgs) = self.error_messages.lock() {
+            msgs.clear();
+        }
+    }
+
+    fn record_error(&self, message: String) {
+        self.error_count.fetch_add(1, Ordering::Release);
+        if let Ok(mut msgs) = self.error_messages.lock()
+            && msgs.len() < MAX_STORED_ERRORS
+        {
+            msgs.push(message);
+        }
+    }
+
+    fn finish(&self) {
+        self.is_scanning.store(false, Ordering::Release);
+    }
+}
+
+impl Default for ScanProgress {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+pub async fn scan_directory(
+    storage: &DynStorageBackend,
+    dir: &Path,
+    ignore_patterns: &[String],
+) -> Result<ScanStatus> {
+    scan_directory_with_progress(storage, dir, ignore_patterns, None).await
+}
+
+pub async fn scan_directory_with_progress(
+    storage: &DynStorageBackend,
+    dir: &Path,
+    ignore_patterns: &[String],
+    progress: Option<&ScanProgress>,
+) -> Result<ScanStatus> {
+    info!(dir = %dir.display(), "starting directory scan");
+
+    if let Some(p) = progress {
+        p.begin();
+    }
+
+    let results = import::import_directory(storage, dir, ignore_patterns).await?;
+    // Note: for configurable concurrency, use import_directory_with_concurrency directly
+
+    let mut errors = Vec::new();
+    let mut processed = 0;
+    for result in &results {
+        match result {
+            Ok(_) => processed += 1,
+            Err(e) => {
+                let msg = e.to_string();
+                if let Some(p) = progress {
+                    p.record_error(msg.clone());
+                }
+                errors.push(msg);
+            }
+        }
+    }
+
+    if let Some(p) = progress {
+        p.files_found.store(results.len(), Ordering::Release);
+        p.files_processed.store(processed, Ordering::Release);
+        p.finish();
+    }
+
+    let status = ScanStatus {
+        scanning: false,
+        files_found: results.len(),
+        files_processed: processed,
+        errors,
+    };
+
+    Ok(status)
+}
+
+pub async fn scan_all_roots(
+    storage: &DynStorageBackend,
+    ignore_patterns: &[String],
+) -> Result<Vec<ScanStatus>> {
+    scan_all_roots_with_progress(storage, ignore_patterns, None).await
+}
+
+pub async fn scan_all_roots_with_progress(
+    storage: &DynStorageBackend,
+    ignore_patterns: &[String],
+    progress: Option<&ScanProgress>,
+) -> Result<Vec<ScanStatus>> {
+    let roots = storage.list_root_dirs().await?;
+    let mut statuses = Vec::new();
+
+    for root in roots {
+        match scan_directory_with_progress(storage, &root, ignore_patterns, progress).await {
+            Ok(status) => statuses.push(status),
+            Err(e) => {
+                warn!(root = %root.display(), error = %e, "failed to scan root directory");
+                statuses.push(ScanStatus {
+                    scanning: false,
+                    files_found: 0,
+                    files_processed: 0,
+                    errors: vec![e.to_string()],
+                });
+            }
+        }
+    }
+
+    Ok(statuses)
+}
+
+pub struct FileWatcher {
+    _watcher: Box<dyn Watcher + Send>,
+    rx: mpsc::Receiver<PathBuf>,
+}
+
+impl FileWatcher {
+    pub fn new(dirs: &[PathBuf]) -> Result<Self> {
+        let (tx, rx) = mpsc::channel(1024);
+
+        // Try the recommended (native) watcher first, fall back to polling
+        let watcher: Box<dyn Watcher + Send> = match Self::try_native_watcher(dirs, tx.clone()) {
+            Ok(w) => {
+                info!("using native filesystem watcher");
+                w
+            }
+            Err(native_err) => {
+                warn!(error = %native_err, "native watcher failed, falling back to polling");
+                Self::polling_watcher(dirs, tx)?
+            }
+        };
+
+        Ok(Self {
+            _watcher: watcher,
+            rx,
+        })
+    }
+
+    fn try_native_watcher(
+        dirs: &[PathBuf],
+        tx: mpsc::Sender<PathBuf>,
+    ) -> std::result::Result<Box<dyn Watcher + Send>, notify::Error> {
+        let tx_clone = tx.clone();
+        let mut watcher =
+            notify::recommended_watcher(move |res: notify::Result<notify::Event>| {
+                if let Ok(event) = res {
+                    for path in event.paths {
+                        if tx_clone.blocking_send(path).is_err() {
+                            tracing::warn!("filesystem watcher channel closed, stopping");
+                            break;
+                        }
+                    }
+                }
+            })?;
+
+        for dir in dirs {
+            watcher.watch(dir, RecursiveMode::Recursive)?;
+        }
+
+        Ok(Box::new(watcher))
+    }
+
+    fn polling_watcher(
+        dirs: &[PathBuf],
+        tx: mpsc::Sender<PathBuf>,
+    ) -> Result<Box<dyn Watcher + Send>> {
+        let tx_clone = tx.clone();
+        let poll_interval = std::time::Duration::from_secs(5);
+        let config = notify::Config::default().with_poll_interval(poll_interval);
+
+        let mut watcher = PollWatcher::new(
+            move |res: notify::Result<notify::Event>| {
+                if let Ok(event) = res {
+                    for path in event.paths {
+                        if tx_clone.blocking_send(path).is_err() {
+                            tracing::warn!("filesystem watcher channel closed, stopping");
+                            break;
+                        }
+                    }
+                }
+            },
+            config,
+        )
+        .map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))?;
+
+        for dir in dirs {
+            watcher
+                .watch(dir, RecursiveMode::Recursive)
+                .map_err(|e| crate::error::PinakesError::Io(std::io::Error::other(e)))?;
+        }
+
+        Ok(Box::new(watcher))
+    }
+
+    pub async fn next_change(&mut self) -> Option<PathBuf> {
+        self.rx.recv().await
+    }
+}
+
+pub async fn watch_and_import(
+    storage: DynStorageBackend,
+    dirs: Vec<PathBuf>,
+    ignore_patterns: Vec<String>,
+) -> Result<()> {
+    let mut watcher = FileWatcher::new(&dirs)?;
+    info!("filesystem watcher started");
+
+    while let Some(path) = watcher.next_change().await {
+        if path.is_file()
+            && crate::media_type::MediaType::from_path(&path).is_some()
+            && !crate::import::should_ignore(&path, &ignore_patterns)
+        {
+            info!(path = %path.display(), "detected file change, importing");
+            if let Err(e) = import::import_file(&storage, &path).await {
+                warn!(path = %path.display(), error = %e, "failed to import changed file");
+            }
+        }
+    }
+
+    Ok(())
+}
--- a/crates/pinakes-core/src/scheduler.rs
+++ b/crates/pinakes-core/src/scheduler.rs
@ -0,0 +1,517 @@
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use chrono::{DateTime, Datelike, Utc};
+use serde::{Deserialize, Serialize};
+use tokio::sync::RwLock;
+use tokio_util::sync::CancellationToken;
+use uuid::Uuid;
+
+use crate::config::Config;
+use crate::jobs::{JobKind, JobQueue};
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case", tag = "type")]
+pub enum Schedule {
+    Interval { secs: u64 },
+    Daily { hour: u32, minute: u32 },
+    Weekly { day: u32, hour: u32, minute: u32 },
+}
+
+impl Schedule {
+    pub fn next_run(&self, from: DateTime<Utc>) -> DateTime<Utc> {
+        match self {
+            Schedule::Interval { secs } => from + chrono::Duration::seconds(*secs as i64),
+            Schedule::Daily { hour, minute } => {
+                let today = from
+                    .date_naive()
+                    .and_hms_opt(*hour, *minute, 0)
+                    .unwrap_or_default();
+                let today_utc = today.and_utc();
+                if today_utc > from {
+                    today_utc
+                } else {
+                    today_utc + chrono::Duration::days(1)
+                }
+            }
+            Schedule::Weekly { day, hour, minute } => {
+                let current_day = from.weekday().num_days_from_monday();
+                let target_day = *day;
+                let days_ahead = if target_day > current_day {
+                    target_day - current_day
+                } else if target_day < current_day {
+                    7 - (current_day - target_day)
+                } else {
+                    let today = from
+                        .date_naive()
+                        .and_hms_opt(*hour, *minute, 0)
+                        .unwrap_or_default()
+                        .and_utc();
+                    if today > from {
+                        return today;
+                    }
+                    7
+                };
+                let target_date = from.date_naive() + chrono::Duration::days(days_ahead as i64);
+                target_date
+                    .and_hms_opt(*hour, *minute, 0)
+                    .unwrap_or_default()
+                    .and_utc()
+            }
+        }
+    }
+
+    pub fn display_string(&self) -> String {
+        match self {
+            Schedule::Interval { secs } => {
+                if *secs >= 3600 {
+                    format!("Every {}h", secs / 3600)
+                } else if *secs >= 60 {
+                    format!("Every {}m", secs / 60)
+                } else {
+                    format!("Every {}s", secs)
+                }
+            }
+            Schedule::Daily { hour, minute } => format!("Daily {hour:02}:{minute:02}"),
+            Schedule::Weekly { day, hour, minute } => {
+                let day_name = match day {
+                    0 => "Mon",
+                    1 => "Tue",
+                    2 => "Wed",
+                    3 => "Thu",
+                    4 => "Fri",
+                    5 => "Sat",
+                    _ => "Sun",
+                };
+                format!("{day_name} {hour:02}:{minute:02}")
+            }
+        }
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ScheduledTask {
+    pub id: String,
+    pub name: String,
+    pub kind: JobKind,
+    pub schedule: Schedule,
+    pub enabled: bool,
+    pub last_run: Option<DateTime<Utc>>,
+    pub next_run: Option<DateTime<Utc>>,
+    pub last_status: Option<String>,
+    /// Whether a job for this task is currently running. Skipped during serialization.
+    #[serde(default, skip_serializing)]
+    pub running: bool,
+    /// The job ID of the last submitted job. Skipped during serialization/deserialization.
+    #[serde(skip)]
+    pub last_job_id: Option<Uuid>,
+}
+
+pub struct TaskScheduler {
+    tasks: Arc<RwLock<Vec<ScheduledTask>>>,
+    job_queue: Arc<JobQueue>,
+    cancel: CancellationToken,
+    config: Arc<RwLock<Config>>,
+    config_path: Option<PathBuf>,
+}
+
+impl TaskScheduler {
+    pub fn new(
+        job_queue: Arc<JobQueue>,
+        cancel: CancellationToken,
+        config: Arc<RwLock<Config>>,
+        config_path: Option<PathBuf>,
+    ) -> Self {
+        let now = Utc::now();
+        let default_tasks = vec![
+            ScheduledTask {
+                id: "periodic_scan".to_string(),
+                name: "Periodic Scan".to_string(),
+                kind: JobKind::Scan { path: None },
+                schedule: Schedule::Interval { secs: 3600 },
+                enabled: true,
+                last_run: None,
+                next_run: Some(now + chrono::Duration::seconds(3600)),
+                last_status: None,
+                running: false,
+                last_job_id: None,
+            },
+            ScheduledTask {
+                id: "integrity_check".to_string(),
+                name: "Integrity Check".to_string(),
+                kind: JobKind::VerifyIntegrity { media_ids: vec![] },
+                schedule: Schedule::Weekly {
+                    day: 0,
+                    hour: 3,
+                    minute: 0,
+                },
+                enabled: false,
+                last_run: None,
+                next_run: None,
+                last_status: None,
+                running: false,
+                last_job_id: None,
+            },
+            ScheduledTask {
+                id: "orphan_detection".to_string(),
+                name: "Orphan Detection".to_string(),
+                kind: JobKind::OrphanDetection,
+                schedule: Schedule::Daily { hour: 2, minute: 0 },
+                enabled: false,
+                last_run: None,
+                next_run: None,
+                last_status: None,
+                running: false,
+                last_job_id: None,
+            },
+            ScheduledTask {
+                id: "thumbnail_cleanup".to_string(),
+                name: "Thumbnail Cleanup".to_string(),
+                kind: JobKind::CleanupThumbnails,
+                schedule: Schedule::Weekly {
+                    day: 6,
+                    hour: 4,
+                    minute: 0,
+                },
+                enabled: false,
+                last_run: None,
+                next_run: None,
+                last_status: None,
+                running: false,
+                last_job_id: None,
+            },
+        ];
+
+        Self {
+            tasks: Arc::new(RwLock::new(default_tasks)),
+            job_queue,
+            cancel,
+            config,
+            config_path,
+        }
+    }
+
+    /// Restore saved task state from config. Should be called once after construction.
+    pub async fn restore_state(&self) {
+        let saved = self.config.read().await.scheduled_tasks.clone();
+        if saved.is_empty() {
+            return;
+        }
+        let mut tasks = self.tasks.write().await;
+        for saved_task in &saved {
+            if let Some(task) = tasks.iter_mut().find(|t| t.id == saved_task.id) {
+                task.enabled = saved_task.enabled;
+                task.schedule = saved_task.schedule.clone();
+                if let Some(Ok(dt)) = saved_task
+                    .last_run
+                    .as_ref()
+                    .map(|s| DateTime::parse_from_rfc3339(s))
+                {
+                    task.last_run = Some(dt.with_timezone(&Utc));
+                }
+                if task.enabled {
+                    let from = task.last_run.unwrap_or_else(Utc::now);
+                    task.next_run = Some(task.schedule.next_run(from));
+                } else {
+                    task.next_run = None;
+                }
+            }
+        }
+    }
+
+    /// Persist current task state to config file.
+    async fn persist_task_state(&self) {
+        let tasks = self.tasks.read().await;
+        let task_configs: Vec<crate::config::ScheduledTaskConfig> = tasks
+            .iter()
+            .map(|t| crate::config::ScheduledTaskConfig {
+                id: t.id.clone(),
+                enabled: t.enabled,
+                schedule: t.schedule.clone(),
+                last_run: t.last_run.map(|dt| dt.to_rfc3339()),
+            })
+            .collect();
+        drop(tasks);
+
+        {
+            let mut config = self.config.write().await;
+            config.scheduled_tasks = task_configs;
+        }
+
+        if let Some(ref path) = self.config_path {
+            let config = self.config.read().await;
+            if let Err(e) = config.save_to_file(path) {
+                tracing::warn!(error = %e, "failed to persist scheduler state to config file");
+            }
+        }
+    }
+
+    pub async fn list_tasks(&self) -> Vec<ScheduledTask> {
+        self.tasks.read().await.clone()
+    }
+
+    pub async fn toggle_task(&self, id: &str) -> Option<bool> {
+        let result = {
+            let mut tasks = self.tasks.write().await;
+            if let Some(task) = tasks.iter_mut().find(|t| t.id == id) {
+                task.enabled = !task.enabled;
+                if task.enabled {
+                    task.next_run = Some(task.schedule.next_run(Utc::now()));
+                } else {
+                    task.next_run = None;
+                }
+                Some(task.enabled)
+            } else {
+                None
+            }
+        };
+        if result.is_some() {
+            self.persist_task_state().await;
+        }
+        result
+    }
+
+    /// Run a task immediately. Uses a single write lock to avoid TOCTOU races.
+    pub async fn run_now(&self, id: &str) -> Option<String> {
+        let result = {
+            let mut tasks = self.tasks.write().await;
+            let task = tasks.iter_mut().find(|t| t.id == id)?;
+
+            // Submit the job (cheap: sends to mpsc channel)
+            let job_id = self.job_queue.submit(task.kind.clone()).await;
+
+            task.last_run = Some(Utc::now());
+            task.last_status = Some("running".to_string());
+            task.running = true;
+            task.last_job_id = Some(job_id);
+            if task.enabled {
+                task.next_run = Some(task.schedule.next_run(Utc::now()));
+            }
+
+            Some(job_id.to_string())
+        };
+        if result.is_some() {
+            self.persist_task_state().await;
+        }
+        result
+    }
+
+    /// Main scheduler loop. Uses a two-phase approach per tick to avoid
+    /// holding the write lock across await points. Returns when the
+    /// cancellation token is triggered.
+    pub async fn run(&self) {
+        let mut interval = tokio::time::interval(std::time::Duration::from_secs(30));
+        loop {
+            tokio::select! {
+                _ = interval.tick() => {}
+                _ = self.cancel.cancelled() => {
+                    tracing::info!("scheduler shutting down");
+                    return;
+                }
+            }
+
+            // Phase 1: Check completed jobs and update running status
+            {
+                use crate::jobs::JobStatus;
+                let mut tasks = self.tasks.write().await;
+                for task in tasks.iter_mut() {
+                    if !task.running {
+                        continue;
+                    }
+                    let Some(job_id) = task.last_job_id else {
+                        continue;
+                    };
+                    let Some(job) = self.job_queue.status(job_id).await else {
+                        continue;
+                    };
+                    match &job.status {
+                        JobStatus::Completed { .. } => {
+                            task.running = false;
+                            task.last_status = Some("completed".to_string());
+                        }
+                        JobStatus::Failed { error } => {
+                            task.running = false;
+                            task.last_status = Some(format!("failed: {error}"));
+                        }
+                        JobStatus::Cancelled => {
+                            task.running = false;
+                            task.last_status = Some("cancelled".to_string());
+                        }
+                        _ => {} // still pending or running
+                    }
+                }
+            }
+
+            // Phase 2: Collect due tasks and submit jobs
+            let now = Utc::now();
+            let mut to_submit: Vec<(usize, JobKind)> = Vec::new();
+
+            {
+                let mut tasks = self.tasks.write().await;
+                for (i, task) in tasks.iter_mut().enumerate() {
+                    if !task.enabled || task.running {
+                        continue;
+                    }
+                    let due = task.next_run.is_some_and(|next| now >= next);
+                    if due {
+                        to_submit.push((i, task.kind.clone()));
+                        task.last_run = Some(now);
+                        task.last_status = Some("running".to_string());
+                        task.running = true;
+                        task.next_run = Some(task.schedule.next_run(now));
+                    }
+                }
+            }
+
+            // Submit jobs without holding the lock
+            for (idx, kind) in to_submit {
+                let job_id = self.job_queue.submit(kind).await;
+                let mut tasks = self.tasks.write().await;
+                if let Some(task) = tasks.get_mut(idx) {
+                    task.last_job_id = Some(job_id);
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use chrono::TimeZone;
+
+    #[test]
+    fn test_interval_next_run() {
+        let from = Utc.with_ymd_and_hms(2025, 6, 15, 12, 0, 0).unwrap();
+        let schedule = Schedule::Interval { secs: 3600 };
+        let next = schedule.next_run(from);
+        assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 13, 0, 0).unwrap());
+    }
+
+    #[test]
+    fn test_daily_next_run_future_today() {
+        // 10:00 UTC, schedule is 14:00 => same day
+        let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
+        let schedule = Schedule::Daily {
+            hour: 14,
+            minute: 0,
+        };
+        let next = schedule.next_run(from);
+        assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 14, 0, 0).unwrap());
+    }
+
+    #[test]
+    fn test_daily_next_run_past_today() {
+        // 16:00 UTC, schedule is 14:00 => next day
+        let from = Utc.with_ymd_and_hms(2025, 6, 15, 16, 0, 0).unwrap();
+        let schedule = Schedule::Daily {
+            hour: 14,
+            minute: 0,
+        };
+        let next = schedule.next_run(from);
+        assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 16, 14, 0, 0).unwrap());
+    }
+
+    #[test]
+    fn test_weekly_next_run() {
+        // 2025-06-15 is a Sunday (day 6). Target is Monday (day 0) at 03:00.
+        let from = Utc.with_ymd_and_hms(2025, 6, 15, 12, 0, 0).unwrap();
+        let schedule = Schedule::Weekly {
+            day: 0,
+            hour: 3,
+            minute: 0,
+        };
+        let next = schedule.next_run(from);
+        assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 16, 3, 0, 0).unwrap());
+    }
+
+    #[test]
+    fn test_weekly_same_day_future() {
+        // 2025-06-15 is Sunday (day 6). Schedule is Sunday 14:00, current is 10:00 => today.
+        let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
+        let schedule = Schedule::Weekly {
+            day: 6,
+            hour: 14,
+            minute: 0,
+        };
+        let next = schedule.next_run(from);
+        assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 15, 14, 0, 0).unwrap());
+    }
+
+    #[test]
+    fn test_weekly_same_day_past() {
+        // 2025-06-15 is Sunday (day 6). Schedule is Sunday 08:00, current is 10:00 => next week.
+        let from = Utc.with_ymd_and_hms(2025, 6, 15, 10, 0, 0).unwrap();
+        let schedule = Schedule::Weekly {
+            day: 6,
+            hour: 8,
+            minute: 0,
+        };
+        let next = schedule.next_run(from);
+        assert_eq!(next, Utc.with_ymd_and_hms(2025, 6, 22, 8, 0, 0).unwrap());
+    }
+
+    #[test]
+    fn test_serde_roundtrip() {
+        let task = ScheduledTask {
+            id: "test".to_string(),
+            name: "Test Task".to_string(),
+            kind: JobKind::Scan { path: None },
+            schedule: Schedule::Interval { secs: 3600 },
+            enabled: true,
+            last_run: Some(Utc::now()),
+            next_run: Some(Utc::now()),
+            last_status: Some("completed".to_string()),
+            running: true,
+            last_job_id: Some(Uuid::now_v7()),
+        };
+
+        let json = serde_json::to_string(&task).unwrap();
+        let deserialized: ScheduledTask = serde_json::from_str(&json).unwrap();
+
+        assert_eq!(deserialized.id, "test");
+        assert_eq!(deserialized.enabled, true);
+        // running defaults to false on deserialization (skip_serializing)
+        assert!(!deserialized.running);
+        // last_job_id is skipped entirely
+        assert!(deserialized.last_job_id.is_none());
+    }
+
+    #[test]
+    fn test_display_string() {
+        assert_eq!(
+            Schedule::Interval { secs: 3600 }.display_string(),
+            "Every 1h"
+        );
+        assert_eq!(
+            Schedule::Interval { secs: 300 }.display_string(),
+            "Every 5m"
+        );
+        assert_eq!(
+            Schedule::Interval { secs: 30 }.display_string(),
+            "Every 30s"
+        );
+        assert_eq!(
+            Schedule::Daily { hour: 3, minute: 0 }.display_string(),
+            "Daily 03:00"
+        );
+        assert_eq!(
+            Schedule::Weekly {
+                day: 0,
+                hour: 3,
+                minute: 0
+            }
+            .display_string(),
+            "Mon 03:00"
+        );
+        assert_eq!(
+            Schedule::Weekly {
+                day: 6,
+                hour: 14,
+                minute: 30
+            }
+            .display_string(),
+            "Sun 14:30"
+        );
+    }
+}
--- a/crates/pinakes-core/src/search.rs
+++ b/crates/pinakes-core/src/search.rs
@ -0,0 +1,256 @@
+use serde::{Deserialize, Serialize};
+use winnow::combinator::{alt, delimited, preceded, repeat};
+use winnow::token::{take_till, take_while};
+use winnow::{ModalResult, Parser};
+
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+pub enum SearchQuery {
+    FullText(String),
+    FieldMatch { field: String, value: String },
+    And(Vec<SearchQuery>),
+    Or(Vec<SearchQuery>),
+    Not(Box<SearchQuery>),
+    Prefix(String),
+    Fuzzy(String),
+    TypeFilter(String),
+    TagFilter(String),
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SearchRequest {
+    pub query: SearchQuery,
+    pub sort: SortOrder,
+    pub pagination: crate::model::Pagination,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct SearchResults {
+    pub items: Vec<crate::model::MediaItem>,
+    pub total_count: u64,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+#[derive(Default)]
+pub enum SortOrder {
+    #[default]
+    Relevance,
+    DateAsc,
+    DateDesc,
+    NameAsc,
+    NameDesc,
+    SizeAsc,
+    SizeDesc,
+}
+
+fn ws<'i>(input: &mut &'i str) -> ModalResult<&'i str> {
+    take_while(0.., ' ').parse_next(input)
+}
+
+fn quoted_string(input: &mut &str) -> ModalResult<String> {
+    delimited('"', take_till(0.., '"'), '"')
+        .map(|s: &str| s.to_string())
+        .parse_next(input)
+}
+
+fn bare_word(input: &mut &str) -> ModalResult<String> {
+    take_while(1.., |c: char| !c.is_whitespace() && c != ')' && c != '(')
+        .map(|s: &str| s.to_string())
+        .parse_next(input)
+}
+
+fn word_or_quoted(input: &mut &str) -> ModalResult<String> {
+    alt((quoted_string, bare_word)).parse_next(input)
+}
+
+fn not_expr(input: &mut &str) -> ModalResult<SearchQuery> {
+    preceded(('-', ws), atom)
+        .map(|q| SearchQuery::Not(Box::new(q)))
+        .parse_next(input)
+}
+
+fn field_match(input: &mut &str) -> ModalResult<SearchQuery> {
+    let field_name =
+        take_while(1.., |c: char| c.is_alphanumeric() || c == '_').map(|s: &str| s.to_string());
+    (field_name, ':', word_or_quoted)
+        .map(|(field, _, value)| match field.as_str() {
+            "type" => SearchQuery::TypeFilter(value),
+            "tag" => SearchQuery::TagFilter(value),
+            _ => SearchQuery::FieldMatch { field, value },
+        })
+        .parse_next(input)
+}
+
+fn prefix_expr(input: &mut &str) -> ModalResult<SearchQuery> {
+    let word = take_while(1.., |c: char| {
+        !c.is_whitespace() && c != ')' && c != '(' && c != '*'
+    })
+    .map(|s: &str| s.to_string());
+    (word, '*')
+        .map(|(w, _)| SearchQuery::Prefix(w))
+        .parse_next(input)
+}
+
+fn fuzzy_expr(input: &mut &str) -> ModalResult<SearchQuery> {
+    let word = take_while(1.., |c: char| {
+        !c.is_whitespace() && c != ')' && c != '(' && c != '~'
+    })
+    .map(|s: &str| s.to_string());
+    (word, '~')
+        .map(|(w, _)| SearchQuery::Fuzzy(w))
+        .parse_next(input)
+}
+
+fn paren_expr(input: &mut &str) -> ModalResult<SearchQuery> {
+    delimited(('(', ws), or_expr, (ws, ')')).parse_next(input)
+}
+
+fn not_or_keyword(input: &mut &str) -> ModalResult<()> {
+    if let Some(rest) = input.strip_prefix("OR")
+        && (rest.is_empty() || rest.starts_with(' ') || rest.starts_with(')'))
+    {
+        return Err(winnow::error::ErrMode::Backtrack(
+            winnow::error::ContextError::new(),
+        ));
+    }
+    Ok(())
+}
+
+fn full_text(input: &mut &str) -> ModalResult<SearchQuery> {
+    not_or_keyword.parse_next(input)?;
+    word_or_quoted.map(SearchQuery::FullText).parse_next(input)
+}
+
+fn atom(input: &mut &str) -> ModalResult<SearchQuery> {
+    alt((
+        paren_expr,
+        not_expr,
+        field_match,
+        prefix_expr,
+        fuzzy_expr,
+        full_text,
+    ))
+    .parse_next(input)
+}
+
+fn and_expr(input: &mut &str) -> ModalResult<SearchQuery> {
+    let first = atom.parse_next(input)?;
+    let rest: Vec<SearchQuery> = repeat(0.., preceded(ws, atom)).parse_next(input)?;
+    if rest.is_empty() {
+        Ok(first)
+    } else {
+        let mut terms = vec![first];
+        terms.extend(rest);
+        Ok(SearchQuery::And(terms))
+    }
+}
+
+fn or_expr(input: &mut &str) -> ModalResult<SearchQuery> {
+    let first = and_expr.parse_next(input)?;
+    let rest: Vec<SearchQuery> =
+        repeat(0.., preceded((ws, "OR", ws), and_expr)).parse_next(input)?;
+    if rest.is_empty() {
+        Ok(first)
+    } else {
+        let mut terms = vec![first];
+        terms.extend(rest);
+        Ok(SearchQuery::Or(terms))
+    }
+}
+
+pub fn parse_search_query(input: &str) -> crate::error::Result<SearchQuery> {
+    let trimmed = input.trim();
+    if trimmed.is_empty() {
+        return Ok(SearchQuery::FullText(String::new()));
+    }
+    let mut input = trimmed;
+    or_expr
+        .parse_next(&mut input)
+        .map_err(|e| crate::error::PinakesError::SearchParse(format!("{e}")))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_simple_text() {
+        let q = parse_search_query("hello").unwrap();
+        assert_eq!(q, SearchQuery::FullText("hello".into()));
+    }
+
+    #[test]
+    fn test_field_match() {
+        let q = parse_search_query("artist:Beatles").unwrap();
+        assert_eq!(
+            q,
+            SearchQuery::FieldMatch {
+                field: "artist".into(),
+                value: "Beatles".into()
+            }
+        );
+    }
+
+    #[test]
+    fn test_type_filter() {
+        let q = parse_search_query("type:pdf").unwrap();
+        assert_eq!(q, SearchQuery::TypeFilter("pdf".into()));
+    }
+
+    #[test]
+    fn test_tag_filter() {
+        let q = parse_search_query("tag:music").unwrap();
+        assert_eq!(q, SearchQuery::TagFilter("music".into()));
+    }
+
+    #[test]
+    fn test_and_implicit() {
+        let q = parse_search_query("hello world").unwrap();
+        assert_eq!(
+            q,
+            SearchQuery::And(vec![
+                SearchQuery::FullText("hello".into()),
+                SearchQuery::FullText("world".into()),
+            ])
+        );
+    }
+
+    #[test]
+    fn test_or() {
+        let q = parse_search_query("hello OR world").unwrap();
+        assert_eq!(
+            q,
+            SearchQuery::Or(vec![
+                SearchQuery::FullText("hello".into()),
+                SearchQuery::FullText("world".into()),
+            ])
+        );
+    }
+
+    #[test]
+    fn test_not() {
+        let q = parse_search_query("-excluded").unwrap();
+        assert_eq!(
+            q,
+            SearchQuery::Not(Box::new(SearchQuery::FullText("excluded".into())))
+        );
+    }
+
+    #[test]
+    fn test_prefix() {
+        let q = parse_search_query("hel*").unwrap();
+        assert_eq!(q, SearchQuery::Prefix("hel".into()));
+    }
+
+    #[test]
+    fn test_fuzzy() {
+        let q = parse_search_query("hello~").unwrap();
+        assert_eq!(q, SearchQuery::Fuzzy("hello".into()));
+    }
+
+    #[test]
+    fn test_quoted() {
+        let q = parse_search_query("\"hello world\"").unwrap();
+        assert_eq!(q, SearchQuery::FullText("hello world".into()));
+    }
+}
--- a/crates/pinakes-core/src/storage/migrations.rs
+++ b/crates/pinakes-core/src/storage/migrations.rs
@ -0,0 +1,26 @@
+use crate::error::{PinakesError, Result};
+
+mod sqlite_migrations {
+    use refinery::embed_migrations;
+    embed_migrations!("../../migrations/sqlite");
+}
+
+mod postgres_migrations {
+    use refinery::embed_migrations;
+    embed_migrations!("../../migrations/postgres");
+}
+
+pub fn run_sqlite_migrations(conn: &mut rusqlite::Connection) -> Result<()> {
+    sqlite_migrations::migrations::runner()
+        .run(conn)
+        .map_err(|e| PinakesError::Migration(e.to_string()))?;
+    Ok(())
+}
+
+pub async fn run_postgres_migrations(client: &mut tokio_postgres::Client) -> Result<()> {
+    postgres_migrations::migrations::runner()
+        .run_async(client)
+        .await
+        .map_err(|e| PinakesError::Migration(e.to_string()))?;
+    Ok(())
+}
--- a/crates/pinakes-core/src/storage/mod.rs
+++ b/crates/pinakes-core/src/storage/mod.rs
@ -0,0 +1,209 @@
+pub mod migrations;
+pub mod postgres;
+pub mod sqlite;
+
+use std::path::PathBuf;
+use std::sync::Arc;
+
+use uuid::Uuid;
+
+use crate::error::Result;
+use crate::model::*;
+use crate::search::{SearchRequest, SearchResults};
+
+/// Statistics about the database.
+#[derive(Debug, Clone, Default)]
+pub struct DatabaseStats {
+    pub media_count: u64,
+    pub tag_count: u64,
+    pub collection_count: u64,
+    pub audit_count: u64,
+    pub database_size_bytes: u64,
+    pub backend_name: String,
+}
+
+#[async_trait::async_trait]
+pub trait StorageBackend: Send + Sync + 'static {
+    // Migrations
+    async fn run_migrations(&self) -> Result<()>;
+
+    // Root directories
+    async fn add_root_dir(&self, path: PathBuf) -> Result<()>;
+    async fn list_root_dirs(&self) -> Result<Vec<PathBuf>>;
+    async fn remove_root_dir(&self, path: &std::path::Path) -> Result<()>;
+
+    // Media CRUD
+    async fn insert_media(&self, item: &MediaItem) -> Result<()>;
+    async fn get_media(&self, id: MediaId) -> Result<MediaItem>;
+    async fn count_media(&self) -> Result<u64>;
+    async fn get_media_by_hash(&self, hash: &ContentHash) -> Result<Option<MediaItem>>;
+    async fn list_media(&self, pagination: &Pagination) -> Result<Vec<MediaItem>>;
+    async fn update_media(&self, item: &MediaItem) -> Result<()>;
+    async fn delete_media(&self, id: MediaId) -> Result<()>;
+    async fn delete_all_media(&self) -> Result<u64>;
+
+    // Tags
+    async fn create_tag(&self, name: &str, parent_id: Option<Uuid>) -> Result<Tag>;
+    async fn get_tag(&self, id: Uuid) -> Result<Tag>;
+    async fn list_tags(&self) -> Result<Vec<Tag>>;
+    async fn delete_tag(&self, id: Uuid) -> Result<()>;
+    async fn tag_media(&self, media_id: MediaId, tag_id: Uuid) -> Result<()>;
+    async fn untag_media(&self, media_id: MediaId, tag_id: Uuid) -> Result<()>;
+    async fn get_media_tags(&self, media_id: MediaId) -> Result<Vec<Tag>>;
+    async fn get_tag_descendants(&self, tag_id: Uuid) -> Result<Vec<Tag>>;
+
+    // Collections
+    async fn create_collection(
+        &self,
+        name: &str,
+        kind: CollectionKind,
+        description: Option<&str>,
+        filter_query: Option<&str>,
+    ) -> Result<Collection>;
+    async fn get_collection(&self, id: Uuid) -> Result<Collection>;
+    async fn list_collections(&self) -> Result<Vec<Collection>>;
+    async fn delete_collection(&self, id: Uuid) -> Result<()>;
+    async fn add_to_collection(
+        &self,
+        collection_id: Uuid,
+        media_id: MediaId,
+        position: i32,
+    ) -> Result<()>;
+    async fn remove_from_collection(&self, collection_id: Uuid, media_id: MediaId) -> Result<()>;
+    async fn get_collection_members(&self, collection_id: Uuid) -> Result<Vec<MediaItem>>;
+
+    // Search
+    async fn search(&self, request: &SearchRequest) -> Result<SearchResults>;
+
+    // Audit
+    async fn record_audit(&self, entry: &AuditEntry) -> Result<()>;
+    async fn list_audit_entries(
+        &self,
+        media_id: Option<MediaId>,
+        pagination: &Pagination,
+    ) -> Result<Vec<AuditEntry>>;
+
+    // Custom fields
+    async fn set_custom_field(
+        &self,
+        media_id: MediaId,
+        name: &str,
+        field: &CustomField,
+    ) -> Result<()>;
+    async fn get_custom_fields(
+        &self,
+        media_id: MediaId,
+    ) -> Result<std::collections::HashMap<String, CustomField>>;
+    async fn delete_custom_field(&self, media_id: MediaId, name: &str) -> Result<()>;
+
+    // Batch operations (transactional where supported)
+    async fn batch_delete_media(&self, ids: &[MediaId]) -> Result<u64> {
+        let mut count = 0u64;
+        for id in ids {
+            self.delete_media(*id).await?;
+            count += 1;
+        }
+        Ok(count)
+    }
+
+    async fn batch_tag_media(&self, media_ids: &[MediaId], tag_ids: &[Uuid]) -> Result<u64> {
+        let mut count = 0u64;
+        for media_id in media_ids {
+            for tag_id in tag_ids {
+                self.tag_media(*media_id, *tag_id).await?;
+                count += 1;
+            }
+        }
+        Ok(count)
+    }
+
+    // Integrity
+    async fn list_media_paths(&self) -> Result<Vec<(MediaId, std::path::PathBuf, ContentHash)>>;
+
+    // Batch metadata update
+    async fn batch_update_media(
+        &self,
+        ids: &[MediaId],
+        title: Option<&str>,
+        artist: Option<&str>,
+        album: Option<&str>,
+        genre: Option<&str>,
+        year: Option<i32>,
+        description: Option<&str>,
+    ) -> Result<u64> {
+        let mut count = 0u64;
+        for id in ids {
+            let mut item = self.get_media(*id).await?;
+            if let Some(v) = title {
+                item.title = Some(v.to_string());
+            }
+            if let Some(v) = artist {
+                item.artist = Some(v.to_string());
+            }
+            if let Some(v) = album {
+                item.album = Some(v.to_string());
+            }
+            if let Some(v) = genre {
+                item.genre = Some(v.to_string());
+            }
+            if let Some(v) = &year {
+                item.year = Some(*v);
+            }
+            if let Some(v) = description {
+                item.description = Some(v.to_string());
+            }
+            item.updated_at = chrono::Utc::now();
+            self.update_media(&item).await?;
+            count += 1;
+        }
+        Ok(count)
+    }
+
+    // Saved searches
+    async fn save_search(
+        &self,
+        id: uuid::Uuid,
+        name: &str,
+        query: &str,
+        sort_order: Option<&str>,
+    ) -> Result<()>;
+    async fn list_saved_searches(&self) -> Result<Vec<crate::model::SavedSearch>>;
+    async fn delete_saved_search(&self, id: uuid::Uuid) -> Result<()>;
+
+    // Duplicates
+    async fn find_duplicates(&self) -> Result<Vec<Vec<MediaItem>>>;
+
+    // Database management
+    async fn database_stats(&self) -> Result<DatabaseStats>;
+    async fn vacuum(&self) -> Result<()>;
+    async fn clear_all_data(&self) -> Result<()>;
+
+    // Thumbnail helpers
+    /// List all media IDs, optionally filtering to those missing thumbnails.
+    async fn list_media_ids_for_thumbnails(
+        &self,
+        only_missing: bool,
+    ) -> Result<Vec<crate::model::MediaId>>;
+
+    // Library statistics
+    async fn library_statistics(&self) -> Result<LibraryStatistics>;
+}
+
+/// Comprehensive library statistics.
+#[derive(Debug, Clone, Default)]
+pub struct LibraryStatistics {
+    pub total_media: u64,
+    pub total_size_bytes: u64,
+    pub avg_file_size_bytes: u64,
+    pub media_by_type: Vec<(String, u64)>,
+    pub storage_by_type: Vec<(String, u64)>,
+    pub newest_item: Option<String>,
+    pub oldest_item: Option<String>,
+    pub top_tags: Vec<(String, u64)>,
+    pub top_collections: Vec<(String, u64)>,
+    pub total_tags: u64,
+    pub total_collections: u64,
+    pub total_duplicates: u64,
+}
+
+pub type DynStorageBackend = Arc<dyn StorageBackend>;
--- a/crates/pinakes-core/src/storage/postgres.rs
+++ b/crates/pinakes-core/src/storage/postgres.rs
--- a/crates/pinakes-core/src/storage/sqlite.rs
+++ b/crates/pinakes-core/src/storage/sqlite.rs
--- a/crates/pinakes-core/src/tags.rs
+++ b/crates/pinakes-core/src/tags.rs
@ -0,0 +1,43 @@
+use uuid::Uuid;
+
+use crate::error::Result;
+use crate::model::{AuditAction, MediaId, Tag};
+use crate::storage::DynStorageBackend;
+
+pub async fn create_tag(
+    storage: &DynStorageBackend,
+    name: &str,
+    parent_id: Option<Uuid>,
+) -> Result<Tag> {
+    storage.create_tag(name, parent_id).await
+}
+
+pub async fn tag_media(storage: &DynStorageBackend, media_id: MediaId, tag_id: Uuid) -> Result<()> {
+    storage.tag_media(media_id, tag_id).await?;
+    crate::audit::record_action(
+        storage,
+        Some(media_id),
+        AuditAction::Tagged,
+        Some(format!("tag_id={tag_id}")),
+    )
+    .await
+}
+
+pub async fn untag_media(
+    storage: &DynStorageBackend,
+    media_id: MediaId,
+    tag_id: Uuid,
+) -> Result<()> {
+    storage.untag_media(media_id, tag_id).await?;
+    crate::audit::record_action(
+        storage,
+        Some(media_id),
+        AuditAction::Untagged,
+        Some(format!("tag_id={tag_id}")),
+    )
+    .await
+}
+
+pub async fn get_tag_tree(storage: &DynStorageBackend, tag_id: Uuid) -> Result<Vec<Tag>> {
+    storage.get_tag_descendants(tag_id).await
+}
--- a/crates/pinakes-core/src/thumbnail.rs
+++ b/crates/pinakes-core/src/thumbnail.rs
@ -0,0 +1,278 @@
+use std::path::{Path, PathBuf};
+use std::process::Command;
+
+use tracing::{info, warn};
+
+use crate::config::ThumbnailConfig;
+use crate::error::{PinakesError, Result};
+use crate::media_type::{MediaCategory, MediaType};
+use crate::model::MediaId;
+
+/// Generate a thumbnail for a media file and return the path to the thumbnail.
+///
+/// Supports images (via `image` crate), videos (via ffmpeg), PDFs (via pdftoppm),
+/// and EPUBs (via cover image extraction).
+pub fn generate_thumbnail(
+    media_id: MediaId,
+    source_path: &Path,
+    media_type: MediaType,
+    thumbnail_dir: &Path,
+) -> Result<Option<PathBuf>> {
+    generate_thumbnail_with_config(
+        media_id,
+        source_path,
+        media_type,
+        thumbnail_dir,
+        &ThumbnailConfig::default(),
+    )
+}
+
+pub fn generate_thumbnail_with_config(
+    media_id: MediaId,
+    source_path: &Path,
+    media_type: MediaType,
+    thumbnail_dir: &Path,
+    config: &ThumbnailConfig,
+) -> Result<Option<PathBuf>> {
+    std::fs::create_dir_all(thumbnail_dir)?;
+    let thumb_path = thumbnail_dir.join(format!("{}.jpg", media_id));
+
+    let result = match media_type.category() {
+        MediaCategory::Image => {
+            if media_type.is_raw() {
+                generate_raw_thumbnail(source_path, &thumb_path, config)
+            } else if media_type == MediaType::Heic {
+                generate_heic_thumbnail(source_path, &thumb_path, config)
+            } else {
+                generate_image_thumbnail(source_path, &thumb_path, config)
+            }
+        }
+        MediaCategory::Video => generate_video_thumbnail(source_path, &thumb_path, config),
+        MediaCategory::Document => match media_type {
+            MediaType::Pdf => generate_pdf_thumbnail(source_path, &thumb_path, config),
+            MediaType::Epub => generate_epub_thumbnail(source_path, &thumb_path, config),
+            _ => return Ok(None),
+        },
+        _ => return Ok(None),
+    };
+
+    match result {
+        Ok(()) => {
+            info!(media_id = %media_id, category = ?media_type.category(), "generated thumbnail");
+            Ok(Some(thumb_path))
+        }
+        Err(e) => {
+            warn!(media_id = %media_id, error = %e, "failed to generate thumbnail");
+            Ok(None)
+        }
+    }
+}
+
+fn generate_image_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
+    let img = image::open(source)
+        .map_err(|e| PinakesError::MetadataExtraction(format!("image open: {e}")))?;
+
+    let thumb = img.thumbnail(config.size, config.size);
+
+    let mut output = std::fs::File::create(dest)?;
+    let encoder = image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
+    thumb
+        .write_with_encoder(encoder)
+        .map_err(|e| PinakesError::MetadataExtraction(format!("thumbnail encode: {e}")))?;
+
+    Ok(())
+}
+
+fn generate_video_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
+    let ffmpeg = config.ffmpeg_path.as_deref().unwrap_or("ffmpeg");
+
+    let status = Command::new(ffmpeg)
+        .args(["-ss", &config.video_seek_secs.to_string(), "-i"])
+        .arg(source)
+        .args([
+            "-vframes",
+            "1",
+            "-vf",
+            &format!("scale={}:{}", config.size, config.size),
+            "-y",
+        ])
+        .arg(dest)
+        .stdout(std::process::Stdio::null())
+        .stderr(std::process::Stdio::null())
+        .status()
+        .map_err(|e| {
+            PinakesError::MetadataExtraction(format!("ffmpeg not found or failed to execute: {e}"))
+        })?;
+
+    if !status.success() {
+        return Err(PinakesError::MetadataExtraction(format!(
+            "ffmpeg exited with status {}",
+            status
+        )));
+    }
+
+    Ok(())
+}
+
+fn generate_pdf_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
+    // Use pdftoppm to render first page, then resize with image crate
+    let temp_prefix = dest.with_extension("tmp");
+    let status = Command::new("pdftoppm")
+        .args(["-jpeg", "-f", "1", "-l", "1", "-singlefile"])
+        .arg(source)
+        .arg(&temp_prefix)
+        .stdout(std::process::Stdio::null())
+        .stderr(std::process::Stdio::null())
+        .status()
+        .map_err(|e| {
+            PinakesError::MetadataExtraction(format!(
+                "pdftoppm not found or failed to execute: {e}"
+            ))
+        })?;
+
+    if !status.success() {
+        return Err(PinakesError::MetadataExtraction(format!(
+            "pdftoppm exited with status {}",
+            status
+        )));
+    }
+
+    // pdftoppm outputs <prefix>.jpg
+    let rendered = temp_prefix.with_extension("jpg");
+    if rendered.exists() {
+        // Resize to thumbnail size
+        let img = image::open(&rendered)
+            .map_err(|e| PinakesError::MetadataExtraction(format!("pdf thumbnail open: {e}")))?;
+        let thumb = img.thumbnail(config.size, config.size);
+        let mut output = std::fs::File::create(dest)?;
+        let encoder =
+            image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
+        thumb
+            .write_with_encoder(encoder)
+            .map_err(|e| PinakesError::MetadataExtraction(format!("pdf thumbnail encode: {e}")))?;
+        let _ = std::fs::remove_file(&rendered);
+        Ok(())
+    } else {
+        Err(PinakesError::MetadataExtraction(
+            "pdftoppm did not produce output".to_string(),
+        ))
+    }
+}
+
+fn generate_epub_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
+    // Try to extract cover image from EPUB
+    let mut doc = epub::doc::EpubDoc::new(source)
+        .map_err(|e| PinakesError::MetadataExtraction(format!("epub open: {e}")))?;
+
+    let cover_data = doc.get_cover().map(|(data, _mime)| data).or_else(|| {
+        // Fallback: try to find a cover image in the resources
+        doc.get_resource("cover-image")
+            .map(|(data, _)| data)
+            .or_else(|| doc.get_resource("cover").map(|(data, _)| data))
+    });
+
+    if let Some(data) = cover_data {
+        let img = image::load_from_memory(&data)
+            .map_err(|e| PinakesError::MetadataExtraction(format!("epub cover decode: {e}")))?;
+        let thumb = img.thumbnail(config.size, config.size);
+        let mut output = std::fs::File::create(dest)?;
+        let encoder =
+            image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
+        thumb
+            .write_with_encoder(encoder)
+            .map_err(|e| PinakesError::MetadataExtraction(format!("epub thumbnail encode: {e}")))?;
+        Ok(())
+    } else {
+        Err(PinakesError::MetadataExtraction(
+            "no cover image found in epub".to_string(),
+        ))
+    }
+}
+
+fn generate_raw_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
+    // Try dcraw to extract embedded JPEG preview, then resize
+    let temp_ppm = dest.with_extension("ppm");
+    let status = Command::new("dcraw")
+        .args(["-e", "-c"])
+        .arg(source)
+        .stdout(std::fs::File::create(&temp_ppm).map_err(|e| {
+            PinakesError::MetadataExtraction(format!("failed to create temp file: {e}"))
+        })?)
+        .stderr(std::process::Stdio::null())
+        .status()
+        .map_err(|e| PinakesError::MetadataExtraction(format!("dcraw not found or failed: {e}")))?;
+
+    if !status.success() {
+        let _ = std::fs::remove_file(&temp_ppm);
+        return Err(PinakesError::MetadataExtraction(format!(
+            "dcraw exited with status {}",
+            status
+        )));
+    }
+
+    // The extracted preview is typically a JPEG — try loading it
+    if temp_ppm.exists() {
+        let result = image::open(&temp_ppm);
+        let _ = std::fs::remove_file(&temp_ppm);
+        let img = result
+            .map_err(|e| PinakesError::MetadataExtraction(format!("raw preview decode: {e}")))?;
+        let thumb = img.thumbnail(config.size, config.size);
+        let mut output = std::fs::File::create(dest)?;
+        let encoder =
+            image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
+        thumb
+            .write_with_encoder(encoder)
+            .map_err(|e| PinakesError::MetadataExtraction(format!("raw thumbnail encode: {e}")))?;
+        Ok(())
+    } else {
+        Err(PinakesError::MetadataExtraction(
+            "dcraw did not produce output".to_string(),
+        ))
+    }
+}
+
+fn generate_heic_thumbnail(source: &Path, dest: &Path, config: &ThumbnailConfig) -> Result<()> {
+    // Use heif-convert to convert to JPEG, then resize
+    let temp_jpg = dest.with_extension("tmp.jpg");
+    let status = Command::new("heif-convert")
+        .arg(source)
+        .arg(&temp_jpg)
+        .stdout(std::process::Stdio::null())
+        .stderr(std::process::Stdio::null())
+        .status()
+        .map_err(|e| {
+            PinakesError::MetadataExtraction(format!("heif-convert not found or failed: {e}"))
+        })?;
+
+    if !status.success() {
+        let _ = std::fs::remove_file(&temp_jpg);
+        return Err(PinakesError::MetadataExtraction(format!(
+            "heif-convert exited with status {}",
+            status
+        )));
+    }
+
+    if temp_jpg.exists() {
+        let result = image::open(&temp_jpg);
+        let _ = std::fs::remove_file(&temp_jpg);
+        let img =
+            result.map_err(|e| PinakesError::MetadataExtraction(format!("heic decode: {e}")))?;
+        let thumb = img.thumbnail(config.size, config.size);
+        let mut output = std::fs::File::create(dest)?;
+        let encoder =
+            image::codecs::jpeg::JpegEncoder::new_with_quality(&mut output, config.quality);
+        thumb
+            .write_with_encoder(encoder)
+            .map_err(|e| PinakesError::MetadataExtraction(format!("heic thumbnail encode: {e}")))?;
+        Ok(())
+    } else {
+        Err(PinakesError::MetadataExtraction(
+            "heif-convert did not produce output".to_string(),
+        ))
+    }
+}
+
+/// Returns the default thumbnail directory under the data dir.
+pub fn default_thumbnail_dir() -> PathBuf {
+    crate::config::Config::default_data_dir().join("thumbnails")
+}
--- a/crates/pinakes-core/tests/integration_test.rs
+++ b/crates/pinakes-core/tests/integration_test.rs
@ -0,0 +1,414 @@
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use pinakes_core::model::*;
+use pinakes_core::storage::StorageBackend;
+use pinakes_core::storage::sqlite::SqliteBackend;
+
+async fn setup() -> Arc<SqliteBackend> {
+    let backend = SqliteBackend::in_memory().expect("in-memory SQLite");
+    backend.run_migrations().await.expect("migrations");
+    Arc::new(backend)
+}
+
+#[tokio::test]
+async fn test_media_crud() {
+    let storage = setup().await;
+
+    let now = chrono::Utc::now();
+    let id = MediaId::new();
+    let item = MediaItem {
+        id,
+        path: "/tmp/test.txt".into(),
+        file_name: "test.txt".to_string(),
+        media_type: pinakes_core::media_type::MediaType::PlainText,
+        content_hash: ContentHash::new("abc123".to_string()),
+        file_size: 100,
+        title: Some("Test Title".to_string()),
+        artist: None,
+        album: None,
+        genre: None,
+        year: Some(2024),
+        duration_secs: None,
+        description: Some("A test file".to_string()),
+        thumbnail_path: None,
+        custom_fields: HashMap::new(),
+        created_at: now,
+        updated_at: now,
+    };
+
+    // Insert
+    storage.insert_media(&item).await.unwrap();
+
+    // Get
+    let fetched = storage.get_media(id).await.unwrap();
+    assert_eq!(fetched.id, id);
+    assert_eq!(fetched.title.as_deref(), Some("Test Title"));
+    assert_eq!(fetched.file_size, 100);
+
+    // Get by hash
+    let by_hash = storage
+        .get_media_by_hash(&ContentHash::new("abc123".into()))
+        .await
+        .unwrap();
+    assert!(by_hash.is_some());
+    assert_eq!(by_hash.unwrap().id, id);
+
+    // Update
+    let mut updated = fetched;
+    updated.title = Some("Updated Title".to_string());
+    storage.update_media(&updated).await.unwrap();
+    let re_fetched = storage.get_media(id).await.unwrap();
+    assert_eq!(re_fetched.title.as_deref(), Some("Updated Title"));
+
+    // List
+    let list = storage.list_media(&Pagination::default()).await.unwrap();
+    assert_eq!(list.len(), 1);
+
+    // Delete
+    storage.delete_media(id).await.unwrap();
+    let result = storage.get_media(id).await;
+    assert!(result.is_err());
+}
+
+#[tokio::test]
+async fn test_tags() {
+    let storage = setup().await;
+
+    // Create tags
+    let parent = storage.create_tag("Music", None).await.unwrap();
+    let child = storage.create_tag("Rock", Some(parent.id)).await.unwrap();
+
+    assert_eq!(parent.name, "Music");
+    assert_eq!(child.parent_id, Some(parent.id));
+
+    // List tags
+    let tags = storage.list_tags().await.unwrap();
+    assert_eq!(tags.len(), 2);
+
+    // Get descendants
+    let descendants = storage.get_tag_descendants(parent.id).await.unwrap();
+    assert!(descendants.iter().any(|t| t.name == "Rock"));
+
+    // Tag media
+    let now = chrono::Utc::now();
+    let id = MediaId::new();
+    let item = MediaItem {
+        id,
+        path: "/tmp/song.mp3".into(),
+        file_name: "song.mp3".to_string(),
+        media_type: pinakes_core::media_type::MediaType::Mp3,
+        content_hash: ContentHash::new("hash1".to_string()),
+        file_size: 5000,
+        title: Some("Test Song".to_string()),
+        artist: Some("Test Artist".to_string()),
+        album: None,
+        genre: None,
+        year: None,
+        duration_secs: Some(180.0),
+        description: None,
+        thumbnail_path: None,
+        custom_fields: HashMap::new(),
+        created_at: now,
+        updated_at: now,
+    };
+    storage.insert_media(&item).await.unwrap();
+    storage.tag_media(id, parent.id).await.unwrap();
+
+    let media_tags = storage.get_media_tags(id).await.unwrap();
+    assert_eq!(media_tags.len(), 1);
+    assert_eq!(media_tags[0].name, "Music");
+
+    // Untag
+    storage.untag_media(id, parent.id).await.unwrap();
+    let media_tags = storage.get_media_tags(id).await.unwrap();
+    assert_eq!(media_tags.len(), 0);
+
+    // Delete tag
+    storage.delete_tag(child.id).await.unwrap();
+    let tags = storage.list_tags().await.unwrap();
+    assert_eq!(tags.len(), 1);
+}
+
+#[tokio::test]
+async fn test_collections() {
+    let storage = setup().await;
+
+    let col = storage
+        .create_collection("Favorites", CollectionKind::Manual, Some("My faves"), None)
+        .await
+        .unwrap();
+    assert_eq!(col.name, "Favorites");
+    assert_eq!(col.kind, CollectionKind::Manual);
+
+    let now = chrono::Utc::now();
+    let id = MediaId::new();
+    let item = MediaItem {
+        id,
+        path: "/tmp/doc.pdf".into(),
+        file_name: "doc.pdf".to_string(),
+        media_type: pinakes_core::media_type::MediaType::Pdf,
+        content_hash: ContentHash::new("pdfhash".to_string()),
+        file_size: 10000,
+        title: None,
+        artist: None,
+        album: None,
+        genre: None,
+        year: None,
+        duration_secs: None,
+        description: None,
+        thumbnail_path: None,
+        custom_fields: HashMap::new(),
+        created_at: now,
+        updated_at: now,
+    };
+    storage.insert_media(&item).await.unwrap();
+
+    storage.add_to_collection(col.id, id, 0).await.unwrap();
+    let members = storage.get_collection_members(col.id).await.unwrap();
+    assert_eq!(members.len(), 1);
+    assert_eq!(members[0].id, id);
+
+    storage.remove_from_collection(col.id, id).await.unwrap();
+    let members = storage.get_collection_members(col.id).await.unwrap();
+    assert_eq!(members.len(), 0);
+
+    // List collections
+    let cols = storage.list_collections().await.unwrap();
+    assert_eq!(cols.len(), 1);
+
+    storage.delete_collection(col.id).await.unwrap();
+    let cols = storage.list_collections().await.unwrap();
+    assert_eq!(cols.len(), 0);
+}
+
+#[tokio::test]
+async fn test_custom_fields() {
+    let storage = setup().await;
+
+    let now = chrono::Utc::now();
+    let id = MediaId::new();
+    let item = MediaItem {
+        id,
+        path: "/tmp/test.md".into(),
+        file_name: "test.md".to_string(),
+        media_type: pinakes_core::media_type::MediaType::Markdown,
+        content_hash: ContentHash::new("mdhash".to_string()),
+        file_size: 500,
+        title: None,
+        artist: None,
+        album: None,
+        genre: None,
+        year: None,
+        duration_secs: None,
+        description: None,
+        thumbnail_path: None,
+        custom_fields: HashMap::new(),
+        created_at: now,
+        updated_at: now,
+    };
+    storage.insert_media(&item).await.unwrap();
+
+    // Set custom field
+    let field = CustomField {
+        field_type: CustomFieldType::Text,
+        value: "important".to_string(),
+    };
+    storage
+        .set_custom_field(id, "priority", &field)
+        .await
+        .unwrap();
+
+    // Get custom fields
+    let fields = storage.get_custom_fields(id).await.unwrap();
+    assert_eq!(fields.len(), 1);
+    assert_eq!(fields["priority"].value, "important");
+
+    // Verify custom fields are loaded with get_media
+    let media = storage.get_media(id).await.unwrap();
+    assert_eq!(media.custom_fields.len(), 1);
+    assert_eq!(media.custom_fields["priority"].value, "important");
+
+    // Delete custom field
+    storage.delete_custom_field(id, "priority").await.unwrap();
+    let fields = storage.get_custom_fields(id).await.unwrap();
+    assert_eq!(fields.len(), 0);
+}
+
+#[tokio::test]
+async fn test_search() {
+    let storage = setup().await;
+
+    let now = chrono::Utc::now();
+    // Insert a few items
+    for (i, (name, title, artist)) in [
+        ("song1.mp3", "Bohemian Rhapsody", "Queen"),
+        ("song2.mp3", "Stairway to Heaven", "Led Zeppelin"),
+        ("doc.pdf", "Rust Programming", ""),
+    ]
+    .iter()
+    .enumerate()
+    {
+        let item = MediaItem {
+            id: MediaId::new(),
+            path: format!("/tmp/{name}").into(),
+            file_name: name.to_string(),
+            media_type: pinakes_core::media_type::MediaType::from_path(std::path::Path::new(name))
+                .unwrap(),
+            content_hash: ContentHash::new(format!("hash{i}")),
+            file_size: 1000 * (i as u64 + 1),
+            title: Some(title.to_string()),
+            artist: if artist.is_empty() {
+                None
+            } else {
+                Some(artist.to_string())
+            },
+            album: None,
+            genre: None,
+            year: None,
+            duration_secs: None,
+            description: None,
+            thumbnail_path: None,
+            custom_fields: HashMap::new(),
+            created_at: now,
+            updated_at: now,
+        };
+        storage.insert_media(&item).await.unwrap();
+    }
+
+    // Full-text search
+    let request = pinakes_core::search::SearchRequest {
+        query: pinakes_core::search::parse_search_query("Bohemian").unwrap(),
+        sort: pinakes_core::search::SortOrder::Relevance,
+        pagination: Pagination::new(0, 50, None),
+    };
+    let results = storage.search(&request).await.unwrap();
+    assert_eq!(results.total_count, 1);
+    assert_eq!(results.items[0].title.as_deref(), Some("Bohemian Rhapsody"));
+
+    // Type filter
+    let request = pinakes_core::search::SearchRequest {
+        query: pinakes_core::search::parse_search_query("type:pdf").unwrap(),
+        sort: pinakes_core::search::SortOrder::Relevance,
+        pagination: Pagination::new(0, 50, None),
+    };
+    let results = storage.search(&request).await.unwrap();
+    assert_eq!(results.total_count, 1);
+    assert_eq!(results.items[0].file_name, "doc.pdf");
+}
+
+#[tokio::test]
+async fn test_audit_log() {
+    let storage = setup().await;
+
+    let entry = AuditEntry {
+        id: uuid::Uuid::now_v7(),
+        media_id: None,
+        action: AuditAction::Scanned,
+        details: Some("test scan".to_string()),
+        timestamp: chrono::Utc::now(),
+    };
+    storage.record_audit(&entry).await.unwrap();
+
+    let entries = storage
+        .list_audit_entries(None, &Pagination::new(0, 10, None))
+        .await
+        .unwrap();
+    assert_eq!(entries.len(), 1);
+    assert_eq!(entries[0].action, AuditAction::Scanned);
+}
+
+#[tokio::test]
+async fn test_import_with_dedup() {
+    let storage = setup().await as pinakes_core::storage::DynStorageBackend;
+
+    // Create a temp file
+    let dir = tempfile::tempdir().unwrap();
+    let file_path = dir.path().join("test.txt");
+    std::fs::write(&file_path, "hello world").unwrap();
+
+    // First import
+    let result1 = pinakes_core::import::import_file(&storage, &file_path)
+        .await
+        .unwrap();
+    assert!(!result1.was_duplicate);
+
+    // Second import of same file
+    let result2 = pinakes_core::import::import_file(&storage, &file_path)
+        .await
+        .unwrap();
+    assert!(result2.was_duplicate);
+    assert_eq!(result1.media_id, result2.media_id);
+}
+
+#[tokio::test]
+async fn test_root_dirs() {
+    let storage = setup().await;
+
+    storage.add_root_dir("/tmp/music".into()).await.unwrap();
+    storage.add_root_dir("/tmp/docs".into()).await.unwrap();
+
+    let dirs = storage.list_root_dirs().await.unwrap();
+    assert_eq!(dirs.len(), 2);
+
+    storage
+        .remove_root_dir(std::path::Path::new("/tmp/music"))
+        .await
+        .unwrap();
+    let dirs = storage.list_root_dirs().await.unwrap();
+    assert_eq!(dirs.len(), 1);
+    assert_eq!(dirs[0], std::path::PathBuf::from("/tmp/docs"));
+}
+
+#[tokio::test]
+async fn test_library_statistics_empty() {
+    let storage = setup().await;
+    let stats = storage.library_statistics().await.unwrap();
+    assert_eq!(stats.total_media, 0);
+    assert_eq!(stats.total_size_bytes, 0);
+    assert_eq!(stats.avg_file_size_bytes, 0);
+    assert!(stats.media_by_type.is_empty());
+    assert!(stats.storage_by_type.is_empty());
+    assert!(stats.top_tags.is_empty());
+    assert!(stats.top_collections.is_empty());
+    assert!(stats.newest_item.is_none());
+    assert!(stats.oldest_item.is_none());
+    assert_eq!(stats.total_tags, 0);
+    assert_eq!(stats.total_collections, 0);
+    assert_eq!(stats.total_duplicates, 0);
+}
+
+#[tokio::test]
+async fn test_library_statistics_with_data() {
+    let storage = setup().await;
+
+    let now = chrono::Utc::now();
+    let item = MediaItem {
+        id: MediaId::new(),
+        path: "/tmp/stats_test.mp3".into(),
+        file_name: "stats_test.mp3".to_string(),
+        media_type: pinakes_core::media_type::MediaType::Mp3,
+        content_hash: ContentHash::new("stats_hash".to_string()),
+        file_size: 5000,
+        title: Some("Stats Song".to_string()),
+        artist: None,
+        album: None,
+        genre: None,
+        year: None,
+        duration_secs: Some(120.0),
+        description: None,
+        thumbnail_path: None,
+        custom_fields: HashMap::new(),
+        created_at: now,
+        updated_at: now,
+    };
+    storage.insert_media(&item).await.unwrap();
+
+    let stats = storage.library_statistics().await.unwrap();
+    assert_eq!(stats.total_media, 1);
+    assert_eq!(stats.total_size_bytes, 5000);
+    assert_eq!(stats.avg_file_size_bytes, 5000);
+    assert!(!stats.media_by_type.is_empty());
+    assert!(stats.newest_item.is_some());
+    assert!(stats.oldest_item.is_some());
+}