pinakes: import in parallel; various UI improvements

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I1eb47cd79cd4145c56af966f6756fe1d6a6a6964
2026-02-03 10:31:20 +03:00 · 2026-02-03 10:31:20 +03:00 · 116fe7b059
commit 116fe7b059
parent 278bcaa4b0
42 changed files with 4316 additions and 316 deletions
--- a/crates/pinakes-core/src/cache.rs
+++ b/crates/pinakes-core/src/cache.rs
@ -1,91 +1,501 @@
-use std::collections::HashMap;
+//! High-performance caching layer using moka.
+//!
+//! This module provides a comprehensive caching solution with:
+//! - LRU eviction with configurable size limits
+//! - TTL-based expiration
+//! - Smart cache invalidation
+//! - Metrics tracking (hit rate, size, evictions)
+//! - Specialized caches for different data types
+
 use std::hash::Hash;
 use std::sync::Arc;
-use std::time::{Duration, Instant};
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::time::Duration;

-use tokio::sync::RwLock;
+use moka::future::Cache as MokaCache;

-struct CacheEntry<V> {
-    value: V,
-    inserted_at: Instant,
+use crate::model::MediaId;
+
+/// Cache statistics for monitoring and debugging.
+#[derive(Debug, Clone, Default)]
+pub struct CacheStats {
+    pub hits: u64,
+    pub misses: u64,
+    pub evictions: u64,
+    pub size: u64,
 }

-/// A simple TTL-based in-memory cache with periodic eviction.
-pub struct Cache<K, V> {
-    entries: Arc<RwLock<HashMap<K, CacheEntry<V>>>>,
-    ttl: Duration,
+impl CacheStats {
+    pub fn hit_rate(&self) -> f64 {
+        let total = self.hits + self.misses;
+        if total == 0 {
+            0.0
+        } else {
+            self.hits as f64 / total as f64
+        }
+    }
+}
+
+/// Atomic counters for cache metrics.
+struct CacheMetrics {
+    hits: AtomicU64,
+    misses: AtomicU64,
+}
+
+impl Default for CacheMetrics {
+    fn default() -> Self {
+        Self {
+            hits: AtomicU64::new(0),
+            misses: AtomicU64::new(0),
+        }
+    }
+}
+
+impl CacheMetrics {
+    fn record_hit(&self) {
+        self.hits.fetch_add(1, Ordering::Relaxed);
+    }
+
+    fn record_miss(&self) {
+        self.misses.fetch_add(1, Ordering::Relaxed);
+    }
+
+    fn stats(&self) -> (u64, u64) {
+        (
+            self.hits.load(Ordering::Relaxed),
+            self.misses.load(Ordering::Relaxed),
+        )
+    }
+}
+
+/// A high-performance cache with LRU eviction and TTL support.
+pub struct Cache<K, V>
+where
+    K: Hash + Eq + Send + Sync + 'static,
+    V: Clone + Send + Sync + 'static,
+{
+    inner: MokaCache<K, V>,
+    metrics: Arc<CacheMetrics>,
 }

 impl<K, V> Cache<K, V>
 where
-    K: Eq + Hash + Clone + Send + Sync + 'static,
+    K: Hash + Eq + Send + Sync + 'static,
    V: Clone + Send + Sync + 'static,
 {
-    pub fn new(ttl: Duration) -> Self {
-        let cache = Self {
-            entries: Arc::new(RwLock::new(HashMap::new())),
-            ttl,
-        };
+    /// Create a new cache with the specified TTL and maximum capacity.
+    pub fn new(ttl: Duration, max_capacity: u64) -> Self {
+        let inner = MokaCache::builder()
+            .time_to_live(ttl)
+            .max_capacity(max_capacity)
+            .build();

-        // Spawn periodic eviction task
-        let entries = cache.entries.clone();
-        let ttl = cache.ttl;
-        tokio::spawn(async move {
-            let mut interval = tokio::time::interval(ttl);
-            loop {
-                interval.tick().await;
-                let now = Instant::now();
-                let mut map = entries.write().await;
-                map.retain(|_, entry| now.duration_since(entry.inserted_at) < ttl);
-            }
-        });
-
-        cache
-    }
-
-    pub async fn get(&self, key: &K) -> Option<V> {
-        let map = self.entries.read().await;
-        if let Some(entry) = map.get(key)
-            && entry.inserted_at.elapsed() < self.ttl
-        {
-            return Some(entry.value.clone());
+        Self {
+            inner,
+            metrics: Arc::new(CacheMetrics::default()),
        }
-        None
    }

+    /// Create a new cache with TTL, max capacity, and time-to-idle.
+    pub fn new_with_idle(ttl: Duration, tti: Duration, max_capacity: u64) -> Self {
+        let inner = MokaCache::builder()
+            .time_to_live(ttl)
+            .time_to_idle(tti)
+            .max_capacity(max_capacity)
+            .build();
+
+        Self {
+            inner,
+            metrics: Arc::new(CacheMetrics::default()),
+        }
+    }
+
+    /// Get a value from the cache.
+    pub async fn get(&self, key: &K) -> Option<V> {
+        match self.inner.get(key).await {
+            Some(value) => {
+                self.metrics.record_hit();
+                Some(value)
+            }
+            None => {
+                self.metrics.record_miss();
+                None
+            }
+        }
+    }
+
+    /// Insert a value into the cache.
    pub async fn insert(&self, key: K, value: V) {
-        let mut map = self.entries.write().await;
-        map.insert(
-            key,
-            CacheEntry {
-                value,
-                inserted_at: Instant::now(),
-            },
-        );
+        self.inner.insert(key, value).await;
    }

+    /// Remove a specific key from the cache.
    pub async fn invalidate(&self, key: &K) {
-        let mut map = self.entries.write().await;
-        map.remove(key);
+        self.inner.invalidate(key).await;
+    }
+
+    /// Clear all entries from the cache.
+    pub async fn invalidate_all(&self) {
+        self.inner.invalidate_all();
+        // Run pending tasks to ensure immediate invalidation
+        self.inner.run_pending_tasks().await;
+    }
+
+    /// Get the current number of entries in the cache.
+    pub fn entry_count(&self) -> u64 {
+        self.inner.entry_count()
+    }
+
+    /// Get cache statistics.
+    pub fn stats(&self) -> CacheStats {
+        let (hits, misses) = self.metrics.stats();
+        CacheStats {
+            hits,
+            misses,
+            evictions: 0, // Moka doesn't expose this directly
+            size: self.entry_count(),
+        }
+    }
+}
+
+/// Specialized cache for search query results.
+pub struct QueryCache {
+    /// Cache keyed by (query_hash, offset, limit)
+    inner: Cache<String, String>,
+}
+
+impl QueryCache {
+    pub fn new(ttl: Duration, max_capacity: u64) -> Self {
+        Self {
+            inner: Cache::new(ttl, max_capacity),
+        }
+    }
+
+    /// Generate a cache key from query parameters.
+    fn make_key(query: &str, offset: u64, limit: u64, sort: Option<&str>) -> String {
+        use std::hash::{DefaultHasher, Hasher};
+        let mut hasher = DefaultHasher::new();
+        hasher.write(query.as_bytes());
+        hasher.write(&offset.to_le_bytes());
+        hasher.write(&limit.to_le_bytes());
+        if let Some(s) = sort {
+            hasher.write(s.as_bytes());
+        }
+        format!("q:{:016x}", hasher.finish())
+    }
+
+    pub async fn get(
+        &self,
+        query: &str,
+        offset: u64,
+        limit: u64,
+        sort: Option<&str>,
+    ) -> Option<String> {
+        let key = Self::make_key(query, offset, limit, sort);
+        self.inner.get(&key).await
+    }
+
+    pub async fn insert(
+        &self,
+        query: &str,
+        offset: u64,
+        limit: u64,
+        sort: Option<&str>,
+        result: String,
+    ) {
+        let key = Self::make_key(query, offset, limit, sort);
+        self.inner.insert(key, result).await;
    }

    pub async fn invalidate_all(&self) {
-        let mut map = self.entries.write().await;
-        map.clear();
+        self.inner.invalidate_all().await;
+    }
+
+    pub fn stats(&self) -> CacheStats {
+        self.inner.stats()
    }
 }

-/// Application-level cache layer wrapping multiple caches for different data types.
-pub struct CacheLayer {
-    /// Cache for serialized API responses, keyed by request path + query string.
-    pub responses: Cache<String, String>,
+/// Specialized cache for metadata extraction results.
+pub struct MetadataCache {
+    /// Cache keyed by content hash
+    inner: Cache<String, String>,
 }

-impl CacheLayer {
-    pub fn new(ttl_secs: u64) -> Self {
-        let ttl = Duration::from_secs(ttl_secs);
+impl MetadataCache {
+    pub fn new(ttl: Duration, max_capacity: u64) -> Self {
        Self {
-            responses: Cache::new(ttl),
+            inner: Cache::new(ttl, max_capacity),
+        }
+    }
+
+    pub async fn get(&self, content_hash: &str) -> Option<String> {
+        self.inner.get(&content_hash.to_string()).await
+    }
+
+    pub async fn insert(&self, content_hash: &str, metadata_json: String) {
+        self.inner
+            .insert(content_hash.to_string(), metadata_json)
+            .await;
+    }
+
+    pub async fn invalidate(&self, content_hash: &str) {
+        self.inner.invalidate(&content_hash.to_string()).await;
+    }
+
+    pub fn stats(&self) -> CacheStats {
+        self.inner.stats()
+    }
+}
+
+/// Specialized cache for media item data.
+pub struct MediaCache {
+    inner: Cache<String, String>,
+}
+
+impl MediaCache {
+    pub fn new(ttl: Duration, max_capacity: u64) -> Self {
+        Self {
+            inner: Cache::new(ttl, max_capacity),
+        }
+    }
+
+    pub async fn get(&self, media_id: MediaId) -> Option<String> {
+        self.inner.get(&media_id.to_string()).await
+    }
+
+    pub async fn insert(&self, media_id: MediaId, item_json: String) {
+        self.inner.insert(media_id.to_string(), item_json).await;
+    }
+
+    pub async fn invalidate(&self, media_id: MediaId) {
+        self.inner.invalidate(&media_id.to_string()).await;
+    }
+
+    pub async fn invalidate_all(&self) {
+        self.inner.invalidate_all().await;
+    }
+
+    pub fn stats(&self) -> CacheStats {
+        self.inner.stats()
+    }
+}
+
+/// Configuration for the cache layer.
+#[derive(Debug, Clone)]
+pub struct CacheConfig {
+    /// TTL for response cache in seconds
+    pub response_ttl_secs: u64,
+    /// Maximum number of cached responses
+    pub response_max_entries: u64,
+    /// TTL for query cache in seconds
+    pub query_ttl_secs: u64,
+    /// Maximum number of cached query results
+    pub query_max_entries: u64,
+    /// TTL for metadata cache in seconds
+    pub metadata_ttl_secs: u64,
+    /// Maximum number of cached metadata entries
+    pub metadata_max_entries: u64,
+    /// TTL for media cache in seconds
+    pub media_ttl_secs: u64,
+    /// Maximum number of cached media items
+    pub media_max_entries: u64,
+}
+
+impl Default for CacheConfig {
+    fn default() -> Self {
+        Self {
+            response_ttl_secs: 60,
+            response_max_entries: 1000,
+            query_ttl_secs: 300,
+            query_max_entries: 500,
+            metadata_ttl_secs: 3600,
+            metadata_max_entries: 10000,
+            media_ttl_secs: 300,
+            media_max_entries: 5000,
        }
    }
 }
+
+/// Application-level cache layer wrapping multiple specialized caches.
+pub struct CacheLayer {
+    /// Cache for serialized API responses
+    pub responses: Cache<String, String>,
+    /// Cache for search query results
+    pub queries: QueryCache,
+    /// Cache for metadata extraction results
+    pub metadata: MetadataCache,
+    /// Cache for individual media items
+    pub media: MediaCache,
+    /// Configuration
+    config: CacheConfig,
+}
+
+impl CacheLayer {
+    /// Create a new cache layer with the specified TTL (using defaults for other settings).
+    pub fn new(ttl_secs: u64) -> Self {
+        let config = CacheConfig {
+            response_ttl_secs: ttl_secs,
+            ..Default::default()
+        };
+        Self::with_config(config)
+    }
+
+    /// Create a new cache layer with full configuration.
+    pub fn with_config(config: CacheConfig) -> Self {
+        Self {
+            responses: Cache::new(
+                Duration::from_secs(config.response_ttl_secs),
+                config.response_max_entries,
+            ),
+            queries: QueryCache::new(
+                Duration::from_secs(config.query_ttl_secs),
+                config.query_max_entries,
+            ),
+            metadata: MetadataCache::new(
+                Duration::from_secs(config.metadata_ttl_secs),
+                config.metadata_max_entries,
+            ),
+            media: MediaCache::new(
+                Duration::from_secs(config.media_ttl_secs),
+                config.media_max_entries,
+            ),
+            config,
+        }
+    }
+
+    /// Invalidate all caches related to a media item update.
+    pub async fn invalidate_for_media_update(&self, media_id: MediaId) {
+        self.media.invalidate(media_id).await;
+        // Query cache should be invalidated as search results may change
+        self.queries.invalidate_all().await;
+    }
+
+    /// Invalidate all caches related to a media item deletion.
+    pub async fn invalidate_for_media_delete(&self, media_id: MediaId) {
+        self.media.invalidate(media_id).await;
+        self.queries.invalidate_all().await;
+    }
+
+    /// Invalidate all caches (useful after bulk imports or major changes).
+    pub async fn invalidate_all(&self) {
+        self.responses.invalidate_all().await;
+        self.queries.invalidate_all().await;
+        self.media.invalidate_all().await;
+        // Keep metadata cache as it's keyed by content hash which doesn't change
+    }
+
+    /// Get aggregated statistics for all caches.
+    pub fn stats(&self) -> CacheLayerStats {
+        CacheLayerStats {
+            responses: self.responses.stats(),
+            queries: self.queries.stats(),
+            metadata: self.metadata.stats(),
+            media: self.media.stats(),
+        }
+    }
+
+    /// Get the current configuration.
+    pub fn config(&self) -> &CacheConfig {
+        &self.config
+    }
+}
+
+/// Aggregated statistics for the entire cache layer.
+#[derive(Debug, Clone)]
+pub struct CacheLayerStats {
+    pub responses: CacheStats,
+    pub queries: CacheStats,
+    pub metadata: CacheStats,
+    pub media: CacheStats,
+}
+
+impl CacheLayerStats {
+    /// Get the overall hit rate across all caches.
+    pub fn overall_hit_rate(&self) -> f64 {
+        let total_hits =
+            self.responses.hits + self.queries.hits + self.metadata.hits + self.media.hits;
+        let total_requests = total_hits
+            + self.responses.misses
+            + self.queries.misses
+            + self.metadata.misses
+            + self.media.misses;
+
+        if total_requests == 0 {
+            0.0
+        } else {
+            total_hits as f64 / total_requests as f64
+        }
+    }
+
+    /// Get the total number of entries across all caches.
+    pub fn total_entries(&self) -> u64 {
+        self.responses.size + self.queries.size + self.metadata.size + self.media.size
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[tokio::test]
+    async fn test_cache_basic_operations() {
+        let cache: Cache<String, String> = Cache::new(Duration::from_secs(60), 100);
+
+        // Insert and get
+        cache.insert("key1".to_string(), "value1".to_string()).await;
+        assert_eq!(
+            cache.get(&"key1".to_string()).await,
+            Some("value1".to_string())
+        );
+
+        // Miss
+        assert_eq!(cache.get(&"key2".to_string()).await, None);
+
+        // Invalidate
+        cache.invalidate(&"key1".to_string()).await;
+        assert_eq!(cache.get(&"key1".to_string()).await, None);
+    }
+
+    #[tokio::test]
+    async fn test_cache_stats() {
+        let cache: Cache<String, String> = Cache::new(Duration::from_secs(60), 100);
+
+        cache.insert("key1".to_string(), "value1".to_string()).await;
+        let _ = cache.get(&"key1".to_string()).await; // hit
+        let _ = cache.get(&"key2".to_string()).await; // miss
+
+        let stats = cache.stats();
+        assert_eq!(stats.hits, 1);
+        assert_eq!(stats.misses, 1);
+        assert!((stats.hit_rate() - 0.5).abs() < 0.01);
+    }
+
+    #[tokio::test]
+    async fn test_query_cache() {
+        let cache = QueryCache::new(Duration::from_secs(60), 100);
+
+        cache
+            .insert("test query", 0, 10, Some("name"), "results".to_string())
+            .await;
+        assert_eq!(
+            cache.get("test query", 0, 10, Some("name")).await,
+            Some("results".to_string())
+        );
+
+        // Different parameters should miss
+        assert_eq!(cache.get("test query", 10, 10, Some("name")).await, None);
+    }
+
+    #[tokio::test]
+    async fn test_cache_layer() {
+        let layer = CacheLayer::new(60);
+
+        let media_id = MediaId::new();
+        layer.media.insert(media_id, "{}".to_string()).await;
+        assert!(layer.media.get(media_id).await.is_some());
+
+        layer.invalidate_for_media_delete(media_id).await;
+        assert!(layer.media.get(media_id).await.is_none());
+    }
+}