pinakes: import in parallel; various UI improvements
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I1eb47cd79cd4145c56af966f6756fe1d6a6a6964
This commit is contained in:
parent
278bcaa4b0
commit
116fe7b059
42 changed files with 4316 additions and 316 deletions
|
|
@ -1,91 +1,501 @@
|
|||
use std::collections::HashMap;
|
||||
//! High-performance caching layer using moka.
|
||||
//!
|
||||
//! This module provides a comprehensive caching solution with:
|
||||
//! - LRU eviction with configurable size limits
|
||||
//! - TTL-based expiration
|
||||
//! - Smart cache invalidation
|
||||
//! - Metrics tracking (hit rate, size, evictions)
|
||||
//! - Specialized caches for different data types
|
||||
|
||||
use std::hash::Hash;
|
||||
use std::sync::Arc;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
use std::time::Duration;
|
||||
|
||||
use tokio::sync::RwLock;
|
||||
use moka::future::Cache as MokaCache;
|
||||
|
||||
struct CacheEntry<V> {
|
||||
value: V,
|
||||
inserted_at: Instant,
|
||||
use crate::model::MediaId;
|
||||
|
||||
/// Cache statistics for monitoring and debugging.
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct CacheStats {
|
||||
pub hits: u64,
|
||||
pub misses: u64,
|
||||
pub evictions: u64,
|
||||
pub size: u64,
|
||||
}
|
||||
|
||||
/// A simple TTL-based in-memory cache with periodic eviction.
|
||||
pub struct Cache<K, V> {
|
||||
entries: Arc<RwLock<HashMap<K, CacheEntry<V>>>>,
|
||||
ttl: Duration,
|
||||
impl CacheStats {
|
||||
pub fn hit_rate(&self) -> f64 {
|
||||
let total = self.hits + self.misses;
|
||||
if total == 0 {
|
||||
0.0
|
||||
} else {
|
||||
self.hits as f64 / total as f64
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Atomic counters for cache metrics.
|
||||
struct CacheMetrics {
|
||||
hits: AtomicU64,
|
||||
misses: AtomicU64,
|
||||
}
|
||||
|
||||
impl Default for CacheMetrics {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
hits: AtomicU64::new(0),
|
||||
misses: AtomicU64::new(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl CacheMetrics {
|
||||
fn record_hit(&self) {
|
||||
self.hits.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
fn record_miss(&self) {
|
||||
self.misses.fetch_add(1, Ordering::Relaxed);
|
||||
}
|
||||
|
||||
fn stats(&self) -> (u64, u64) {
|
||||
(
|
||||
self.hits.load(Ordering::Relaxed),
|
||||
self.misses.load(Ordering::Relaxed),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/// A high-performance cache with LRU eviction and TTL support.
|
||||
pub struct Cache<K, V>
|
||||
where
|
||||
K: Hash + Eq + Send + Sync + 'static,
|
||||
V: Clone + Send + Sync + 'static,
|
||||
{
|
||||
inner: MokaCache<K, V>,
|
||||
metrics: Arc<CacheMetrics>,
|
||||
}
|
||||
|
||||
impl<K, V> Cache<K, V>
|
||||
where
|
||||
K: Eq + Hash + Clone + Send + Sync + 'static,
|
||||
K: Hash + Eq + Send + Sync + 'static,
|
||||
V: Clone + Send + Sync + 'static,
|
||||
{
|
||||
pub fn new(ttl: Duration) -> Self {
|
||||
let cache = Self {
|
||||
entries: Arc::new(RwLock::new(HashMap::new())),
|
||||
ttl,
|
||||
};
|
||||
/// Create a new cache with the specified TTL and maximum capacity.
|
||||
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
|
||||
let inner = MokaCache::builder()
|
||||
.time_to_live(ttl)
|
||||
.max_capacity(max_capacity)
|
||||
.build();
|
||||
|
||||
// Spawn periodic eviction task
|
||||
let entries = cache.entries.clone();
|
||||
let ttl = cache.ttl;
|
||||
tokio::spawn(async move {
|
||||
let mut interval = tokio::time::interval(ttl);
|
||||
loop {
|
||||
interval.tick().await;
|
||||
let now = Instant::now();
|
||||
let mut map = entries.write().await;
|
||||
map.retain(|_, entry| now.duration_since(entry.inserted_at) < ttl);
|
||||
}
|
||||
});
|
||||
|
||||
cache
|
||||
}
|
||||
|
||||
pub async fn get(&self, key: &K) -> Option<V> {
|
||||
let map = self.entries.read().await;
|
||||
if let Some(entry) = map.get(key)
|
||||
&& entry.inserted_at.elapsed() < self.ttl
|
||||
{
|
||||
return Some(entry.value.clone());
|
||||
Self {
|
||||
inner,
|
||||
metrics: Arc::new(CacheMetrics::default()),
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Create a new cache with TTL, max capacity, and time-to-idle.
|
||||
pub fn new_with_idle(ttl: Duration, tti: Duration, max_capacity: u64) -> Self {
|
||||
let inner = MokaCache::builder()
|
||||
.time_to_live(ttl)
|
||||
.time_to_idle(tti)
|
||||
.max_capacity(max_capacity)
|
||||
.build();
|
||||
|
||||
Self {
|
||||
inner,
|
||||
metrics: Arc::new(CacheMetrics::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a value from the cache.
|
||||
pub async fn get(&self, key: &K) -> Option<V> {
|
||||
match self.inner.get(key).await {
|
||||
Some(value) => {
|
||||
self.metrics.record_hit();
|
||||
Some(value)
|
||||
}
|
||||
None => {
|
||||
self.metrics.record_miss();
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Insert a value into the cache.
|
||||
pub async fn insert(&self, key: K, value: V) {
|
||||
let mut map = self.entries.write().await;
|
||||
map.insert(
|
||||
key,
|
||||
CacheEntry {
|
||||
value,
|
||||
inserted_at: Instant::now(),
|
||||
},
|
||||
);
|
||||
self.inner.insert(key, value).await;
|
||||
}
|
||||
|
||||
/// Remove a specific key from the cache.
|
||||
pub async fn invalidate(&self, key: &K) {
|
||||
let mut map = self.entries.write().await;
|
||||
map.remove(key);
|
||||
self.inner.invalidate(key).await;
|
||||
}
|
||||
|
||||
/// Clear all entries from the cache.
|
||||
pub async fn invalidate_all(&self) {
|
||||
self.inner.invalidate_all();
|
||||
// Run pending tasks to ensure immediate invalidation
|
||||
self.inner.run_pending_tasks().await;
|
||||
}
|
||||
|
||||
/// Get the current number of entries in the cache.
|
||||
pub fn entry_count(&self) -> u64 {
|
||||
self.inner.entry_count()
|
||||
}
|
||||
|
||||
/// Get cache statistics.
|
||||
pub fn stats(&self) -> CacheStats {
|
||||
let (hits, misses) = self.metrics.stats();
|
||||
CacheStats {
|
||||
hits,
|
||||
misses,
|
||||
evictions: 0, // Moka doesn't expose this directly
|
||||
size: self.entry_count(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Specialized cache for search query results.
|
||||
pub struct QueryCache {
|
||||
/// Cache keyed by (query_hash, offset, limit)
|
||||
inner: Cache<String, String>,
|
||||
}
|
||||
|
||||
impl QueryCache {
|
||||
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
|
||||
Self {
|
||||
inner: Cache::new(ttl, max_capacity),
|
||||
}
|
||||
}
|
||||
|
||||
/// Generate a cache key from query parameters.
|
||||
fn make_key(query: &str, offset: u64, limit: u64, sort: Option<&str>) -> String {
|
||||
use std::hash::{DefaultHasher, Hasher};
|
||||
let mut hasher = DefaultHasher::new();
|
||||
hasher.write(query.as_bytes());
|
||||
hasher.write(&offset.to_le_bytes());
|
||||
hasher.write(&limit.to_le_bytes());
|
||||
if let Some(s) = sort {
|
||||
hasher.write(s.as_bytes());
|
||||
}
|
||||
format!("q:{:016x}", hasher.finish())
|
||||
}
|
||||
|
||||
pub async fn get(
|
||||
&self,
|
||||
query: &str,
|
||||
offset: u64,
|
||||
limit: u64,
|
||||
sort: Option<&str>,
|
||||
) -> Option<String> {
|
||||
let key = Self::make_key(query, offset, limit, sort);
|
||||
self.inner.get(&key).await
|
||||
}
|
||||
|
||||
pub async fn insert(
|
||||
&self,
|
||||
query: &str,
|
||||
offset: u64,
|
||||
limit: u64,
|
||||
sort: Option<&str>,
|
||||
result: String,
|
||||
) {
|
||||
let key = Self::make_key(query, offset, limit, sort);
|
||||
self.inner.insert(key, result).await;
|
||||
}
|
||||
|
||||
pub async fn invalidate_all(&self) {
|
||||
let mut map = self.entries.write().await;
|
||||
map.clear();
|
||||
self.inner.invalidate_all().await;
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> CacheStats {
|
||||
self.inner.stats()
|
||||
}
|
||||
}
|
||||
|
||||
/// Application-level cache layer wrapping multiple caches for different data types.
|
||||
pub struct CacheLayer {
|
||||
/// Cache for serialized API responses, keyed by request path + query string.
|
||||
pub responses: Cache<String, String>,
|
||||
/// Specialized cache for metadata extraction results.
|
||||
pub struct MetadataCache {
|
||||
/// Cache keyed by content hash
|
||||
inner: Cache<String, String>,
|
||||
}
|
||||
|
||||
impl CacheLayer {
|
||||
pub fn new(ttl_secs: u64) -> Self {
|
||||
let ttl = Duration::from_secs(ttl_secs);
|
||||
impl MetadataCache {
|
||||
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
|
||||
Self {
|
||||
responses: Cache::new(ttl),
|
||||
inner: Cache::new(ttl, max_capacity),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get(&self, content_hash: &str) -> Option<String> {
|
||||
self.inner.get(&content_hash.to_string()).await
|
||||
}
|
||||
|
||||
pub async fn insert(&self, content_hash: &str, metadata_json: String) {
|
||||
self.inner
|
||||
.insert(content_hash.to_string(), metadata_json)
|
||||
.await;
|
||||
}
|
||||
|
||||
pub async fn invalidate(&self, content_hash: &str) {
|
||||
self.inner.invalidate(&content_hash.to_string()).await;
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> CacheStats {
|
||||
self.inner.stats()
|
||||
}
|
||||
}
|
||||
|
||||
/// Specialized cache for media item data.
|
||||
pub struct MediaCache {
|
||||
inner: Cache<String, String>,
|
||||
}
|
||||
|
||||
impl MediaCache {
|
||||
pub fn new(ttl: Duration, max_capacity: u64) -> Self {
|
||||
Self {
|
||||
inner: Cache::new(ttl, max_capacity),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn get(&self, media_id: MediaId) -> Option<String> {
|
||||
self.inner.get(&media_id.to_string()).await
|
||||
}
|
||||
|
||||
pub async fn insert(&self, media_id: MediaId, item_json: String) {
|
||||
self.inner.insert(media_id.to_string(), item_json).await;
|
||||
}
|
||||
|
||||
pub async fn invalidate(&self, media_id: MediaId) {
|
||||
self.inner.invalidate(&media_id.to_string()).await;
|
||||
}
|
||||
|
||||
pub async fn invalidate_all(&self) {
|
||||
self.inner.invalidate_all().await;
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> CacheStats {
|
||||
self.inner.stats()
|
||||
}
|
||||
}
|
||||
|
||||
/// Configuration for the cache layer.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CacheConfig {
|
||||
/// TTL for response cache in seconds
|
||||
pub response_ttl_secs: u64,
|
||||
/// Maximum number of cached responses
|
||||
pub response_max_entries: u64,
|
||||
/// TTL for query cache in seconds
|
||||
pub query_ttl_secs: u64,
|
||||
/// Maximum number of cached query results
|
||||
pub query_max_entries: u64,
|
||||
/// TTL for metadata cache in seconds
|
||||
pub metadata_ttl_secs: u64,
|
||||
/// Maximum number of cached metadata entries
|
||||
pub metadata_max_entries: u64,
|
||||
/// TTL for media cache in seconds
|
||||
pub media_ttl_secs: u64,
|
||||
/// Maximum number of cached media items
|
||||
pub media_max_entries: u64,
|
||||
}
|
||||
|
||||
impl Default for CacheConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
response_ttl_secs: 60,
|
||||
response_max_entries: 1000,
|
||||
query_ttl_secs: 300,
|
||||
query_max_entries: 500,
|
||||
metadata_ttl_secs: 3600,
|
||||
metadata_max_entries: 10000,
|
||||
media_ttl_secs: 300,
|
||||
media_max_entries: 5000,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Application-level cache layer wrapping multiple specialized caches.
|
||||
pub struct CacheLayer {
|
||||
/// Cache for serialized API responses
|
||||
pub responses: Cache<String, String>,
|
||||
/// Cache for search query results
|
||||
pub queries: QueryCache,
|
||||
/// Cache for metadata extraction results
|
||||
pub metadata: MetadataCache,
|
||||
/// Cache for individual media items
|
||||
pub media: MediaCache,
|
||||
/// Configuration
|
||||
config: CacheConfig,
|
||||
}
|
||||
|
||||
impl CacheLayer {
|
||||
/// Create a new cache layer with the specified TTL (using defaults for other settings).
|
||||
pub fn new(ttl_secs: u64) -> Self {
|
||||
let config = CacheConfig {
|
||||
response_ttl_secs: ttl_secs,
|
||||
..Default::default()
|
||||
};
|
||||
Self::with_config(config)
|
||||
}
|
||||
|
||||
/// Create a new cache layer with full configuration.
|
||||
pub fn with_config(config: CacheConfig) -> Self {
|
||||
Self {
|
||||
responses: Cache::new(
|
||||
Duration::from_secs(config.response_ttl_secs),
|
||||
config.response_max_entries,
|
||||
),
|
||||
queries: QueryCache::new(
|
||||
Duration::from_secs(config.query_ttl_secs),
|
||||
config.query_max_entries,
|
||||
),
|
||||
metadata: MetadataCache::new(
|
||||
Duration::from_secs(config.metadata_ttl_secs),
|
||||
config.metadata_max_entries,
|
||||
),
|
||||
media: MediaCache::new(
|
||||
Duration::from_secs(config.media_ttl_secs),
|
||||
config.media_max_entries,
|
||||
),
|
||||
config,
|
||||
}
|
||||
}
|
||||
|
||||
/// Invalidate all caches related to a media item update.
|
||||
pub async fn invalidate_for_media_update(&self, media_id: MediaId) {
|
||||
self.media.invalidate(media_id).await;
|
||||
// Query cache should be invalidated as search results may change
|
||||
self.queries.invalidate_all().await;
|
||||
}
|
||||
|
||||
/// Invalidate all caches related to a media item deletion.
|
||||
pub async fn invalidate_for_media_delete(&self, media_id: MediaId) {
|
||||
self.media.invalidate(media_id).await;
|
||||
self.queries.invalidate_all().await;
|
||||
}
|
||||
|
||||
/// Invalidate all caches (useful after bulk imports or major changes).
|
||||
pub async fn invalidate_all(&self) {
|
||||
self.responses.invalidate_all().await;
|
||||
self.queries.invalidate_all().await;
|
||||
self.media.invalidate_all().await;
|
||||
// Keep metadata cache as it's keyed by content hash which doesn't change
|
||||
}
|
||||
|
||||
/// Get aggregated statistics for all caches.
|
||||
pub fn stats(&self) -> CacheLayerStats {
|
||||
CacheLayerStats {
|
||||
responses: self.responses.stats(),
|
||||
queries: self.queries.stats(),
|
||||
metadata: self.metadata.stats(),
|
||||
media: self.media.stats(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the current configuration.
|
||||
pub fn config(&self) -> &CacheConfig {
|
||||
&self.config
|
||||
}
|
||||
}
|
||||
|
||||
/// Aggregated statistics for the entire cache layer.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CacheLayerStats {
|
||||
pub responses: CacheStats,
|
||||
pub queries: CacheStats,
|
||||
pub metadata: CacheStats,
|
||||
pub media: CacheStats,
|
||||
}
|
||||
|
||||
impl CacheLayerStats {
|
||||
/// Get the overall hit rate across all caches.
|
||||
pub fn overall_hit_rate(&self) -> f64 {
|
||||
let total_hits =
|
||||
self.responses.hits + self.queries.hits + self.metadata.hits + self.media.hits;
|
||||
let total_requests = total_hits
|
||||
+ self.responses.misses
|
||||
+ self.queries.misses
|
||||
+ self.metadata.misses
|
||||
+ self.media.misses;
|
||||
|
||||
if total_requests == 0 {
|
||||
0.0
|
||||
} else {
|
||||
total_hits as f64 / total_requests as f64
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the total number of entries across all caches.
|
||||
pub fn total_entries(&self) -> u64 {
|
||||
self.responses.size + self.queries.size + self.metadata.size + self.media.size
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_basic_operations() {
|
||||
let cache: Cache<String, String> = Cache::new(Duration::from_secs(60), 100);
|
||||
|
||||
// Insert and get
|
||||
cache.insert("key1".to_string(), "value1".to_string()).await;
|
||||
assert_eq!(
|
||||
cache.get(&"key1".to_string()).await,
|
||||
Some("value1".to_string())
|
||||
);
|
||||
|
||||
// Miss
|
||||
assert_eq!(cache.get(&"key2".to_string()).await, None);
|
||||
|
||||
// Invalidate
|
||||
cache.invalidate(&"key1".to_string()).await;
|
||||
assert_eq!(cache.get(&"key1".to_string()).await, None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_stats() {
|
||||
let cache: Cache<String, String> = Cache::new(Duration::from_secs(60), 100);
|
||||
|
||||
cache.insert("key1".to_string(), "value1".to_string()).await;
|
||||
let _ = cache.get(&"key1".to_string()).await; // hit
|
||||
let _ = cache.get(&"key2".to_string()).await; // miss
|
||||
|
||||
let stats = cache.stats();
|
||||
assert_eq!(stats.hits, 1);
|
||||
assert_eq!(stats.misses, 1);
|
||||
assert!((stats.hit_rate() - 0.5).abs() < 0.01);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_query_cache() {
|
||||
let cache = QueryCache::new(Duration::from_secs(60), 100);
|
||||
|
||||
cache
|
||||
.insert("test query", 0, 10, Some("name"), "results".to_string())
|
||||
.await;
|
||||
assert_eq!(
|
||||
cache.get("test query", 0, 10, Some("name")).await,
|
||||
Some("results".to_string())
|
||||
);
|
||||
|
||||
// Different parameters should miss
|
||||
assert_eq!(cache.get("test query", 10, 10, Some("name")).await, None);
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn test_cache_layer() {
|
||||
let layer = CacheLayer::new(60);
|
||||
|
||||
let media_id = MediaId::new();
|
||||
layer.media.insert(media_id, "{}".to_string()).await;
|
||||
assert!(layer.media.get(media_id).await.is_some());
|
||||
|
||||
layer.invalidate_for_media_delete(media_id).await;
|
||||
assert!(layer.media.get(media_id).await.is_none());
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue