//! Upload processing for managed storage. //! //! Handles file uploads, metadata extraction, and MediaItem creation //! for files stored in managed content-addressable storage. use std::collections::HashMap; use std::path::Path; use chrono::Utc; use tokio::io::AsyncRead; use tracing::{debug, info}; use crate::error::{PinakesError, Result}; use crate::managed_storage::ManagedStorageService; use crate::media_type::MediaType; use crate::metadata; use crate::model::{MediaId, MediaItem, StorageMode, UploadResult}; use crate::storage::DynStorageBackend; /// Process an upload from an async reader. /// /// This function: /// 1. Stores the file in managed storage /// 2. Checks for duplicates by content hash /// 3. Extracts metadata from the file /// 4. Creates or updates the MediaItem pub async fn process_upload( storage: &DynStorageBackend, managed: &ManagedStorageService, reader: R, original_filename: &str, mime_type: Option<&str>, ) -> Result { // Store the file let (content_hash, file_size) = managed.store_stream(reader).await?; // Check if we already have a media item with this hash if let Some(existing) = storage.get_media_by_hash(&content_hash).await? { debug!(hash = %content_hash, media_id = %existing.id, "upload matched existing media item"); return Ok(UploadResult { media_id: existing.id, content_hash, was_duplicate: true, file_size, }); } // Determine media type from filename let media_type = MediaType::from_path(Path::new(original_filename)) .unwrap_or_else(|| MediaType::custom("unknown")); // Get the actual file path in managed storage for metadata extraction let blob_path = managed.path(&content_hash); // Extract metadata let extracted = metadata::extract_metadata(&blob_path, media_type.clone()).ok(); // Create or get blob record let mime = mime_type .map(String::from) .unwrap_or_else(|| media_type.mime_type().to_string()); let _blob = storage .get_or_create_blob(&content_hash, file_size, &mime) .await?; // Create the media item let now = Utc::now(); let media_id = MediaId::new(); let item = MediaItem { id: media_id, path: blob_path, file_name: sanitize_filename(original_filename), media_type, content_hash: content_hash.clone(), file_size, title: extracted.as_ref().and_then(|m| m.title.clone()), artist: extracted.as_ref().and_then(|m| m.artist.clone()), album: extracted.as_ref().and_then(|m| m.album.clone()), genre: extracted.as_ref().and_then(|m| m.genre.clone()), year: extracted.as_ref().and_then(|m| m.year), duration_secs: extracted.as_ref().and_then(|m| m.duration_secs), description: extracted.as_ref().and_then(|m| m.description.clone()), thumbnail_path: None, custom_fields: HashMap::new(), file_mtime: None, date_taken: extracted.as_ref().and_then(|m| m.date_taken), latitude: extracted.as_ref().and_then(|m| m.latitude), longitude: extracted.as_ref().and_then(|m| m.longitude), camera_make: extracted.as_ref().and_then(|m| m.camera_make.clone()), camera_model: extracted.as_ref().and_then(|m| m.camera_model.clone()), rating: None, perceptual_hash: None, storage_mode: StorageMode::Managed, original_filename: Some(original_filename.to_string()), uploaded_at: Some(now), storage_key: Some(content_hash.0.clone()), created_at: now, updated_at: now, deleted_at: None, links_extracted_at: None, }; // Store the media item storage.insert_managed_media(&item).await?; info!( media_id = %media_id, hash = %content_hash, filename = %original_filename, size = file_size, "processed upload" ); Ok(UploadResult { media_id, content_hash, was_duplicate: false, file_size, }) } /// Process an upload from bytes. pub async fn process_upload_bytes( storage: &DynStorageBackend, managed: &ManagedStorageService, data: &[u8], original_filename: &str, mime_type: Option<&str>, ) -> Result { use std::io::Cursor; let cursor = Cursor::new(data); process_upload(storage, managed, cursor, original_filename, mime_type).await } /// Process an upload from a local file path. /// /// This is useful for migrating existing external files to managed storage. pub async fn process_upload_file( storage: &DynStorageBackend, managed: &ManagedStorageService, path: &Path, original_filename: Option<&str>, ) -> Result { let file = tokio::fs::File::open(path).await?; let reader = tokio::io::BufReader::new(file); let filename = original_filename.unwrap_or_else(|| { path.file_name() .and_then(|n| n.to_str()) .unwrap_or("unknown") }); let mime = mime_guess::from_path(path).first().map(|m| m.to_string()); process_upload(storage, managed, reader, filename, mime.as_deref()).await } /// Migrate an existing external media item to managed storage. pub async fn migrate_to_managed( storage: &DynStorageBackend, managed: &ManagedStorageService, media_id: MediaId, ) -> Result<()> { let item = storage.get_media(media_id).await?; if item.storage_mode == StorageMode::Managed { return Err(PinakesError::InvalidOperation( "media item is already in managed storage".into(), )); } // Check if the external file exists if !item.path.exists() { return Err(PinakesError::FileNotFound(item.path.clone())); } // Store the file in managed storage let (new_hash, new_size) = managed.store_file(&item.path).await?; // Verify the hash matches (it should, unless the file changed) if new_hash.0 != item.content_hash.0 { return Err(PinakesError::StorageIntegrity(format!( "hash changed during migration: {} -> {}", item.content_hash, new_hash ))); } // Get or create blob record let mime = item.media_type.mime_type().to_string(); let _blob = storage .get_or_create_blob(&new_hash, new_size, &mime) .await?; // Update the media item let mut updated = item.clone(); updated.storage_mode = StorageMode::Managed; updated.storage_key = Some(new_hash.0.clone()); updated.uploaded_at = Some(Utc::now()); updated.path = managed.path(&new_hash); updated.updated_at = Utc::now(); storage.update_media(&updated).await?; info!( media_id = %media_id, hash = %new_hash, "migrated media item to managed storage" ); Ok(()) } /// Sanitize a filename for storage. fn sanitize_filename(name: &str) -> String { // Remove path separators and null bytes name.replace(['/', '\\', '\0'], "_") // Trim whitespace .trim() // Truncate to reasonable length .chars() .take(255) .collect() } /// Delete a managed media item and clean up the blob if orphaned. pub async fn delete_managed_media( storage: &DynStorageBackend, managed: &ManagedStorageService, media_id: MediaId, ) -> Result<()> { let item = storage.get_media(media_id).await?; if item.storage_mode != StorageMode::Managed { return Err(PinakesError::InvalidOperation( "media item is not in managed storage".into(), )); } // Decrement blob reference count let should_delete = storage.decrement_blob_ref(&item.content_hash).await?; // Delete the media item storage.delete_media(media_id).await?; // If blob is orphaned, delete it from storage if should_delete { managed.delete(&item.content_hash).await?; storage.delete_blob(&item.content_hash).await?; info!(hash = %item.content_hash, "deleted orphaned blob"); } Ok(()) } #[cfg(test)] mod tests { use super::*; #[test] fn test_sanitize_filename() { assert_eq!(sanitize_filename("test.txt"), "test.txt"); assert_eq!(sanitize_filename("path/to/file.txt"), "path_to_file.txt"); assert_eq!(sanitize_filename(" spaces "), "spaces"); assert_eq!(sanitize_filename("a".repeat(300).as_str()), "a".repeat(255)); } }