pinakes/crates/pinakes-core/src/upload.rs
NotAShelf 80a8b5c7ca
various: markdown improvements
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I81fda8247814da19eed1e76dbe97bd5b6a6a6964
2026-02-09 15:49:23 +03:00

267 lines
8.3 KiB
Rust

//! Upload processing for managed storage.
//!
//! Handles file uploads, metadata extraction, and MediaItem creation
//! for files stored in managed content-addressable storage.
use std::collections::HashMap;
use std::path::Path;
use chrono::Utc;
use tokio::io::AsyncRead;
use tracing::{debug, info};
use crate::error::{PinakesError, Result};
use crate::managed_storage::ManagedStorageService;
use crate::media_type::MediaType;
use crate::metadata;
use crate::model::{MediaId, MediaItem, StorageMode, UploadResult};
use crate::storage::DynStorageBackend;
/// Process an upload from an async reader.
///
/// This function:
/// 1. Stores the file in managed storage
/// 2. Checks for duplicates by content hash
/// 3. Extracts metadata from the file
/// 4. Creates or updates the MediaItem
pub async fn process_upload<R: AsyncRead + Unpin>(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
reader: R,
original_filename: &str,
mime_type: Option<&str>,
) -> Result<UploadResult> {
// Store the file
let (content_hash, file_size) = managed.store_stream(reader).await?;
// Check if we already have a media item with this hash
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
debug!(hash = %content_hash, media_id = %existing.id, "upload matched existing media item");
return Ok(UploadResult {
media_id: existing.id,
content_hash,
was_duplicate: true,
file_size,
});
}
// Determine media type from filename
let media_type = MediaType::from_path(Path::new(original_filename))
.unwrap_or_else(|| MediaType::custom("unknown"));
// Get the actual file path in managed storage for metadata extraction
let blob_path = managed.path(&content_hash);
// Extract metadata
let extracted = metadata::extract_metadata(&blob_path, media_type.clone()).ok();
// Create or get blob record
let mime = mime_type
.map(String::from)
.unwrap_or_else(|| media_type.mime_type().to_string());
let _blob = storage
.get_or_create_blob(&content_hash, file_size, &mime)
.await?;
// Create the media item
let now = Utc::now();
let media_id = MediaId::new();
let item = MediaItem {
id: media_id,
path: blob_path,
file_name: sanitize_filename(original_filename),
media_type,
content_hash: content_hash.clone(),
file_size,
title: extracted.as_ref().and_then(|m| m.title.clone()),
artist: extracted.as_ref().and_then(|m| m.artist.clone()),
album: extracted.as_ref().and_then(|m| m.album.clone()),
genre: extracted.as_ref().and_then(|m| m.genre.clone()),
year: extracted.as_ref().and_then(|m| m.year),
duration_secs: extracted.as_ref().and_then(|m| m.duration_secs),
description: extracted.as_ref().and_then(|m| m.description.clone()),
thumbnail_path: None,
custom_fields: HashMap::new(),
file_mtime: None,
date_taken: extracted.as_ref().and_then(|m| m.date_taken),
latitude: extracted.as_ref().and_then(|m| m.latitude),
longitude: extracted.as_ref().and_then(|m| m.longitude),
camera_make: extracted.as_ref().and_then(|m| m.camera_make.clone()),
camera_model: extracted.as_ref().and_then(|m| m.camera_model.clone()),
rating: None,
perceptual_hash: None,
storage_mode: StorageMode::Managed,
original_filename: Some(original_filename.to_string()),
uploaded_at: Some(now),
storage_key: Some(content_hash.0.clone()),
created_at: now,
updated_at: now,
deleted_at: None,
links_extracted_at: None,
};
// Store the media item
storage.insert_managed_media(&item).await?;
info!(
media_id = %media_id,
hash = %content_hash,
filename = %original_filename,
size = file_size,
"processed upload"
);
Ok(UploadResult {
media_id,
content_hash,
was_duplicate: false,
file_size,
})
}
/// Process an upload from bytes.
pub async fn process_upload_bytes(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
data: &[u8],
original_filename: &str,
mime_type: Option<&str>,
) -> Result<UploadResult> {
use std::io::Cursor;
let cursor = Cursor::new(data);
process_upload(storage, managed, cursor, original_filename, mime_type).await
}
/// Process an upload from a local file path.
///
/// This is useful for migrating existing external files to managed storage.
pub async fn process_upload_file(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
path: &Path,
original_filename: Option<&str>,
) -> Result<UploadResult> {
let file = tokio::fs::File::open(path).await?;
let reader = tokio::io::BufReader::new(file);
let filename = original_filename.unwrap_or_else(|| {
path.file_name()
.and_then(|n| n.to_str())
.unwrap_or("unknown")
});
let mime = mime_guess::from_path(path).first().map(|m| m.to_string());
process_upload(storage, managed, reader, filename, mime.as_deref()).await
}
/// Migrate an existing external media item to managed storage.
pub async fn migrate_to_managed(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
media_id: MediaId,
) -> Result<()> {
let item = storage.get_media(media_id).await?;
if item.storage_mode == StorageMode::Managed {
return Err(PinakesError::InvalidOperation(
"media item is already in managed storage".into(),
));
}
// Check if the external file exists
if !item.path.exists() {
return Err(PinakesError::FileNotFound(item.path.clone()));
}
// Store the file in managed storage
let (new_hash, new_size) = managed.store_file(&item.path).await?;
// Verify the hash matches (it should, unless the file changed)
if new_hash.0 != item.content_hash.0 {
return Err(PinakesError::StorageIntegrity(format!(
"hash changed during migration: {} -> {}",
item.content_hash, new_hash
)));
}
// Get or create blob record
let mime = item.media_type.mime_type().to_string();
let _blob = storage
.get_or_create_blob(&new_hash, new_size, &mime)
.await?;
// Update the media item
let mut updated = item.clone();
updated.storage_mode = StorageMode::Managed;
updated.storage_key = Some(new_hash.0.clone());
updated.uploaded_at = Some(Utc::now());
updated.path = managed.path(&new_hash);
updated.updated_at = Utc::now();
storage.update_media(&updated).await?;
info!(
media_id = %media_id,
hash = %new_hash,
"migrated media item to managed storage"
);
Ok(())
}
/// Sanitize a filename for storage.
fn sanitize_filename(name: &str) -> String {
// Remove path separators and null bytes
name.replace(['/', '\\', '\0'], "_")
// Trim whitespace
.trim()
// Truncate to reasonable length
.chars()
.take(255)
.collect()
}
/// Delete a managed media item and clean up the blob if orphaned.
pub async fn delete_managed_media(
storage: &DynStorageBackend,
managed: &ManagedStorageService,
media_id: MediaId,
) -> Result<()> {
let item = storage.get_media(media_id).await?;
if item.storage_mode != StorageMode::Managed {
return Err(PinakesError::InvalidOperation(
"media item is not in managed storage".into(),
));
}
// Decrement blob reference count
let should_delete = storage.decrement_blob_ref(&item.content_hash).await?;
// Delete the media item
storage.delete_media(media_id).await?;
// If blob is orphaned, delete it from storage
if should_delete {
managed.delete(&item.content_hash).await?;
storage.delete_blob(&item.content_hash).await?;
info!(hash = %item.content_hash, "deleted orphaned blob");
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_sanitize_filename() {
assert_eq!(sanitize_filename("test.txt"), "test.txt");
assert_eq!(sanitize_filename("path/to/file.txt"), "path_to_file.txt");
assert_eq!(sanitize_filename(" spaces "), "spaces");
assert_eq!(sanitize_filename("a".repeat(300).as_str()), "a".repeat(255));
}
}