Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I81fda8247814da19eed1e76dbe97bd5b6a6a6964
267 lines
8.3 KiB
Rust
267 lines
8.3 KiB
Rust
//! Upload processing for managed storage.
|
|
//!
|
|
//! Handles file uploads, metadata extraction, and MediaItem creation
|
|
//! for files stored in managed content-addressable storage.
|
|
|
|
use std::collections::HashMap;
|
|
use std::path::Path;
|
|
|
|
use chrono::Utc;
|
|
use tokio::io::AsyncRead;
|
|
use tracing::{debug, info};
|
|
|
|
use crate::error::{PinakesError, Result};
|
|
use crate::managed_storage::ManagedStorageService;
|
|
use crate::media_type::MediaType;
|
|
use crate::metadata;
|
|
use crate::model::{MediaId, MediaItem, StorageMode, UploadResult};
|
|
use crate::storage::DynStorageBackend;
|
|
|
|
/// Process an upload from an async reader.
|
|
///
|
|
/// This function:
|
|
/// 1. Stores the file in managed storage
|
|
/// 2. Checks for duplicates by content hash
|
|
/// 3. Extracts metadata from the file
|
|
/// 4. Creates or updates the MediaItem
|
|
pub async fn process_upload<R: AsyncRead + Unpin>(
|
|
storage: &DynStorageBackend,
|
|
managed: &ManagedStorageService,
|
|
reader: R,
|
|
original_filename: &str,
|
|
mime_type: Option<&str>,
|
|
) -> Result<UploadResult> {
|
|
// Store the file
|
|
let (content_hash, file_size) = managed.store_stream(reader).await?;
|
|
|
|
// Check if we already have a media item with this hash
|
|
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
|
|
debug!(hash = %content_hash, media_id = %existing.id, "upload matched existing media item");
|
|
return Ok(UploadResult {
|
|
media_id: existing.id,
|
|
content_hash,
|
|
was_duplicate: true,
|
|
file_size,
|
|
});
|
|
}
|
|
|
|
// Determine media type from filename
|
|
let media_type = MediaType::from_path(Path::new(original_filename))
|
|
.unwrap_or_else(|| MediaType::custom("unknown"));
|
|
|
|
// Get the actual file path in managed storage for metadata extraction
|
|
let blob_path = managed.path(&content_hash);
|
|
|
|
// Extract metadata
|
|
let extracted = metadata::extract_metadata(&blob_path, media_type.clone()).ok();
|
|
|
|
// Create or get blob record
|
|
let mime = mime_type
|
|
.map(String::from)
|
|
.unwrap_or_else(|| media_type.mime_type().to_string());
|
|
let _blob = storage
|
|
.get_or_create_blob(&content_hash, file_size, &mime)
|
|
.await?;
|
|
|
|
// Create the media item
|
|
let now = Utc::now();
|
|
let media_id = MediaId::new();
|
|
|
|
let item = MediaItem {
|
|
id: media_id,
|
|
path: blob_path,
|
|
file_name: sanitize_filename(original_filename),
|
|
media_type,
|
|
content_hash: content_hash.clone(),
|
|
file_size,
|
|
title: extracted.as_ref().and_then(|m| m.title.clone()),
|
|
artist: extracted.as_ref().and_then(|m| m.artist.clone()),
|
|
album: extracted.as_ref().and_then(|m| m.album.clone()),
|
|
genre: extracted.as_ref().and_then(|m| m.genre.clone()),
|
|
year: extracted.as_ref().and_then(|m| m.year),
|
|
duration_secs: extracted.as_ref().and_then(|m| m.duration_secs),
|
|
description: extracted.as_ref().and_then(|m| m.description.clone()),
|
|
thumbnail_path: None,
|
|
custom_fields: HashMap::new(),
|
|
file_mtime: None,
|
|
date_taken: extracted.as_ref().and_then(|m| m.date_taken),
|
|
latitude: extracted.as_ref().and_then(|m| m.latitude),
|
|
longitude: extracted.as_ref().and_then(|m| m.longitude),
|
|
camera_make: extracted.as_ref().and_then(|m| m.camera_make.clone()),
|
|
camera_model: extracted.as_ref().and_then(|m| m.camera_model.clone()),
|
|
rating: None,
|
|
perceptual_hash: None,
|
|
storage_mode: StorageMode::Managed,
|
|
original_filename: Some(original_filename.to_string()),
|
|
uploaded_at: Some(now),
|
|
storage_key: Some(content_hash.0.clone()),
|
|
created_at: now,
|
|
updated_at: now,
|
|
deleted_at: None,
|
|
links_extracted_at: None,
|
|
};
|
|
|
|
// Store the media item
|
|
storage.insert_managed_media(&item).await?;
|
|
|
|
info!(
|
|
media_id = %media_id,
|
|
hash = %content_hash,
|
|
filename = %original_filename,
|
|
size = file_size,
|
|
"processed upload"
|
|
);
|
|
|
|
Ok(UploadResult {
|
|
media_id,
|
|
content_hash,
|
|
was_duplicate: false,
|
|
file_size,
|
|
})
|
|
}
|
|
|
|
/// Process an upload from bytes.
|
|
pub async fn process_upload_bytes(
|
|
storage: &DynStorageBackend,
|
|
managed: &ManagedStorageService,
|
|
data: &[u8],
|
|
original_filename: &str,
|
|
mime_type: Option<&str>,
|
|
) -> Result<UploadResult> {
|
|
use std::io::Cursor;
|
|
let cursor = Cursor::new(data);
|
|
process_upload(storage, managed, cursor, original_filename, mime_type).await
|
|
}
|
|
|
|
/// Process an upload from a local file path.
|
|
///
|
|
/// This is useful for migrating existing external files to managed storage.
|
|
pub async fn process_upload_file(
|
|
storage: &DynStorageBackend,
|
|
managed: &ManagedStorageService,
|
|
path: &Path,
|
|
original_filename: Option<&str>,
|
|
) -> Result<UploadResult> {
|
|
let file = tokio::fs::File::open(path).await?;
|
|
let reader = tokio::io::BufReader::new(file);
|
|
|
|
let filename = original_filename.unwrap_or_else(|| {
|
|
path.file_name()
|
|
.and_then(|n| n.to_str())
|
|
.unwrap_or("unknown")
|
|
});
|
|
|
|
let mime = mime_guess::from_path(path).first().map(|m| m.to_string());
|
|
|
|
process_upload(storage, managed, reader, filename, mime.as_deref()).await
|
|
}
|
|
|
|
/// Migrate an existing external media item to managed storage.
|
|
pub async fn migrate_to_managed(
|
|
storage: &DynStorageBackend,
|
|
managed: &ManagedStorageService,
|
|
media_id: MediaId,
|
|
) -> Result<()> {
|
|
let item = storage.get_media(media_id).await?;
|
|
|
|
if item.storage_mode == StorageMode::Managed {
|
|
return Err(PinakesError::InvalidOperation(
|
|
"media item is already in managed storage".into(),
|
|
));
|
|
}
|
|
|
|
// Check if the external file exists
|
|
if !item.path.exists() {
|
|
return Err(PinakesError::FileNotFound(item.path.clone()));
|
|
}
|
|
|
|
// Store the file in managed storage
|
|
let (new_hash, new_size) = managed.store_file(&item.path).await?;
|
|
|
|
// Verify the hash matches (it should, unless the file changed)
|
|
if new_hash.0 != item.content_hash.0 {
|
|
return Err(PinakesError::StorageIntegrity(format!(
|
|
"hash changed during migration: {} -> {}",
|
|
item.content_hash, new_hash
|
|
)));
|
|
}
|
|
|
|
// Get or create blob record
|
|
let mime = item.media_type.mime_type().to_string();
|
|
let _blob = storage
|
|
.get_or_create_blob(&new_hash, new_size, &mime)
|
|
.await?;
|
|
|
|
// Update the media item
|
|
let mut updated = item.clone();
|
|
updated.storage_mode = StorageMode::Managed;
|
|
updated.storage_key = Some(new_hash.0.clone());
|
|
updated.uploaded_at = Some(Utc::now());
|
|
updated.path = managed.path(&new_hash);
|
|
updated.updated_at = Utc::now();
|
|
|
|
storage.update_media(&updated).await?;
|
|
|
|
info!(
|
|
media_id = %media_id,
|
|
hash = %new_hash,
|
|
"migrated media item to managed storage"
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Sanitize a filename for storage.
|
|
fn sanitize_filename(name: &str) -> String {
|
|
// Remove path separators and null bytes
|
|
name.replace(['/', '\\', '\0'], "_")
|
|
// Trim whitespace
|
|
.trim()
|
|
// Truncate to reasonable length
|
|
.chars()
|
|
.take(255)
|
|
.collect()
|
|
}
|
|
|
|
/// Delete a managed media item and clean up the blob if orphaned.
|
|
pub async fn delete_managed_media(
|
|
storage: &DynStorageBackend,
|
|
managed: &ManagedStorageService,
|
|
media_id: MediaId,
|
|
) -> Result<()> {
|
|
let item = storage.get_media(media_id).await?;
|
|
|
|
if item.storage_mode != StorageMode::Managed {
|
|
return Err(PinakesError::InvalidOperation(
|
|
"media item is not in managed storage".into(),
|
|
));
|
|
}
|
|
|
|
// Decrement blob reference count
|
|
let should_delete = storage.decrement_blob_ref(&item.content_hash).await?;
|
|
|
|
// Delete the media item
|
|
storage.delete_media(media_id).await?;
|
|
|
|
// If blob is orphaned, delete it from storage
|
|
if should_delete {
|
|
managed.delete(&item.content_hash).await?;
|
|
storage.delete_blob(&item.content_hash).await?;
|
|
info!(hash = %item.content_hash, "deleted orphaned blob");
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_sanitize_filename() {
|
|
assert_eq!(sanitize_filename("test.txt"), "test.txt");
|
|
assert_eq!(sanitize_filename("path/to/file.txt"), "path_to_file.txt");
|
|
assert_eq!(sanitize_filename(" spaces "), "spaces");
|
|
assert_eq!(sanitize_filename("a".repeat(300).as_str()), "a".repeat(255));
|
|
}
|
|
}
|