treewide: better cross-device sync capabilities; in-database storage
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Id99798df6f7e4470caae8a193c2654aa6a6a6964
This commit is contained in:
parent
5521488a93
commit
f34c78b238
41 changed files with 8806 additions and 138 deletions
265
crates/pinakes-core/src/upload.rs
Normal file
265
crates/pinakes-core/src/upload.rs
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
//! Upload processing for managed storage.
|
||||
//!
|
||||
//! Handles file uploads, metadata extraction, and MediaItem creation
|
||||
//! for files stored in managed content-addressable storage.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
|
||||
use chrono::Utc;
|
||||
use tokio::io::AsyncRead;
|
||||
use tracing::{debug, info};
|
||||
|
||||
use crate::error::{PinakesError, Result};
|
||||
use crate::managed_storage::ManagedStorageService;
|
||||
use crate::media_type::MediaType;
|
||||
use crate::metadata;
|
||||
use crate::model::{MediaId, MediaItem, StorageMode, UploadResult};
|
||||
use crate::storage::DynStorageBackend;
|
||||
|
||||
/// Process an upload from an async reader.
|
||||
///
|
||||
/// This function:
|
||||
/// 1. Stores the file in managed storage
|
||||
/// 2. Checks for duplicates by content hash
|
||||
/// 3. Extracts metadata from the file
|
||||
/// 4. Creates or updates the MediaItem
|
||||
pub async fn process_upload<R: AsyncRead + Unpin>(
|
||||
storage: &DynStorageBackend,
|
||||
managed: &ManagedStorageService,
|
||||
reader: R,
|
||||
original_filename: &str,
|
||||
mime_type: Option<&str>,
|
||||
) -> Result<UploadResult> {
|
||||
// Store the file
|
||||
let (content_hash, file_size) = managed.store_stream(reader).await?;
|
||||
|
||||
// Check if we already have a media item with this hash
|
||||
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
|
||||
debug!(hash = %content_hash, media_id = %existing.id, "upload matched existing media item");
|
||||
return Ok(UploadResult {
|
||||
media_id: existing.id,
|
||||
content_hash,
|
||||
was_duplicate: true,
|
||||
file_size,
|
||||
});
|
||||
}
|
||||
|
||||
// Determine media type from filename
|
||||
let media_type = MediaType::from_path(Path::new(original_filename))
|
||||
.unwrap_or_else(|| MediaType::custom("unknown"));
|
||||
|
||||
// Get the actual file path in managed storage for metadata extraction
|
||||
let blob_path = managed.path(&content_hash);
|
||||
|
||||
// Extract metadata
|
||||
let extracted = metadata::extract_metadata(&blob_path, media_type.clone()).ok();
|
||||
|
||||
// Create or get blob record
|
||||
let mime = mime_type
|
||||
.map(String::from)
|
||||
.unwrap_or_else(|| media_type.mime_type().to_string());
|
||||
let _blob = storage
|
||||
.get_or_create_blob(&content_hash, file_size, &mime)
|
||||
.await?;
|
||||
|
||||
// Create the media item
|
||||
let now = Utc::now();
|
||||
let media_id = MediaId::new();
|
||||
|
||||
let item = MediaItem {
|
||||
id: media_id,
|
||||
path: blob_path,
|
||||
file_name: sanitize_filename(original_filename),
|
||||
media_type,
|
||||
content_hash: content_hash.clone(),
|
||||
file_size,
|
||||
title: extracted.as_ref().and_then(|m| m.title.clone()),
|
||||
artist: extracted.as_ref().and_then(|m| m.artist.clone()),
|
||||
album: extracted.as_ref().and_then(|m| m.album.clone()),
|
||||
genre: extracted.as_ref().and_then(|m| m.genre.clone()),
|
||||
year: extracted.as_ref().and_then(|m| m.year),
|
||||
duration_secs: extracted.as_ref().and_then(|m| m.duration_secs),
|
||||
description: extracted.as_ref().and_then(|m| m.description.clone()),
|
||||
thumbnail_path: None,
|
||||
custom_fields: HashMap::new(),
|
||||
file_mtime: None,
|
||||
date_taken: extracted.as_ref().and_then(|m| m.date_taken),
|
||||
latitude: extracted.as_ref().and_then(|m| m.latitude),
|
||||
longitude: extracted.as_ref().and_then(|m| m.longitude),
|
||||
camera_make: extracted.as_ref().and_then(|m| m.camera_make.clone()),
|
||||
camera_model: extracted.as_ref().and_then(|m| m.camera_model.clone()),
|
||||
rating: None,
|
||||
perceptual_hash: None,
|
||||
storage_mode: StorageMode::Managed,
|
||||
original_filename: Some(original_filename.to_string()),
|
||||
uploaded_at: Some(now),
|
||||
storage_key: Some(content_hash.0.clone()),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
// Store the media item
|
||||
storage.insert_managed_media(&item).await?;
|
||||
|
||||
info!(
|
||||
media_id = %media_id,
|
||||
hash = %content_hash,
|
||||
filename = %original_filename,
|
||||
size = file_size,
|
||||
"processed upload"
|
||||
);
|
||||
|
||||
Ok(UploadResult {
|
||||
media_id,
|
||||
content_hash,
|
||||
was_duplicate: false,
|
||||
file_size,
|
||||
})
|
||||
}
|
||||
|
||||
/// Process an upload from bytes.
|
||||
pub async fn process_upload_bytes(
|
||||
storage: &DynStorageBackend,
|
||||
managed: &ManagedStorageService,
|
||||
data: &[u8],
|
||||
original_filename: &str,
|
||||
mime_type: Option<&str>,
|
||||
) -> Result<UploadResult> {
|
||||
use std::io::Cursor;
|
||||
let cursor = Cursor::new(data);
|
||||
process_upload(storage, managed, cursor, original_filename, mime_type).await
|
||||
}
|
||||
|
||||
/// Process an upload from a local file path.
|
||||
///
|
||||
/// This is useful for migrating existing external files to managed storage.
|
||||
pub async fn process_upload_file(
|
||||
storage: &DynStorageBackend,
|
||||
managed: &ManagedStorageService,
|
||||
path: &Path,
|
||||
original_filename: Option<&str>,
|
||||
) -> Result<UploadResult> {
|
||||
let file = tokio::fs::File::open(path).await?;
|
||||
let reader = tokio::io::BufReader::new(file);
|
||||
|
||||
let filename = original_filename.unwrap_or_else(|| {
|
||||
path.file_name()
|
||||
.and_then(|n| n.to_str())
|
||||
.unwrap_or("unknown")
|
||||
});
|
||||
|
||||
let mime = mime_guess::from_path(path).first().map(|m| m.to_string());
|
||||
|
||||
process_upload(storage, managed, reader, filename, mime.as_deref()).await
|
||||
}
|
||||
|
||||
/// Migrate an existing external media item to managed storage.
|
||||
pub async fn migrate_to_managed(
|
||||
storage: &DynStorageBackend,
|
||||
managed: &ManagedStorageService,
|
||||
media_id: MediaId,
|
||||
) -> Result<()> {
|
||||
let item = storage.get_media(media_id).await?;
|
||||
|
||||
if item.storage_mode == StorageMode::Managed {
|
||||
return Err(PinakesError::InvalidOperation(
|
||||
"media item is already in managed storage".into(),
|
||||
));
|
||||
}
|
||||
|
||||
// Check if the external file exists
|
||||
if !item.path.exists() {
|
||||
return Err(PinakesError::FileNotFound(item.path.clone()));
|
||||
}
|
||||
|
||||
// Store the file in managed storage
|
||||
let (new_hash, new_size) = managed.store_file(&item.path).await?;
|
||||
|
||||
// Verify the hash matches (it should, unless the file changed)
|
||||
if new_hash.0 != item.content_hash.0 {
|
||||
return Err(PinakesError::StorageIntegrity(format!(
|
||||
"hash changed during migration: {} -> {}",
|
||||
item.content_hash, new_hash
|
||||
)));
|
||||
}
|
||||
|
||||
// Get or create blob record
|
||||
let mime = item.media_type.mime_type().to_string();
|
||||
let _blob = storage
|
||||
.get_or_create_blob(&new_hash, new_size, &mime)
|
||||
.await?;
|
||||
|
||||
// Update the media item
|
||||
let mut updated = item.clone();
|
||||
updated.storage_mode = StorageMode::Managed;
|
||||
updated.storage_key = Some(new_hash.0.clone());
|
||||
updated.uploaded_at = Some(Utc::now());
|
||||
updated.path = managed.path(&new_hash);
|
||||
updated.updated_at = Utc::now();
|
||||
|
||||
storage.update_media(&updated).await?;
|
||||
|
||||
info!(
|
||||
media_id = %media_id,
|
||||
hash = %new_hash,
|
||||
"migrated media item to managed storage"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Sanitize a filename for storage.
|
||||
fn sanitize_filename(name: &str) -> String {
|
||||
// Remove path separators and null bytes
|
||||
name.replace(['/', '\\', '\0'], "_")
|
||||
// Trim whitespace
|
||||
.trim()
|
||||
// Truncate to reasonable length
|
||||
.chars()
|
||||
.take(255)
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Delete a managed media item and clean up the blob if orphaned.
|
||||
pub async fn delete_managed_media(
|
||||
storage: &DynStorageBackend,
|
||||
managed: &ManagedStorageService,
|
||||
media_id: MediaId,
|
||||
) -> Result<()> {
|
||||
let item = storage.get_media(media_id).await?;
|
||||
|
||||
if item.storage_mode != StorageMode::Managed {
|
||||
return Err(PinakesError::InvalidOperation(
|
||||
"media item is not in managed storage".into(),
|
||||
));
|
||||
}
|
||||
|
||||
// Decrement blob reference count
|
||||
let should_delete = storage.decrement_blob_ref(&item.content_hash).await?;
|
||||
|
||||
// Delete the media item
|
||||
storage.delete_media(media_id).await?;
|
||||
|
||||
// If blob is orphaned, delete it from storage
|
||||
if should_delete {
|
||||
managed.delete(&item.content_hash).await?;
|
||||
storage.delete_blob(&item.content_hash).await?;
|
||||
info!(hash = %item.content_hash, "deleted orphaned blob");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_sanitize_filename() {
|
||||
assert_eq!(sanitize_filename("test.txt"), "test.txt");
|
||||
assert_eq!(sanitize_filename("path/to/file.txt"), "path_to_file.txt");
|
||||
assert_eq!(sanitize_filename(" spaces "), "spaces");
|
||||
assert_eq!(sanitize_filename("a".repeat(300).as_str()), "a".repeat(255));
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue