use std::{ path::{Path, PathBuf}, time::SystemTime, }; use tracing::info; use crate::{ audit, error::{PinakesError, Result}, hash::compute_file_hash, links, media_type::{BuiltinMediaType, MediaType}, metadata, model::*, storage::DynStorageBackend, thumbnail, }; pub struct ImportResult { pub media_id: MediaId, pub was_duplicate: bool, /// True if the file was skipped because it hasn't changed since last scan pub was_skipped: bool, pub path: PathBuf, } /// Options for import operations #[derive(Debug, Clone)] pub struct ImportOptions { /// Skip files that haven't changed since last scan (based on mtime) pub incremental: bool, /// Force re-import even if mtime hasn't changed pub force: bool, /// Photo configuration for toggleable features pub photo_config: crate::config::PhotoConfig, } impl Default for ImportOptions { fn default() -> Self { Self { incremental: false, force: false, photo_config: crate::config::PhotoConfig::default(), } } } /// Get the modification time of a file as a Unix timestamp fn get_file_mtime(path: &Path) -> Option { std::fs::metadata(path) .ok() .and_then(|m| m.modified().ok()) .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok()) .map(|d| d.as_secs() as i64) } /// Check that a canonicalized path falls under at least one configured root /// directory. If no roots are configured, all paths are allowed (for ad-hoc /// imports). pub async fn validate_path_in_roots( storage: &DynStorageBackend, path: &Path, ) -> Result<()> { let roots = storage.list_root_dirs().await?; if roots.is_empty() { return Ok(()); } for root in &roots { if let Ok(canonical_root) = root.canonicalize() && path.starts_with(&canonical_root) { return Ok(()); } } Err(PinakesError::InvalidOperation(format!( "path {} is not within any configured root directory", path.display() ))) } pub async fn import_file( storage: &DynStorageBackend, path: &Path, ) -> Result { import_file_with_options(storage, path, &ImportOptions::default()).await } /// Import a file with configurable options for incremental scanning pub async fn import_file_with_options( storage: &DynStorageBackend, path: &Path, options: &ImportOptions, ) -> Result { let path = path.canonicalize()?; if !path.exists() { return Err(PinakesError::FileNotFound(path)); } validate_path_in_roots(storage, &path).await?; let media_type = MediaType::from_path(&path) .ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?; let current_mtime = get_file_mtime(&path); // Check for incremental scan: skip if file hasn't changed if options.incremental && !options.force && let Some(existing) = storage.get_media_by_path(&path).await? && let (Some(stored_mtime), Some(curr_mtime)) = (existing.file_mtime, current_mtime) && stored_mtime == curr_mtime { return Ok(ImportResult { media_id: existing.id, was_duplicate: false, was_skipped: true, path: path.clone(), }); } let content_hash = compute_file_hash(&path).await?; if let Some(existing) = storage.get_media_by_hash(&content_hash).await? { // Update the mtime even for duplicates so incremental scan works if current_mtime.is_some() && existing.file_mtime != current_mtime { let mut updated = existing.clone(); updated.file_mtime = current_mtime; let _ = storage.update_media(&updated).await; } return Ok(ImportResult { media_id: existing.id, was_duplicate: true, was_skipped: false, path: path.clone(), }); } let file_meta = std::fs::metadata(&path)?; let file_size = file_meta.len(); let extracted = { let path_clone = path.clone(); let media_type_clone = media_type.clone(); tokio::task::spawn_blocking(move || { metadata::extract_metadata(&path_clone, media_type_clone) }) .await .map_err(|e| PinakesError::MetadataExtraction(e.to_string()))?? }; let file_name = path .file_name() .unwrap_or_default() .to_string_lossy() .to_string(); let now = chrono::Utc::now(); let media_id = MediaId::new(); // Generate thumbnail for image types let thumb_path = { let source = path.clone(); let thumb_dir = thumbnail::default_thumbnail_dir(); let media_type_clone = media_type.clone(); tokio::task::spawn_blocking(move || { thumbnail::generate_thumbnail( media_id, &source, media_type_clone, &thumb_dir, ) }) .await .map_err(|e| PinakesError::MetadataExtraction(e.to_string()))?? }; // Generate perceptual hash for image files (if enabled in config) let perceptual_hash = if options.photo_config.generate_perceptual_hash && media_type.category() == crate::media_type::MediaCategory::Image { crate::metadata::image::generate_perceptual_hash(&path) } else { None }; // Check if this is a markdown file for link extraction let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown); let item = MediaItem { id: media_id, path: path.clone(), file_name, media_type, content_hash, file_size, title: extracted.title, artist: extracted.artist, album: extracted.album, genre: extracted.genre, year: extracted.year, duration_secs: extracted.duration_secs, description: extracted.description, thumbnail_path: thumb_path, custom_fields: std::collections::HashMap::new(), file_mtime: current_mtime, // Photo-specific metadata from extraction date_taken: extracted.date_taken, latitude: extracted.latitude, longitude: extracted.longitude, camera_make: extracted.camera_make, camera_model: extracted.camera_model, rating: extracted.rating, perceptual_hash, // Managed storage fields - external files use defaults storage_mode: StorageMode::External, original_filename: None, uploaded_at: None, storage_key: None, created_at: now, updated_at: now, // New items are not deleted deleted_at: None, // Links will be extracted separately links_extracted_at: None, }; storage.insert_media(&item).await?; // Extract and store markdown links for markdown files if is_markdown { if let Err(e) = extract_and_store_links(storage, media_id, &path).await { tracing::warn!( media_id = %media_id, path = %path.display(), error = %e, "failed to extract markdown links" ); } } // Store extracted extra metadata as custom fields for (key, value) in &extracted.extra { let field = CustomField { field_type: CustomFieldType::Text, value: value.clone(), }; if let Err(e) = storage.set_custom_field(media_id, key, &field).await { tracing::warn!( media_id = %media_id, field = %key, error = %e, "failed to store extracted metadata as custom field" ); } } audit::record_action( storage, Some(media_id), AuditAction::Imported, Some(format!("path={}", path.display())), ) .await?; info!(media_id = %media_id, path = %path.display(), "imported media file"); Ok(ImportResult { media_id, was_duplicate: false, was_skipped: false, path: path.clone(), }) } pub(crate) fn should_ignore( path: &std::path::Path, patterns: &[String], ) -> bool { for component in path.components() { if let std::path::Component::Normal(name) = component { let name_str = name.to_string_lossy(); for pattern in patterns { if pattern.starts_with('.') && name_str.starts_with('.') && pattern == name_str.as_ref() { return true; } // Simple glob: ".*" matches any dotfile if pattern == ".*" && name_str.starts_with('.') { return true; } if name_str == pattern.as_str() { return true; } } } } false } /// Default number of concurrent import tasks. const DEFAULT_IMPORT_CONCURRENCY: usize = 8; pub async fn import_directory( storage: &DynStorageBackend, dir: &Path, ignore_patterns: &[String], ) -> Result>> { import_directory_with_options( storage, dir, ignore_patterns, DEFAULT_IMPORT_CONCURRENCY, &ImportOptions::default(), ) .await } pub async fn import_directory_with_concurrency( storage: &DynStorageBackend, dir: &Path, ignore_patterns: &[String], concurrency: usize, ) -> Result>> { import_directory_with_options( storage, dir, ignore_patterns, concurrency, &ImportOptions::default(), ) .await } /// Import a directory with full options including incremental scanning support pub async fn import_directory_with_options( storage: &DynStorageBackend, dir: &Path, ignore_patterns: &[String], concurrency: usize, options: &ImportOptions, ) -> Result>> { let concurrency = concurrency.clamp(1, 256); let dir = dir.to_path_buf(); let patterns = ignore_patterns.to_vec(); let options = options.clone(); let entries: Vec = { let dir = dir.clone(); tokio::task::spawn_blocking(move || { walkdir::WalkDir::new(&dir) .follow_links(true) .into_iter() .filter_map(|e| e.ok()) .filter(|e| e.file_type().is_file()) .filter(|e| MediaType::from_path(e.path()).is_some()) .filter(|e| !should_ignore(e.path(), &patterns)) .map(|e| e.path().to_path_buf()) .collect() }) .await .map_err(|e| PinakesError::Io(std::io::Error::other(e)))? }; let mut results = Vec::with_capacity(entries.len()); let mut join_set = tokio::task::JoinSet::new(); for entry_path in entries { let storage = storage.clone(); let path = entry_path.clone(); let opts = options.clone(); join_set.spawn(async move { let result = import_file_with_options(&storage, &path, &opts).await; (path, result) }); // Limit concurrency by draining when we hit the cap if join_set.len() >= concurrency && let Some(Ok((path, result))) = join_set.join_next().await { match result { Ok(r) => results.push(Ok(r)), Err(e) => { tracing::warn!(path = %path.display(), error = %e, "failed to import file"); results.push(Err(e)); }, } } } // Drain remaining tasks while let Some(Ok((path, result))) = join_set.join_next().await { match result { Ok(r) => results.push(Ok(r)), Err(e) => { tracing::warn!(path = %path.display(), error = %e, "failed to import file"); results.push(Err(e)); }, } } Ok(results) } /// Extract markdown links from a file and store them in the database. async fn extract_and_store_links( storage: &DynStorageBackend, media_id: MediaId, path: &Path, ) -> Result<()> { // Read file content let content = tokio::fs::read_to_string(path).await.map_err(|e| { PinakesError::Io(std::io::Error::new( std::io::ErrorKind::Other, format!("failed to read markdown file for link extraction: {e}"), )) })?; // Extract links let extracted_links = links::extract_links(media_id, &content); if extracted_links.is_empty() { // No links found, just mark as extracted storage.mark_links_extracted(media_id).await?; return Ok(()); } // Clear any existing links for this media (in case of re-import) storage.clear_links_for_media(media_id).await?; // Save extracted links storage .save_markdown_links(media_id, &extracted_links) .await?; // Mark links as extracted storage.mark_links_extracted(media_id).await?; tracing::debug!( media_id = %media_id, link_count = extracted_links.len(), "extracted markdown links" ); Ok(()) }