Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I81fda8247814da19eed1e76dbe97bd5b6a6a6964
434 lines
13 KiB
Rust
434 lines
13 KiB
Rust
use std::path::{Path, PathBuf};
|
|
use std::time::SystemTime;
|
|
|
|
use tracing::info;
|
|
|
|
use crate::audit;
|
|
use crate::error::{PinakesError, Result};
|
|
use crate::hash::compute_file_hash;
|
|
use crate::links;
|
|
use crate::media_type::{BuiltinMediaType, MediaType};
|
|
use crate::metadata;
|
|
use crate::model::*;
|
|
use crate::storage::DynStorageBackend;
|
|
use crate::thumbnail;
|
|
|
|
pub struct ImportResult {
|
|
pub media_id: MediaId,
|
|
pub was_duplicate: bool,
|
|
/// True if the file was skipped because it hasn't changed since last scan
|
|
pub was_skipped: bool,
|
|
pub path: PathBuf,
|
|
}
|
|
|
|
/// Options for import operations
|
|
#[derive(Debug, Clone)]
|
|
pub struct ImportOptions {
|
|
/// Skip files that haven't changed since last scan (based on mtime)
|
|
pub incremental: bool,
|
|
/// Force re-import even if mtime hasn't changed
|
|
pub force: bool,
|
|
/// Photo configuration for toggleable features
|
|
pub photo_config: crate::config::PhotoConfig,
|
|
}
|
|
|
|
impl Default for ImportOptions {
|
|
fn default() -> Self {
|
|
Self {
|
|
incremental: false,
|
|
force: false,
|
|
photo_config: crate::config::PhotoConfig::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Get the modification time of a file as a Unix timestamp
|
|
fn get_file_mtime(path: &Path) -> Option<i64> {
|
|
std::fs::metadata(path)
|
|
.ok()
|
|
.and_then(|m| m.modified().ok())
|
|
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
|
|
.map(|d| d.as_secs() as i64)
|
|
}
|
|
|
|
/// Check that a canonicalized path falls under at least one configured root directory.
|
|
/// If no roots are configured, all paths are allowed (for ad-hoc imports).
|
|
pub async fn validate_path_in_roots(storage: &DynStorageBackend, path: &Path) -> Result<()> {
|
|
let roots = storage.list_root_dirs().await?;
|
|
if roots.is_empty() {
|
|
return Ok(());
|
|
}
|
|
for root in &roots {
|
|
if let Ok(canonical_root) = root.canonicalize()
|
|
&& path.starts_with(&canonical_root)
|
|
{
|
|
return Ok(());
|
|
}
|
|
}
|
|
Err(PinakesError::InvalidOperation(format!(
|
|
"path {} is not within any configured root directory",
|
|
path.display()
|
|
)))
|
|
}
|
|
|
|
pub async fn import_file(storage: &DynStorageBackend, path: &Path) -> Result<ImportResult> {
|
|
import_file_with_options(storage, path, &ImportOptions::default()).await
|
|
}
|
|
|
|
/// Import a file with configurable options for incremental scanning
|
|
pub async fn import_file_with_options(
|
|
storage: &DynStorageBackend,
|
|
path: &Path,
|
|
options: &ImportOptions,
|
|
) -> Result<ImportResult> {
|
|
let path = path.canonicalize()?;
|
|
|
|
if !path.exists() {
|
|
return Err(PinakesError::FileNotFound(path));
|
|
}
|
|
|
|
validate_path_in_roots(storage, &path).await?;
|
|
|
|
let media_type = MediaType::from_path(&path)
|
|
.ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;
|
|
|
|
let current_mtime = get_file_mtime(&path);
|
|
|
|
// Check for incremental scan: skip if file hasn't changed
|
|
if options.incremental
|
|
&& !options.force
|
|
&& let Some(existing) = storage.get_media_by_path(&path).await?
|
|
&& let (Some(stored_mtime), Some(curr_mtime)) = (existing.file_mtime, current_mtime)
|
|
&& stored_mtime == curr_mtime
|
|
{
|
|
return Ok(ImportResult {
|
|
media_id: existing.id,
|
|
was_duplicate: false,
|
|
was_skipped: true,
|
|
path: path.clone(),
|
|
});
|
|
}
|
|
|
|
let content_hash = compute_file_hash(&path).await?;
|
|
|
|
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
|
|
// Update the mtime even for duplicates so incremental scan works
|
|
if current_mtime.is_some() && existing.file_mtime != current_mtime {
|
|
let mut updated = existing.clone();
|
|
updated.file_mtime = current_mtime;
|
|
let _ = storage.update_media(&updated).await;
|
|
}
|
|
return Ok(ImportResult {
|
|
media_id: existing.id,
|
|
was_duplicate: true,
|
|
was_skipped: false,
|
|
path: path.clone(),
|
|
});
|
|
}
|
|
|
|
let file_meta = std::fs::metadata(&path)?;
|
|
let file_size = file_meta.len();
|
|
|
|
let extracted = {
|
|
let path_clone = path.clone();
|
|
let media_type_clone = media_type.clone();
|
|
tokio::task::spawn_blocking(move || {
|
|
metadata::extract_metadata(&path_clone, media_type_clone)
|
|
})
|
|
.await
|
|
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
|
|
};
|
|
|
|
let file_name = path
|
|
.file_name()
|
|
.unwrap_or_default()
|
|
.to_string_lossy()
|
|
.to_string();
|
|
|
|
let now = chrono::Utc::now();
|
|
let media_id = MediaId::new();
|
|
|
|
// Generate thumbnail for image types
|
|
let thumb_path = {
|
|
let source = path.clone();
|
|
let thumb_dir = thumbnail::default_thumbnail_dir();
|
|
let media_type_clone = media_type.clone();
|
|
tokio::task::spawn_blocking(move || {
|
|
thumbnail::generate_thumbnail(media_id, &source, media_type_clone, &thumb_dir)
|
|
})
|
|
.await
|
|
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
|
|
};
|
|
|
|
// Generate perceptual hash for image files (if enabled in config)
|
|
let perceptual_hash = if options.photo_config.generate_perceptual_hash
|
|
&& media_type.category() == crate::media_type::MediaCategory::Image
|
|
{
|
|
crate::metadata::image::generate_perceptual_hash(&path)
|
|
} else {
|
|
None
|
|
};
|
|
|
|
// Check if this is a markdown file for link extraction
|
|
let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
|
|
|
|
let item = MediaItem {
|
|
id: media_id,
|
|
path: path.clone(),
|
|
file_name,
|
|
media_type,
|
|
content_hash,
|
|
file_size,
|
|
title: extracted.title,
|
|
artist: extracted.artist,
|
|
album: extracted.album,
|
|
genre: extracted.genre,
|
|
year: extracted.year,
|
|
duration_secs: extracted.duration_secs,
|
|
description: extracted.description,
|
|
thumbnail_path: thumb_path,
|
|
custom_fields: std::collections::HashMap::new(),
|
|
file_mtime: current_mtime,
|
|
|
|
// Photo-specific metadata from extraction
|
|
date_taken: extracted.date_taken,
|
|
latitude: extracted.latitude,
|
|
longitude: extracted.longitude,
|
|
camera_make: extracted.camera_make,
|
|
camera_model: extracted.camera_model,
|
|
rating: extracted.rating,
|
|
perceptual_hash,
|
|
|
|
// Managed storage fields - external files use defaults
|
|
storage_mode: StorageMode::External,
|
|
original_filename: None,
|
|
uploaded_at: None,
|
|
storage_key: None,
|
|
|
|
created_at: now,
|
|
updated_at: now,
|
|
|
|
// New items are not deleted
|
|
deleted_at: None,
|
|
|
|
// Links will be extracted separately
|
|
links_extracted_at: None,
|
|
};
|
|
|
|
storage.insert_media(&item).await?;
|
|
|
|
// Extract and store markdown links for markdown files
|
|
if is_markdown {
|
|
if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
|
|
tracing::warn!(
|
|
media_id = %media_id,
|
|
path = %path.display(),
|
|
error = %e,
|
|
"failed to extract markdown links"
|
|
);
|
|
}
|
|
}
|
|
|
|
// Store extracted extra metadata as custom fields
|
|
for (key, value) in &extracted.extra {
|
|
let field = CustomField {
|
|
field_type: CustomFieldType::Text,
|
|
value: value.clone(),
|
|
};
|
|
if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
|
|
tracing::warn!(
|
|
media_id = %media_id,
|
|
field = %key,
|
|
error = %e,
|
|
"failed to store extracted metadata as custom field"
|
|
);
|
|
}
|
|
}
|
|
|
|
audit::record_action(
|
|
storage,
|
|
Some(media_id),
|
|
AuditAction::Imported,
|
|
Some(format!("path={}", path.display())),
|
|
)
|
|
.await?;
|
|
|
|
info!(media_id = %media_id, path = %path.display(), "imported media file");
|
|
|
|
Ok(ImportResult {
|
|
media_id,
|
|
was_duplicate: false,
|
|
was_skipped: false,
|
|
path: path.clone(),
|
|
})
|
|
}
|
|
|
|
pub(crate) fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
|
|
for component in path.components() {
|
|
if let std::path::Component::Normal(name) = component {
|
|
let name_str = name.to_string_lossy();
|
|
for pattern in patterns {
|
|
if pattern.starts_with('.')
|
|
&& name_str.starts_with('.')
|
|
&& pattern == name_str.as_ref()
|
|
{
|
|
return true;
|
|
}
|
|
// Simple glob: ".*" matches any dotfile
|
|
if pattern == ".*" && name_str.starts_with('.') {
|
|
return true;
|
|
}
|
|
if name_str == pattern.as_str() {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
/// Default number of concurrent import tasks.
|
|
const DEFAULT_IMPORT_CONCURRENCY: usize = 8;
|
|
|
|
pub async fn import_directory(
|
|
storage: &DynStorageBackend,
|
|
dir: &Path,
|
|
ignore_patterns: &[String],
|
|
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
|
|
import_directory_with_options(
|
|
storage,
|
|
dir,
|
|
ignore_patterns,
|
|
DEFAULT_IMPORT_CONCURRENCY,
|
|
&ImportOptions::default(),
|
|
)
|
|
.await
|
|
}
|
|
|
|
pub async fn import_directory_with_concurrency(
|
|
storage: &DynStorageBackend,
|
|
dir: &Path,
|
|
ignore_patterns: &[String],
|
|
concurrency: usize,
|
|
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
|
|
import_directory_with_options(
|
|
storage,
|
|
dir,
|
|
ignore_patterns,
|
|
concurrency,
|
|
&ImportOptions::default(),
|
|
)
|
|
.await
|
|
}
|
|
|
|
/// Import a directory with full options including incremental scanning support
|
|
pub async fn import_directory_with_options(
|
|
storage: &DynStorageBackend,
|
|
dir: &Path,
|
|
ignore_patterns: &[String],
|
|
concurrency: usize,
|
|
options: &ImportOptions,
|
|
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
|
|
let concurrency = concurrency.clamp(1, 256);
|
|
let dir = dir.to_path_buf();
|
|
let patterns = ignore_patterns.to_vec();
|
|
let options = options.clone();
|
|
|
|
let entries: Vec<PathBuf> = {
|
|
let dir = dir.clone();
|
|
tokio::task::spawn_blocking(move || {
|
|
walkdir::WalkDir::new(&dir)
|
|
.follow_links(true)
|
|
.into_iter()
|
|
.filter_map(|e| e.ok())
|
|
.filter(|e| e.file_type().is_file())
|
|
.filter(|e| MediaType::from_path(e.path()).is_some())
|
|
.filter(|e| !should_ignore(e.path(), &patterns))
|
|
.map(|e| e.path().to_path_buf())
|
|
.collect()
|
|
})
|
|
.await
|
|
.map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
|
|
};
|
|
|
|
let mut results = Vec::with_capacity(entries.len());
|
|
let mut join_set = tokio::task::JoinSet::new();
|
|
|
|
for entry_path in entries {
|
|
let storage = storage.clone();
|
|
let path = entry_path.clone();
|
|
let opts = options.clone();
|
|
|
|
join_set.spawn(async move {
|
|
let result = import_file_with_options(&storage, &path, &opts).await;
|
|
(path, result)
|
|
});
|
|
|
|
// Limit concurrency by draining when we hit the cap
|
|
if join_set.len() >= concurrency
|
|
&& let Some(Ok((path, result))) = join_set.join_next().await
|
|
{
|
|
match result {
|
|
Ok(r) => results.push(Ok(r)),
|
|
Err(e) => {
|
|
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
|
|
results.push(Err(e));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Drain remaining tasks
|
|
while let Some(Ok((path, result))) = join_set.join_next().await {
|
|
match result {
|
|
Ok(r) => results.push(Ok(r)),
|
|
Err(e) => {
|
|
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
|
|
results.push(Err(e));
|
|
}
|
|
}
|
|
}
|
|
|
|
Ok(results)
|
|
}
|
|
|
|
/// Extract markdown links from a file and store them in the database.
|
|
async fn extract_and_store_links(
|
|
storage: &DynStorageBackend,
|
|
media_id: MediaId,
|
|
path: &Path,
|
|
) -> Result<()> {
|
|
// Read file content
|
|
let content = tokio::fs::read_to_string(path).await.map_err(|e| {
|
|
PinakesError::Io(std::io::Error::new(
|
|
std::io::ErrorKind::Other,
|
|
format!("failed to read markdown file for link extraction: {e}"),
|
|
))
|
|
})?;
|
|
|
|
// Extract links
|
|
let extracted_links = links::extract_links(media_id, &content);
|
|
|
|
if extracted_links.is_empty() {
|
|
// No links found, just mark as extracted
|
|
storage.mark_links_extracted(media_id).await?;
|
|
return Ok(());
|
|
}
|
|
|
|
// Clear any existing links for this media (in case of re-import)
|
|
storage.clear_links_for_media(media_id).await?;
|
|
|
|
// Save extracted links
|
|
storage.save_markdown_links(media_id, &extracted_links).await?;
|
|
|
|
// Mark links as extracted
|
|
storage.mark_links_extracted(media_id).await?;
|
|
|
|
tracing::debug!(
|
|
media_id = %media_id,
|
|
link_count = extracted_links.len(),
|
|
"extracted markdown links"
|
|
);
|
|
|
|
Ok(())
|
|
}
|