pinakes/crates/pinakes-core/src/import.rs

use std::path::{Path, PathBuf};
use std::time::SystemTime;

use tracing::info;

use crate::audit;
use crate::error::{PinakesError, Result};
use crate::hash::compute_file_hash;
use crate::links;
use crate::media_type::{BuiltinMediaType, MediaType};
use crate::metadata;
use crate::model::*;
use crate::storage::DynStorageBackend;
use crate::thumbnail;

pub struct ImportResult {
    pub media_id: MediaId,
    pub was_duplicate: bool,
    /// True if the file was skipped because it hasn't changed since last scan
    pub was_skipped: bool,
    pub path: PathBuf,
}

/// Options for import operations
#[derive(Debug, Clone)]
pub struct ImportOptions {
    /// Skip files that haven't changed since last scan (based on mtime)
    pub incremental: bool,
    /// Force re-import even if mtime hasn't changed
    pub force: bool,
    /// Photo configuration for toggleable features
    pub photo_config: crate::config::PhotoConfig,
}

impl Default for ImportOptions {
    fn default() -> Self {
        Self {
            incremental: false,
            force: false,
            photo_config: crate::config::PhotoConfig::default(),
        }
    }
}

/// Get the modification time of a file as a Unix timestamp
fn get_file_mtime(path: &Path) -> Option<i64> {
    std::fs::metadata(path)
        .ok()
        .and_then(|m| m.modified().ok())
        .and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
        .map(|d| d.as_secs() as i64)
}

/// Check that a canonicalized path falls under at least one configured root directory.
/// If no roots are configured, all paths are allowed (for ad-hoc imports).
pub async fn validate_path_in_roots(storage: &DynStorageBackend, path: &Path) -> Result<()> {
    let roots = storage.list_root_dirs().await?;
    if roots.is_empty() {
        return Ok(());
    }
    for root in &roots {
        if let Ok(canonical_root) = root.canonicalize()
            && path.starts_with(&canonical_root)
        {
            return Ok(());
        }
    }
    Err(PinakesError::InvalidOperation(format!(
        "path {} is not within any configured root directory",
        path.display()
    )))
}

pub async fn import_file(storage: &DynStorageBackend, path: &Path) -> Result<ImportResult> {
    import_file_with_options(storage, path, &ImportOptions::default()).await
}

/// Import a file with configurable options for incremental scanning
pub async fn import_file_with_options(
    storage: &DynStorageBackend,
    path: &Path,
    options: &ImportOptions,
) -> Result<ImportResult> {
    let path = path.canonicalize()?;

    if !path.exists() {
        return Err(PinakesError::FileNotFound(path));
    }

    validate_path_in_roots(storage, &path).await?;

    let media_type = MediaType::from_path(&path)
        .ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;

    let current_mtime = get_file_mtime(&path);

    // Check for incremental scan: skip if file hasn't changed
    if options.incremental
        && !options.force
        && let Some(existing) = storage.get_media_by_path(&path).await?
        && let (Some(stored_mtime), Some(curr_mtime)) = (existing.file_mtime, current_mtime)
        && stored_mtime == curr_mtime
    {
        return Ok(ImportResult {
            media_id: existing.id,
            was_duplicate: false,
            was_skipped: true,
            path: path.clone(),
        });
    }

    let content_hash = compute_file_hash(&path).await?;

    if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
        // Update the mtime even for duplicates so incremental scan works
        if current_mtime.is_some() && existing.file_mtime != current_mtime {
            let mut updated = existing.clone();
            updated.file_mtime = current_mtime;
            let _ = storage.update_media(&updated).await;
        }
        return Ok(ImportResult {
            media_id: existing.id,
            was_duplicate: true,
            was_skipped: false,
            path: path.clone(),
        });
    }

    let file_meta = std::fs::metadata(&path)?;
    let file_size = file_meta.len();

    let extracted = {
        let path_clone = path.clone();
        let media_type_clone = media_type.clone();
        tokio::task::spawn_blocking(move || {
            metadata::extract_metadata(&path_clone, media_type_clone)
        })
        .await
        .map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
    };

    let file_name = path
        .file_name()
        .unwrap_or_default()
        .to_string_lossy()
        .to_string();

    let now = chrono::Utc::now();
    let media_id = MediaId::new();

    // Generate thumbnail for image types
    let thumb_path = {
        let source = path.clone();
        let thumb_dir = thumbnail::default_thumbnail_dir();
        let media_type_clone = media_type.clone();
        tokio::task::spawn_blocking(move || {
            thumbnail::generate_thumbnail(media_id, &source, media_type_clone, &thumb_dir)
        })
        .await
        .map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
    };

    // Generate perceptual hash for image files (if enabled in config)
    let perceptual_hash = if options.photo_config.generate_perceptual_hash
        && media_type.category() == crate::media_type::MediaCategory::Image
    {
        crate::metadata::image::generate_perceptual_hash(&path)
    } else {
        None
    };

    // Check if this is a markdown file for link extraction
    let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown);

    let item = MediaItem {
        id: media_id,
        path: path.clone(),
        file_name,
        media_type,
        content_hash,
        file_size,
        title: extracted.title,
        artist: extracted.artist,
        album: extracted.album,
        genre: extracted.genre,
        year: extracted.year,
        duration_secs: extracted.duration_secs,
        description: extracted.description,
        thumbnail_path: thumb_path,
        custom_fields: std::collections::HashMap::new(),
        file_mtime: current_mtime,

        // Photo-specific metadata from extraction
        date_taken: extracted.date_taken,
        latitude: extracted.latitude,
        longitude: extracted.longitude,
        camera_make: extracted.camera_make,
        camera_model: extracted.camera_model,
        rating: extracted.rating,
        perceptual_hash,

        // Managed storage fields - external files use defaults
        storage_mode: StorageMode::External,
        original_filename: None,
        uploaded_at: None,
        storage_key: None,

        created_at: now,
        updated_at: now,

        // New items are not deleted
        deleted_at: None,

        // Links will be extracted separately
        links_extracted_at: None,
    };

    storage.insert_media(&item).await?;

    // Extract and store markdown links for markdown files
    if is_markdown {
        if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
            tracing::warn!(
                media_id = %media_id,
                path = %path.display(),
                error = %e,
                "failed to extract markdown links"
            );
        }
    }

    // Store extracted extra metadata as custom fields
    for (key, value) in &extracted.extra {
        let field = CustomField {
            field_type: CustomFieldType::Text,
            value: value.clone(),
        };
        if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
            tracing::warn!(
                media_id = %media_id,
                field = %key,
                error = %e,
                "failed to store extracted metadata as custom field"
            );
        }
    }

    audit::record_action(
        storage,
        Some(media_id),
        AuditAction::Imported,
        Some(format!("path={}", path.display())),
    )
    .await?;

    info!(media_id = %media_id, path = %path.display(), "imported media file");

    Ok(ImportResult {
        media_id,
        was_duplicate: false,
        was_skipped: false,
        path: path.clone(),
    })
}

pub(crate) fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
    for component in path.components() {
        if let std::path::Component::Normal(name) = component {
            let name_str = name.to_string_lossy();
            for pattern in patterns {
                if pattern.starts_with('.')
                    && name_str.starts_with('.')
                    && pattern == name_str.as_ref()
                {
                    return true;
                }
                // Simple glob: ".*" matches any dotfile
                if pattern == ".*" && name_str.starts_with('.') {
                    return true;
                }
                if name_str == pattern.as_str() {
                    return true;
                }
            }
        }
    }
    false
}

/// Default number of concurrent import tasks.
const DEFAULT_IMPORT_CONCURRENCY: usize = 8;

pub async fn import_directory(
    storage: &DynStorageBackend,
    dir: &Path,
    ignore_patterns: &[String],
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
    import_directory_with_options(
        storage,
        dir,
        ignore_patterns,
        DEFAULT_IMPORT_CONCURRENCY,
        &ImportOptions::default(),
    )
    .await
}

pub async fn import_directory_with_concurrency(
    storage: &DynStorageBackend,
    dir: &Path,
    ignore_patterns: &[String],
    concurrency: usize,
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
    import_directory_with_options(
        storage,
        dir,
        ignore_patterns,
        concurrency,
        &ImportOptions::default(),
    )
    .await
}

/// Import a directory with full options including incremental scanning support
pub async fn import_directory_with_options(
    storage: &DynStorageBackend,
    dir: &Path,
    ignore_patterns: &[String],
    concurrency: usize,
    options: &ImportOptions,
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
    let concurrency = concurrency.clamp(1, 256);
    let dir = dir.to_path_buf();
    let patterns = ignore_patterns.to_vec();
    let options = options.clone();

    let entries: Vec<PathBuf> = {
        let dir = dir.clone();
        tokio::task::spawn_blocking(move || {
            walkdir::WalkDir::new(&dir)
                .follow_links(true)
                .into_iter()
                .filter_map(|e| e.ok())
                .filter(|e| e.file_type().is_file())
                .filter(|e| MediaType::from_path(e.path()).is_some())
                .filter(|e| !should_ignore(e.path(), &patterns))
                .map(|e| e.path().to_path_buf())
                .collect()
        })
        .await
        .map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
    };

    let mut results = Vec::with_capacity(entries.len());
    let mut join_set = tokio::task::JoinSet::new();

    for entry_path in entries {
        let storage = storage.clone();
        let path = entry_path.clone();
        let opts = options.clone();

        join_set.spawn(async move {
            let result = import_file_with_options(&storage, &path, &opts).await;
            (path, result)
        });

        // Limit concurrency by draining when we hit the cap
        if join_set.len() >= concurrency
            && let Some(Ok((path, result))) = join_set.join_next().await
        {
            match result {
                Ok(r) => results.push(Ok(r)),
                Err(e) => {
                    tracing::warn!(path = %path.display(), error = %e, "failed to import file");
                    results.push(Err(e));
                }
            }
        }
    }

    // Drain remaining tasks
    while let Some(Ok((path, result))) = join_set.join_next().await {
        match result {
            Ok(r) => results.push(Ok(r)),
            Err(e) => {
                tracing::warn!(path = %path.display(), error = %e, "failed to import file");
                results.push(Err(e));
            }
        }
    }

    Ok(results)
}

/// Extract markdown links from a file and store them in the database.
async fn extract_and_store_links(
    storage: &DynStorageBackend,
    media_id: MediaId,
    path: &Path,
) -> Result<()> {
    // Read file content
    let content = tokio::fs::read_to_string(path).await.map_err(|e| {
        PinakesError::Io(std::io::Error::new(
            std::io::ErrorKind::Other,
            format!("failed to read markdown file for link extraction: {e}"),
        ))
    })?;

    // Extract links
    let extracted_links = links::extract_links(media_id, &content);

    if extracted_links.is_empty() {
        // No links found, just mark as extracted
        storage.mark_links_extracted(media_id).await?;
        return Ok(());
    }

    // Clear any existing links for this media (in case of re-import)
    storage.clear_links_for_media(media_id).await?;

    // Save extracted links
    storage.save_markdown_links(media_id, &extracted_links).await?;

    // Mark links as extracted
    storage.mark_links_extracted(media_id).await?;

    tracing::debug!(
        media_id = %media_id,
        link_count = extracted_links.len(),
        "extracted markdown links"
    );

    Ok(())
}