treewide: fix various UI bugs; optimize crypto dependencies & format
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: If8fe8b38c1d9c4fecd40ff71f88d2ae06a6a6964
This commit is contained in:
parent
764aafa88d
commit
3ccddce7fd
178 changed files with 58342 additions and 54241 deletions
|
|
@ -1,436 +1,457 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
use std::time::SystemTime;
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
time::SystemTime,
|
||||
};
|
||||
|
||||
use tracing::info;
|
||||
|
||||
use crate::audit;
|
||||
use crate::error::{PinakesError, Result};
|
||||
use crate::hash::compute_file_hash;
|
||||
use crate::links;
|
||||
use crate::media_type::{BuiltinMediaType, MediaType};
|
||||
use crate::metadata;
|
||||
use crate::model::*;
|
||||
use crate::storage::DynStorageBackend;
|
||||
use crate::thumbnail;
|
||||
use crate::{
|
||||
audit,
|
||||
error::{PinakesError, Result},
|
||||
hash::compute_file_hash,
|
||||
links,
|
||||
media_type::{BuiltinMediaType, MediaType},
|
||||
metadata,
|
||||
model::*,
|
||||
storage::DynStorageBackend,
|
||||
thumbnail,
|
||||
};
|
||||
|
||||
pub struct ImportResult {
|
||||
pub media_id: MediaId,
|
||||
pub was_duplicate: bool,
|
||||
/// True if the file was skipped because it hasn't changed since last scan
|
||||
pub was_skipped: bool,
|
||||
pub path: PathBuf,
|
||||
pub media_id: MediaId,
|
||||
pub was_duplicate: bool,
|
||||
/// True if the file was skipped because it hasn't changed since last scan
|
||||
pub was_skipped: bool,
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
/// Options for import operations
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct ImportOptions {
|
||||
/// Skip files that haven't changed since last scan (based on mtime)
|
||||
pub incremental: bool,
|
||||
/// Force re-import even if mtime hasn't changed
|
||||
pub force: bool,
|
||||
/// Photo configuration for toggleable features
|
||||
pub photo_config: crate::config::PhotoConfig,
|
||||
/// Skip files that haven't changed since last scan (based on mtime)
|
||||
pub incremental: bool,
|
||||
/// Force re-import even if mtime hasn't changed
|
||||
pub force: bool,
|
||||
/// Photo configuration for toggleable features
|
||||
pub photo_config: crate::config::PhotoConfig,
|
||||
}
|
||||
|
||||
impl Default for ImportOptions {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
incremental: false,
|
||||
force: false,
|
||||
photo_config: crate::config::PhotoConfig::default(),
|
||||
}
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
incremental: false,
|
||||
force: false,
|
||||
photo_config: crate::config::PhotoConfig::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Get the modification time of a file as a Unix timestamp
|
||||
fn get_file_mtime(path: &Path) -> Option<i64> {
|
||||
std::fs::metadata(path)
|
||||
.ok()
|
||||
.and_then(|m| m.modified().ok())
|
||||
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
|
||||
.map(|d| d.as_secs() as i64)
|
||||
std::fs::metadata(path)
|
||||
.ok()
|
||||
.and_then(|m| m.modified().ok())
|
||||
.and_then(|t| t.duration_since(SystemTime::UNIX_EPOCH).ok())
|
||||
.map(|d| d.as_secs() as i64)
|
||||
}
|
||||
|
||||
/// Check that a canonicalized path falls under at least one configured root directory.
|
||||
/// If no roots are configured, all paths are allowed (for ad-hoc imports).
|
||||
pub async fn validate_path_in_roots(storage: &DynStorageBackend, path: &Path) -> Result<()> {
|
||||
let roots = storage.list_root_dirs().await?;
|
||||
if roots.is_empty() {
|
||||
return Ok(());
|
||||
/// Check that a canonicalized path falls under at least one configured root
|
||||
/// directory. If no roots are configured, all paths are allowed (for ad-hoc
|
||||
/// imports).
|
||||
pub async fn validate_path_in_roots(
|
||||
storage: &DynStorageBackend,
|
||||
path: &Path,
|
||||
) -> Result<()> {
|
||||
let roots = storage.list_root_dirs().await?;
|
||||
if roots.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
for root in &roots {
|
||||
if let Ok(canonical_root) = root.canonicalize()
|
||||
&& path.starts_with(&canonical_root)
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
for root in &roots {
|
||||
if let Ok(canonical_root) = root.canonicalize()
|
||||
&& path.starts_with(&canonical_root)
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(PinakesError::InvalidOperation(format!(
|
||||
"path {} is not within any configured root directory",
|
||||
path.display()
|
||||
)))
|
||||
}
|
||||
Err(PinakesError::InvalidOperation(format!(
|
||||
"path {} is not within any configured root directory",
|
||||
path.display()
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn import_file(storage: &DynStorageBackend, path: &Path) -> Result<ImportResult> {
|
||||
import_file_with_options(storage, path, &ImportOptions::default()).await
|
||||
pub async fn import_file(
|
||||
storage: &DynStorageBackend,
|
||||
path: &Path,
|
||||
) -> Result<ImportResult> {
|
||||
import_file_with_options(storage, path, &ImportOptions::default()).await
|
||||
}
|
||||
|
||||
/// Import a file with configurable options for incremental scanning
|
||||
pub async fn import_file_with_options(
|
||||
storage: &DynStorageBackend,
|
||||
path: &Path,
|
||||
options: &ImportOptions,
|
||||
storage: &DynStorageBackend,
|
||||
path: &Path,
|
||||
options: &ImportOptions,
|
||||
) -> Result<ImportResult> {
|
||||
let path = path.canonicalize()?;
|
||||
let path = path.canonicalize()?;
|
||||
|
||||
if !path.exists() {
|
||||
return Err(PinakesError::FileNotFound(path));
|
||||
if !path.exists() {
|
||||
return Err(PinakesError::FileNotFound(path));
|
||||
}
|
||||
|
||||
validate_path_in_roots(storage, &path).await?;
|
||||
|
||||
let media_type = MediaType::from_path(&path)
|
||||
.ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;
|
||||
|
||||
let current_mtime = get_file_mtime(&path);
|
||||
|
||||
// Check for incremental scan: skip if file hasn't changed
|
||||
if options.incremental
|
||||
&& !options.force
|
||||
&& let Some(existing) = storage.get_media_by_path(&path).await?
|
||||
&& let (Some(stored_mtime), Some(curr_mtime)) =
|
||||
(existing.file_mtime, current_mtime)
|
||||
&& stored_mtime == curr_mtime
|
||||
{
|
||||
return Ok(ImportResult {
|
||||
media_id: existing.id,
|
||||
was_duplicate: false,
|
||||
was_skipped: true,
|
||||
path: path.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
let content_hash = compute_file_hash(&path).await?;
|
||||
|
||||
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
|
||||
// Update the mtime even for duplicates so incremental scan works
|
||||
if current_mtime.is_some() && existing.file_mtime != current_mtime {
|
||||
let mut updated = existing.clone();
|
||||
updated.file_mtime = current_mtime;
|
||||
let _ = storage.update_media(&updated).await;
|
||||
}
|
||||
return Ok(ImportResult {
|
||||
media_id: existing.id,
|
||||
was_duplicate: true,
|
||||
was_skipped: false,
|
||||
path: path.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
validate_path_in_roots(storage, &path).await?;
|
||||
let file_meta = std::fs::metadata(&path)?;
|
||||
let file_size = file_meta.len();
|
||||
|
||||
let media_type = MediaType::from_path(&path)
|
||||
.ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;
|
||||
|
||||
let current_mtime = get_file_mtime(&path);
|
||||
|
||||
// Check for incremental scan: skip if file hasn't changed
|
||||
if options.incremental
|
||||
&& !options.force
|
||||
&& let Some(existing) = storage.get_media_by_path(&path).await?
|
||||
&& let (Some(stored_mtime), Some(curr_mtime)) = (existing.file_mtime, current_mtime)
|
||||
&& stored_mtime == curr_mtime
|
||||
{
|
||||
return Ok(ImportResult {
|
||||
media_id: existing.id,
|
||||
was_duplicate: false,
|
||||
was_skipped: true,
|
||||
path: path.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
let content_hash = compute_file_hash(&path).await?;
|
||||
|
||||
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
|
||||
// Update the mtime even for duplicates so incremental scan works
|
||||
if current_mtime.is_some() && existing.file_mtime != current_mtime {
|
||||
let mut updated = existing.clone();
|
||||
updated.file_mtime = current_mtime;
|
||||
let _ = storage.update_media(&updated).await;
|
||||
}
|
||||
return Ok(ImportResult {
|
||||
media_id: existing.id,
|
||||
was_duplicate: true,
|
||||
was_skipped: false,
|
||||
path: path.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
let file_meta = std::fs::metadata(&path)?;
|
||||
let file_size = file_meta.len();
|
||||
|
||||
let extracted = {
|
||||
let path_clone = path.clone();
|
||||
let media_type_clone = media_type.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
metadata::extract_metadata(&path_clone, media_type_clone)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
|
||||
};
|
||||
|
||||
let file_name = path
|
||||
.file_name()
|
||||
.unwrap_or_default()
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
let now = chrono::Utc::now();
|
||||
let media_id = MediaId::new();
|
||||
|
||||
// Generate thumbnail for image types
|
||||
let thumb_path = {
|
||||
let source = path.clone();
|
||||
let thumb_dir = thumbnail::default_thumbnail_dir();
|
||||
let media_type_clone = media_type.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
thumbnail::generate_thumbnail(media_id, &source, media_type_clone, &thumb_dir)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
|
||||
};
|
||||
|
||||
// Generate perceptual hash for image files (if enabled in config)
|
||||
let perceptual_hash = if options.photo_config.generate_perceptual_hash
|
||||
&& media_type.category() == crate::media_type::MediaCategory::Image
|
||||
{
|
||||
crate::metadata::image::generate_perceptual_hash(&path)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Check if this is a markdown file for link extraction
|
||||
let is_markdown = media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
|
||||
|
||||
let item = MediaItem {
|
||||
id: media_id,
|
||||
path: path.clone(),
|
||||
file_name,
|
||||
media_type,
|
||||
content_hash,
|
||||
file_size,
|
||||
title: extracted.title,
|
||||
artist: extracted.artist,
|
||||
album: extracted.album,
|
||||
genre: extracted.genre,
|
||||
year: extracted.year,
|
||||
duration_secs: extracted.duration_secs,
|
||||
description: extracted.description,
|
||||
thumbnail_path: thumb_path,
|
||||
custom_fields: std::collections::HashMap::new(),
|
||||
file_mtime: current_mtime,
|
||||
|
||||
// Photo-specific metadata from extraction
|
||||
date_taken: extracted.date_taken,
|
||||
latitude: extracted.latitude,
|
||||
longitude: extracted.longitude,
|
||||
camera_make: extracted.camera_make,
|
||||
camera_model: extracted.camera_model,
|
||||
rating: extracted.rating,
|
||||
perceptual_hash,
|
||||
|
||||
// Managed storage fields - external files use defaults
|
||||
storage_mode: StorageMode::External,
|
||||
original_filename: None,
|
||||
uploaded_at: None,
|
||||
storage_key: None,
|
||||
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
|
||||
// New items are not deleted
|
||||
deleted_at: None,
|
||||
|
||||
// Links will be extracted separately
|
||||
links_extracted_at: None,
|
||||
};
|
||||
|
||||
storage.insert_media(&item).await?;
|
||||
|
||||
// Extract and store markdown links for markdown files
|
||||
if is_markdown {
|
||||
if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
|
||||
tracing::warn!(
|
||||
media_id = %media_id,
|
||||
path = %path.display(),
|
||||
error = %e,
|
||||
"failed to extract markdown links"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Store extracted extra metadata as custom fields
|
||||
for (key, value) in &extracted.extra {
|
||||
let field = CustomField {
|
||||
field_type: CustomFieldType::Text,
|
||||
value: value.clone(),
|
||||
};
|
||||
if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
|
||||
tracing::warn!(
|
||||
media_id = %media_id,
|
||||
field = %key,
|
||||
error = %e,
|
||||
"failed to store extracted metadata as custom field"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
audit::record_action(
|
||||
storage,
|
||||
Some(media_id),
|
||||
AuditAction::Imported,
|
||||
Some(format!("path={}", path.display())),
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!(media_id = %media_id, path = %path.display(), "imported media file");
|
||||
|
||||
Ok(ImportResult {
|
||||
media_id,
|
||||
was_duplicate: false,
|
||||
was_skipped: false,
|
||||
path: path.clone(),
|
||||
let extracted = {
|
||||
let path_clone = path.clone();
|
||||
let media_type_clone = media_type.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
metadata::extract_metadata(&path_clone, media_type_clone)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
|
||||
};
|
||||
|
||||
let file_name = path
|
||||
.file_name()
|
||||
.unwrap_or_default()
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
let now = chrono::Utc::now();
|
||||
let media_id = MediaId::new();
|
||||
|
||||
// Generate thumbnail for image types
|
||||
let thumb_path = {
|
||||
let source = path.clone();
|
||||
let thumb_dir = thumbnail::default_thumbnail_dir();
|
||||
let media_type_clone = media_type.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
thumbnail::generate_thumbnail(
|
||||
media_id,
|
||||
&source,
|
||||
media_type_clone,
|
||||
&thumb_dir,
|
||||
)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
|
||||
};
|
||||
|
||||
// Generate perceptual hash for image files (if enabled in config)
|
||||
let perceptual_hash = if options.photo_config.generate_perceptual_hash
|
||||
&& media_type.category() == crate::media_type::MediaCategory::Image
|
||||
{
|
||||
crate::metadata::image::generate_perceptual_hash(&path)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Check if this is a markdown file for link extraction
|
||||
let is_markdown =
|
||||
media_type == MediaType::Builtin(BuiltinMediaType::Markdown);
|
||||
|
||||
let item = MediaItem {
|
||||
id: media_id,
|
||||
path: path.clone(),
|
||||
file_name,
|
||||
media_type,
|
||||
content_hash,
|
||||
file_size,
|
||||
title: extracted.title,
|
||||
artist: extracted.artist,
|
||||
album: extracted.album,
|
||||
genre: extracted.genre,
|
||||
year: extracted.year,
|
||||
duration_secs: extracted.duration_secs,
|
||||
description: extracted.description,
|
||||
thumbnail_path: thumb_path,
|
||||
custom_fields: std::collections::HashMap::new(),
|
||||
file_mtime: current_mtime,
|
||||
|
||||
// Photo-specific metadata from extraction
|
||||
date_taken: extracted.date_taken,
|
||||
latitude: extracted.latitude,
|
||||
longitude: extracted.longitude,
|
||||
camera_make: extracted.camera_make,
|
||||
camera_model: extracted.camera_model,
|
||||
rating: extracted.rating,
|
||||
perceptual_hash,
|
||||
|
||||
// Managed storage fields - external files use defaults
|
||||
storage_mode: StorageMode::External,
|
||||
original_filename: None,
|
||||
uploaded_at: None,
|
||||
storage_key: None,
|
||||
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
|
||||
// New items are not deleted
|
||||
deleted_at: None,
|
||||
|
||||
// Links will be extracted separately
|
||||
links_extracted_at: None,
|
||||
};
|
||||
|
||||
storage.insert_media(&item).await?;
|
||||
|
||||
// Extract and store markdown links for markdown files
|
||||
if is_markdown {
|
||||
if let Err(e) = extract_and_store_links(storage, media_id, &path).await {
|
||||
tracing::warn!(
|
||||
media_id = %media_id,
|
||||
path = %path.display(),
|
||||
error = %e,
|
||||
"failed to extract markdown links"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Store extracted extra metadata as custom fields
|
||||
for (key, value) in &extracted.extra {
|
||||
let field = CustomField {
|
||||
field_type: CustomFieldType::Text,
|
||||
value: value.clone(),
|
||||
};
|
||||
if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
|
||||
tracing::warn!(
|
||||
media_id = %media_id,
|
||||
field = %key,
|
||||
error = %e,
|
||||
"failed to store extracted metadata as custom field"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
audit::record_action(
|
||||
storage,
|
||||
Some(media_id),
|
||||
AuditAction::Imported,
|
||||
Some(format!("path={}", path.display())),
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!(media_id = %media_id, path = %path.display(), "imported media file");
|
||||
|
||||
Ok(ImportResult {
|
||||
media_id,
|
||||
was_duplicate: false,
|
||||
was_skipped: false,
|
||||
path: path.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
|
||||
for component in path.components() {
|
||||
if let std::path::Component::Normal(name) = component {
|
||||
let name_str = name.to_string_lossy();
|
||||
for pattern in patterns {
|
||||
if pattern.starts_with('.')
|
||||
&& name_str.starts_with('.')
|
||||
&& pattern == name_str.as_ref()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// Simple glob: ".*" matches any dotfile
|
||||
if pattern == ".*" && name_str.starts_with('.') {
|
||||
return true;
|
||||
}
|
||||
if name_str == pattern.as_str() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
pub(crate) fn should_ignore(
|
||||
path: &std::path::Path,
|
||||
patterns: &[String],
|
||||
) -> bool {
|
||||
for component in path.components() {
|
||||
if let std::path::Component::Normal(name) = component {
|
||||
let name_str = name.to_string_lossy();
|
||||
for pattern in patterns {
|
||||
if pattern.starts_with('.')
|
||||
&& name_str.starts_with('.')
|
||||
&& pattern == name_str.as_ref()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// Simple glob: ".*" matches any dotfile
|
||||
if pattern == ".*" && name_str.starts_with('.') {
|
||||
return true;
|
||||
}
|
||||
if name_str == pattern.as_str() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Default number of concurrent import tasks.
|
||||
const DEFAULT_IMPORT_CONCURRENCY: usize = 8;
|
||||
|
||||
pub async fn import_directory(
|
||||
storage: &DynStorageBackend,
|
||||
dir: &Path,
|
||||
ignore_patterns: &[String],
|
||||
storage: &DynStorageBackend,
|
||||
dir: &Path,
|
||||
ignore_patterns: &[String],
|
||||
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
|
||||
import_directory_with_options(
|
||||
storage,
|
||||
dir,
|
||||
ignore_patterns,
|
||||
DEFAULT_IMPORT_CONCURRENCY,
|
||||
&ImportOptions::default(),
|
||||
)
|
||||
.await
|
||||
import_directory_with_options(
|
||||
storage,
|
||||
dir,
|
||||
ignore_patterns,
|
||||
DEFAULT_IMPORT_CONCURRENCY,
|
||||
&ImportOptions::default(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn import_directory_with_concurrency(
|
||||
storage: &DynStorageBackend,
|
||||
dir: &Path,
|
||||
ignore_patterns: &[String],
|
||||
concurrency: usize,
|
||||
storage: &DynStorageBackend,
|
||||
dir: &Path,
|
||||
ignore_patterns: &[String],
|
||||
concurrency: usize,
|
||||
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
|
||||
import_directory_with_options(
|
||||
storage,
|
||||
dir,
|
||||
ignore_patterns,
|
||||
concurrency,
|
||||
&ImportOptions::default(),
|
||||
)
|
||||
.await
|
||||
import_directory_with_options(
|
||||
storage,
|
||||
dir,
|
||||
ignore_patterns,
|
||||
concurrency,
|
||||
&ImportOptions::default(),
|
||||
)
|
||||
.await
|
||||
}
|
||||
|
||||
/// Import a directory with full options including incremental scanning support
|
||||
pub async fn import_directory_with_options(
|
||||
storage: &DynStorageBackend,
|
||||
dir: &Path,
|
||||
ignore_patterns: &[String],
|
||||
concurrency: usize,
|
||||
options: &ImportOptions,
|
||||
storage: &DynStorageBackend,
|
||||
dir: &Path,
|
||||
ignore_patterns: &[String],
|
||||
concurrency: usize,
|
||||
options: &ImportOptions,
|
||||
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
|
||||
let concurrency = concurrency.clamp(1, 256);
|
||||
let dir = dir.to_path_buf();
|
||||
let patterns = ignore_patterns.to_vec();
|
||||
let options = options.clone();
|
||||
let concurrency = concurrency.clamp(1, 256);
|
||||
let dir = dir.to_path_buf();
|
||||
let patterns = ignore_patterns.to_vec();
|
||||
let options = options.clone();
|
||||
|
||||
let entries: Vec<PathBuf> = {
|
||||
let dir = dir.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
walkdir::WalkDir::new(&dir)
|
||||
.follow_links(true)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
.filter(|e| MediaType::from_path(e.path()).is_some())
|
||||
.filter(|e| !should_ignore(e.path(), &patterns))
|
||||
.map(|e| e.path().to_path_buf())
|
||||
.collect()
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
|
||||
};
|
||||
let entries: Vec<PathBuf> = {
|
||||
let dir = dir.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
walkdir::WalkDir::new(&dir)
|
||||
.follow_links(true)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
.filter(|e| MediaType::from_path(e.path()).is_some())
|
||||
.filter(|e| !should_ignore(e.path(), &patterns))
|
||||
.map(|e| e.path().to_path_buf())
|
||||
.collect()
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
|
||||
};
|
||||
|
||||
let mut results = Vec::with_capacity(entries.len());
|
||||
let mut join_set = tokio::task::JoinSet::new();
|
||||
let mut results = Vec::with_capacity(entries.len());
|
||||
let mut join_set = tokio::task::JoinSet::new();
|
||||
|
||||
for entry_path in entries {
|
||||
let storage = storage.clone();
|
||||
let path = entry_path.clone();
|
||||
let opts = options.clone();
|
||||
for entry_path in entries {
|
||||
let storage = storage.clone();
|
||||
let path = entry_path.clone();
|
||||
let opts = options.clone();
|
||||
|
||||
join_set.spawn(async move {
|
||||
let result = import_file_with_options(&storage, &path, &opts).await;
|
||||
(path, result)
|
||||
});
|
||||
join_set.spawn(async move {
|
||||
let result = import_file_with_options(&storage, &path, &opts).await;
|
||||
(path, result)
|
||||
});
|
||||
|
||||
// Limit concurrency by draining when we hit the cap
|
||||
if join_set.len() >= concurrency
|
||||
&& let Some(Ok((path, result))) = join_set.join_next().await
|
||||
{
|
||||
match result {
|
||||
Ok(r) => results.push(Ok(r)),
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
|
||||
results.push(Err(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Limit concurrency by draining when we hit the cap
|
||||
if join_set.len() >= concurrency
|
||||
&& let Some(Ok((path, result))) = join_set.join_next().await
|
||||
{
|
||||
match result {
|
||||
Ok(r) => results.push(Ok(r)),
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
|
||||
results.push(Err(e));
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Drain remaining tasks
|
||||
while let Some(Ok((path, result))) = join_set.join_next().await {
|
||||
match result {
|
||||
Ok(r) => results.push(Ok(r)),
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
|
||||
results.push(Err(e));
|
||||
}
|
||||
}
|
||||
// Drain remaining tasks
|
||||
while let Some(Ok((path, result))) = join_set.join_next().await {
|
||||
match result {
|
||||
Ok(r) => results.push(Ok(r)),
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
|
||||
results.push(Err(e));
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
Ok(results)
|
||||
}
|
||||
|
||||
/// Extract markdown links from a file and store them in the database.
|
||||
async fn extract_and_store_links(
|
||||
storage: &DynStorageBackend,
|
||||
media_id: MediaId,
|
||||
path: &Path,
|
||||
storage: &DynStorageBackend,
|
||||
media_id: MediaId,
|
||||
path: &Path,
|
||||
) -> Result<()> {
|
||||
// Read file content
|
||||
let content = tokio::fs::read_to_string(path).await.map_err(|e| {
|
||||
PinakesError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format!("failed to read markdown file for link extraction: {e}"),
|
||||
))
|
||||
})?;
|
||||
// Read file content
|
||||
let content = tokio::fs::read_to_string(path).await.map_err(|e| {
|
||||
PinakesError::Io(std::io::Error::new(
|
||||
std::io::ErrorKind::Other,
|
||||
format!("failed to read markdown file for link extraction: {e}"),
|
||||
))
|
||||
})?;
|
||||
|
||||
// Extract links
|
||||
let extracted_links = links::extract_links(media_id, &content);
|
||||
// Extract links
|
||||
let extracted_links = links::extract_links(media_id, &content);
|
||||
|
||||
if extracted_links.is_empty() {
|
||||
// No links found, just mark as extracted
|
||||
storage.mark_links_extracted(media_id).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Clear any existing links for this media (in case of re-import)
|
||||
storage.clear_links_for_media(media_id).await?;
|
||||
|
||||
// Save extracted links
|
||||
storage
|
||||
.save_markdown_links(media_id, &extracted_links)
|
||||
.await?;
|
||||
|
||||
// Mark links as extracted
|
||||
if extracted_links.is_empty() {
|
||||
// No links found, just mark as extracted
|
||||
storage.mark_links_extracted(media_id).await?;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
tracing::debug!(
|
||||
media_id = %media_id,
|
||||
link_count = extracted_links.len(),
|
||||
"extracted markdown links"
|
||||
);
|
||||
// Clear any existing links for this media (in case of re-import)
|
||||
storage.clear_links_for_media(media_id).await?;
|
||||
|
||||
Ok(())
|
||||
// Save extracted links
|
||||
storage
|
||||
.save_markdown_links(media_id, &extracted_links)
|
||||
.await?;
|
||||
|
||||
// Mark links as extracted
|
||||
storage.mark_links_extracted(media_id).await?;
|
||||
|
||||
tracing::debug!(
|
||||
media_id = %media_id,
|
||||
link_count = extracted_links.len(),
|
||||
"extracted markdown links"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue