initial commit
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I4a6b498153eccd5407510dd541b7f4816a6a6964
This commit is contained in:
commit
6a73d11c4b
124 changed files with 34856 additions and 0 deletions
250
crates/pinakes-core/src/import.rs
Normal file
250
crates/pinakes-core/src/import.rs
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
use std::path::{Path, PathBuf};
|
||||
|
||||
use tracing::info;
|
||||
|
||||
use crate::audit;
|
||||
use crate::error::{PinakesError, Result};
|
||||
use crate::hash::compute_file_hash;
|
||||
use crate::media_type::MediaType;
|
||||
use crate::metadata;
|
||||
use crate::model::*;
|
||||
use crate::storage::DynStorageBackend;
|
||||
use crate::thumbnail;
|
||||
|
||||
pub struct ImportResult {
|
||||
pub media_id: MediaId,
|
||||
pub was_duplicate: bool,
|
||||
pub path: PathBuf,
|
||||
}
|
||||
|
||||
/// Check that a canonicalized path falls under at least one configured root directory.
|
||||
/// If no roots are configured, all paths are allowed (for ad-hoc imports).
|
||||
pub async fn validate_path_in_roots(storage: &DynStorageBackend, path: &Path) -> Result<()> {
|
||||
let roots = storage.list_root_dirs().await?;
|
||||
if roots.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
for root in &roots {
|
||||
if let Ok(canonical_root) = root.canonicalize()
|
||||
&& path.starts_with(&canonical_root)
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(PinakesError::InvalidOperation(format!(
|
||||
"path {} is not within any configured root directory",
|
||||
path.display()
|
||||
)))
|
||||
}
|
||||
|
||||
pub async fn import_file(storage: &DynStorageBackend, path: &Path) -> Result<ImportResult> {
|
||||
let path = path.canonicalize()?;
|
||||
|
||||
if !path.exists() {
|
||||
return Err(PinakesError::FileNotFound(path));
|
||||
}
|
||||
|
||||
validate_path_in_roots(storage, &path).await?;
|
||||
|
||||
let media_type = MediaType::from_path(&path)
|
||||
.ok_or_else(|| PinakesError::UnsupportedMediaType(path.clone()))?;
|
||||
|
||||
let content_hash = compute_file_hash(&path).await?;
|
||||
|
||||
if let Some(existing) = storage.get_media_by_hash(&content_hash).await? {
|
||||
return Ok(ImportResult {
|
||||
media_id: existing.id,
|
||||
was_duplicate: true,
|
||||
path: path.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
let file_meta = std::fs::metadata(&path)?;
|
||||
let file_size = file_meta.len();
|
||||
|
||||
let extracted = {
|
||||
let path_clone = path.clone();
|
||||
tokio::task::spawn_blocking(move || metadata::extract_metadata(&path_clone, media_type))
|
||||
.await
|
||||
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
|
||||
};
|
||||
|
||||
let file_name = path
|
||||
.file_name()
|
||||
.unwrap_or_default()
|
||||
.to_string_lossy()
|
||||
.to_string();
|
||||
|
||||
let now = chrono::Utc::now();
|
||||
let media_id = MediaId::new();
|
||||
|
||||
// Generate thumbnail for image types
|
||||
let thumb_path = {
|
||||
let source = path.clone();
|
||||
let thumb_dir = thumbnail::default_thumbnail_dir();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
thumbnail::generate_thumbnail(media_id, &source, media_type, &thumb_dir)
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::MetadataExtraction(e.to_string()))??
|
||||
};
|
||||
|
||||
let item = MediaItem {
|
||||
id: media_id,
|
||||
path: path.clone(),
|
||||
file_name,
|
||||
media_type,
|
||||
content_hash,
|
||||
file_size,
|
||||
title: extracted.title,
|
||||
artist: extracted.artist,
|
||||
album: extracted.album,
|
||||
genre: extracted.genre,
|
||||
year: extracted.year,
|
||||
duration_secs: extracted.duration_secs,
|
||||
description: extracted.description,
|
||||
thumbnail_path: thumb_path,
|
||||
custom_fields: std::collections::HashMap::new(),
|
||||
created_at: now,
|
||||
updated_at: now,
|
||||
};
|
||||
|
||||
storage.insert_media(&item).await?;
|
||||
|
||||
// Store extracted extra metadata as custom fields
|
||||
for (key, value) in &extracted.extra {
|
||||
let field = CustomField {
|
||||
field_type: CustomFieldType::Text,
|
||||
value: value.clone(),
|
||||
};
|
||||
if let Err(e) = storage.set_custom_field(media_id, key, &field).await {
|
||||
tracing::warn!(
|
||||
media_id = %media_id,
|
||||
field = %key,
|
||||
error = %e,
|
||||
"failed to store extracted metadata as custom field"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
audit::record_action(
|
||||
storage,
|
||||
Some(media_id),
|
||||
AuditAction::Imported,
|
||||
Some(format!("path={}", path.display())),
|
||||
)
|
||||
.await?;
|
||||
|
||||
info!(media_id = %media_id, path = %path.display(), "imported media file");
|
||||
|
||||
Ok(ImportResult {
|
||||
media_id,
|
||||
was_duplicate: false,
|
||||
path: path.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn should_ignore(path: &std::path::Path, patterns: &[String]) -> bool {
|
||||
for component in path.components() {
|
||||
if let std::path::Component::Normal(name) = component {
|
||||
let name_str = name.to_string_lossy();
|
||||
for pattern in patterns {
|
||||
if pattern.starts_with('.')
|
||||
&& name_str.starts_with('.')
|
||||
&& pattern == name_str.as_ref()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
// Simple glob: ".*" matches any dotfile
|
||||
if pattern == ".*" && name_str.starts_with('.') {
|
||||
return true;
|
||||
}
|
||||
if name_str == pattern.as_str() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Default number of concurrent import tasks.
|
||||
const DEFAULT_IMPORT_CONCURRENCY: usize = 8;
|
||||
|
||||
pub async fn import_directory(
|
||||
storage: &DynStorageBackend,
|
||||
dir: &Path,
|
||||
ignore_patterns: &[String],
|
||||
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
|
||||
import_directory_with_concurrency(storage, dir, ignore_patterns, DEFAULT_IMPORT_CONCURRENCY)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn import_directory_with_concurrency(
|
||||
storage: &DynStorageBackend,
|
||||
dir: &Path,
|
||||
ignore_patterns: &[String],
|
||||
concurrency: usize,
|
||||
) -> Result<Vec<std::result::Result<ImportResult, PinakesError>>> {
|
||||
let concurrency = concurrency.clamp(1, 256);
|
||||
let dir = dir.to_path_buf();
|
||||
let patterns = ignore_patterns.to_vec();
|
||||
|
||||
let entries: Vec<PathBuf> = {
|
||||
let dir = dir.clone();
|
||||
tokio::task::spawn_blocking(move || {
|
||||
walkdir::WalkDir::new(&dir)
|
||||
.follow_links(true)
|
||||
.into_iter()
|
||||
.filter_map(|e| e.ok())
|
||||
.filter(|e| e.file_type().is_file())
|
||||
.filter(|e| MediaType::from_path(e.path()).is_some())
|
||||
.filter(|e| !should_ignore(e.path(), &patterns))
|
||||
.map(|e| e.path().to_path_buf())
|
||||
.collect()
|
||||
})
|
||||
.await
|
||||
.map_err(|e| PinakesError::Io(std::io::Error::other(e)))?
|
||||
};
|
||||
|
||||
let mut results = Vec::with_capacity(entries.len());
|
||||
let mut join_set = tokio::task::JoinSet::new();
|
||||
let mut pending_paths: Vec<PathBuf> = Vec::new();
|
||||
|
||||
for entry_path in entries {
|
||||
let storage = storage.clone();
|
||||
let path = entry_path.clone();
|
||||
pending_paths.push(entry_path);
|
||||
|
||||
join_set.spawn(async move {
|
||||
let result = import_file(&storage, &path).await;
|
||||
(path, result)
|
||||
});
|
||||
|
||||
// Limit concurrency by draining when we hit the cap
|
||||
if join_set.len() >= concurrency
|
||||
&& let Some(Ok((path, result))) = join_set.join_next().await
|
||||
{
|
||||
match result {
|
||||
Ok(r) => results.push(Ok(r)),
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
|
||||
results.push(Err(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Drain remaining tasks
|
||||
while let Some(Ok((path, result))) = join_set.join_next().await {
|
||||
match result {
|
||||
Ok(r) => results.push(Ok(r)),
|
||||
Err(e) => {
|
||||
tracing::warn!(path = %path.display(), error = %e, "failed to import file");
|
||||
results.push(Err(e));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(results)
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue