pinakes-server: TLS support; session persistence and security polish
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: If2c9c3e3af62bbf9f33a97be89ac40bc6a6a6964
This commit is contained in:
parent
758aba0f7a
commit
87a4482576
19 changed files with 1835 additions and 111 deletions
|
|
@ -1,3 +1,4 @@
|
|||
use std::collections::{HashMap, HashSet};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -5,6 +6,7 @@ use tracing::{info, warn};
|
|||
|
||||
use crate::error::Result;
|
||||
use crate::hash::compute_file_hash;
|
||||
use crate::media_type::MediaType;
|
||||
use crate::model::{ContentHash, MediaId};
|
||||
use crate::storage::DynStorageBackend;
|
||||
|
||||
|
|
@ -66,31 +68,202 @@ impl std::str::FromStr for IntegrityStatus {
|
|||
}
|
||||
}
|
||||
|
||||
/// Detect orphaned media items (files that no longer exist on disk).
|
||||
/// Detect orphaned media items (files that no longer exist on disk),
|
||||
/// untracked files (files on disk not in database), and moved files (same hash, different path).
|
||||
pub async fn detect_orphans(storage: &DynStorageBackend) -> Result<OrphanReport> {
|
||||
let media_paths = storage.list_media_paths().await?;
|
||||
let mut orphaned_ids = Vec::new();
|
||||
let moved_files = Vec::new();
|
||||
|
||||
// Build hash index: ContentHash -> Vec<(MediaId, PathBuf)>
|
||||
let mut hash_index: HashMap<ContentHash, Vec<(MediaId, PathBuf)>> = HashMap::new();
|
||||
for (id, path, hash) in &media_paths {
|
||||
hash_index
|
||||
.entry(hash.clone())
|
||||
.or_insert_with(Vec::new)
|
||||
.push((*id, path.clone()));
|
||||
}
|
||||
|
||||
// Detect orphaned files (in DB but not on disk)
|
||||
for (id, path, _hash) in &media_paths {
|
||||
if !path.exists() {
|
||||
orphaned_ids.push(*id);
|
||||
}
|
||||
}
|
||||
|
||||
// Detect moved files (orphaned items with same hash existing elsewhere)
|
||||
let moved_files = detect_moved_files(&orphaned_ids, &media_paths, &hash_index);
|
||||
|
||||
// Detect untracked files (on disk but not in DB)
|
||||
let untracked_paths = detect_untracked_files(storage, &media_paths).await?;
|
||||
|
||||
info!(
|
||||
orphaned = orphaned_ids.len(),
|
||||
untracked = untracked_paths.len(),
|
||||
moved = moved_files.len(),
|
||||
total = media_paths.len(),
|
||||
"orphan detection complete"
|
||||
);
|
||||
|
||||
Ok(OrphanReport {
|
||||
orphaned_ids,
|
||||
untracked_paths: Vec::new(),
|
||||
untracked_paths,
|
||||
moved_files,
|
||||
})
|
||||
}
|
||||
|
||||
/// Detect files that appear to have moved (same content hash, different path).
|
||||
fn detect_moved_files(
|
||||
orphaned_ids: &[MediaId],
|
||||
media_paths: &[(MediaId, PathBuf, ContentHash)],
|
||||
hash_index: &HashMap<ContentHash, Vec<(MediaId, PathBuf)>>,
|
||||
) -> Vec<(MediaId, PathBuf, PathBuf)> {
|
||||
let mut moved = Vec::new();
|
||||
|
||||
// Build lookup map for orphaned items: MediaId -> (PathBuf, ContentHash)
|
||||
let orphaned_map: HashMap<MediaId, (PathBuf, ContentHash)> = media_paths
|
||||
.iter()
|
||||
.filter(|(id, _, _)| orphaned_ids.contains(id))
|
||||
.map(|(id, path, hash)| (*id, (path.clone(), hash.clone())))
|
||||
.collect();
|
||||
|
||||
// For each orphaned item, check if there's another file with the same hash
|
||||
for (orphaned_id, (old_path, hash)) in &orphaned_map {
|
||||
if let Some(items_with_hash) = hash_index.get(hash) {
|
||||
// Find other items with same hash that exist on disk
|
||||
for (other_id, new_path) in items_with_hash {
|
||||
// Skip if it's the same item
|
||||
if other_id == orphaned_id {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if the new path exists
|
||||
if new_path.exists() {
|
||||
moved.push((*orphaned_id, old_path.clone(), new_path.clone()));
|
||||
// Only report first match (most likely candidate)
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
moved
|
||||
}
|
||||
|
||||
/// Detect files on disk that are not tracked in the database.
|
||||
async fn detect_untracked_files(
|
||||
storage: &DynStorageBackend,
|
||||
media_paths: &[(MediaId, PathBuf, ContentHash)],
|
||||
) -> Result<Vec<PathBuf>> {
|
||||
// Get root directories
|
||||
let roots = storage.list_root_dirs().await?;
|
||||
if roots.is_empty() {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
// Build set of tracked paths for fast lookup
|
||||
let tracked_paths: HashSet<PathBuf> = media_paths
|
||||
.iter()
|
||||
.map(|(_, path, _)| path.clone())
|
||||
.collect();
|
||||
|
||||
// Get ignore patterns (we'll need to load config somehow, for now use empty)
|
||||
let ignore_patterns: Vec<String> = vec![
|
||||
".*".to_string(),
|
||||
"node_modules".to_string(),
|
||||
"__pycache__".to_string(),
|
||||
"target".to_string(),
|
||||
];
|
||||
|
||||
// Walk filesystem for each root in parallel (limit concurrency to 4)
|
||||
let mut filesystem_paths = HashSet::new();
|
||||
let mut tasks = tokio::task::JoinSet::new();
|
||||
|
||||
for root in roots {
|
||||
let ignore_patterns = ignore_patterns.clone();
|
||||
tasks.spawn_blocking(move || -> Result<Vec<PathBuf>> {
|
||||
let mut paths = Vec::new();
|
||||
|
||||
let walker = walkdir::WalkDir::new(&root)
|
||||
.follow_links(false)
|
||||
.into_iter()
|
||||
.filter_entry(|e| {
|
||||
// Skip directories that match ignore patterns
|
||||
if e.file_type().is_dir() {
|
||||
let name = e.file_name().to_string_lossy();
|
||||
for pattern in &ignore_patterns {
|
||||
if pattern.starts_with("*.") {
|
||||
// Extension pattern
|
||||
if let Some(ext) = pattern.strip_prefix("*.") {
|
||||
if name.ends_with(ext) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if pattern.contains('*') {
|
||||
// Glob pattern - simplified matching
|
||||
let pattern_without_stars = pattern.replace('*', "");
|
||||
if name.contains(&pattern_without_stars) {
|
||||
return false;
|
||||
}
|
||||
} else if name.as_ref() == pattern
|
||||
|| name.starts_with(&format!("{pattern}."))
|
||||
{
|
||||
// Exact match or starts with pattern
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
});
|
||||
|
||||
for entry in walker {
|
||||
match entry {
|
||||
Ok(entry) => {
|
||||
let path = entry.path();
|
||||
|
||||
// Only process files
|
||||
if !path.is_file() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Check if it's a supported media type
|
||||
if MediaType::from_path(path).is_some() {
|
||||
paths.push(path.to_path_buf());
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "failed to read directory entry");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(paths)
|
||||
});
|
||||
}
|
||||
|
||||
// Collect results from all tasks
|
||||
while let Some(result) = tasks.join_next().await {
|
||||
match result {
|
||||
Ok(Ok(paths)) => {
|
||||
filesystem_paths.extend(paths);
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
warn!(error = %e, "failed to walk directory");
|
||||
}
|
||||
Err(e) => {
|
||||
warn!(error = %e, "task join error");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Compute set difference: filesystem - tracked
|
||||
let untracked: Vec<PathBuf> = filesystem_paths
|
||||
.difference(&tracked_paths)
|
||||
.cloned()
|
||||
.collect();
|
||||
|
||||
Ok(untracked)
|
||||
}
|
||||
|
||||
/// Resolve orphaned media items by deleting them from the database.
|
||||
pub async fn resolve_orphans(
|
||||
storage: &DynStorageBackend,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue