treewide: extract various components from pinakes-core into their own crates

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ida2b25c66d62b40e75eeee924fe9c39c6a6a6964
This commit is contained in:
raf 2026-05-20 17:34:42 +03:00
commit e955f167b9
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
40 changed files with 10654 additions and 26 deletions

View file

@ -1,6 +1,12 @@
[workspace]
members = ["crates/*", "packages/*", "xtask"]
exclude = ["crates/pinakes-core/tests/fixtures/test-plugin"]
exclude = [
"crates/pinakes-core/tests/fixtures/test-plugin",
"examples/plugins/auto-tagger",
"examples/plugins/text-enrichment",
"examples/plugins/subtitle-detector",
"examples/plugins/cbz-comics",
]
resolver = "3"
[workspace.package]
@ -15,6 +21,12 @@ rust-version = "1.95.0" # follows nightly Rust
# while building any package.
pinakes-core = { path = "./crates/pinakes-core" }
pinakes-plugin-api = { path = "./crates/pinakes-plugin-api" }
pinakes-migrations = { path = "./crates/pinakes-migrations" }
pinakes-types = { path = "./crates/pinakes-types" }
pinakes-metadata = { path = "./crates/pinakes-metadata" }
pinakes-plugin = { path = "./crates/pinakes-plugin" }
pinakes-enrichment = { path = "./crates/pinakes-enrichment" }
pinakes-sync = { path = "./crates/pinakes-sync" }
# Pinakes itself is a REST API server. UI and TUI are official visual components
# that connect to the server. Using the API documentation, the user can write
@ -27,53 +39,54 @@ pinakes-tui = { path = "./packages/pinakes-tui" }
# Other dependencies. Declaring them in the virtual manifests lets use reuse the crates
# without having to track individual crate version across different types of crates. This
# also includes *dev* dependencies.
tokio = { version = "1.50.0", features = ["full"] }
tokio = { version = "1.52.3", features = ["full"] }
tokio-util = { version = "0.7.18", features = ["rt"] }
serde = { version = "1.0.228", features = ["derive"] }
serde_json = "1.0.149"
toml = "1.0.7"
clap = { version = "4.6.0", features = ["derive", "env"] }
toml = "1.1.2"
clap = { version = "4.6.1", features = ["derive", "env"] }
chrono = { version = "0.4.44", features = ["serde"] }
uuid = { version = "1.22.0", features = ["v7", "serde"] }
uuid = { version = "1.23.1", features = ["v7", "serde"] }
thiserror = "2.0.18"
anyhow = "1.0.102"
tracing = "0.1.44"
tracing-subscriber = { version = "0.3.23", features = ["env-filter", "json"] }
blake3 = "1.8.3"
rustc-hash = "2.1.1"
blake3 = "1.8.5"
rustc-hash = "2.1.2"
ed25519-dalek = { version = "2.2.0", features = ["std"] }
lofty = "0.23.3"
lofty = "0.24.0"
lopdf = "0.40.0"
epub = "2.1.5"
matroska = "0.30.0"
matroska = "0.30.1"
gray_matter = "0.3.2"
kamadak-exif = "0.6.1"
rusqlite = { version = "0.37.0", features = ["bundled", "column_decltype"] }
tokio-postgres = { version = "0.7.16", features = [
rusqlite = { version = "0.39.0", features = ["bundled", "column_decltype"] }
tokio-postgres = { version = "0.7.17", features = [
"with-uuid-1",
"with-chrono-0_4",
"with-serde_json-1",
] }
deadpool-postgres = "0.14.1"
postgres-types = { version = "0.2.12", features = ["derive"] }
postgres-native-tls = "0.5.2"
postgres-types = { version = "0.2.13", features = ["derive"] }
postgres-native-tls = "0.5.3"
native-tls = "0.2.18"
refinery = { version = "0.9.0", features = ["rusqlite", "tokio-postgres"] }
refinery = { version = "0.9.1", features = ["tokio-postgres"] }
rusqlite_migration = "2.5.0"
walkdir = "2.5.0"
notify = { version = "8.2.0", features = ["macos_fsevent"] }
winnow = "1.0.0"
axum = { version = "0.8.8", features = ["macros", "multipart"] }
winnow = "1.0.3"
axum = { version = "0.8.9", features = ["macros", "multipart"] }
axum-server = { version = "0.8.0" }
tower = "0.5.3"
tower-http = { version = "0.6.8", features = ["cors", "trace", "set-header"] }
tower-http = { version = "0.6.11", features = ["cors", "trace", "set-header"] }
governor = "0.10.4"
tower_governor = "0.8.0"
reqwest = { version = "0.13.2", features = ["json", "query", "blocking"] }
reqwest = { version = "0.13.3", features = ["json", "query", "blocking"] }
url = "2.5"
ratatui = "0.30.0"
crossterm = "0.29.0"
dioxus = { version = "0.7.3", features = ["desktop", "router"] }
dioxus-core = { version = "0.7.3" }
dioxus = { version = "0.7.9", features = ["desktop", "router"] }
dioxus-core = { version = "0.7.9" }
async-trait = "0.1.89"
futures = "0.3.32"
image = { version = "0.25.10", default-features = false, features = [
@ -84,24 +97,24 @@ image = { version = "0.25.10", default-features = false, features = [
"tiff",
"bmp",
] }
pulldown-cmark = "0.13.3"
pulldown-cmark = "0.13.4"
ammonia = "4.1.2"
argon2 = { version = "0.5.3", features = ["std"] }
mime_guess = "2.0.5"
regex = "1.12.3"
dioxus-free-icons = { version = "0.10.0", features = ["font-awesome-solid"] }
rfd = "0.17.2"
gloo-timers = { version = "0.3.0", features = ["futures"] }
rand = "0.10.0"
gloo-timers = { version = "0.4.0", features = ["futures"] }
rand = "0.10.1"
moka = { version = "0.12.15", features = ["future"] }
urlencoding = "2.1.3"
image_hasher = "3.1.1"
percent-encoding = "2.3.2"
http = "1.4.0"
wasmtime = { version = "43.0.0", features = ["component-model"] }
wit-bindgen = "0.54.0"
wasmtime = { version = "44.0.1", features = ["component-model"] }
wit-bindgen = "0.57.1"
tempfile = "3.27.0"
utoipa = { version = "5.4.0", features = ["axum_extras", "uuid", "chrono"] }
utoipa = { version = "5.5.0", features = ["axum_extras", "uuid", "chrono"] }
utoipa-axum = { version = "0.2.0" }
utoipa-swagger-ui = { version = "9.0.2", features = ["axum"] }
http-body-util = "0.1.3"

View file

@ -0,0 +1,22 @@
[package]
name = "pinakes-enrichment"
edition.workspace = true
version.workspace = true
license.workspace = true
[dependencies]
pinakes-types = { workspace = true }
reqwest = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
url = { workspace = true }
chrono = { workspace = true }
uuid = { workspace = true }
async-trait = { workspace = true }
regex = { workspace = true }
urlencoding = { workspace = true }
[lints]
workspace = true

View file

@ -0,0 +1,298 @@
use std::sync::LazyLock;
use chrono::Utc;
use pinakes_types::{
error::{PinakesError, Result},
model::MediaItem,
};
use uuid::Uuid;
use super::{
EnrichmentSourceType,
ExternalMetadata,
MetadataEnricher,
googlebooks::GoogleBooksClient,
openlibrary::OpenLibraryClient,
};
// --- ISBN helper (duplicated from pinakes-core::books to avoid circular dep)
// ---
static ISBN_PATTERNS: LazyLock<Vec<regex::Regex>> = LazyLock::new(|| {
[
r"ISBN(?:-13)?(?:\s+is|:)?\s*(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)",
r"ISBN(?:-10)?(?:\s+is|:)?\s*(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])",
r"ISBN(?:-13)?\s+(\d{13})",
r"ISBN(?:-10)?\s+(\d{9}[\dXx])",
r"\b(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)\b",
r"\b(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])\b",
]
.iter()
.filter_map(|p| regex::Regex::new(p).ok())
.collect()
});
fn extract_isbn_from_text(text: &str) -> Option<String> {
for pattern in ISBN_PATTERNS.iter() {
if let Some(captures) = pattern.captures(text)
&& let Some(isbn) = captures.get(1)
{
return Some(isbn.as_str().to_string());
}
}
None
}
/// Book enricher that tries `OpenLibrary` first, then falls back to Google
/// Books
pub struct BookEnricher {
openlibrary: OpenLibraryClient,
googlebooks: GoogleBooksClient,
}
impl BookEnricher {
#[must_use]
pub fn new(google_api_key: Option<String>) -> Self {
Self {
openlibrary: OpenLibraryClient::new(),
googlebooks: GoogleBooksClient::new(google_api_key),
}
}
/// Try to enrich from `OpenLibrary` first
///
/// # Errors
///
/// Returns an error if the metadata cannot be serialized.
pub async fn try_openlibrary(
&self,
isbn: &str,
) -> Result<Option<ExternalMetadata>> {
match self.openlibrary.fetch_by_isbn(isbn).await {
Ok(book) => {
let metadata_json = serde_json::to_string(&book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {e}"))
})?;
Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: pinakes_types::model::MediaId(Uuid::nil()), /* Will be set by caller */
source: EnrichmentSourceType::OpenLibrary,
external_id: None,
metadata_json,
confidence: calculate_openlibrary_confidence(&book),
last_updated: Utc::now(),
}))
},
Err(_) => Ok(None),
}
}
/// Try to enrich from Google Books
///
/// # Errors
///
/// Returns an error if the metadata cannot be serialized.
pub async fn try_googlebooks(
&self,
isbn: &str,
) -> Result<Option<ExternalMetadata>> {
match self.googlebooks.fetch_by_isbn(isbn).await {
Ok(books) if !books.is_empty() => {
let book = &books[0];
let metadata_json = serde_json::to_string(book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {e}"))
})?;
Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: pinakes_types::model::MediaId(Uuid::nil()), /* Will be set by caller */
source: EnrichmentSourceType::GoogleBooks,
external_id: Some(book.id.clone()),
metadata_json,
confidence: calculate_googlebooks_confidence(&book.volume_info),
last_updated: Utc::now(),
}))
},
_ => Ok(None),
}
}
/// Try to enrich by searching with title and author
///
/// # Errors
///
/// Returns an error if the metadata cannot be serialized.
pub async fn enrich_by_search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Option<ExternalMetadata>> {
// Try OpenLibrary search first
if let Ok(results) = self.openlibrary.search(title, author).await
&& let Some(result) = results.first()
{
let metadata_json = serde_json::to_string(result).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {e}"))
})?;
return Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: pinakes_types::model::MediaId(Uuid::nil()),
source: EnrichmentSourceType::OpenLibrary,
external_id: result.key.clone(),
metadata_json,
confidence: 0.6, // Lower confidence for search results
last_updated: Utc::now(),
}));
}
// Fall back to Google Books
if let Ok(results) = self.googlebooks.search(title, author).await
&& let Some(book) = results.first()
{
let metadata_json = serde_json::to_string(book).map_err(|e| {
PinakesError::External(format!("Failed to serialize metadata: {e}"))
})?;
return Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: pinakes_types::model::MediaId(Uuid::nil()),
source: EnrichmentSourceType::GoogleBooks,
external_id: Some(book.id.clone()),
metadata_json,
confidence: 0.6,
last_updated: Utc::now(),
}));
}
Ok(None)
}
}
#[async_trait::async_trait]
impl MetadataEnricher for BookEnricher {
fn source(&self) -> EnrichmentSourceType {
// Returns the preferred source
EnrichmentSourceType::OpenLibrary
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
// Try ISBN-based enrichment first by checking title/description for ISBN
// patterns
if let Some(ref title) = item.title {
if let Some(isbn) = extract_isbn_from_text(title) {
if let Some(mut metadata) = self.try_openlibrary(&isbn).await? {
metadata.media_id = item.id;
return Ok(Some(metadata));
}
if let Some(mut metadata) = self.try_googlebooks(&isbn).await? {
metadata.media_id = item.id;
return Ok(Some(metadata));
}
}
// Fall back to title/author search
let author = item.artist.as_deref();
return self.enrich_by_search(title, author).await;
}
// No title available
Ok(None)
}
}
/// Calculate confidence score for `OpenLibrary` metadata
#[must_use]
pub fn calculate_openlibrary_confidence(
book: &super::openlibrary::OpenLibraryBook,
) -> f64 {
let mut score: f64 = 0.5; // Base score
if book.title.is_some() {
score += 0.1;
}
if !book.authors.is_empty() {
score += 0.1;
}
if !book.publishers.is_empty() {
score += 0.05;
}
if book.publish_date.is_some() {
score += 0.05;
}
if book.description.is_some() {
score += 0.1;
}
if !book.covers.is_empty() {
score += 0.1;
}
score.min(1.0)
}
/// Calculate confidence score for Google Books metadata
#[must_use]
pub fn calculate_googlebooks_confidence(
info: &super::googlebooks::VolumeInfo,
) -> f64 {
let mut score: f64 = 0.5; // Base score
if info.title.is_some() {
score += 0.1;
}
if !info.authors.is_empty() {
score += 0.1;
}
if info.publisher.is_some() {
score += 0.05;
}
if info.published_date.is_some() {
score += 0.05;
}
if info.description.is_some() {
score += 0.1;
}
if info.image_links.is_some() {
score += 0.1;
}
score.min(1.0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_openlibrary_confidence_calculation() {
let book = super::super::openlibrary::OpenLibraryBook {
title: Some("Test Book".to_string()),
subtitle: None,
authors: vec![],
publishers: vec![],
publish_date: None,
number_of_pages: None,
subjects: vec![],
covers: vec![],
isbn_10: vec![],
isbn_13: vec![],
series: vec![],
description: None,
languages: vec![],
};
let confidence = calculate_openlibrary_confidence(&book);
assert_eq!(confidence, 0.6); // 0.5 base + 0.1 for title
}
#[test]
fn test_googlebooks_confidence_calculation() {
let info = super::super::googlebooks::VolumeInfo {
title: Some("Test Book".to_string()),
..Default::default()
};
let confidence = calculate_googlebooks_confidence(&info);
assert_eq!(confidence, 0.6); // 0.5 base + 0.1 for title
}
}

View file

@ -0,0 +1,294 @@
use std::fmt::Write as _;
use pinakes_types::error::{PinakesError, Result};
use serde::{Deserialize, Serialize};
/// Google Books API client for book metadata enrichment
pub struct GoogleBooksClient {
client: reqwest::Client,
api_key: Option<String>,
}
impl GoogleBooksClient {
/// Create a new `GoogleBooksClient`.
#[must_use]
pub fn new(api_key: Option<String>) -> Self {
let client = reqwest::Client::builder()
.user_agent("Pinakes/1.0")
.timeout(std::time::Duration::from_secs(10))
.build()
.unwrap_or_else(|_| reqwest::Client::new());
Self { client, api_key }
}
/// Fetch book metadata by ISBN
///
/// # Errors
///
/// Returns an error if the HTTP request fails or the response cannot be
/// parsed.
pub async fn fetch_by_isbn(&self, isbn: &str) -> Result<Vec<GoogleBook>> {
let mut url =
format!("https://www.googleapis.com/books/v1/volumes?q=isbn:{isbn}");
if let Some(ref key) = self.api_key {
let _ = write!(url, "&key={key}");
}
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Google Books request failed: {e}"))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Google Books returned status: {}",
response.status()
)));
}
let volumes: GoogleBooksResponse = response.json().await.map_err(|e| {
PinakesError::External(format!(
"Failed to parse Google Books response: {e}"
))
})?;
Ok(volumes.items)
}
/// Search for books by title and author
///
/// # Errors
///
/// Returns an error if the HTTP request fails or the response cannot be
/// parsed.
pub async fn search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Vec<GoogleBook>> {
let mut query = format!("intitle:{}", urlencoding::encode(title));
if let Some(author) = author {
let _ = write!(query, "+inauthor:{}", urlencoding::encode(author));
}
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes?q={query}&maxResults=5"
);
if let Some(ref key) = self.api_key {
let _ = write!(url, "&key={key}");
}
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Google Books search failed: {e}"))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Google Books search returned status: {}",
response.status()
)));
}
let volumes: GoogleBooksResponse = response.json().await.map_err(|e| {
PinakesError::External(format!("Failed to parse search results: {e}"))
})?;
Ok(volumes.items)
}
/// Download cover image from Google Books
///
/// # Errors
///
/// Returns an error if the HTTP request fails or the response cannot be
/// read.
pub async fn fetch_cover(&self, image_link: &str) -> Result<Vec<u8>> {
// Replace thumbnail link with higher resolution if possible
let high_res_link = image_link
.replace("&zoom=1", "&zoom=2")
.replace("&edge=curl", "");
let response =
self.client.get(&high_res_link).send().await.map_err(|e| {
PinakesError::External(format!("Cover download failed: {e}"))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response.bytes().await.map(|b| b.to_vec()).map_err(|e| {
PinakesError::External(format!("Failed to read cover data: {e}"))
})
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoogleBooksResponse {
#[serde(default)]
pub items: Vec<GoogleBook>,
#[serde(default)]
pub total_items: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoogleBook {
pub id: String,
#[serde(default)]
pub volume_info: VolumeInfo,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct VolumeInfo {
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub subtitle: Option<String>,
#[serde(default)]
pub authors: Vec<String>,
#[serde(default)]
pub publisher: Option<String>,
#[serde(default)]
pub published_date: Option<String>,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub page_count: Option<i32>,
#[serde(default)]
pub categories: Vec<String>,
#[serde(default)]
pub average_rating: Option<f64>,
#[serde(default)]
pub ratings_count: Option<i32>,
#[serde(default)]
pub image_links: Option<ImageLinks>,
#[serde(default)]
pub language: Option<String>,
#[serde(default)]
pub industry_identifiers: Vec<IndustryIdentifier>,
#[serde(default)]
pub main_category: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageLinks {
#[serde(default)]
pub small_thumbnail: Option<String>,
#[serde(default)]
pub thumbnail: Option<String>,
#[serde(default)]
pub small: Option<String>,
#[serde(default)]
pub medium: Option<String>,
#[serde(default)]
pub large: Option<String>,
#[serde(default)]
pub extra_large: Option<String>,
}
impl ImageLinks {
/// Get the best available image link (highest resolution)
#[must_use]
pub fn best_link(&self) -> Option<&String> {
self
.extra_large
.as_ref()
.or(self.large.as_ref())
.or(self.medium.as_ref())
.or(self.small.as_ref())
.or(self.thumbnail.as_ref())
.or(self.small_thumbnail.as_ref())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndustryIdentifier {
#[serde(rename = "type")]
pub identifier_type: String,
pub identifier: String,
}
impl IndustryIdentifier {
/// Check if this is an ISBN-13
#[must_use]
pub fn is_isbn13(&self) -> bool {
self.identifier_type == "ISBN_13"
}
/// Check if this is an ISBN-10
#[must_use]
pub fn is_isbn10(&self) -> bool {
self.identifier_type == "ISBN_10"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_googlebooks_client_creation() {
let client = GoogleBooksClient::new(None);
assert!(client.api_key.is_none());
let client_with_key = GoogleBooksClient::new(Some("test-key".to_string()));
assert_eq!(client_with_key.api_key, Some("test-key".to_string()));
}
#[test]
fn test_image_links_best_link() {
let links = ImageLinks {
small_thumbnail: Some("small.jpg".to_string()),
thumbnail: Some("thumb.jpg".to_string()),
small: None,
medium: Some("medium.jpg".to_string()),
large: Some("large.jpg".to_string()),
extra_large: None,
};
assert_eq!(links.best_link(), Some(&"large.jpg".to_string()));
}
#[test]
fn test_industry_identifier_type_checks() {
let isbn13 = IndustryIdentifier {
identifier_type: "ISBN_13".to_string(),
identifier: "9780123456789".to_string(),
};
assert!(isbn13.is_isbn13());
assert!(!isbn13.is_isbn10());
let isbn10 = IndustryIdentifier {
identifier_type: "ISBN_10".to_string(),
identifier: "0123456789".to_string(),
};
assert!(!isbn10.is_isbn13());
assert!(isbn10.is_isbn10());
}
}

View file

@ -0,0 +1,116 @@
//! Last.fm metadata enrichment for audio files.
use std::time::Duration;
use chrono::Utc;
use pinakes_types::{
error::{PinakesError, Result},
model::MediaItem,
};
use uuid::Uuid;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
pub struct LastFmEnricher {
client: reqwest::Client,
api_key: String,
base_url: String,
}
impl LastFmEnricher {
/// Create a new `LastFmEnricher`.
#[must_use]
pub fn new(api_key: String) -> Self {
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.unwrap_or_else(|_| reqwest::Client::new());
Self {
client,
api_key,
base_url: "https://ws.audioscrobbler.com/2.0".to_string(),
}
}
}
#[async_trait::async_trait]
impl MetadataEnricher for LastFmEnricher {
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::LastFm
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let artist = match &item.artist {
Some(a) if !a.is_empty() => a,
_ => return Ok(None),
};
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let url = format!("{}/", self.base_url);
let resp = self
.client
.get(&url)
.query(&[
("method", "track.getInfo"),
("api_key", self.api_key.as_str()),
("artist", artist.as_str()),
("track", title.as_str()),
("format", "json"),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!("Last.fm request failed: {e}"))
})?;
if !resp.status().is_success() {
return Ok(None);
}
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"Last.fm response read failed: {e}"
))
})?;
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!(
"Last.fm JSON parse failed: {e}"
))
})?;
// Check for error response
if json.get("error").is_some() {
return Ok(None);
}
let Some(track) = json.get("track") else {
return Ok(None);
};
let mbid = track.get("mbid").and_then(|m| m.as_str()).map(String::from);
let listeners = track
.get("listeners")
.and_then(|l| l.as_str())
.and_then(|l| l.parse::<f64>().ok())
.unwrap_or(0.0);
// Normalize listeners to confidence (arbitrary scale)
let confidence = (listeners / 1_000_000.0).min(1.0);
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::LastFm,
external_id: mbid,
metadata_json: body,
confidence,
last_updated: Utc::now(),
}))
}
}

View file

@ -0,0 +1,76 @@
pub mod books;
pub mod googlebooks;
pub mod lastfm;
pub mod musicbrainz;
pub mod openlibrary;
pub mod tmdb;
use chrono::{DateTime, Utc};
use pinakes_types::{
error::Result,
model::{MediaId, MediaItem},
};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
/// Externally-sourced metadata for a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExternalMetadata {
pub id: Uuid,
pub media_id: MediaId,
pub source: EnrichmentSourceType,
pub external_id: Option<String>,
pub metadata_json: String,
pub confidence: f64,
pub last_updated: DateTime<Utc>,
}
/// Supported enrichment data sources.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum EnrichmentSourceType {
#[serde(rename = "musicbrainz")]
MusicBrainz,
#[serde(rename = "tmdb")]
Tmdb,
#[serde(rename = "lastfm")]
LastFm,
#[serde(rename = "openlibrary")]
OpenLibrary,
#[serde(rename = "googlebooks")]
GoogleBooks,
}
impl std::fmt::Display for EnrichmentSourceType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::MusicBrainz => "musicbrainz",
Self::Tmdb => "tmdb",
Self::LastFm => "lastfm",
Self::OpenLibrary => "openlibrary",
Self::GoogleBooks => "googlebooks",
};
write!(f, "{s}")
}
}
impl std::str::FromStr for EnrichmentSourceType {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"musicbrainz" => Ok(Self::MusicBrainz),
"tmdb" => Ok(Self::Tmdb),
"lastfm" => Ok(Self::LastFm),
"openlibrary" => Ok(Self::OpenLibrary),
"googlebooks" => Ok(Self::GoogleBooks),
_ => Err(format!("unknown enrichment source: {s}")),
}
}
}
/// Trait for metadata enrichment providers.
#[async_trait::async_trait]
pub trait MetadataEnricher: Send + Sync {
fn source(&self) -> EnrichmentSourceType;
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>>;
}

View file

@ -0,0 +1,79 @@
//! Metadata enrichment from external sources.
pub mod books;
pub mod googlebooks;
pub mod lastfm;
pub mod musicbrainz;
pub mod openlibrary;
pub mod tmdb;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use pinakes_types::{
error::Result,
model::{MediaId, MediaItem},
};
/// Externally-sourced metadata for a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ExternalMetadata {
pub id: Uuid,
pub media_id: MediaId,
pub source: EnrichmentSourceType,
pub external_id: Option<String>,
pub metadata_json: String,
pub confidence: f64,
pub last_updated: DateTime<Utc>,
}
/// Supported enrichment data sources.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum EnrichmentSourceType {
#[serde(rename = "musicbrainz")]
MusicBrainz,
#[serde(rename = "tmdb")]
Tmdb,
#[serde(rename = "lastfm")]
LastFm,
#[serde(rename = "openlibrary")]
OpenLibrary,
#[serde(rename = "googlebooks")]
GoogleBooks,
}
impl std::fmt::Display for EnrichmentSourceType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let s = match self {
Self::MusicBrainz => "musicbrainz",
Self::Tmdb => "tmdb",
Self::LastFm => "lastfm",
Self::OpenLibrary => "openlibrary",
Self::GoogleBooks => "googlebooks",
};
write!(f, "{s}")
}
}
impl std::str::FromStr for EnrichmentSourceType {
type Err = String;
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
match s {
"musicbrainz" => Ok(Self::MusicBrainz),
"tmdb" => Ok(Self::Tmdb),
"lastfm" => Ok(Self::LastFm),
"openlibrary" => Ok(Self::OpenLibrary),
"googlebooks" => Ok(Self::GoogleBooks),
_ => Err(format!("unknown enrichment source: {s}")),
}
}
}
/// Trait for metadata enrichment providers.
#[async_trait::async_trait]
pub trait MetadataEnricher: Send + Sync {
fn source(&self) -> EnrichmentSourceType;
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>>;
}

View file

@ -0,0 +1,148 @@
//! `MusicBrainz` metadata enrichment for audio files.
use std::{fmt::Write as _, time::Duration};
use chrono::Utc;
use pinakes_types::{
error::{PinakesError, Result},
model::MediaItem,
};
use uuid::Uuid;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
pub struct MusicBrainzEnricher {
client: reqwest::Client,
base_url: String,
}
impl Default for MusicBrainzEnricher {
fn default() -> Self {
Self::new()
}
}
impl MusicBrainzEnricher {
/// Create a new `MusicBrainzEnricher`.
#[must_use]
pub fn new() -> Self {
let client = reqwest::Client::builder()
.user_agent("Pinakes/0.1 (https://github.com/notashelf/pinakes)")
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.unwrap_or_else(|_| reqwest::Client::new());
Self {
client,
base_url: "https://musicbrainz.org/ws/2".to_string(),
}
}
}
fn escape_lucene_query(s: &str) -> String {
let special_chars = [
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*',
'?', ':', '\\', '/',
];
let mut escaped = String::with_capacity(s.len() * 2);
for c in s.chars() {
if special_chars.contains(&c) {
escaped.push('\\');
}
escaped.push(c);
}
escaped
}
#[async_trait::async_trait]
impl MetadataEnricher for MusicBrainzEnricher {
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::MusicBrainz
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let mut query = format!("recording:{}", escape_lucene_query(title));
if let Some(ref artist) = item.artist {
let _ = write!(query, " AND artist:{}", escape_lucene_query(artist));
}
let url = format!("{}/recording/", self.base_url);
let resp = self
.client
.get(&url)
.query(&[
("query", &query),
("fmt", &"json".to_string()),
("limit", &"1".to_string()),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz request failed: {e}"
))
})?;
if !resp.status().is_success() {
let status = resp.status();
if status == reqwest::StatusCode::TOO_MANY_REQUESTS
|| status == reqwest::StatusCode::SERVICE_UNAVAILABLE
{
return Err(PinakesError::MetadataExtraction(format!(
"MusicBrainz rate limited (HTTP {})",
status.as_u16()
)));
}
return Ok(None);
}
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz response read failed: {e}"
))
})?;
// Parse to check if we got results
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz JSON parse failed: {e}"
))
})?;
let recordings = json.get("recordings").and_then(|r| r.as_array());
if recordings.is_none_or(std::vec::Vec::is_empty) {
return Ok(None);
}
let Some(recordings) = recordings else {
return Ok(None);
};
let recording = &recordings[0];
let external_id = recording
.get("id")
.and_then(|id| id.as_str())
.map(String::from);
let score = (recording
.get("score")
.and_then(serde_json::Value::as_f64)
.unwrap_or(0.0)
/ 100.0)
.min(1.0);
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::MusicBrainz,
external_id,
metadata_json: body,
confidence: score,
last_updated: Utc::now(),
}))
}
}

View file

@ -0,0 +1,307 @@
use std::fmt::Write as _;
use pinakes_types::error::{PinakesError, Result};
use serde::{Deserialize, Serialize};
/// `OpenLibrary` API client for book metadata enrichment
pub struct OpenLibraryClient {
client: reqwest::Client,
base_url: String,
}
impl Default for OpenLibraryClient {
fn default() -> Self {
Self::new()
}
}
impl OpenLibraryClient {
/// Create a new `OpenLibraryClient`.
#[must_use]
pub fn new() -> Self {
let client = reqwest::Client::builder()
.user_agent("Pinakes/1.0")
.timeout(std::time::Duration::from_secs(10))
.build()
.unwrap_or_else(|_| reqwest::Client::new());
Self {
client,
base_url: "https://openlibrary.org".to_string(),
}
}
/// Fetch book metadata by ISBN
///
/// # Errors
///
/// Returns an error if the HTTP request fails or the response cannot be
/// parsed.
pub async fn fetch_by_isbn(&self, isbn: &str) -> Result<OpenLibraryBook> {
let url = format!("{}/isbn/{}.json", self.base_url, isbn);
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("OpenLibrary request failed: {e}"))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"OpenLibrary returned status: {}",
response.status()
)));
}
response.json::<OpenLibraryBook>().await.map_err(|e| {
PinakesError::External(format!(
"Failed to parse OpenLibrary response: {e}"
))
})
}
/// Search for books by title and author
///
/// # Errors
///
/// Returns an error if the HTTP request fails or the response cannot be
/// parsed.
pub async fn search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Vec<OpenLibrarySearchResult>> {
let mut url = format!(
"{}/search.json?title={}",
self.base_url,
urlencoding::encode(title)
);
if let Some(author) = author {
let _ = write!(url, "&author={}", urlencoding::encode(author));
}
url.push_str("&limit=5");
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("OpenLibrary search failed: {e}"))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"OpenLibrary search returned status: {}",
response.status()
)));
}
let search_response: OpenLibrarySearchResponse =
response.json().await.map_err(|e| {
PinakesError::External(format!("Failed to parse search results: {e}"))
})?;
Ok(search_response.docs)
}
/// Fetch cover image by cover ID
///
/// # Errors
///
/// Returns an error if the HTTP request fails or the response cannot be
/// read.
pub async fn fetch_cover(
&self,
cover_id: i64,
size: CoverSize,
) -> Result<Vec<u8>> {
let size_str = match size {
CoverSize::Small => "S",
CoverSize::Medium => "M",
CoverSize::Large => "L",
};
let url =
format!("https://covers.openlibrary.org/b/id/{cover_id}-{size_str}.jpg");
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Cover download failed: {e}"))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response.bytes().await.map(|b| b.to_vec()).map_err(|e| {
PinakesError::External(format!("Failed to read cover data: {e}"))
})
}
/// Fetch cover by ISBN
///
/// # Errors
///
/// Returns an error if the HTTP request fails or the response cannot be
/// read.
pub async fn fetch_cover_by_isbn(
&self,
isbn: &str,
size: CoverSize,
) -> Result<Vec<u8>> {
let size_str = match size {
CoverSize::Small => "S",
CoverSize::Medium => "M",
CoverSize::Large => "L",
};
let url =
format!("https://covers.openlibrary.org/b/isbn/{isbn}-{size_str}.jpg");
let response = self.client.get(&url).send().await.map_err(|e| {
PinakesError::External(format!("Cover download failed: {e}"))
})?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response.bytes().await.map(|b| b.to_vec()).map_err(|e| {
PinakesError::External(format!("Failed to read cover data: {e}"))
})
}
}
#[derive(Debug, Clone, Copy)]
pub enum CoverSize {
Small, // 256x256
Medium, // 600x800
Large, // Original
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibraryBook {
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub subtitle: Option<String>,
#[serde(default)]
pub authors: Vec<AuthorRef>,
#[serde(default)]
pub publishers: Vec<String>,
#[serde(default)]
pub publish_date: Option<String>,
#[serde(default)]
pub number_of_pages: Option<i32>,
#[serde(default)]
pub subjects: Vec<String>,
#[serde(default)]
pub covers: Vec<i64>,
#[serde(default)]
pub isbn_10: Vec<String>,
#[serde(default)]
pub isbn_13: Vec<String>,
#[serde(default)]
pub series: Vec<String>,
#[serde(default)]
pub description: Option<StringOrObject>,
#[serde(default)]
pub languages: Vec<LanguageRef>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuthorRef {
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageRef {
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum StringOrObject {
String(String),
Object { value: String },
}
impl StringOrObject {
#[must_use]
pub fn as_str(&self) -> &str {
match self {
Self::String(s) => s,
Self::Object { value } => value,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibrarySearchResponse {
#[serde(default)]
pub docs: Vec<OpenLibrarySearchResult>,
#[serde(default)]
pub num_found: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibrarySearchResult {
#[serde(default)]
pub key: Option<String>,
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub author_name: Vec<String>,
#[serde(default)]
pub first_publish_year: Option<i32>,
#[serde(default)]
pub publisher: Vec<String>,
#[serde(default)]
pub isbn: Vec<String>,
#[serde(default)]
pub cover_i: Option<i64>,
#[serde(default)]
pub subject: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_openlibrary_client_creation() {
let client = OpenLibraryClient::new();
assert_eq!(client.base_url, "https://openlibrary.org");
}
#[test]
fn test_string_or_object_parsing() {
let string_desc: StringOrObject =
serde_json::from_str(r#""Simple description""#).unwrap();
assert_eq!(string_desc.as_str(), "Simple description");
let object_desc: StringOrObject =
serde_json::from_str(r#"{"value": "Object description"}"#).unwrap();
assert_eq!(object_desc.as_str(), "Object description");
}
}

View file

@ -0,0 +1,125 @@
//! TMDB (The Movie Database) metadata enrichment for video files.
use std::time::Duration;
use chrono::Utc;
use pinakes_types::{
error::{PinakesError, Result},
model::MediaItem,
};
use uuid::Uuid;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
pub struct TmdbEnricher {
client: reqwest::Client,
api_key: String,
base_url: String,
}
impl TmdbEnricher {
/// Create a new `TMDb` enricher.
///
/// # Panics
///
/// Panics if the HTTP client cannot be built (programming error in client
/// configuration).
#[must_use]
pub fn new(api_key: String) -> Self {
Self {
client: reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client with configured timeouts"),
api_key,
base_url: "https://api.themoviedb.org/3".to_string(),
}
}
}
#[async_trait::async_trait]
impl MetadataEnricher for TmdbEnricher {
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::Tmdb
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let url = format!("{}/search/movie", self.base_url);
let resp = self
.client
.get(&url)
.query(&[
("api_key", &self.api_key),
("query", &title.clone()),
("page", &"1".to_string()),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!("TMDB request failed: {e}"))
})?;
if !resp.status().is_success() {
let status = resp.status();
if status == reqwest::StatusCode::UNAUTHORIZED {
return Err(PinakesError::MetadataExtraction(
"TMDB API key is invalid (401)".into(),
));
}
if status == reqwest::StatusCode::TOO_MANY_REQUESTS {
tracing::warn!("TMDB rate limit exceeded (429)");
return Ok(None);
}
tracing::debug!(status = %status, "TMDB search returned non-success status");
return Ok(None);
}
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"TMDB response read failed: {e}"
))
})?;
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!("TMDB JSON parse failed: {e}"))
})?;
let results = json.get("results").and_then(|r| r.as_array());
if results.is_none_or(std::vec::Vec::is_empty) {
return Ok(None);
}
let Some(results) = results else {
return Ok(None);
};
let movie = &results[0];
let external_id = match movie.get("id").and_then(serde_json::Value::as_i64)
{
Some(id) => id.to_string(),
None => return Ok(None),
};
let popularity = movie
.get("popularity")
.and_then(serde_json::Value::as_f64)
.unwrap_or(0.0);
// Normalize popularity to 0-1 range (TMDB popularity can be very high)
let confidence = (popularity / 100.0).min(1.0);
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::Tmdb,
external_id: Some(external_id),
metadata_json: body,
confidence,
last_updated: Utc::now(),
}))
}
}

View file

@ -0,0 +1,23 @@
[package]
name = "pinakes-metadata"
edition.workspace = true
version.workspace = true
license.workspace = true
[dependencies]
pinakes-types = { workspace = true }
lofty = { workspace = true }
lopdf = { workspace = true }
epub = { workspace = true }
matroska = { workspace = true }
image = { workspace = true }
kamadak-exif = { workspace = true }
gray_matter = { workspace = true }
rustc-hash = { workspace = true }
chrono = { workspace = true }
image_hasher = { workspace = true }
tracing = { workspace = true }
regex = { workspace = true }
[lints]
workspace = true

View file

@ -0,0 +1,91 @@
use std::path::Path;
use lofty::{
file::{AudioFile, TaggedFileExt},
tag::Accessor,
};
use pinakes_types::{
error::{PinakesError, Result},
media_type::{BuiltinMediaType, MediaType},
};
use super::{ExtractedMetadata, MetadataExtractor};
pub struct AudioExtractor;
impl MetadataExtractor for AudioExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let tagged_file = lofty::read_from_path(path).map_err(|e| {
PinakesError::MetadataExtraction(format!("audio metadata: {e}"))
})?;
let mut meta = ExtractedMetadata::default();
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag.title().map(|s| s.to_string());
meta.artist = tag.artist().map(|s| s.to_string());
meta.album = tag.album().map(|s| s.to_string());
meta.genre = tag.genre().map(|s| s.to_string());
meta.year = tag.date().map(|ts| i32::from(ts.year));
}
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
if let Some(track) = tag.track() {
meta
.extra
.insert("track_number".to_string(), track.to_string());
}
if let Some(disc) = tag.disk() {
meta
.extra
.insert("disc_number".to_string(), disc.to_string());
}
if let Some(comment) = tag.comment() {
meta
.extra
.insert("comment".to_string(), comment.to_string());
}
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta
.extra
.insert("bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta
.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta
.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Mp3),
MediaType::Builtin(BuiltinMediaType::Flac),
MediaType::Builtin(BuiltinMediaType::Ogg),
MediaType::Builtin(BuiltinMediaType::Wav),
MediaType::Builtin(BuiltinMediaType::Aac),
MediaType::Builtin(BuiltinMediaType::Opus),
]
}
}

View file

@ -0,0 +1,460 @@
use std::{path::Path, sync::LazyLock};
use pinakes_types::{
error::{PinakesError, Result},
media_type::{BuiltinMediaType, MediaType},
};
use super::{ExtractedMetadata, MetadataExtractor};
// --- ISBN helpers (duplicated from pinakes-core::books to avoid circular dep)
// ---
static ISBN_PATTERNS: LazyLock<Vec<regex::Regex>> = LazyLock::new(|| {
[
r"ISBN(?:-13)?(?:\s+is|:)?\s*(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)",
r"ISBN(?:-10)?(?:\s+is|:)?\s*(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])",
r"ISBN(?:-13)?\s+(\d{13})",
r"ISBN(?:-10)?\s+(\d{9}[\dXx])",
r"\b(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)\b",
r"\b(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])\b",
]
.iter()
.filter_map(|p| regex::Regex::new(p).ok())
.collect()
});
fn extract_isbn_from_text(text: &str) -> Option<String> {
for pattern in ISBN_PATTERNS.iter() {
if let Some(captures) = pattern.captures(text)
&& let Some(isbn) = captures.get(1)
&& let Ok(normalized) = normalize_isbn(isbn.as_str())
{
return Some(normalized);
}
}
None
}
fn normalize_isbn(isbn: &str) -> std::result::Result<String, ()> {
let clean: String = isbn
.chars()
.filter(|c| c.is_ascii_digit() || *c == 'X' || *c == 'x')
.collect();
match clean.len() {
10 => isbn10_to_isbn13(&clean),
13 => {
if is_valid_isbn13(&clean) {
Ok(clean)
} else {
Err(())
}
},
_ => Err(()),
}
}
fn isbn10_to_isbn13(isbn10: &str) -> std::result::Result<String, ()> {
if isbn10.len() != 10 {
return Err(());
}
let mut isbn13 = format!("978{}", &isbn10[..9]);
let check_digit = calculate_isbn13_check_digit(&isbn13).ok_or(())?;
isbn13.push_str(&check_digit.to_string());
Ok(isbn13)
}
fn calculate_isbn13_check_digit(isbn_without_check: &str) -> Option<u32> {
if isbn_without_check.len() != 12 {
return None;
}
let sum: u32 = isbn_without_check
.chars()
.enumerate()
.filter_map(|(i, c)| {
c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 })
})
.sum();
Some((10 - (sum % 10)) % 10)
}
fn is_valid_isbn13(isbn13: &str) -> bool {
if isbn13.len() != 13 {
return false;
}
let sum: u32 = isbn13
.chars()
.enumerate()
.filter_map(|(i, c)| {
c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 })
})
.sum();
sum.is_multiple_of(10)
}
pub struct DocumentExtractor;
impl MetadataExtractor for DocumentExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Builtin(BuiltinMediaType::Pdf)) => extract_pdf(path),
Some(MediaType::Builtin(BuiltinMediaType::Epub)) => extract_epub(path),
Some(MediaType::Builtin(BuiltinMediaType::Djvu)) => extract_djvu(path),
_ => Ok(ExtractedMetadata::default()),
}
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Pdf),
MediaType::Builtin(BuiltinMediaType::Epub),
MediaType::Builtin(BuiltinMediaType::Djvu),
]
}
}
fn extract_pdf(path: &Path) -> Result<ExtractedMetadata> {
let doc = lopdf::Document::load(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("PDF load: {e}")))?;
let mut meta = ExtractedMetadata::default();
let mut book_meta = pinakes_types::model::BookMetadata::default();
// Find the Info dictionary via the trailer
if let Ok(info_ref) = doc.trailer.get(b"Info") {
let info_obj = info_ref
.as_reference()
.map_or(Some(info_ref), |reference| doc.get_object(reference).ok());
if let Some(obj) = info_obj
&& let Ok(dict) = obj.as_dict()
{
if let Ok(title) = dict.get(b"Title") {
meta.title = pdf_object_to_string(title);
}
if let Ok(author) = dict.get(b"Author") {
let author_str = pdf_object_to_string(author);
meta.artist.clone_from(&author_str);
// Parse multiple authors if separated by semicolon, comma, or "and"
if let Some(authors_str) = author_str {
book_meta.authors = authors_str
.split(&[';', ','][..])
.flat_map(|part| part.split(" and "))
.map(|name| name.trim().to_string())
.filter(|name| !name.is_empty())
.enumerate()
.map(|(pos, name)| {
let mut author = pinakes_types::model::AuthorInfo::new(name);
author.position = i32::try_from(pos).unwrap_or(i32::MAX);
author
})
.collect();
}
}
if let Ok(subject) = dict.get(b"Subject") {
meta.description = pdf_object_to_string(subject);
}
if let Ok(creator) = dict.get(b"Creator") {
meta.extra.insert(
"creator".to_string(),
pdf_object_to_string(creator).unwrap_or_default(),
);
}
if let Ok(producer) = dict.get(b"Producer") {
meta.extra.insert(
"producer".to_string(),
pdf_object_to_string(producer).unwrap_or_default(),
);
}
}
}
// Page count
let pages = doc.get_pages();
let page_count = pages.len();
if page_count > 0 {
book_meta.page_count = Some(i32::try_from(page_count).unwrap_or(i32::MAX));
}
// Try to extract ISBN from first few pages
// Extract text from up to the first 5 pages and search for ISBN patterns
let mut extracted_text = String::new();
let max_pages = page_count.min(5);
for (_page_num, page_id) in pages.iter().take(max_pages) {
if let Ok(content) = doc.get_page_content(*page_id) {
// PDF content streams contain raw operators, but may have text strings
if let Ok(text) = std::str::from_utf8(&content) {
extracted_text.push_str(text);
extracted_text.push(' ');
}
}
}
// Extract ISBN from the text
if let Some(isbn) = extract_isbn_from_text(&extracted_text)
&& let Ok(normalized) = normalize_isbn(&isbn)
{
book_meta.isbn13 = Some(normalized);
book_meta.isbn = Some(isbn);
}
// Set format
book_meta.format = Some("pdf".to_string());
meta.book_metadata = Some(book_meta);
Ok(meta)
}
fn pdf_object_to_string(obj: &lopdf::Object) -> Option<String> {
match obj {
lopdf::Object::String(bytes, _) => {
Some(String::from_utf8_lossy(bytes).into_owned())
},
lopdf::Object::Name(name) => {
Some(String::from_utf8_lossy(name).into_owned())
},
_ => None,
}
}
fn extract_epub(path: &Path) -> Result<ExtractedMetadata> {
let mut doc = epub::doc::EpubDoc::new(path).map_err(|e| {
PinakesError::MetadataExtraction(format!("EPUB parse: {e}"))
})?;
let mut meta = ExtractedMetadata {
title: doc.mdata("title").map(|item| item.value.clone()),
artist: doc.mdata("creator").map(|item| item.value.clone()),
description: doc.mdata("description").map(|item| item.value.clone()),
..Default::default()
};
let mut book_meta = pinakes_types::model::BookMetadata::default();
// Extract basic metadata
if let Some(lang) = doc.mdata("language") {
book_meta.language = Some(lang.value.clone());
}
if let Some(publisher) = doc.mdata("publisher") {
book_meta.publisher = Some(publisher.value.clone());
}
if let Some(date) = doc.mdata("date") {
// Try to parse as YYYY-MM-DD or just YYYY
if let Ok(parsed_date) =
chrono::NaiveDate::parse_from_str(&date.value, "%Y-%m-%d")
{
book_meta.publication_date = Some(parsed_date);
} else if let Ok(year) = date.value.parse::<i32>() {
book_meta.publication_date = chrono::NaiveDate::from_ymd_opt(year, 1, 1);
}
}
// Extract authors - iterate through all metadata items
let mut authors = Vec::new();
let mut position = 0;
for item in &doc.metadata {
if item.property == "creator" || item.property == "dc:creator" {
let mut author =
pinakes_types::model::AuthorInfo::new(item.value.clone());
author.position = position;
position += 1;
// Check for file-as in refinements
if let Some(file_as_ref) = item.refinement("file-as") {
author.file_as = Some(file_as_ref.value.clone());
}
// Check for role in refinements
if let Some(role_ref) = item.refinement("role") {
author.role.clone_from(&role_ref.value);
}
authors.push(author);
}
}
book_meta.authors = authors;
// Extract ISBNs from identifiers
let mut identifiers = rustc_hash::FxHashMap::default();
for item in &doc.metadata {
if item.property == "identifier" || item.property == "dc:identifier" {
// Try to get scheme from refinements
let scheme = item
.refinement("identifier-type")
.map(|r| r.value.to_lowercase());
let id_type = match scheme.as_deref() {
Some("isbn" | "isbn-10" | "isbn10") => "isbn",
Some("isbn-13" | "isbn13") => "isbn13",
Some("asin") => "asin",
Some("doi") => "doi",
_ => {
// Fallback: detect from value pattern.
// ISBN-10 = 10 chars bare, ISBN-13 = 13 chars bare,
// hyphenated ISBN-13 = 17 chars (e.g. 978-0-123-45678-9).
// Parentheses required: && binds tighter than ||.
if (item.value.len() == 10 || item.value.len() == 13)
|| (item.value.contains('-')
&& (item.value.len() == 13 || item.value.len() == 17))
{
"isbn"
} else {
"other"
}
},
};
// Try to normalize ISBN
if (id_type == "isbn" || id_type == "isbn13")
&& let Ok(normalized) = normalize_isbn(&item.value)
{
book_meta.isbn13 = Some(normalized.clone());
book_meta.isbn = Some(item.value.clone());
}
identifiers
.entry(id_type.to_string())
.or_insert_with(Vec::new)
.push(item.value.clone());
}
}
book_meta.identifiers = identifiers;
// Extract Calibre series metadata by parsing the content.opf file
// Try common OPF locations
let opf_paths = vec!["OEBPS/content.opf", "content.opf", "OPS/content.opf"];
let mut opf_data = None;
for path in opf_paths {
if let Some(data) = doc.get_resource_str_by_path(path) {
opf_data = Some(data);
break;
}
}
if let Some(opf_content) = opf_data {
// Look for <meta name="calibre:series" content="Series Name"/>
if let Some(series_start) = opf_content.find("name=\"calibre:series\"")
&& let Some(content_start) =
opf_content[series_start..].find("content=\"")
{
let after_content = &opf_content[series_start + content_start + 9..];
if let Some(quote_end) = after_content.find('"') {
book_meta.series_name = Some(after_content[..quote_end].to_string());
}
}
// Look for <meta name="calibre:series_index" content="1.0"/>
if let Some(index_start) = opf_content.find("name=\"calibre:series_index\"")
&& let Some(content_start) = opf_content[index_start..].find("content=\"")
{
let after_content = &opf_content[index_start + content_start + 9..];
if let Some(quote_end) = after_content.find('"')
&& let Ok(index) = after_content[..quote_end].parse::<f64>()
{
book_meta.series_index = Some(index);
}
}
}
// Set format
book_meta.format = Some("epub".to_string());
meta.book_metadata = Some(book_meta);
Ok(meta)
}
fn extract_djvu(path: &Path) -> Result<ExtractedMetadata> {
// DjVu files contain metadata in SEXPR (S-expression) format within
// ANTa/ANTz chunks, or in the DIRM chunk. We parse the raw bytes to
// extract any metadata fields we can find.
// Guard against loading very large DjVu files into memory.
const MAX_DJVU_SIZE: u64 = 50 * 1024 * 1024; // 50 MB
let file_meta = std::fs::metadata(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("DjVu stat: {e}")))?;
if file_meta.len() > MAX_DJVU_SIZE {
return Ok(ExtractedMetadata::default());
}
let data = std::fs::read(path)
.map_err(|e| PinakesError::MetadataExtraction(format!("DjVu read: {e}")))?;
let mut meta = ExtractedMetadata::default();
// DjVu files start with "AT&T" magic followed by FORM:DJVU or FORM:DJVM
if data.len() < 16 {
return Ok(meta);
}
// Search for metadata annotations in the file. DjVu metadata is stored
// as S-expressions like (metadata (key "value") ...) within ANTa chunks.
let content = String::from_utf8_lossy(&data);
// Look for (metadata ...) blocks
if let Some(meta_start) = content.find("(metadata") {
let remainder = &content[meta_start..];
// Extract key-value pairs like (title "Some Title")
extract_djvu_field(remainder, "title", &mut meta.title);
extract_djvu_field(remainder, "author", &mut meta.artist);
let mut desc = None;
extract_djvu_field(remainder, "subject", &mut desc);
if desc.is_none() {
extract_djvu_field(remainder, "description", &mut desc);
}
meta.description = desc;
let mut year_str = None;
extract_djvu_field(remainder, "year", &mut year_str);
if let Some(ref y) = year_str {
meta.year = y.parse().ok();
}
let mut creator = None;
extract_djvu_field(remainder, "creator", &mut creator);
if let Some(c) = creator {
meta.extra.insert("creator".to_string(), c);
}
}
// Also check for booklet-style metadata that some DjVu encoders write
// outside the metadata SEXPR
if meta.title.is_none()
&& let Some(title_start) = content.find("(bookmarks")
{
let remainder = &content[title_start..];
// First bookmark title is often the document title
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
meta.title = Some(val.to_string());
}
}
}
}
Ok(meta)
}
fn extract_djvu_field(sexpr: &str, key: &str, out: &mut Option<String>) {
// Look for patterns like (key "value") in the S-expression
let pattern = format!("({key}");
if let Some(start) = sexpr.find(&pattern) {
let remainder = &sexpr[start + pattern.len()..];
// Find the quoted value
if let Some(q1) = remainder.find('"') {
let after_q1 = &remainder[q1 + 1..];
if let Some(q2) = after_q1.find('"') {
let val = &after_q1[..q2];
if !val.is_empty() {
*out = Some(val.to_string());
}
}
}
}
}

View file

@ -0,0 +1,300 @@
use std::path::Path;
use pinakes_types::{
error::Result,
media_type::{BuiltinMediaType, MediaType},
};
use super::{ExtractedMetadata, MetadataExtractor};
pub struct ImageExtractor;
impl MetadataExtractor for ImageExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let mut meta = ExtractedMetadata::default();
let file = std::fs::File::open(path)?;
let mut buf_reader = std::io::BufReader::new(&file);
let Ok(exif_data) =
exif::Reader::new().read_from_container(&mut buf_reader)
else {
return Ok(meta);
};
// Image dimensions
if let Some(width) = exif_data
.get_field(exif::Tag::PixelXDimension, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::ImageWidth, exif::In::PRIMARY))
&& let Some(w) = field_to_u32(width)
{
meta.extra.insert("width".to_string(), w.to_string());
}
if let Some(height) = exif_data
.get_field(exif::Tag::PixelYDimension, exif::In::PRIMARY)
.or_else(|| {
exif_data.get_field(exif::Tag::ImageLength, exif::In::PRIMARY)
})
&& let Some(h) = field_to_u32(height)
{
meta.extra.insert("height".to_string(), h.to_string());
}
// Camera make and model - set both in top-level fields and extra
if let Some(make) = exif_data.get_field(exif::Tag::Make, exif::In::PRIMARY)
{
let val = make.display_value().to_string().trim().to_string();
if !val.is_empty() {
meta.camera_make = Some(val.clone());
meta.extra.insert("camera_make".to_string(), val);
}
}
if let Some(model) =
exif_data.get_field(exif::Tag::Model, exif::In::PRIMARY)
{
let val = model.display_value().to_string().trim().to_string();
if !val.is_empty() {
meta.camera_model = Some(val.clone());
meta.extra.insert("camera_model".to_string(), val);
}
}
// Date taken - parse EXIF date format (YYYY:MM:DD HH:MM:SS)
if let Some(date) = exif_data
.get_field(exif::Tag::DateTimeOriginal, exif::In::PRIMARY)
.or_else(|| exif_data.get_field(exif::Tag::DateTime, exif::In::PRIMARY))
{
let val = date.display_value().to_string();
if !val.is_empty() {
// Try parsing EXIF format: "YYYY:MM:DD HH:MM:SS"
if let Some(dt) = parse_exif_datetime(&val) {
meta.date_taken = Some(dt);
}
meta.extra.insert("date_taken".to_string(), val);
}
}
// GPS coordinates - set both in top-level fields and extra
if let (Some(lat), Some(lat_ref), Some(lon), Some(lon_ref)) = (
exif_data.get_field(exif::Tag::GPSLatitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLatitudeRef, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitude, exif::In::PRIMARY),
exif_data.get_field(exif::Tag::GPSLongitudeRef, exif::In::PRIMARY),
) && let (Some(lat_val), Some(lon_val)) =
(dms_to_decimal(lat, lat_ref), dms_to_decimal(lon, lon_ref))
{
meta.latitude = Some(lat_val);
meta.longitude = Some(lon_val);
meta
.extra
.insert("gps_latitude".to_string(), format!("{lat_val:.6}"));
meta
.extra
.insert("gps_longitude".to_string(), format!("{lon_val:.6}"));
}
// Exposure info
if let Some(iso) =
exif_data.get_field(exif::Tag::PhotographicSensitivity, exif::In::PRIMARY)
{
let val = iso.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("iso".to_string(), val);
}
}
if let Some(exposure) =
exif_data.get_field(exif::Tag::ExposureTime, exif::In::PRIMARY)
{
let val = exposure.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("exposure_time".to_string(), val);
}
}
if let Some(aperture) =
exif_data.get_field(exif::Tag::FNumber, exif::In::PRIMARY)
{
let val = aperture.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("f_number".to_string(), val);
}
}
if let Some(focal) =
exif_data.get_field(exif::Tag::FocalLength, exif::In::PRIMARY)
{
let val = focal.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("focal_length".to_string(), val);
}
}
// Lens model
if let Some(lens) =
exif_data.get_field(exif::Tag::LensModel, exif::In::PRIMARY)
{
let val = lens.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta
.extra
.insert("lens_model".to_string(), val.trim_matches('"').to_string());
}
}
// Flash
if let Some(flash) =
exif_data.get_field(exif::Tag::Flash, exif::In::PRIMARY)
{
let val = flash.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("flash".to_string(), val);
}
}
// Orientation
if let Some(orientation) =
exif_data.get_field(exif::Tag::Orientation, exif::In::PRIMARY)
{
let val = orientation.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("orientation".to_string(), val);
}
}
// Software
if let Some(software) =
exif_data.get_field(exif::Tag::Software, exif::In::PRIMARY)
{
let val = software.display_value().to_string();
if !val.is_empty() {
meta.extra.insert("software".to_string(), val);
}
}
// Image description as title
if let Some(desc) =
exif_data.get_field(exif::Tag::ImageDescription, exif::In::PRIMARY)
{
let val = desc.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.title = Some(val.trim_matches('"').to_string());
}
}
// Artist
if let Some(artist) =
exif_data.get_field(exif::Tag::Artist, exif::In::PRIMARY)
{
let val = artist.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.artist = Some(val.trim_matches('"').to_string());
}
}
// Copyright as description
if let Some(copyright) =
exif_data.get_field(exif::Tag::Copyright, exif::In::PRIMARY)
{
let val = copyright.display_value().to_string();
if !val.is_empty() && val != "\"\"" {
meta.description = Some(val.trim_matches('"').to_string());
}
}
Ok(meta)
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Jpeg),
MediaType::Builtin(BuiltinMediaType::Png),
MediaType::Builtin(BuiltinMediaType::Gif),
MediaType::Builtin(BuiltinMediaType::Webp),
MediaType::Builtin(BuiltinMediaType::Avif),
MediaType::Builtin(BuiltinMediaType::Tiff),
MediaType::Builtin(BuiltinMediaType::Bmp),
// RAW formats (TIFF-based, kamadak-exif handles these)
MediaType::Builtin(BuiltinMediaType::Cr2),
MediaType::Builtin(BuiltinMediaType::Nef),
MediaType::Builtin(BuiltinMediaType::Arw),
MediaType::Builtin(BuiltinMediaType::Dng),
MediaType::Builtin(BuiltinMediaType::Orf),
MediaType::Builtin(BuiltinMediaType::Rw2),
// HEIC
MediaType::Builtin(BuiltinMediaType::Heic),
]
}
}
fn field_to_u32(field: &exif::Field) -> Option<u32> {
match &field.value {
exif::Value::Long(v) => v.first().copied(),
exif::Value::Short(v) => v.first().map(|&x| u32::from(x)),
_ => None,
}
}
fn dms_to_decimal(
dms_field: &exif::Field,
ref_field: &exif::Field,
) -> Option<f64> {
if let exif::Value::Rational(ref rationals) = dms_field.value
&& rationals.len() >= 3
{
let degrees = rationals[0].to_f64();
let minutes = rationals[1].to_f64();
let seconds = rationals[2].to_f64();
let mut decimal = degrees + minutes / 60.0 + seconds / 3600.0;
let ref_str = ref_field.display_value().to_string();
if ref_str.contains('S') || ref_str.contains('W') {
decimal = -decimal;
}
return Some(decimal);
}
None
}
/// Parse EXIF datetime format: "YYYY:MM:DD HH:MM:SS"
fn parse_exif_datetime(s: &str) -> Option<chrono::DateTime<chrono::Utc>> {
use chrono::NaiveDateTime;
// EXIF format is "YYYY:MM:DD HH:MM:SS"
let s = s.trim().trim_matches('"');
// Try standard EXIF format
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y:%m:%d %H:%M:%S") {
return Some(dt.and_utc());
}
// Try ISO format as fallback
if let Ok(dt) = NaiveDateTime::parse_from_str(s, "%Y-%m-%d %H:%M:%S") {
return Some(dt.and_utc());
}
None
}
/// Generate a perceptual hash for an image file.
///
/// Uses DCT (Discrete Cosine Transform) hash algorithm for robust similarity
/// detection. Returns a hex-encoded hash string, or None if the image cannot be
/// processed.
#[must_use]
pub fn generate_perceptual_hash(path: &Path) -> Option<String> {
use image_hasher::{HashAlg, HasherConfig};
// Open and decode the image
let img = image::open(path).ok()?;
// Create hasher with DCT algorithm (good for finding similar images)
let hasher = HasherConfig::new()
.hash_alg(HashAlg::DoubleGradient)
.hash_size(8, 8) // 64-bit hash
.to_hasher();
// Generate hash
let hash = hasher.hash_image(&img);
// Convert to hex string for storage
Some(hash.to_base64())
}

View file

@ -0,0 +1,73 @@
pub mod audio;
pub mod document;
pub mod image;
pub mod markdown;
pub mod video;
use std::path::Path;
use pinakes_types::{
error::Result,
media_type::MediaType,
model::BookMetadata,
};
use rustc_hash::FxHashMap;
#[derive(Debug, Clone, Default)]
pub struct ExtractedMetadata {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub extra: FxHashMap<String, String>,
pub book_metadata: Option<BookMetadata>,
// Photo-specific metadata
pub date_taken: Option<chrono::DateTime<chrono::Utc>>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub rating: Option<i32>,
}
pub trait MetadataExtractor: Send + Sync {
/// Extract metadata from a file at the given path.
///
/// # Errors
///
/// Returns an error if the file cannot be read or parsed.
fn extract(&self, path: &Path) -> Result<ExtractedMetadata>;
fn supported_types(&self) -> Vec<MediaType>;
}
/// Extract metadata from a file using the appropriate extractor for the given
/// media type.
///
/// # Errors
///
/// Returns an error if extraction fails. Returns a default `ExtractedMetadata`
/// when no extractor supports the media type.
pub fn extract_metadata(
path: &Path,
media_type: &MediaType,
) -> Result<ExtractedMetadata> {
let extractors: Vec<Box<dyn MetadataExtractor>> = vec![
Box::new(audio::AudioExtractor),
Box::new(document::DocumentExtractor),
Box::new(video::VideoExtractor),
Box::new(markdown::MarkdownExtractor),
Box::new(image::ImageExtractor),
];
for extractor in &extractors {
if extractor.supported_types().contains(media_type) {
return extractor.extract(path);
}
}
Ok(ExtractedMetadata::default())
}

View file

@ -0,0 +1,46 @@
use std::path::Path;
use pinakes_types::{
error::Result,
media_type::{BuiltinMediaType, MediaType},
};
use super::{ExtractedMetadata, MetadataExtractor};
pub struct MarkdownExtractor;
impl MetadataExtractor for MarkdownExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
let content = std::fs::read_to_string(path)?;
let parsed =
gray_matter::Matter::<gray_matter::engine::YAML>::new().parse(&content);
let mut meta = ExtractedMetadata::default();
if let Some(data) = parsed.ok().and_then(|p| p.data)
&& let gray_matter::Pod::Hash(map) = data
{
if let Some(gray_matter::Pod::String(title)) = map.get("title") {
meta.title = Some(title.clone());
}
if let Some(gray_matter::Pod::String(author)) = map.get("author") {
meta.artist = Some(author.clone());
}
if let Some(gray_matter::Pod::String(desc)) = map.get("description") {
meta.description = Some(desc.clone());
}
if let Some(gray_matter::Pod::String(date)) = map.get("date") {
meta.extra.insert("date".to_string(), date.clone());
}
}
Ok(meta)
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Markdown),
MediaType::Builtin(BuiltinMediaType::PlainText),
]
}
}

View file

@ -0,0 +1,70 @@
pub mod audio;
pub mod document;
pub mod image;
pub mod markdown;
pub mod video;
use std::path::Path;
use rustc_hash::FxHashMap;
use pinakes_types::{error::Result, media_type::MediaType, model::BookMetadata};
#[derive(Debug, Clone, Default)]
pub struct ExtractedMetadata {
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub extra: FxHashMap<String, String>,
pub book_metadata: Option<BookMetadata>,
// Photo-specific metadata
pub date_taken: Option<chrono::DateTime<chrono::Utc>>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub rating: Option<i32>,
}
pub trait MetadataExtractor: Send + Sync {
/// Extract metadata from a file at the given path.
///
/// # Errors
///
/// Returns an error if the file cannot be read or parsed.
fn extract(&self, path: &Path) -> Result<ExtractedMetadata>;
fn supported_types(&self) -> Vec<MediaType>;
}
/// Extract metadata from a file using the appropriate extractor for the given
/// media type.
///
/// # Errors
///
/// Returns an error if no extractor supports the media type, or if extraction
/// fails.
pub fn extract_metadata(
path: &Path,
media_type: &MediaType,
) -> Result<ExtractedMetadata> {
let extractors: Vec<Box<dyn MetadataExtractor>> = vec![
Box::new(audio::AudioExtractor),
Box::new(document::DocumentExtractor),
Box::new(video::VideoExtractor),
Box::new(markdown::MarkdownExtractor),
Box::new(image::ImageExtractor),
];
for extractor in &extractors {
if extractor.supported_types().contains(media_type) {
return extractor.extract(path);
}
}
Ok(ExtractedMetadata::default())
}

View file

@ -0,0 +1,129 @@
use std::path::Path;
use pinakes_types::{
error::{PinakesError, Result},
media_type::{BuiltinMediaType, MediaType},
};
use super::{ExtractedMetadata, MetadataExtractor};
pub struct VideoExtractor;
impl MetadataExtractor for VideoExtractor {
fn extract(&self, path: &Path) -> Result<ExtractedMetadata> {
match MediaType::from_path(path) {
Some(MediaType::Builtin(BuiltinMediaType::Mkv)) => extract_mkv(path),
Some(MediaType::Builtin(BuiltinMediaType::Mp4)) => extract_mp4(path),
_ => Ok(ExtractedMetadata::default()),
}
}
fn supported_types(&self) -> Vec<MediaType> {
vec![
MediaType::Builtin(BuiltinMediaType::Mp4),
MediaType::Builtin(BuiltinMediaType::Mkv),
]
}
}
fn extract_mkv(path: &Path) -> Result<ExtractedMetadata> {
let file = std::fs::File::open(path)?;
let mkv = matroska::Matroska::open(file)
.map_err(|e| PinakesError::MetadataExtraction(format!("MKV parse: {e}")))?;
let mut meta = ExtractedMetadata {
title: mkv.info.title.clone(),
duration_secs: mkv.info.duration.map(|dur| dur.as_secs_f64()),
..Default::default()
};
// Extract resolution and codec info from tracks
for track in &mkv.tracks {
match &track.settings {
matroska::Settings::Video(v) => {
meta.extra.insert(
"resolution".to_string(),
format!("{}x{}", v.pixel_width, v.pixel_height),
);
if !track.codec_id.is_empty() {
meta
.extra
.insert("video_codec".to_string(), track.codec_id.clone());
}
},
matroska::Settings::Audio(a) => {
meta.extra.insert(
"sample_rate".to_string(),
format!("{:.0} Hz", a.sample_rate),
);
meta
.extra
.insert("channels".to_string(), a.channels.to_string());
if !track.codec_id.is_empty() {
meta
.extra
.insert("audio_codec".to_string(), track.codec_id.clone());
}
},
matroska::Settings::None => {},
}
}
Ok(meta)
}
fn extract_mp4(path: &Path) -> Result<ExtractedMetadata> {
use lofty::{
file::{AudioFile, TaggedFileExt},
tag::Accessor,
};
let tagged_file = lofty::read_from_path(path).map_err(|e| {
PinakesError::MetadataExtraction(format!("MP4 metadata: {e}"))
})?;
let mut meta = ExtractedMetadata::default();
if let Some(tag) = tagged_file
.primary_tag()
.or_else(|| tagged_file.first_tag())
{
meta.title = tag
.title()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.artist = tag
.artist()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.album = tag
.album()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.genre = tag
.genre()
.map(|s: std::borrow::Cow<'_, str>| s.to_string());
meta.year = tag.date().map(|ts| i32::from(ts.year));
}
let properties = tagged_file.properties();
let duration = properties.duration();
if !duration.is_zero() {
meta.duration_secs = Some(duration.as_secs_f64());
}
if let Some(bitrate) = properties.audio_bitrate() {
meta
.extra
.insert("audio_bitrate".to_string(), format!("{bitrate} kbps"));
}
if let Some(sample_rate) = properties.sample_rate() {
meta
.extra
.insert("sample_rate".to_string(), format!("{sample_rate} Hz"));
}
if let Some(channels) = properties.channels() {
meta
.extra
.insert("channels".to_string(), channels.to_string());
}
Ok(meta)
}

View file

@ -0,0 +1,29 @@
[package]
name = "pinakes-plugin"
edition.workspace = true
version.workspace = true
license.workspace = true
[dependencies]
pinakes-types = { workspace = true }
pinakes-plugin-api = { workspace = true }
wasmtime = { workspace = true }
ed25519-dalek = { workspace = true }
reqwest = { workspace = true }
tokio = { workspace = true }
tracing = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
anyhow = { workspace = true }
rustc-hash = { workspace = true }
walkdir = { workspace = true }
uuid = { workspace = true }
url = { workspace = true }
blake3 = { workspace = true }
rand = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
[lints]
workspace = true

View file

@ -0,0 +1,15 @@
pub mod loader;
pub mod registry;
pub mod rpc;
pub mod runtime;
pub mod security;
pub mod signature;
pub use loader::PluginLoader;
pub use registry::{PluginRegistry, RegisteredPlugin};
pub use runtime::{WasmPlugin, WasmRuntime};
pub use security::CapabilityEnforcer;
pub use signature::{SignatureStatus, verify_plugin_signature};
mod manager;
pub use manager::{PluginManager, PluginManagerConfig};

View file

@ -0,0 +1,432 @@
//! Plugin loader for discovering and loading plugins from the filesystem
use std::path::{Path, PathBuf};
use anyhow::{Result, anyhow};
use pinakes_plugin_api::PluginManifest;
use tracing::{debug, info, warn};
use walkdir::WalkDir;
/// Plugin loader handles discovery and loading of plugins from directories
pub struct PluginLoader {
/// Directories to search for plugins
plugin_dirs: Vec<PathBuf>,
}
impl PluginLoader {
/// Create a new plugin loader
#[must_use]
pub const fn new(plugin_dirs: Vec<PathBuf>) -> Self {
Self { plugin_dirs }
}
/// Discover all plugins in configured directories
///
/// # Errors
///
/// Returns an error if a plugin directory cannot be searched.
pub fn discover_plugins(&self) -> Result<Vec<PluginManifest>> {
let mut manifests = Vec::new();
for dir in &self.plugin_dirs {
if !dir.exists() {
warn!("Plugin directory does not exist: {:?}", dir);
continue;
}
info!("Discovering plugins in: {:?}", dir);
let found = Self::discover_in_directory(dir);
info!("Found {} plugins in {:?}", found.len(), dir);
manifests.extend(found);
}
Ok(manifests)
}
/// Discover plugins in a specific directory
fn discover_in_directory(dir: &Path) -> Vec<PluginManifest> {
let mut manifests = Vec::new();
// Walk the directory looking for plugin.toml files
for entry in WalkDir::new(dir)
.max_depth(3) // Don't go too deep
.follow_links(false)
{
let entry = match entry {
Ok(e) => e,
Err(e) => {
warn!("Error reading directory entry: {}", e);
continue;
},
};
let path = entry.path();
// Look for plugin.toml files
if path.file_name() == Some(std::ffi::OsStr::new("plugin.toml")) {
debug!("Found plugin manifest: {:?}", path);
match PluginManifest::from_file(path) {
Ok(manifest) => {
info!("Loaded manifest for plugin: {}", manifest.plugin.name);
manifests.push(manifest);
},
Err(e) => {
warn!("Failed to load manifest from {:?}: {}", path, e);
},
}
}
}
manifests
}
/// Resolve the WASM binary path from a manifest
///
/// # Errors
///
/// Returns an error if the WASM binary is not found or its path escapes the
/// plugin directory.
pub fn resolve_wasm_path(
&self,
manifest: &PluginManifest,
) -> Result<PathBuf> {
// The WASM path in the manifest is relative to the manifest file
// We need to search for it in the plugin directories
for dir in &self.plugin_dirs {
// Look for a directory matching the plugin name
let plugin_dir = dir.join(&manifest.plugin.name);
if !plugin_dir.exists() {
continue;
}
// Check for plugin.toml in this directory
let manifest_path = plugin_dir.join("plugin.toml");
if !manifest_path.exists() {
continue;
}
// Resolve WASM path relative to this directory
let wasm_path = plugin_dir.join(&manifest.plugin.binary.wasm);
if wasm_path.exists() {
// Verify the resolved path is within the plugin directory (prevent path
// traversal)
let canonical_wasm = wasm_path
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize WASM path: {e}"))?;
let canonical_plugin_dir = plugin_dir
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize plugin dir: {e}"))?;
if !canonical_wasm.starts_with(&canonical_plugin_dir) {
return Err(anyhow!(
"WASM binary path escapes plugin directory: {}",
wasm_path.display()
));
}
return Ok(canonical_wasm);
}
}
Err(anyhow!(
"WASM binary not found for plugin: {}",
manifest.plugin.name
))
}
/// Download a plugin from a URL
///
/// # Errors
///
/// Returns an error if the URL is not HTTPS, no plugin directories are
/// configured, the download fails, the archive is too large, or extraction
/// fails.
pub async fn download_plugin(&self, url: &str) -> Result<PathBuf> {
const MAX_PLUGIN_SIZE: u64 = 100 * 1024 * 1024; // 100 MB
// Only allow HTTPS downloads
if !url.starts_with("https://") {
return Err(anyhow!(
"Only HTTPS URLs are allowed for plugin downloads: {url}"
));
}
let dest_dir = self
.plugin_dirs
.first()
.ok_or_else(|| anyhow!("No plugin directories configured"))?;
std::fs::create_dir_all(dest_dir)?;
// Download the archive with timeout and size limits
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_mins(5))
.build()
.map_err(|e| anyhow!("Failed to build HTTP client: {e}"))?;
let response = client
.get(url)
.send()
.await
.map_err(|e| anyhow!("Failed to download plugin: {e}"))?;
if !response.status().is_success() {
return Err(anyhow!(
"Plugin download failed with status: {}",
response.status()
));
}
// Check content-length header before downloading
if let Some(content_length) = response.content_length()
&& content_length > MAX_PLUGIN_SIZE
{
return Err(anyhow!(
"Plugin archive too large: {content_length} bytes (max \
{MAX_PLUGIN_SIZE} bytes)"
));
}
let bytes = response
.bytes()
.await
.map_err(|e| anyhow!("Failed to read plugin response: {e}"))?;
// Check actual size after download
if bytes.len() as u64 > MAX_PLUGIN_SIZE {
return Err(anyhow!(
"Plugin archive too large: {} bytes (max {} bytes)",
bytes.len(),
MAX_PLUGIN_SIZE
));
}
// Write archive to a unique temp file
let temp_archive =
dest_dir.join(format!(".download-{}.tar.gz", uuid::Uuid::now_v7()));
std::fs::write(&temp_archive, &bytes)?;
// Extract using tar with -C to target directory
let canonical_dest = dest_dir
.canonicalize()
.map_err(|e| anyhow!("Failed to canonicalize dest dir: {e}"))?;
let output = std::process::Command::new("tar")
.args([
"xzf",
&temp_archive.to_string_lossy(),
"-C",
&canonical_dest.to_string_lossy(),
])
.output()
.map_err(|e| anyhow!("Failed to extract plugin archive: {e}"))?;
// Clean up the archive
let _ = std::fs::remove_file(&temp_archive);
if !output.status.success() {
return Err(anyhow!(
"Failed to extract plugin archive: {}",
String::from_utf8_lossy(&output.stderr)
));
}
// Validate that all extracted files are within dest_dir
for entry in WalkDir::new(&canonical_dest).follow_links(false) {
let entry = entry?;
let entry_canonical = entry.path().canonicalize()?;
if !entry_canonical.starts_with(&canonical_dest) {
return Err(anyhow!(
"Extracted file escapes destination directory: {}",
entry.path().display()
));
}
}
// Find the extracted plugin directory by looking for plugin.toml
for entry in WalkDir::new(dest_dir).max_depth(2).follow_links(false) {
let entry = entry?;
if entry.file_name() == "plugin.toml" {
let plugin_dir = entry
.path()
.parent()
.ok_or_else(|| anyhow!("Invalid plugin.toml location"))?;
// Validate the manifest
let manifest = PluginManifest::from_file(entry.path())?;
info!("Downloaded and extracted plugin: {}", manifest.plugin.name);
return Ok(plugin_dir.to_path_buf());
}
}
Err(anyhow!(
"No plugin.toml found after extracting archive from: {url}"
))
}
/// Validate a plugin package
///
/// # Errors
///
/// Returns an error if the path does not exist, is missing `plugin.toml`,
/// the WASM binary is not found, or the WASM file is invalid.
pub fn validate_plugin_package(&self, path: &Path) -> Result<()> {
// Check that the path exists
if !path.exists() {
return Err(anyhow!("Plugin path does not exist: {}", path.display()));
}
// Check for plugin.toml
let manifest_path = path.join("plugin.toml");
if !manifest_path.exists() {
return Err(anyhow!("Missing plugin.toml in {}", path.display()));
}
// Parse and validate manifest
let manifest = PluginManifest::from_file(&manifest_path)?;
// Check that WASM binary exists
let wasm_path = path.join(&manifest.plugin.binary.wasm);
if !wasm_path.exists() {
return Err(anyhow!(
"WASM binary not found: {}",
manifest.plugin.binary.wasm
));
}
// Verify the WASM path is within the plugin directory (prevent path
// traversal)
let canonical_wasm = wasm_path.canonicalize()?;
let canonical_path = path.canonicalize()?;
if !canonical_wasm.starts_with(&canonical_path) {
return Err(anyhow!(
"WASM binary path escapes plugin directory: {}",
wasm_path.display()
));
}
// Validate WASM file
let wasm_bytes = std::fs::read(&wasm_path)?;
if wasm_bytes.len() < 4 || &wasm_bytes[0..4] != b"\0asm" {
return Err(anyhow!("Invalid WASM file: {}", wasm_path.display()));
}
Ok(())
}
/// Get plugin directory path for a given plugin name
#[must_use]
pub fn get_plugin_dir(&self, plugin_name: &str) -> Option<PathBuf> {
for dir in &self.plugin_dirs {
let plugin_dir = dir.join(plugin_name);
if plugin_dir.exists() {
return Some(plugin_dir);
}
}
None
}
}
#[cfg(test)]
mod tests {
use tempfile::TempDir;
use super::*;
#[test]
fn test_discover_plugins_empty() {
let temp_dir = TempDir::new().unwrap();
let loader = PluginLoader::new(vec![temp_dir.path().to_path_buf()]);
let manifests = loader.discover_plugins().unwrap();
assert_eq!(manifests.len(), 0);
}
#[test]
fn test_discover_plugins_with_manifest() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
// Create a valid manifest
let manifest_content = r#"
[plugin]
name = "test-plugin"
version = "1.0.0"
api_version = "1.0"
kind = ["media_type"]
[plugin.binary]
wasm = "plugin.wasm"
"#;
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
// Create dummy WASM file
std::fs::write(plugin_dir.join("plugin.wasm"), b"\0asm\x01\x00\x00\x00")
.unwrap();
let loader = PluginLoader::new(vec![temp_dir.path().to_path_buf()]);
let manifests = loader.discover_plugins().unwrap();
assert_eq!(manifests.len(), 1);
assert_eq!(manifests[0].plugin.name, "test-plugin");
}
#[test]
fn test_validate_plugin_package() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
// Create a valid manifest
let manifest_content = r#"
[plugin]
name = "test-plugin"
version = "1.0.0"
api_version = "1.0"
kind = ["media_type"]
[plugin.binary]
wasm = "plugin.wasm"
"#;
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
let loader = PluginLoader::new(vec![]);
// Should fail without WASM file
assert!(loader.validate_plugin_package(&plugin_dir).is_err());
// Create valid WASM file (magic number only)
std::fs::write(plugin_dir.join("plugin.wasm"), b"\0asm\x01\x00\x00\x00")
.unwrap();
// Should succeed now
assert!(loader.validate_plugin_package(&plugin_dir).is_ok());
}
#[test]
fn test_validate_invalid_wasm() {
let temp_dir = TempDir::new().unwrap();
let plugin_dir = temp_dir.path().join("test-plugin");
std::fs::create_dir(&plugin_dir).unwrap();
let manifest_content = r#"
[plugin]
name = "test-plugin"
version = "1.0.0"
api_version = "1.0"
kind = ["media_type"]
[plugin.binary]
wasm = "plugin.wasm"
"#;
std::fs::write(plugin_dir.join("plugin.toml"), manifest_content).unwrap();
// Create invalid WASM file
std::fs::write(plugin_dir.join("plugin.wasm"), b"not wasm").unwrap();
let loader = PluginLoader::new(vec![]);
assert!(loader.validate_plugin_package(&plugin_dir).is_err());
}
}

View file

@ -0,0 +1,916 @@
use std::{path::PathBuf, sync::Arc};
use anyhow::Result;
use pinakes_plugin_api::{PluginContext, PluginMetadata};
use tokio::sync::RwLock;
use tracing::{debug, error, info, warn};
use crate::{
CapabilityEnforcer,
PluginLoader,
PluginRegistry,
RegisteredPlugin,
SignatureStatus,
WasmPlugin,
WasmRuntime,
signature,
};
/// Plugin manager coordinates plugin lifecycle and operations
pub struct PluginManager {
/// Plugin registry
registry: Arc<RwLock<PluginRegistry>>,
/// WASM runtime for executing plugins
runtime: Arc<WasmRuntime>,
/// Plugin loader for discovery and loading
loader: PluginLoader,
/// Capability enforcer for security
enforcer: CapabilityEnforcer,
/// Plugin data directory
data_dir: PathBuf,
/// Plugin cache directory
cache_dir: PathBuf,
/// Configuration
config: PluginManagerConfig,
}
/// Configuration for the plugin manager
#[derive(Debug, Clone)]
pub struct PluginManagerConfig {
/// Directories to search for plugins
pub plugin_dirs: Vec<PathBuf>,
/// Whether to enable hot-reload (for development)
pub enable_hot_reload: bool,
/// Whether to allow unsigned plugins
pub allow_unsigned: bool,
/// Maximum number of concurrent plugin operations
pub max_concurrent_ops: usize,
/// Plugin timeout in seconds
pub plugin_timeout_secs: u64,
/// Timeout configuration for different call types
pub timeouts: pinakes_types::config::PluginTimeoutConfig,
/// Max consecutive failures before circuit breaker disables plugin
pub max_consecutive_failures: u32,
/// Trusted Ed25519 public keys for signature verification (hex-encoded)
pub trusted_keys: Vec<String>,
}
impl Default for PluginManagerConfig {
fn default() -> Self {
Self {
plugin_dirs: vec![],
enable_hot_reload: false,
allow_unsigned: false,
max_concurrent_ops: 4,
plugin_timeout_secs: 30,
timeouts:
pinakes_types::config::PluginTimeoutConfig::default(),
max_consecutive_failures: 5,
trusted_keys: vec![],
}
}
}
impl From<pinakes_types::config::PluginsConfig> for PluginManagerConfig {
fn from(cfg: pinakes_types::config::PluginsConfig) -> Self {
Self {
plugin_dirs: cfg.plugin_dirs,
enable_hot_reload: cfg.enable_hot_reload,
allow_unsigned: cfg.allow_unsigned,
max_concurrent_ops: cfg.max_concurrent_ops,
plugin_timeout_secs: cfg.plugin_timeout_secs,
timeouts: cfg.timeouts,
max_consecutive_failures: cfg.max_consecutive_failures,
trusted_keys: cfg.trusted_keys,
}
}
}
impl PluginManager {
/// Create a new plugin manager
///
/// # Errors
///
/// Returns an error if the data or cache directories cannot be created, or
/// if the WASM runtime cannot be initialized.
pub fn new(
data_dir: PathBuf,
cache_dir: PathBuf,
config: PluginManagerConfig,
) -> Result<Self> {
// Ensure directories exist
std::fs::create_dir_all(&data_dir)?;
std::fs::create_dir_all(&cache_dir)?;
let runtime = Arc::new(WasmRuntime::new()?);
let registry = Arc::new(RwLock::new(PluginRegistry::new()));
let loader = PluginLoader::new(config.plugin_dirs.clone());
let enforcer = CapabilityEnforcer::new();
Ok(Self {
registry,
runtime,
loader,
enforcer,
data_dir,
cache_dir,
config,
})
}
/// Discover and load all plugins from configured directories.
///
/// Plugins are loaded in dependency order: if plugin A declares a
/// dependency on plugin B, B is loaded first. Cycles and missing
/// dependencies are detected and reported as warnings; affected plugins
/// are skipped rather than causing a hard failure.
///
/// # Errors
///
/// Returns an error if plugin discovery fails.
pub async fn discover_and_load_all(&self) -> Result<Vec<String>> {
info!("Discovering plugins from {:?}", self.config.plugin_dirs);
let manifests = self.loader.discover_plugins()?;
let ordered = Self::resolve_load_order(&manifests);
let mut loaded_plugins = Vec::new();
for manifest in ordered {
match self.load_plugin_from_manifest(&manifest).await {
Ok(plugin_id) => {
info!("Loaded plugin: {}", plugin_id);
loaded_plugins.push(plugin_id);
},
Err(e) => {
warn!("Failed to load plugin {}: {}", manifest.plugin.name, e);
},
}
}
Ok(loaded_plugins)
}
/// Topological sort of manifests by their declared `dependencies`.
///
/// Uses Kahn's algorithm. Plugins whose dependencies are missing or form
/// a cycle are logged as warnings and excluded from the result.
fn resolve_load_order(
manifests: &[pinakes_plugin_api::PluginManifest],
) -> Vec<pinakes_plugin_api::PluginManifest> {
use std::collections::VecDeque;
use rustc_hash::{FxHashMap, FxHashSet};
// Index manifests by name for O(1) lookup
let by_name: FxHashMap<&str, usize> = manifests
.iter()
.enumerate()
.map(|(i, m)| (m.plugin.name.as_str(), i))
.collect();
// Check for missing dependencies and warn early
let known: FxHashSet<&str> = by_name.keys().copied().collect();
for manifest in manifests {
for dep in &manifest.plugin.dependencies {
if !known.contains(dep.as_str()) {
warn!(
"Plugin '{}' depends on '{}' which was not discovered; it will be \
skipped",
manifest.plugin.name, dep
);
}
}
}
// Build adjacency: in_degree[i] = number of deps that must load before i
let mut in_degree = vec![0usize; manifests.len()];
// dependents[i] = indices that depend on i (i must load before them)
let mut dependents: Vec<Vec<usize>> = vec![vec![]; manifests.len()];
for (i, manifest) in manifests.iter().enumerate() {
for dep in &manifest.plugin.dependencies {
if let Some(&dep_idx) = by_name.get(dep.as_str()) {
in_degree[i] += 1;
dependents[dep_idx].push(i);
} else {
// Missing dep: set in_degree impossibly high so it never resolves
in_degree[i] = usize::MAX;
}
}
}
// Kahn's algorithm
let mut queue: VecDeque<usize> = VecDeque::new();
for (i, &deg) in in_degree.iter().enumerate() {
if deg == 0 {
queue.push_back(i);
}
}
let mut result = Vec::with_capacity(manifests.len());
while let Some(idx) = queue.pop_front() {
result.push(manifests[idx].clone());
for &dependent in &dependents[idx] {
if in_degree[dependent] == usize::MAX {
continue; // already poisoned by missing dep
}
in_degree[dependent] -= 1;
if in_degree[dependent] == 0 {
queue.push_back(dependent);
}
}
}
// Anything not in `result` is part of a cycle or has a missing dep
if result.len() < manifests.len() {
let loaded: FxHashSet<&str> =
result.iter().map(|m| m.plugin.name.as_str()).collect();
for manifest in manifests {
if !loaded.contains(manifest.plugin.name.as_str()) {
warn!(
"Plugin '{}' was skipped due to unresolved dependencies or a \
dependency cycle",
manifest.plugin.name
);
}
}
}
result
}
/// Load a plugin from a manifest file
///
/// # Errors
///
/// Returns an error if the plugin ID is invalid, capability validation
/// fails, the WASM binary cannot be loaded, or the plugin cannot be
/// registered.
async fn load_plugin_from_manifest(
&self,
manifest: &pinakes_plugin_api::PluginManifest,
) -> Result<String> {
let plugin_id = manifest.plugin_id();
// Validate plugin_id to prevent path traversal
if plugin_id.contains('/')
|| plugin_id.contains('\\')
|| plugin_id.contains("..")
{
return Err(anyhow::anyhow!("Invalid plugin ID: {plugin_id}"));
}
// Check if already loaded
{
let registry = self.registry.read().await;
if registry.is_loaded(&plugin_id) {
return Ok(plugin_id);
}
}
// Validate capabilities
let capabilities = manifest.to_capabilities();
self.enforcer.validate_capabilities(&capabilities)?;
// Create plugin context
let plugin_data_dir = self.data_dir.join(&plugin_id);
let plugin_cache_dir = self.cache_dir.join(&plugin_id);
tokio::fs::create_dir_all(&plugin_data_dir).await?;
tokio::fs::create_dir_all(&plugin_cache_dir).await?;
let context = PluginContext {
data_dir: plugin_data_dir,
cache_dir: plugin_cache_dir,
config: manifest
.config
.iter()
.map(|(k, v)| {
(
k.clone(),
serde_json::to_value(v).unwrap_or_else(|e| {
tracing::warn!(
"failed to serialize config value for key {}: {}",
k,
e
);
serde_json::Value::Null
}),
)
})
.collect(),
capabilities: capabilities.clone(),
};
// Load WASM binary
let wasm_path = self.loader.resolve_wasm_path(manifest)?;
// Verify plugin signature unless unsigned plugins are allowed
if !self.config.allow_unsigned {
let plugin_dir = wasm_path
.parent()
.ok_or_else(|| anyhow::anyhow!("WASM path has no parent directory"))?;
let trusted_keys: Vec<ed25519_dalek::VerifyingKey> = self
.config
.trusted_keys
.iter()
.filter_map(|hex| {
signature::parse_public_key(hex)
.map_err(|e| warn!("Ignoring malformed trusted key: {e}"))
.ok()
})
.collect();
match signature::verify_plugin_signature(
plugin_dir,
&wasm_path,
&trusted_keys,
)? {
SignatureStatus::Valid => {
debug!("Plugin '{plugin_id}' signature verified");
},
SignatureStatus::Unsigned => {
return Err(anyhow::anyhow!(
"Plugin '{plugin_id}' is unsigned and allow_unsigned is false"
));
},
SignatureStatus::Invalid(reason) => {
return Err(anyhow::anyhow!(
"Plugin '{plugin_id}' has an invalid signature: {reason}"
));
},
}
}
let wasm_plugin = self.runtime.load_plugin(&wasm_path, context)?;
// Initialize plugin
let init_succeeded = match wasm_plugin
.call_function("initialize", &[])
.await
{
Ok(_) => true,
Err(e) => {
tracing::warn!(plugin_id = %plugin_id, "plugin initialization failed: {}", e);
false
},
};
// Register plugin
let metadata = PluginMetadata {
id: plugin_id.clone(),
name: manifest.plugin.name.clone(),
version: manifest.plugin.version.clone(),
author: manifest.plugin.author.clone().unwrap_or_default(),
description: manifest
.plugin
.description
.clone()
.unwrap_or_default(),
api_version: manifest.plugin.api_version.clone(),
capabilities_required: capabilities,
};
// Derive manifest_path from the loader's plugin directories
let manifest_path = self
.loader
.get_plugin_dir(&manifest.plugin.name)
.map(|dir| dir.join("plugin.toml"));
let registered = RegisteredPlugin {
id: plugin_id.clone(),
metadata,
wasm_plugin,
manifest: manifest.clone(),
manifest_path,
enabled: init_succeeded,
};
{
let mut registry = self.registry.write().await;
registry.register(registered)?;
}
Ok(plugin_id)
}
/// Install a plugin from a file or URL
///
/// # Errors
///
/// Returns an error if the plugin cannot be downloaded, the manifest cannot
/// be read, or the plugin cannot be loaded.
pub async fn install_plugin(&self, source: &str) -> Result<String> {
info!("Installing plugin from: {}", source);
// Download/copy plugin to plugins directory
let plugin_path =
if source.starts_with("http://") || source.starts_with("https://") {
// Download from URL
self.loader.download_plugin(source).await?
} else {
// Copy from local file
PathBuf::from(source)
};
// Load the manifest
let manifest_path = plugin_path.join("plugin.toml");
let manifest =
pinakes_plugin_api::PluginManifest::from_file(&manifest_path)?;
// Load the plugin
self.load_plugin_from_manifest(&manifest).await
}
/// Uninstall a plugin
///
/// # Errors
///
/// Returns an error if the plugin ID is invalid, the plugin cannot be shut
/// down, cannot be unregistered, or its data directories cannot be removed.
pub async fn uninstall_plugin(&self, plugin_id: &str) -> Result<()> {
// Validate plugin_id to prevent path traversal
if plugin_id.contains('/')
|| plugin_id.contains('\\')
|| plugin_id.contains("..")
{
return Err(anyhow::anyhow!("Invalid plugin ID: {plugin_id}"));
}
info!("Uninstalling plugin: {}", plugin_id);
// Shutdown plugin first
self.shutdown_plugin(plugin_id).await?;
// Remove from registry
{
let mut registry = self.registry.write().await;
registry.unregister(plugin_id)?;
}
// Remove plugin data and cache
let plugin_data_dir = self.data_dir.join(plugin_id);
let plugin_cache_dir = self.cache_dir.join(plugin_id);
if plugin_data_dir.exists() {
std::fs::remove_dir_all(&plugin_data_dir)?;
}
if plugin_cache_dir.exists() {
std::fs::remove_dir_all(&plugin_cache_dir)?;
}
Ok(())
}
/// Enable a plugin
///
/// # Errors
///
/// Returns an error if the plugin ID is not found in the registry.
pub async fn enable_plugin(&self, plugin_id: &str) -> Result<()> {
let mut registry = self.registry.write().await;
registry.enable(plugin_id)
}
/// Disable a plugin
///
/// # Errors
///
/// Returns an error if the plugin ID is not found in the registry.
pub async fn disable_plugin(&self, plugin_id: &str) -> Result<()> {
let mut registry = self.registry.write().await;
registry.disable(plugin_id)
}
/// Shutdown a specific plugin
///
/// # Errors
///
/// Returns an error if the plugin ID is not found in the registry.
pub async fn shutdown_plugin(&self, plugin_id: &str) -> Result<()> {
debug!("Shutting down plugin: {}", plugin_id);
let registry = self.registry.read().await;
if let Some(plugin) = registry.get(plugin_id) {
let _ = plugin.wasm_plugin.call_function("shutdown", &[]).await;
Ok(())
} else {
Err(anyhow::anyhow!("Plugin not found: {plugin_id}"))
}
}
/// Shutdown all plugins
///
/// # Errors
///
/// This function always returns `Ok(())`. Individual plugin shutdown errors
/// are logged but do not cause the overall operation to fail.
pub async fn shutdown_all(&self) -> Result<()> {
info!("Shutting down all plugins");
let plugin_ids: Vec<String> = {
let registry = self.registry.read().await;
registry.list_all().iter().map(|p| p.id.clone()).collect()
};
for plugin_id in plugin_ids {
if let Err(e) = self.shutdown_plugin(&plugin_id).await {
error!("Failed to shutdown plugin {}: {}", plugin_id, e);
}
}
Ok(())
}
/// Get list of all registered plugins
pub async fn list_plugins(&self) -> Vec<PluginMetadata> {
let registry = self.registry.read().await;
registry
.list_all()
.iter()
.map(|p| p.metadata.clone())
.collect()
}
/// Get plugin metadata by ID
pub async fn get_plugin(&self, plugin_id: &str) -> Option<PluginMetadata> {
let registry = self.registry.read().await;
registry.get(plugin_id).map(|p| p.metadata.clone())
}
/// Get enabled plugins of a specific kind, sorted by priority (ascending).
///
/// # Returns
///
/// `(plugin_id, priority, kinds, wasm_plugin)` tuples.
pub async fn get_enabled_by_kind_sorted(
&self,
kind: &str,
) -> Vec<(String, u16, Vec<String>, WasmPlugin)> {
let registry = self.registry.read().await;
let mut plugins: Vec<_> = registry
.get_by_kind(kind)
.into_iter()
.filter(|p| p.enabled)
.map(|p| {
(
p.id.clone(),
p.manifest.plugin.priority,
p.manifest.plugin.kind.clone(),
p.wasm_plugin.clone(),
)
})
.collect();
drop(registry);
plugins.sort_by_key(|(_, priority, ..)| *priority);
plugins
}
/// Get a reference to the capability enforcer.
#[must_use]
pub const fn enforcer(&self) -> &CapabilityEnforcer {
&self.enforcer
}
/// List all UI pages provided by loaded plugins.
///
/// Returns a vector of `(plugin_id, page)` tuples for all enabled plugins
/// that provide pages in their manifests. Both inline and file-referenced
/// page entries are resolved.
pub async fn list_ui_pages(
&self,
) -> Vec<(String, pinakes_plugin_api::UiPage)> {
self
.list_ui_pages_with_endpoints()
.await
.into_iter()
.map(|(id, page, _)| (id, page))
.collect()
}
/// List all UI pages provided by loaded plugins, including each plugin's
/// declared endpoint allowlist.
///
/// Returns a vector of `(plugin_id, page, allowed_endpoints)` tuples. The
/// `allowed_endpoints` list mirrors the `required_endpoints` field from the
/// plugin manifest's `[ui]` section.
pub async fn list_ui_pages_with_endpoints(
&self,
) -> Vec<(String, pinakes_plugin_api::UiPage, Vec<String>)> {
let registry = self.registry.read().await;
let mut pages = Vec::new();
for plugin in registry.list_all() {
if !plugin.enabled {
continue;
}
let allowed = plugin.manifest.ui.required_endpoints.clone();
let plugin_dir = plugin
.manifest_path
.as_ref()
.and_then(|p| p.parent())
.map(std::path::Path::to_path_buf);
let Some(plugin_dir) = plugin_dir else {
for entry in &plugin.manifest.ui.pages {
if let pinakes_plugin_api::manifest::UiPageEntry::Inline(page) = entry
{
pages.push((plugin.id.clone(), (**page).clone(), allowed.clone()));
}
}
continue;
};
match plugin.manifest.load_ui_pages(&plugin_dir) {
Ok(loaded) => {
for page in loaded {
pages.push((plugin.id.clone(), page, allowed.clone()));
}
},
Err(e) => {
tracing::warn!(
"Failed to load UI pages for plugin '{}': {e}",
plugin.id
);
},
}
}
pages
}
/// Collect CSS custom property overrides declared by all enabled plugins.
///
/// When multiple plugins declare the same property name, later-loaded plugins
/// overwrite earlier ones. Returns an empty map if no plugins are loaded or
/// none declare theme extensions.
pub async fn list_ui_theme_extensions(
&self,
) -> rustc_hash::FxHashMap<String, String> {
let registry = self.registry.read().await;
let mut merged = rustc_hash::FxHashMap::default();
for plugin in registry.list_all() {
if !plugin.enabled {
continue;
}
for (k, v) in &plugin.manifest.ui.theme_extensions {
merged.insert(k.clone(), v.clone());
}
}
merged
}
/// List all UI widgets provided by loaded plugins.
///
/// Returns a vector of `(plugin_id, widget)` tuples for all enabled plugins
/// that provide widgets in their manifests.
pub async fn list_ui_widgets(
&self,
) -> Vec<(String, pinakes_plugin_api::UiWidget)> {
let registry = self.registry.read().await;
let mut widgets = Vec::new();
for plugin in registry.list_all() {
if !plugin.enabled {
continue;
}
for widget in &plugin.manifest.ui.widgets {
widgets.push((plugin.id.clone(), widget.clone()));
}
}
widgets
}
/// Check if a plugin is loaded and enabled
pub async fn is_plugin_enabled(&self, plugin_id: &str) -> bool {
let registry = self.registry.read().await;
registry.is_enabled(plugin_id).unwrap_or(false)
}
/// Reload a plugin (for hot-reload during development)
///
/// # Errors
///
/// Returns an error if hot-reload is disabled, the plugin is not found, it
/// cannot be shut down, or the reloaded plugin cannot be registered.
pub async fn reload_plugin(&self, plugin_id: &str) -> Result<()> {
if !self.config.enable_hot_reload {
return Err(anyhow::anyhow!("Hot-reload is disabled"));
}
info!("Reloading plugin: {}", plugin_id);
// Re-read the manifest from disk if possible, falling back to cached
// version
let manifest = {
let registry = self.registry.read().await;
let plugin = registry
.get(plugin_id)
.ok_or_else(|| anyhow::anyhow!("Plugin not found"))?;
let manifest = plugin.manifest_path.as_ref().map_or_else(
|| plugin.manifest.clone(),
|manifest_path| {
pinakes_plugin_api::PluginManifest::from_file(manifest_path)
.unwrap_or_else(|e| {
warn!(
"Failed to re-read manifest from disk, using cached: {}",
e
);
plugin.manifest.clone()
})
},
);
drop(registry);
manifest
};
// Shutdown and unload current version
self.shutdown_plugin(plugin_id).await?;
{
let mut registry = self.registry.write().await;
registry.unregister(plugin_id)?;
}
// Reload from manifest
self.load_plugin_from_manifest(&manifest).await?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use tempfile::TempDir;
use super::*;
#[tokio::test]
async fn test_plugin_manager_creation() {
let temp_dir = TempDir::new().unwrap();
let data_dir = temp_dir.path().join("data");
let cache_dir = temp_dir.path().join("cache");
let config = PluginManagerConfig::default();
let manager =
PluginManager::new(data_dir.clone(), cache_dir.clone(), config);
assert!(manager.is_ok());
assert!(data_dir.exists());
assert!(cache_dir.exists());
}
#[tokio::test]
async fn test_list_plugins_empty() {
let temp_dir = TempDir::new().unwrap();
let data_dir = temp_dir.path().join("data");
let cache_dir = temp_dir.path().join("cache");
let config = PluginManagerConfig::default();
let manager = PluginManager::new(data_dir, cache_dir, config).unwrap();
let plugins = manager.list_plugins().await;
assert_eq!(plugins.len(), 0);
}
/// Build a minimal manifest for dependency resolution tests
fn test_manifest(
name: &str,
deps: Vec<String>,
) -> pinakes_plugin_api::PluginManifest {
use pinakes_plugin_api::manifest::{PluginBinary, PluginInfo};
pinakes_plugin_api::PluginManifest {
plugin: PluginInfo {
name: name.to_string(),
version: "1.0.0".to_string(),
api_version: "1.0".to_string(),
author: None,
description: None,
homepage: None,
license: None,
priority: 500,
kind: vec!["media_type".to_string()],
binary: PluginBinary {
wasm: "plugin.wasm".to_string(),
entrypoint: None,
},
dependencies: deps,
},
capabilities: Default::default(),
config: Default::default(),
ui: Default::default(),
}
}
#[test]
fn test_resolve_load_order_no_deps() {
let manifests = vec![
test_manifest("alpha", vec![]),
test_manifest("beta", vec![]),
test_manifest("gamma", vec![]),
];
let ordered = PluginManager::resolve_load_order(&manifests);
assert_eq!(ordered.len(), 3);
}
#[test]
fn test_resolve_load_order_linear_chain() {
// gamma depends on beta, beta depends on alpha
let manifests = vec![
test_manifest("gamma", vec!["beta".to_string()]),
test_manifest("alpha", vec![]),
test_manifest("beta", vec!["alpha".to_string()]),
];
let ordered = PluginManager::resolve_load_order(&manifests);
assert_eq!(ordered.len(), 3);
let names: Vec<&str> =
ordered.iter().map(|m| m.plugin.name.as_str()).collect();
let alpha_pos = names.iter().position(|&n| n == "alpha").unwrap();
let beta_pos = names.iter().position(|&n| n == "beta").unwrap();
let gamma_pos = names.iter().position(|&n| n == "gamma").unwrap();
assert!(alpha_pos < beta_pos, "alpha must load before beta");
assert!(beta_pos < gamma_pos, "beta must load before gamma");
}
#[test]
fn test_resolve_load_order_cycle_detected() {
// A -> B -> C -> A (cycle)
let manifests = vec![
test_manifest("a", vec!["c".to_string()]),
test_manifest("b", vec!["a".to_string()]),
test_manifest("c", vec!["b".to_string()]),
];
let ordered = PluginManager::resolve_load_order(&manifests);
// All three should be excluded due to cycle
assert_eq!(ordered.len(), 0);
}
#[test]
fn test_resolve_load_order_missing_dependency() {
let manifests = vec![
test_manifest("good", vec![]),
test_manifest("bad", vec!["nonexistent".to_string()]),
];
let ordered = PluginManager::resolve_load_order(&manifests);
// Only "good" should be loaded; "bad" depends on something missing
assert_eq!(ordered.len(), 1);
assert_eq!(ordered[0].plugin.name, "good");
}
#[test]
fn test_resolve_load_order_partial_cycle() {
// "ok" has no deps, "cycle_a" and "cycle_b" form a cycle
let manifests = vec![
test_manifest("ok", vec![]),
test_manifest("cycle_a", vec!["cycle_b".to_string()]),
test_manifest("cycle_b", vec!["cycle_a".to_string()]),
];
let ordered = PluginManager::resolve_load_order(&manifests);
assert_eq!(ordered.len(), 1);
assert_eq!(ordered[0].plugin.name, "ok");
}
#[test]
fn test_resolve_load_order_diamond() {
// Man look at how beautiful my diamond is...
// A
// / \
// B C
// \ /
// D
let manifests = vec![
test_manifest("d", vec!["b".to_string(), "c".to_string()]),
test_manifest("b", vec!["a".to_string()]),
test_manifest("c", vec!["a".to_string()]),
test_manifest("a", vec![]),
];
let ordered = PluginManager::resolve_load_order(&manifests);
assert_eq!(ordered.len(), 4);
let names: Vec<&str> =
ordered.iter().map(|m| m.plugin.name.as_str()).collect();
let a_pos = names.iter().position(|&n| n == "a").unwrap();
let b_pos = names.iter().position(|&n| n == "b").unwrap();
let c_pos = names.iter().position(|&n| n == "c").unwrap();
let d_pos = names.iter().position(|&n| n == "d").unwrap();
assert!(a_pos < b_pos);
assert!(a_pos < c_pos);
assert!(b_pos < d_pos);
assert!(c_pos < d_pos);
}
}

View file

@ -0,0 +1,309 @@
//! Plugin registry for managing loaded plugins
use std::path::PathBuf;
use anyhow::{Result, anyhow};
use pinakes_plugin_api::{PluginManifest, PluginMetadata};
use rustc_hash::FxHashMap;
use super::runtime::WasmPlugin;
/// A registered plugin with its metadata and runtime state
#[derive(Clone)]
pub struct RegisteredPlugin {
pub id: String,
pub metadata: PluginMetadata,
pub wasm_plugin: WasmPlugin,
pub manifest: PluginManifest,
pub manifest_path: Option<PathBuf>,
pub enabled: bool,
}
/// Plugin registry maintains the state of all loaded plugins
pub struct PluginRegistry {
/// Map of plugin ID to registered plugin
plugins: FxHashMap<String, RegisteredPlugin>,
}
impl PluginRegistry {
/// Create a new empty registry
#[must_use]
pub fn new() -> Self {
Self {
plugins: FxHashMap::default(),
}
}
/// Register a new plugin
///
/// # Errors
///
/// Returns an error if a plugin with the same ID is already registered.
pub fn register(&mut self, plugin: RegisteredPlugin) -> Result<()> {
if self.plugins.contains_key(&plugin.id) {
return Err(anyhow!("Plugin already registered: {}", plugin.id));
}
self.plugins.insert(plugin.id.clone(), plugin);
Ok(())
}
/// Unregister a plugin by ID
///
/// # Errors
///
/// Returns an error if the plugin ID is not found.
pub fn unregister(&mut self, plugin_id: &str) -> Result<()> {
self
.plugins
.remove(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {plugin_id}"))?;
Ok(())
}
/// Get a plugin by ID
#[must_use]
pub fn get(&self, plugin_id: &str) -> Option<&RegisteredPlugin> {
self.plugins.get(plugin_id)
}
/// Get a mutable reference to a plugin by ID
pub fn get_mut(&mut self, plugin_id: &str) -> Option<&mut RegisteredPlugin> {
self.plugins.get_mut(plugin_id)
}
/// Check if a plugin is loaded
#[must_use]
pub fn is_loaded(&self, plugin_id: &str) -> bool {
self.plugins.contains_key(plugin_id)
}
/// Check if a plugin is enabled. Returns `None` if the plugin is not found.
#[must_use]
pub fn is_enabled(&self, plugin_id: &str) -> Option<bool> {
self.plugins.get(plugin_id).map(|p| p.enabled)
}
/// Enable a plugin
///
/// # Errors
///
/// Returns an error if the plugin ID is not found.
pub fn enable(&mut self, plugin_id: &str) -> Result<()> {
let plugin = self
.plugins
.get_mut(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {plugin_id}"))?;
plugin.enabled = true;
Ok(())
}
/// Disable a plugin
///
/// # Errors
///
/// Returns an error if the plugin ID is not found.
pub fn disable(&mut self, plugin_id: &str) -> Result<()> {
let plugin = self
.plugins
.get_mut(plugin_id)
.ok_or_else(|| anyhow!("Plugin not found: {plugin_id}"))?;
plugin.enabled = false;
Ok(())
}
/// List all registered plugins
#[must_use]
pub fn list_all(&self) -> Vec<&RegisteredPlugin> {
self.plugins.values().collect()
}
/// List all enabled plugins
#[must_use]
pub fn list_enabled(&self) -> Vec<&RegisteredPlugin> {
self.plugins.values().filter(|p| p.enabled).collect()
}
/// Get plugins by kind (e.g., "`media_type`", "`metadata_extractor`")
#[must_use]
pub fn get_by_kind(&self, kind: &str) -> Vec<&RegisteredPlugin> {
self
.plugins
.values()
.filter(|p| p.manifest.plugin.kind.iter().any(|k| k == kind))
.collect()
}
/// Get count of registered plugins
#[must_use]
pub fn count(&self) -> usize {
self.plugins.len()
}
/// Get count of enabled plugins
#[must_use]
pub fn count_enabled(&self) -> usize {
self.plugins.values().filter(|p| p.enabled).count()
}
}
impl Default for PluginRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use pinakes_plugin_api::{Capabilities, manifest::ManifestCapabilities};
use rustc_hash::FxHashMap;
use super::*;
fn create_test_plugin(id: &str, kind: Vec<String>) -> RegisteredPlugin {
let manifest = PluginManifest {
plugin: pinakes_plugin_api::manifest::PluginInfo {
name: id.to_string(),
version: "1.0.0".to_string(),
api_version: "1.0".to_string(),
author: Some("Test".to_string()),
description: Some("Test plugin".to_string()),
homepage: None,
license: None,
kind,
binary: pinakes_plugin_api::manifest::PluginBinary {
wasm: "test.wasm".to_string(),
entrypoint: None,
},
dependencies: vec![],
priority: 0,
},
capabilities: ManifestCapabilities::default(),
config: FxHashMap::default(),
ui: Default::default(),
};
RegisteredPlugin {
id: id.to_string(),
metadata: PluginMetadata {
id: id.to_string(),
name: id.to_string(),
version: "1.0.0".to_string(),
author: "Test".to_string(),
description: "Test plugin".to_string(),
api_version: "1.0".to_string(),
capabilities_required: Capabilities::default(),
},
wasm_plugin: WasmPlugin::default(),
manifest,
manifest_path: None,
enabled: true,
}
}
#[test]
fn test_registry_register_and_get() {
let mut registry = PluginRegistry::new();
let plugin =
create_test_plugin("test-plugin", vec!["media_type".to_string()]);
registry.register(plugin).unwrap();
assert!(registry.is_loaded("test-plugin"));
assert!(registry.get("test-plugin").is_some());
}
#[test]
fn test_registry_duplicate_register() {
let mut registry = PluginRegistry::new();
let plugin =
create_test_plugin("test-plugin", vec!["media_type".to_string()]);
registry.register(plugin.clone()).unwrap();
let result = registry.register(plugin);
assert!(result.is_err());
}
#[test]
fn test_registry_unregister() {
let mut registry = PluginRegistry::new();
let plugin =
create_test_plugin("test-plugin", vec!["media_type".to_string()]);
registry.register(plugin).unwrap();
registry.unregister("test-plugin").unwrap();
assert!(!registry.is_loaded("test-plugin"));
}
#[test]
fn test_registry_enable_disable() {
let mut registry = PluginRegistry::new();
let plugin =
create_test_plugin("test-plugin", vec!["media_type".to_string()]);
registry.register(plugin).unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(true));
registry.disable("test-plugin").unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(false));
registry.enable("test-plugin").unwrap();
assert_eq!(registry.is_enabled("test-plugin"), Some(true));
assert_eq!(registry.is_enabled("nonexistent"), None);
}
#[test]
fn test_registry_get_by_kind() {
let mut registry = PluginRegistry::new();
registry
.register(create_test_plugin("plugin1", vec![
"media_type".to_string(),
]))
.unwrap();
registry
.register(create_test_plugin("plugin2", vec![
"metadata_extractor".to_string(),
]))
.unwrap();
registry
.register(create_test_plugin("plugin3", vec![
"media_type".to_string(),
]))
.unwrap();
let media_type_plugins = registry.get_by_kind("media_type");
assert_eq!(media_type_plugins.len(), 2);
let extractor_plugins = registry.get_by_kind("metadata_extractor");
assert_eq!(extractor_plugins.len(), 1);
}
#[test]
fn test_registry_counts() {
let mut registry = PluginRegistry::new();
registry
.register(create_test_plugin("plugin1", vec![
"media_type".to_string(),
]))
.unwrap();
registry
.register(create_test_plugin("plugin2", vec![
"media_type".to_string(),
]))
.unwrap();
assert_eq!(registry.count(), 2);
assert_eq!(registry.count_enabled(), 2);
registry.disable("plugin1").unwrap();
assert_eq!(registry.count(), 2);
assert_eq!(registry.count_enabled(), 1);
}
}

View file

@ -0,0 +1,240 @@
//! JSON RPC types for structured plugin function calls.
//!
//! Each extension point maps to well-known exported function names.
//! Requests are serialized to JSON, passed to the plugin, and responses
//! are deserialized from JSON written by the plugin via `host_set_result`.
use std::path::PathBuf;
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
/// Request to check if a plugin can handle a file
#[derive(Debug, Serialize)]
pub struct CanHandleRequest {
pub path: PathBuf,
pub mime_type: Option<String>,
}
/// Response from `can_handle`
#[derive(Debug, Deserialize)]
pub struct CanHandleResponse {
pub can_handle: bool,
}
/// Media type definition returned by `supported_media_types`
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PluginMediaTypeDefinition {
pub id: String,
pub name: String,
pub category: Option<String>,
pub extensions: Vec<String>,
pub mime_types: Vec<String>,
}
/// Request to extract metadata from a file
#[derive(Debug, Serialize)]
pub struct ExtractMetadataRequest {
pub path: PathBuf,
}
/// Metadata response from a plugin (all fields optional for partial results)
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct ExtractMetadataResponse {
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub artist: Option<String>,
#[serde(default)]
pub album: Option<String>,
#[serde(default)]
pub genre: Option<String>,
#[serde(default)]
pub year: Option<i32>,
#[serde(default)]
pub duration_secs: Option<f64>,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub extra: FxHashMap<String, String>,
}
/// Request to generate a thumbnail
#[derive(Debug, Serialize)]
pub struct GenerateThumbnailRequest {
pub source_path: PathBuf,
pub output_path: PathBuf,
pub max_width: u32,
pub max_height: u32,
pub format: String,
}
/// Response from thumbnail generation
#[derive(Debug, Deserialize)]
pub struct GenerateThumbnailResponse {
pub path: PathBuf,
pub width: u32,
pub height: u32,
pub format: String,
}
/// Event sent to event handler plugins
#[derive(Debug, Serialize)]
pub struct HandleEventRequest {
pub event_type: String,
pub payload: serde_json::Value,
}
/// Search request for search backend plugins
#[derive(Debug, Serialize)]
pub struct SearchRequest {
pub query: String,
pub limit: usize,
pub offset: usize,
}
/// Search response
#[derive(Debug, Clone, Deserialize)]
pub struct SearchResponse {
pub results: Vec<SearchResultItem>,
#[serde(default)]
pub total_count: Option<usize>,
}
/// Individual search result
#[derive(Debug, Clone, Deserialize)]
pub struct SearchResultItem {
pub id: String,
pub score: f64,
pub snippet: Option<String>,
}
/// Request to index a media item in a search backend
#[derive(Debug, Serialize)]
pub struct IndexItemRequest {
pub id: String,
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub description: Option<String>,
pub tags: Vec<String>,
pub media_type: String,
pub path: PathBuf,
}
/// Request to remove a media item from a search backend
#[derive(Debug, Serialize)]
pub struct RemoveItemRequest {
pub id: String,
}
/// A theme definition returned by a theme provider plugin
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct PluginThemeDefinition {
pub id: String,
pub name: String,
pub description: Option<String>,
pub dark: bool,
}
/// Response from `load_theme`
#[derive(Debug, Clone, Deserialize)]
pub struct LoadThemeResponse {
pub css: Option<String>,
pub colors: FxHashMap<String, String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_extract_metadata_request_serialization() {
let req = ExtractMetadataRequest {
path: "/tmp/test.mp3".into(),
};
let json = serde_json::to_string(&req).unwrap();
assert!(json.contains("/tmp/test.mp3"));
}
#[test]
fn test_extract_metadata_response_partial() {
let json = r#"{"title":"My Song","extra":{"bpm":"120"}}"#;
let resp: ExtractMetadataResponse = serde_json::from_str(json).unwrap();
assert_eq!(resp.title.as_deref(), Some("My Song"));
assert_eq!(resp.artist, None);
assert_eq!(resp.extra.get("bpm").map(String::as_str), Some("120"));
}
#[test]
fn test_extract_metadata_response_empty() {
let json = "{}";
let resp: ExtractMetadataResponse = serde_json::from_str(json).unwrap();
assert_eq!(resp.title, None);
assert!(resp.extra.is_empty());
}
#[test]
fn test_can_handle_response() {
let json = r#"{"can_handle":true}"#;
let resp: CanHandleResponse = serde_json::from_str(json).unwrap();
assert!(resp.can_handle);
}
#[test]
fn test_can_handle_response_false() {
let json = r#"{"can_handle":false}"#;
let resp: CanHandleResponse = serde_json::from_str(json).unwrap();
assert!(!resp.can_handle);
}
#[test]
fn test_plugin_media_type_definition_round_trip() {
let def = PluginMediaTypeDefinition {
id: "heif".to_string(),
name: "HEIF Image".to_string(),
category: Some("image".to_string()),
extensions: vec!["heif".to_string(), "heic".to_string()],
mime_types: vec!["image/heif".to_string()],
};
let json = serde_json::to_string(&def).unwrap();
let parsed: PluginMediaTypeDefinition =
serde_json::from_str(&json).unwrap();
assert_eq!(parsed.id, "heif");
assert_eq!(parsed.extensions.len(), 2);
}
#[test]
fn test_search_response() {
let json =
r#"{"results":[{"id":"abc","score":0.95,"snippet":"match here"}]}"#;
let resp: SearchResponse = serde_json::from_str(json).unwrap();
assert_eq!(resp.results.len(), 1);
assert_eq!(resp.results[0].id, "abc");
}
#[test]
fn test_generate_thumbnail_request_serialization() {
let req = GenerateThumbnailRequest {
source_path: "/media/photo.heif".into(),
output_path: "/tmp/thumb.jpg".into(),
max_width: 256,
max_height: 256,
format: "jpeg".to_string(),
};
let json = serde_json::to_string(&req).unwrap();
assert!(json.contains("photo.heif"));
assert!(json.contains("256"));
}
#[test]
fn test_handle_event_request_serialization() {
let req = HandleEventRequest {
event_type: "MediaImported".to_string(),
payload: serde_json::json!({"id": "abc-123"}),
};
let json = serde_json::to_string(&req).unwrap();
assert!(json.contains("MediaImported"));
assert!(json.contains("abc-123"));
}
}

View file

@ -0,0 +1,925 @@
//! WASM runtime for executing plugins
use std::{path::Path, sync::Arc};
use anyhow::{Result, anyhow};
use pinakes_plugin_api::PluginContext;
use wasmtime::{
Caller,
Config,
Engine,
Linker,
Module,
Store,
StoreLimitsBuilder,
Val,
anyhow,
};
/// WASM runtime wrapper for executing plugins
pub struct WasmRuntime {
engine: Engine,
}
impl WasmRuntime {
/// Create a new WASM runtime
///
/// # Errors
///
/// Returns an error if the WASM engine cannot be created with the given
/// configuration.
pub fn new() -> Result<Self> {
let mut config = Config::new();
config.wasm_component_model(true);
config.max_wasm_stack(1024 * 1024); // 1MB stack
config.consume_fuel(true); // enable fuel metering for CPU limits
let engine = Engine::new(&config)?;
Ok(Self { engine })
}
/// Load a plugin from a WASM file
///
/// # Errors
///
/// Returns an error if the WASM file does not exist, cannot be read, or
/// cannot be compiled.
pub fn load_plugin(
&self,
wasm_path: &Path,
context: PluginContext,
) -> Result<WasmPlugin> {
if !wasm_path.exists() {
return Err(anyhow!("WASM file not found: {}", wasm_path.display()));
}
let wasm_bytes = std::fs::read(wasm_path)?;
let module = Module::new(&self.engine, &wasm_bytes)?;
Ok(WasmPlugin {
module: Arc::new(module),
context,
})
}
}
/// Store data passed to each WASM invocation
pub struct PluginStoreData {
pub context: PluginContext,
pub exchange_buffer: Vec<u8>,
pub pending_events: Vec<(String, String)>,
pub limiter: wasmtime::StoreLimits,
}
/// A loaded WASM plugin instance
#[derive(Clone)]
pub struct WasmPlugin {
module: Arc<Module>,
context: PluginContext,
}
impl WasmPlugin {
/// Get the plugin context
#[must_use]
pub const fn context(&self) -> &PluginContext {
&self.context
}
/// Execute a plugin function, returning both the result bytes and any
/// events the plugin queued via `host_emit_event`.
///
/// Creates a fresh store and instance per invocation with host functions
/// linked, calls the requested exported function, drains both the exchange
/// buffer and the pending events list before the store is dropped, and
/// returns both.
///
/// # Errors
///
/// Returns an error if the function cannot be found, instantiation fails,
/// or the function call returns an error.
pub async fn call_function_with_events(
&self,
function_name: &str,
params: &[u8],
) -> Result<(Vec<u8>, Vec<(String, String)>)> {
let engine = self.module.engine();
// Build memory limiter from capabilities
let memory_limit = self
.context
.capabilities
.max_memory_bytes
.unwrap_or(512 * 1024 * 1024); // default 512 MB
let limiter = StoreLimitsBuilder::new().memory_size(memory_limit).build();
let store_data = PluginStoreData {
context: self.context.clone(),
exchange_buffer: Vec::new(),
pending_events: Vec::new(),
limiter,
};
let mut store = Store::new(engine, store_data);
store.limiter(|data| &mut data.limiter);
// Set fuel limit based on capabilities
if let Some(max_cpu_time_ms) = self.context.capabilities.max_cpu_time_ms {
let fuel = max_cpu_time_ms * 100_000;
store.set_fuel(fuel)?;
} else {
store.set_fuel(1_000_000_000)?;
}
let mut linker = Linker::new(engine);
HostFunctions::setup_linker(&mut linker)?;
let instance = linker.instantiate_async(&mut store, &self.module).await?;
let memory = instance.get_memory(&mut store, "memory");
// If there are params and memory is available, write them to the module
let mut alloc_offset: i32 = 0;
if !params.is_empty()
&& let Some(mem) = &memory
{
// Call the plugin's alloc function if available, otherwise write at
// offset 0
let offset = if let Ok(alloc) =
instance.get_typed_func::<i32, i32>(&mut store, "alloc")
{
let result = alloc
.call_async(
&mut store,
i32::try_from(params.len()).unwrap_or(i32::MAX),
)
.await?;
if result < 0 {
return Err(anyhow!(
"plugin alloc returned negative offset: {result}"
));
}
u32::try_from(result).unwrap_or(0) as usize
} else {
0
};
alloc_offset = i32::try_from(offset).unwrap_or(i32::MAX);
let mem_data = mem.data_mut(&mut store);
if offset + params.len() <= mem_data.len() {
mem_data[offset..offset + params.len()].copy_from_slice(params);
}
}
let func =
instance
.get_func(&mut store, function_name)
.ok_or_else(|| {
anyhow!("exported function '{function_name}' not found")
})?;
let func_ty = func.ty(&store);
let param_count = func_ty.params().len();
let result_count = func_ty.results().len();
let mut results = vec![Val::I32(0); result_count];
// Call with appropriate params based on function signature; convention:
// (ptr, len)
if param_count == 2 && !params.is_empty() {
func
.call_async(
&mut store,
&[
Val::I32(alloc_offset),
Val::I32(i32::try_from(params.len()).unwrap_or(i32::MAX)),
],
&mut results,
)
.await?;
} else if param_count == 0 {
func.call_async(&mut store, &[], &mut results).await?;
} else {
// Generic: fill with zeroes
let params_vals: Vec<Val> =
std::iter::repeat_n(Val::I32(0), param_count).collect();
func
.call_async(&mut store, &params_vals, &mut results)
.await?;
}
// Drain both buffers before the store is dropped.
let pending_events = std::mem::take(&mut store.data_mut().pending_events);
let exchange = std::mem::take(&mut store.data_mut().exchange_buffer);
let result = if !exchange.is_empty() {
exchange
} else if let Some(Val::I32(ret)) = results.first() {
ret.to_le_bytes().to_vec()
} else {
Vec::new()
};
Ok((result, pending_events))
}
/// Execute a plugin function, discarding any events the plugin queued.
///
/// This is a thin wrapper around [`Self::call_function_with_events`].
///
/// # Errors
///
/// Returns an error if the function cannot be found, instantiation fails,
/// or the function call returns an error.
pub async fn call_function(
&self,
function_name: &str,
params: &[u8],
) -> Result<Vec<u8>> {
let (data, _events) = self
.call_function_with_events(function_name, params)
.await?;
Ok(data)
}
/// Call a plugin function with JSON request/response serialization.
///
/// Serializes `request` to JSON, calls the named function, deserializes
/// the response. Wraps the call with `tokio::time::timeout`.
///
/// # Errors
///
/// Returns an error if serialization fails, the call times out, the plugin
/// traps, or the response is malformed JSON.
#[allow(clippy::future_not_send)] // Req doesn't need Sync; called within local tasks
pub async fn call_function_json<Req, Resp>(
&self,
function_name: &str,
request: &Req,
timeout: std::time::Duration,
) -> anyhow::Result<Resp>
where
Req: serde::Serialize,
Resp: serde::de::DeserializeOwned,
{
let request_bytes = serde_json::to_vec(request)
.map_err(|e| anyhow::anyhow!("failed to serialize request: {e}"))?;
let result = tokio::time::timeout(
timeout,
self.call_function(function_name, &request_bytes),
)
.await
.map_err(|_| {
anyhow::anyhow!(
"plugin call '{function_name}' timed out after {timeout:?}"
)
})??;
serde_json::from_slice(&result).map_err(|e| {
anyhow::anyhow!(
"failed to deserialize response from '{function_name}': {e}"
)
})
}
/// Call a plugin function with JSON serialization, also returning any
/// events the plugin queued via `host_emit_event`.
///
/// Mirrors [`Self::call_function_json`] but delegates to
/// [`Self::call_function_with_events`] so the pending events list is not
/// discarded before returning.
///
/// # Errors
///
/// Returns an error if serialization fails, the call times out, the plugin
/// traps, or the response is malformed JSON.
#[allow(clippy::future_not_send)] // Req doesn't need Sync; called within local tasks
pub async fn call_function_json_with_events<Req, Resp>(
&self,
function_name: &str,
request: &Req,
timeout: std::time::Duration,
) -> anyhow::Result<(Resp, Vec<(String, String)>)>
where
Req: serde::Serialize,
Resp: serde::de::DeserializeOwned,
{
let request_bytes = serde_json::to_vec(request)
.map_err(|e| anyhow::anyhow!("failed to serialize request: {e}"))?;
let (result, pending_events) = tokio::time::timeout(
timeout,
self.call_function_with_events(function_name, &request_bytes),
)
.await
.map_err(|_| {
anyhow::anyhow!(
"plugin call '{function_name}' timed out after {timeout:?}"
)
})??;
let resp = serde_json::from_slice(&result).map_err(|e| {
anyhow::anyhow!(
"failed to deserialize response from '{function_name}': {e}"
)
})?;
Ok((resp, pending_events))
}
}
#[cfg(test)]
impl Default for WasmPlugin {
fn default() -> Self {
let engine = Engine::default();
let module = Module::new(&engine, br"(module)").unwrap();
Self {
module: Arc::new(module),
context: PluginContext {
data_dir: std::env::temp_dir(),
cache_dir: std::env::temp_dir(),
config: Default::default(),
capabilities: Default::default(),
},
}
}
}
/// Host functions that plugins can call
pub struct HostFunctions;
impl HostFunctions {
/// Registers all host ABI functions (`host_log`, `host_read_file`,
/// `host_write_file`, `host_http_request`, `host_get_config`,
/// `host_get_env`, `host_get_buffer`, `host_set_result`,
/// `host_emit_event`) into the given linker.
///
/// # Errors
///
/// Returns an error if any host function cannot be registered in the linker.
pub fn setup_linker(linker: &mut Linker<PluginStoreData>) -> Result<()> {
linker.func_wrap(
"env",
"host_log",
|mut caller: Caller<'_, PluginStoreData>,
level: i32,
ptr: i32,
len: i32| {
if ptr < 0 || len < 0 {
return;
}
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
if let Some(mem) = memory {
let data = mem.data(&caller);
let start = u32::try_from(ptr).unwrap_or(0) as usize;
let end = start + u32::try_from(len).unwrap_or(0) as usize;
if end <= data.len()
&& let Ok(msg) = std::str::from_utf8(&data[start..end])
{
match level {
0 => tracing::error!(plugin = true, "{}", msg),
1 => tracing::warn!(plugin = true, "{}", msg),
2 => tracing::info!(plugin = true, "{}", msg),
_ => tracing::debug!(plugin = true, "{}", msg),
}
}
}
},
)?;
linker.func_wrap(
"env",
"host_read_file",
|mut caller: Caller<'_, PluginStoreData>,
path_ptr: i32,
path_len: i32|
-> i32 {
if path_ptr < 0 || path_len < 0 {
return -1;
}
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
let Some(mem) = memory else { return -1 };
let data = mem.data(&caller);
let start = u32::try_from(path_ptr).unwrap_or(0) as usize;
let end = start + u32::try_from(path_len).unwrap_or(0) as usize;
if end > data.len() {
return -1;
}
let path_str = match std::str::from_utf8(&data[start..end]) {
Ok(s) => s.to_string(),
Err(_) => return -1,
};
// Canonicalize path before checking permissions to prevent traversal
let Ok(path) = std::path::Path::new(&path_str).canonicalize() else {
return -1;
};
// Check read permission against canonicalized path
let can_read = caller
.data()
.context
.capabilities
.filesystem
.read
.iter()
.any(|allowed| {
allowed.canonicalize().is_ok_and(|a| path.starts_with(a))
});
if !can_read {
tracing::warn!(path = %path_str, "plugin read access denied");
return -2;
}
std::fs::read(&path).map_or(-1, |contents| {
let len = i32::try_from(contents.len()).unwrap_or(i32::MAX);
caller.data_mut().exchange_buffer = contents;
len
})
},
)?;
linker.func_wrap(
"env",
"host_write_file",
|mut caller: Caller<'_, PluginStoreData>,
path_ptr: i32,
path_len: i32,
data_ptr: i32,
data_len: i32|
-> i32 {
if path_ptr < 0 || path_len < 0 || data_ptr < 0 || data_len < 0 {
return -1;
}
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
let Some(mem) = memory else { return -1 };
let mem_data = mem.data(&caller);
let path_start = u32::try_from(path_ptr).unwrap_or(0) as usize;
let path_end =
path_start + u32::try_from(path_len).unwrap_or(0) as usize;
let data_start = u32::try_from(data_ptr).unwrap_or(0) as usize;
let data_end =
data_start + u32::try_from(data_len).unwrap_or(0) as usize;
if path_end > mem_data.len() || data_end > mem_data.len() {
return -1;
}
let path_str =
match std::str::from_utf8(&mem_data[path_start..path_end]) {
Ok(s) => s.to_string(),
Err(_) => return -1,
};
let file_data = mem_data[data_start..data_end].to_vec();
// Canonicalize path for write (file may not exist yet)
let path = std::path::Path::new(&path_str);
let canonical = if path.exists() {
path.canonicalize().ok()
} else {
path
.parent()
.and_then(|p| p.canonicalize().ok())
.map(|p| p.join(path.file_name().unwrap_or_default()))
};
let Some(canonical) = canonical else {
return -1;
};
// Check write permission against canonicalized path
let can_write = caller
.data()
.context
.capabilities
.filesystem
.write
.iter()
.any(|allowed| {
allowed
.canonicalize()
.is_ok_and(|a| canonical.starts_with(a))
});
if !can_write {
tracing::warn!(path = %path_str, "plugin write access denied");
return -2;
}
match std::fs::write(&canonical, &file_data) {
Ok(()) => 0,
Err(_) => -1,
}
},
)?;
linker.func_wrap(
"env",
"host_http_request",
|mut caller: Caller<'_, PluginStoreData>,
url_ptr: i32,
url_len: i32|
-> i32 {
if url_ptr < 0 || url_len < 0 {
return -1;
}
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
let Some(mem) = memory else { return -1 };
let data = mem.data(&caller);
let start = u32::try_from(url_ptr).unwrap_or(0) as usize;
let end = start + u32::try_from(url_len).unwrap_or(0) as usize;
if end > data.len() {
return -1;
}
let url_str = match std::str::from_utf8(&data[start..end]) {
Ok(s) => s.to_string(),
Err(_) => return -1,
};
// Check network permission
if !caller.data().context.capabilities.network.enabled {
tracing::warn!(url = %url_str, "plugin network access denied");
return -2;
}
// Check domain whitelist if configured
if let Some(ref allowed) =
caller.data().context.capabilities.network.allowed_domains
{
let parsed = if let Ok(u) = url::Url::parse(&url_str) {
u
} else {
tracing::warn!(url = %url_str, "plugin provided invalid URL");
return -1;
};
let domain = parsed.host_str().unwrap_or("");
if !allowed.iter().any(|d| d.eq_ignore_ascii_case(domain)) {
tracing::warn!(
url = %url_str,
domain = domain,
"plugin domain not in allowlist"
);
return -3;
}
}
// Use block_in_place to avoid blocking the async runtime's thread pool.
// Falls back to a blocking client with timeout if block_in_place is
// unavailable.
let result = std::panic::catch_unwind(|| {
tokio::task::block_in_place(|| {
tokio::runtime::Handle::current().block_on(async {
let client = reqwest::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
.map_err(|e| e.to_string())?;
let resp = client
.get(&url_str)
.send()
.await
.map_err(|e| e.to_string())?;
let bytes = resp.bytes().await.map_err(|e| e.to_string())?;
Ok::<_, String>(bytes)
})
})
});
match result {
Ok(Ok(bytes)) => {
let len = i32::try_from(bytes.len()).unwrap_or(i32::MAX);
caller.data_mut().exchange_buffer = bytes.to_vec();
len
},
Ok(Err(_)) => -1,
Err(_) => {
// block_in_place panicked (e.g. current-thread runtime);
// fall back to blocking client with timeout
let Ok(client) = reqwest::blocking::Client::builder()
.timeout(std::time::Duration::from_secs(30))
.build()
else {
return -1;
};
client.get(&url_str).send().map_or(-1, |resp| {
resp.bytes().map_or(-1, |bytes| {
let len = i32::try_from(bytes.len()).unwrap_or(i32::MAX);
caller.data_mut().exchange_buffer = bytes.to_vec();
len
})
})
},
}
},
)?;
linker.func_wrap(
"env",
"host_get_config",
|mut caller: Caller<'_, PluginStoreData>,
key_ptr: i32,
key_len: i32|
-> i32 {
if key_ptr < 0 || key_len < 0 {
return -1;
}
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
let Some(mem) = memory else { return -1 };
let data = mem.data(&caller);
let start = u32::try_from(key_ptr).unwrap_or(0) as usize;
let end = start + u32::try_from(key_len).unwrap_or(0) as usize;
if end > data.len() {
return -1;
}
let key_str = match std::str::from_utf8(&data[start..end]) {
Ok(s) => s.to_string(),
Err(_) => return -1,
};
let bytes = caller
.data()
.context
.config
.get(&key_str)
.map(|value| value.to_string().into_bytes());
bytes.map_or(-1, |b| {
let len = i32::try_from(b.len()).unwrap_or(i32::MAX);
caller.data_mut().exchange_buffer = b;
len
})
},
)?;
linker.func_wrap(
"env",
"host_get_env",
|mut caller: Caller<'_, PluginStoreData>,
key_ptr: i32,
key_len: i32|
-> i32 {
if key_ptr < 0 || key_len < 0 {
return -1;
}
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
let Some(mem) = memory else { return -1 };
let data = mem.data(&caller);
let start = u32::try_from(key_ptr).unwrap_or(0) as usize;
let end = start + u32::try_from(key_len).unwrap_or(0) as usize;
if end > data.len() {
return -1;
}
let key_str = match std::str::from_utf8(&data[start..end]) {
Ok(s) => s.to_string(),
Err(_) => return -1,
};
// Check environment capability
let env_cap = &caller.data().context.capabilities.environment;
if !env_cap.enabled {
tracing::warn!(
var = %key_str,
"plugin environment access denied"
);
return -2;
}
// Check against allowed variables list if configured
if let Some(ref allowed) = env_cap.allowed_vars
&& !allowed.iter().any(|v| v == &key_str)
{
tracing::warn!(
var = %key_str,
"plugin env var not in allowlist"
);
return -2;
}
match std::env::var(&key_str) {
Ok(value) => {
let bytes = value.into_bytes();
let len = i32::try_from(bytes.len()).unwrap_or(i32::MAX);
caller.data_mut().exchange_buffer = bytes;
len
},
Err(_) => -1,
}
},
)?;
linker.func_wrap(
"env",
"host_get_buffer",
|mut caller: Caller<'_, PluginStoreData>,
dest_ptr: i32,
dest_len: i32|
-> i32 {
if dest_ptr < 0 || dest_len < 0 {
return -1;
}
let buf = caller.data().exchange_buffer.clone();
let copy_len =
buf.len().min(u32::try_from(dest_len).unwrap_or(0) as usize);
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
let Some(mem) = memory else { return -1 };
let mem_data = mem.data_mut(&mut caller);
let start = u32::try_from(dest_ptr).unwrap_or(0) as usize;
if start + copy_len > mem_data.len() {
return -1;
}
mem_data[start..start + copy_len].copy_from_slice(&buf[..copy_len]);
i32::try_from(copy_len).unwrap_or(i32::MAX)
},
)?;
linker.func_wrap(
"env",
"host_set_result",
|mut caller: Caller<'_, PluginStoreData>, ptr: i32, len: i32| {
if ptr < 0 || len < 0 {
return;
}
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
let Some(mem) = memory else { return };
let data = mem.data(&caller);
let start = u32::try_from(ptr).unwrap_or(0) as usize;
let end = start + u32::try_from(len).unwrap_or(0) as usize;
if end <= data.len() {
caller.data_mut().exchange_buffer = data[start..end].to_vec();
}
},
)?;
linker.func_wrap(
"env",
"host_emit_event",
|mut caller: Caller<'_, PluginStoreData>,
type_ptr: i32,
type_len: i32,
payload_ptr: i32,
payload_len: i32|
-> i32 {
const MAX_PENDING_EVENTS: usize = 1000;
if type_ptr < 0 || type_len < 0 || payload_ptr < 0 || payload_len < 0 {
return -1;
}
let memory = caller
.get_export("memory")
.and_then(wasmtime::Extern::into_memory);
let Some(mem) = memory else { return -1 };
let type_start = u32::try_from(type_ptr).unwrap_or(0) as usize;
let type_end =
type_start + u32::try_from(type_len).unwrap_or(0) as usize;
let payload_start = u32::try_from(payload_ptr).unwrap_or(0) as usize;
let payload_end =
payload_start + u32::try_from(payload_len).unwrap_or(0) as usize;
// Extract owned strings in a block so the immutable borrow of
// `caller` (via `mem.data`) is dropped before `caller.data_mut()`.
let (event_type, payload) = {
let data = mem.data(&caller);
if type_end > data.len() || payload_end > data.len() {
return -1;
}
let event_type =
match std::str::from_utf8(&data[type_start..type_end]) {
Ok(s) => s.to_string(),
Err(_) => return -1,
};
let payload =
match std::str::from_utf8(&data[payload_start..payload_end]) {
Ok(s) => s.to_string(),
Err(_) => return -1,
};
(event_type, payload)
};
if caller.data().pending_events.len() >= MAX_PENDING_EVENTS {
tracing::warn!("plugin exceeded max pending events limit");
return -4;
}
caller.data_mut().pending_events.push((event_type, payload));
0
},
)?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use pinakes_plugin_api::PluginContext;
use rustc_hash::FxHashMap;
use super::*;
#[test]
fn test_wasm_runtime_creation() {
let runtime = WasmRuntime::new();
assert!(runtime.is_ok());
}
#[test]
fn test_host_functions_file_access() {
let mut capabilities = pinakes_plugin_api::Capabilities::default();
capabilities.filesystem.read.push("/tmp".into());
capabilities.filesystem.write.push("/tmp/output".into());
let context = PluginContext {
data_dir: "/tmp/data".into(),
cache_dir: "/tmp/cache".into(),
config: Default::default(),
capabilities,
};
// Verify capability checks work via context fields
let can_read = context
.capabilities
.filesystem
.read
.iter()
.any(|p| Path::new("/tmp/test.txt").starts_with(p));
assert!(can_read);
let cant_read = context
.capabilities
.filesystem
.read
.iter()
.any(|p| Path::new("/etc/passwd").starts_with(p));
assert!(!cant_read);
let can_write = context
.capabilities
.filesystem
.write
.iter()
.any(|p| Path::new("/tmp/output/file.txt").starts_with(p));
assert!(can_write);
let cant_write = context
.capabilities
.filesystem
.write
.iter()
.any(|p| Path::new("/tmp/file.txt").starts_with(p));
assert!(!cant_write);
}
#[test]
fn test_host_functions_network_access() {
let mut context = PluginContext {
data_dir: "/tmp/data".into(),
cache_dir: "/tmp/cache".into(),
config: FxHashMap::default(),
capabilities: Default::default(),
};
assert!(!context.capabilities.network.enabled);
context.capabilities.network.enabled = true;
assert!(context.capabilities.network.enabled);
}
#[test]
fn test_linker_setup() {
let engine = Engine::default();
let mut linker = Linker::<PluginStoreData>::new(&engine);
let result = HostFunctions::setup_linker(&mut linker);
assert!(result.is_ok());
}
}

View file

@ -0,0 +1,473 @@
//! Capability-based security for plugins
use std::path::{Path, PathBuf};
use anyhow::{Result, anyhow};
use pinakes_plugin_api::Capabilities;
/// Capability enforcer validates and enforces plugin capabilities
pub struct CapabilityEnforcer {
/// Maximum allowed memory per plugin (bytes)
max_memory_limit: usize,
/// Maximum allowed CPU time per plugin (milliseconds)
max_cpu_time_limit: u64,
/// Allowed filesystem read paths (system-wide)
allowed_read_paths: Vec<PathBuf>,
/// Allowed filesystem write paths (system-wide)
allowed_write_paths: Vec<PathBuf>,
/// Whether to allow network access by default
allow_network_default: bool,
}
impl CapabilityEnforcer {
/// Create a new capability enforcer with default limits
#[must_use]
pub const fn new() -> Self {
Self {
max_memory_limit: 512 * 1024 * 1024, // 512 MB
max_cpu_time_limit: 60 * 1000, // 60 seconds
allowed_read_paths: vec![],
allowed_write_paths: vec![],
allow_network_default: false,
}
}
/// Set maximum memory limit
#[must_use]
pub const fn with_max_memory(mut self, bytes: usize) -> Self {
self.max_memory_limit = bytes;
self
}
/// Set maximum CPU time limit
#[must_use]
pub const fn with_max_cpu_time(mut self, milliseconds: u64) -> Self {
self.max_cpu_time_limit = milliseconds;
self
}
/// Add allowed read path
#[must_use]
pub fn allow_read_path(mut self, path: PathBuf) -> Self {
self.allowed_read_paths.push(path);
self
}
/// Add allowed write path
#[must_use]
pub fn allow_write_path(mut self, path: PathBuf) -> Self {
self.allowed_write_paths.push(path);
self
}
/// Set default network access policy
#[must_use]
pub const fn with_network_default(mut self, allow: bool) -> Self {
self.allow_network_default = allow;
self
}
/// Validate capabilities requested by a plugin
///
/// # Errors
///
/// Returns an error if the plugin requests capabilities that exceed the
/// configured system limits, such as memory, CPU time, filesystem paths, or
/// network access.
pub fn validate_capabilities(
&self,
capabilities: &Capabilities,
) -> Result<()> {
// Validate memory limit
if let Some(memory) = capabilities.max_memory_bytes
&& memory > self.max_memory_limit
{
return Err(anyhow!(
"Requested memory ({} bytes) exceeds limit ({} bytes)",
memory,
self.max_memory_limit
));
}
// Validate CPU time limit
if let Some(cpu_time) = capabilities.max_cpu_time_ms
&& cpu_time > self.max_cpu_time_limit
{
return Err(anyhow!(
"Requested CPU time ({} ms) exceeds limit ({} ms)",
cpu_time,
self.max_cpu_time_limit
));
}
// Validate filesystem access
self.validate_filesystem_access(capabilities)?;
// Validate network access
if capabilities.network.enabled && !self.allow_network_default {
return Err(anyhow!(
"Plugin requests network access, but network access is disabled by \
policy"
));
}
Ok(())
}
/// Validate filesystem access capabilities
fn validate_filesystem_access(
&self,
capabilities: &Capabilities,
) -> Result<()> {
// Check read paths
for path in &capabilities.filesystem.read {
if !self.is_read_allowed(path) {
return Err(anyhow!(
"Plugin requests read access to {} which is not in allowed paths",
path.display()
));
}
}
// Check write paths
for path in &capabilities.filesystem.write {
if !self.is_write_allowed(path) {
return Err(anyhow!(
"Plugin requests write access to {} which is not in allowed paths",
path.display()
));
}
}
Ok(())
}
/// Check if a path is allowed for reading
#[must_use]
pub fn is_read_allowed(&self, path: &Path) -> bool {
if self.allowed_read_paths.is_empty() {
return false; // deny-all when unconfigured
}
let Ok(canonical) = path.canonicalize() else {
return false;
};
self.allowed_read_paths.iter().any(|allowed| {
allowed
.canonicalize()
.is_ok_and(|a| canonical.starts_with(a))
})
}
/// Check if a path is allowed for writing
#[must_use]
pub fn is_write_allowed(&self, path: &Path) -> bool {
if self.allowed_write_paths.is_empty() {
return false; // deny-all when unconfigured
}
let canonical = if path.exists() {
path.canonicalize().ok()
} else {
path
.parent()
.and_then(|p| p.canonicalize().ok())
.map(|p| p.join(path.file_name().unwrap_or_default()))
};
let Some(canonical) = canonical else {
return false;
};
self.allowed_write_paths.iter().any(|allowed| {
allowed
.canonicalize()
.is_ok_and(|a| canonical.starts_with(a))
})
}
/// Check if network access is allowed for a plugin
#[must_use]
pub const fn is_network_allowed(&self, capabilities: &Capabilities) -> bool {
capabilities.network.enabled && self.allow_network_default
}
/// Check if a specific domain is allowed
#[must_use]
pub fn is_domain_allowed(
&self,
capabilities: &Capabilities,
domain: &str,
) -> bool {
if !capabilities.network.enabled {
return false;
}
// If no domain restrictions, allow all domains
if capabilities.network.allowed_domains.is_none() {
return self.allow_network_default;
}
// Check against allowed domains list
capabilities
.network
.allowed_domains
.as_ref()
.is_some_and(|domains| {
domains.iter().any(|d| d.eq_ignore_ascii_case(domain))
})
}
/// Get effective memory limit for a plugin
#[must_use]
pub fn get_memory_limit(&self, capabilities: &Capabilities) -> usize {
capabilities
.max_memory_bytes
.unwrap_or(self.max_memory_limit)
.min(self.max_memory_limit)
}
/// Get effective CPU time limit for a plugin
#[must_use]
pub fn get_cpu_time_limit(&self, capabilities: &Capabilities) -> u64 {
capabilities
.max_cpu_time_ms
.unwrap_or(self.max_cpu_time_limit)
.min(self.max_cpu_time_limit)
}
/// Validate that a function call is allowed for a plugin's declared kinds.
///
/// Defense-in-depth: even though the pipeline filters by kind, this prevents
/// bugs from calling wrong functions on plugins. Returns `true` if allowed.
#[must_use]
pub fn validate_function_call(
&self,
plugin_kinds: &[String],
function_name: &str,
) -> bool {
match function_name {
// Lifecycle functions are always allowed
"initialize" | "shutdown" | "health_check" => true,
// MediaTypeProvider
"supported_media_types" | "can_handle" => {
plugin_kinds.iter().any(|k| k == "media_type")
},
// supported_types is shared by metadata_extractor and thumbnail_generator
"supported_types" => {
plugin_kinds
.iter()
.any(|k| k == "metadata_extractor" || k == "thumbnail_generator")
},
// MetadataExtractor
"extract_metadata" => {
plugin_kinds.iter().any(|k| k == "metadata_extractor")
},
// ThumbnailGenerator
"generate_thumbnail" => {
plugin_kinds.iter().any(|k| k == "thumbnail_generator")
},
// SearchBackend
"search" | "index_item" | "remove_item" | "get_stats" => {
plugin_kinds.iter().any(|k| k == "search_backend")
},
// EventHandler
"interested_events" | "handle_event" => {
plugin_kinds.iter().any(|k| k == "event_handler")
},
// ThemeProvider
"get_themes" | "load_theme" => {
plugin_kinds.iter().any(|k| k == "theme_provider")
},
// Unknown function names are not allowed
_ => false,
}
}
}
impl Default for CapabilityEnforcer {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
#[allow(unused_imports)]
use pinakes_plugin_api::{FilesystemCapability, NetworkCapability};
use super::*;
#[test]
fn test_validate_memory_limit() {
let enforcer = CapabilityEnforcer::new().with_max_memory(100 * 1024 * 1024); // 100 MB
let mut caps = Capabilities::default();
caps.max_memory_bytes = Some(50 * 1024 * 1024); // 50 MB - OK
assert!(enforcer.validate_capabilities(&caps).is_ok());
caps.max_memory_bytes = Some(200 * 1024 * 1024); // 200 MB - exceeds limit
assert!(enforcer.validate_capabilities(&caps).is_err());
}
#[test]
fn test_validate_cpu_time_limit() {
let enforcer = CapabilityEnforcer::new().with_max_cpu_time(30_000); // 30 seconds
let mut caps = Capabilities::default();
caps.max_cpu_time_ms = Some(10_000); // 10 seconds - OK
assert!(enforcer.validate_capabilities(&caps).is_ok());
caps.max_cpu_time_ms = Some(60_000); // 60 seconds - exceeds limit
assert!(enforcer.validate_capabilities(&caps).is_err());
}
#[test]
fn test_filesystem_read_allowed() {
// Use real temp directories so canonicalize works
let tmp = tempfile::tempdir().unwrap();
let allowed_dir = tmp.path().join("allowed");
std::fs::create_dir_all(&allowed_dir).unwrap();
let test_file = allowed_dir.join("test.txt");
std::fs::write(&test_file, "test").unwrap();
let enforcer = CapabilityEnforcer::new().allow_read_path(allowed_dir);
assert!(enforcer.is_read_allowed(&test_file));
assert!(!enforcer.is_read_allowed(Path::new("/etc/passwd")));
}
#[test]
fn test_filesystem_read_denied_when_empty() {
let enforcer = CapabilityEnforcer::new();
assert!(!enforcer.is_read_allowed(Path::new("/tmp/test.txt")));
}
#[test]
fn test_filesystem_write_allowed() {
let tmp = tempfile::tempdir().unwrap();
let output_dir = tmp.path().join("output");
std::fs::create_dir_all(&output_dir).unwrap();
// Existing file in allowed dir
let existing = output_dir.join("file.txt");
std::fs::write(&existing, "test").unwrap();
let enforcer =
CapabilityEnforcer::new().allow_write_path(output_dir.clone());
assert!(enforcer.is_write_allowed(&existing));
// New file in allowed dir (parent exists)
assert!(enforcer.is_write_allowed(&output_dir.join("new_file.txt")));
assert!(!enforcer.is_write_allowed(Path::new("/etc/config")));
}
#[test]
fn test_filesystem_write_denied_when_empty() {
let enforcer = CapabilityEnforcer::new();
assert!(!enforcer.is_write_allowed(Path::new("/tmp/file.txt")));
}
#[test]
fn test_network_allowed() {
let enforcer = CapabilityEnforcer::new().with_network_default(true);
let mut caps = Capabilities::default();
caps.network.enabled = true;
assert!(enforcer.is_network_allowed(&caps));
caps.network.enabled = false;
assert!(!enforcer.is_network_allowed(&caps));
}
#[test]
fn test_domain_restrictions() {
let enforcer = CapabilityEnforcer::new().with_network_default(true);
let mut caps = Capabilities::default();
caps.network.enabled = true;
caps.network.allowed_domains = Some(vec![
"api.example.com".to_string(),
"cdn.example.com".to_string(),
]);
assert!(enforcer.is_domain_allowed(&caps, "api.example.com"));
assert!(enforcer.is_domain_allowed(&caps, "cdn.example.com"));
assert!(!enforcer.is_domain_allowed(&caps, "evil.com"));
}
#[test]
fn test_get_effective_limits() {
let enforcer = CapabilityEnforcer::new()
.with_max_memory(100 * 1024 * 1024)
.with_max_cpu_time(30_000);
let mut caps = Capabilities::default();
// No limits specified, use the defaults
assert_eq!(enforcer.get_memory_limit(&caps), 100 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 30_000);
// Plugin requests lower limits, use plugin's
caps.max_memory_bytes = Some(50 * 1024 * 1024);
caps.max_cpu_time_ms = Some(10_000);
assert_eq!(enforcer.get_memory_limit(&caps), 50 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 10_000);
// Plugin requests higher limits, cap at system max
caps.max_memory_bytes = Some(200 * 1024 * 1024);
caps.max_cpu_time_ms = Some(60_000);
assert_eq!(enforcer.get_memory_limit(&caps), 100 * 1024 * 1024);
assert_eq!(enforcer.get_cpu_time_limit(&caps), 30_000);
}
#[test]
fn test_validate_function_call_lifecycle_always_allowed() {
let enforcer = CapabilityEnforcer::new();
let kinds = vec!["metadata_extractor".to_string()];
assert!(enforcer.validate_function_call(&kinds, "initialize"));
assert!(enforcer.validate_function_call(&kinds, "shutdown"));
assert!(enforcer.validate_function_call(&kinds, "health_check"));
}
#[test]
fn test_validate_function_call_metadata_extractor() {
let enforcer = CapabilityEnforcer::new();
let kinds = vec!["metadata_extractor".to_string()];
assert!(enforcer.validate_function_call(&kinds, "extract_metadata"));
assert!(enforcer.validate_function_call(&kinds, "supported_types"));
assert!(!enforcer.validate_function_call(&kinds, "search"));
assert!(!enforcer.validate_function_call(&kinds, "generate_thumbnail"));
assert!(!enforcer.validate_function_call(&kinds, "can_handle"));
}
#[test]
fn test_validate_function_call_multi_kind() {
let enforcer = CapabilityEnforcer::new();
let kinds =
vec!["media_type".to_string(), "metadata_extractor".to_string()];
assert!(enforcer.validate_function_call(&kinds, "can_handle"));
assert!(enforcer.validate_function_call(&kinds, "supported_media_types"));
assert!(enforcer.validate_function_call(&kinds, "extract_metadata"));
assert!(!enforcer.validate_function_call(&kinds, "search"));
}
#[test]
fn test_validate_function_call_unknown_function() {
let enforcer = CapabilityEnforcer::new();
let kinds = vec!["metadata_extractor".to_string()];
assert!(!enforcer.validate_function_call(&kinds, "unknown_func"));
assert!(!enforcer.validate_function_call(&kinds, ""));
}
#[test]
fn test_validate_function_call_shared_supported_types() {
let enforcer = CapabilityEnforcer::new();
let extractor = vec!["metadata_extractor".to_string()];
let generator = vec!["thumbnail_generator".to_string()];
let search = vec!["search_backend".to_string()];
assert!(enforcer.validate_function_call(&extractor, "supported_types"));
assert!(enforcer.validate_function_call(&generator, "supported_types"));
assert!(!enforcer.validate_function_call(&search, "supported_types"));
}
}

View file

@ -0,0 +1,252 @@
//! Plugin signature verification using Ed25519 + BLAKE3
//!
//! Each plugin directory may contain a `plugin.sig` file alongside its
//! `plugin.toml`. The signature covers the BLAKE3 hash of the WASM binary
//! referenced by the manifest. Verification uses Ed25519 public keys
//! configured as trusted in the server's plugin settings.
//!
//! When `allow_unsigned` is false, plugins _must_ carry a valid signature
//! from one of the trusted keys or they will be rejected at load time.
use std::path::Path;
use anyhow::{Result, anyhow};
use ed25519_dalek::{Signature, Verifier, VerifyingKey};
/// Outcome of a signature check on a plugin package.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SignatureStatus {
/// Signature is present and valid against a trusted key.
Valid,
/// No signature file found.
Unsigned,
/// Signature file exists but does not match any trusted key.
Invalid(String),
}
/// Verify the signature of a plugin's WASM binary.
///
/// Reads `plugin.sig` from `plugin_dir`, computes the BLAKE3 hash of the
/// WASM binary at `wasm_path`, and verifies the signature against each of
/// the `trusted_keys`. The signature file is raw 64-byte Ed25519 signature
/// over the 32-byte BLAKE3 digest.
///
/// # Errors
///
/// Returns an error only on I/O failures, never for cryptographic rejection,
/// which is reported via [`SignatureStatus`] instead.
pub fn verify_plugin_signature(
plugin_dir: &Path,
wasm_path: &Path,
trusted_keys: &[VerifyingKey],
) -> Result<SignatureStatus> {
let sig_path = plugin_dir.join("plugin.sig");
if !sig_path.exists() {
return Ok(SignatureStatus::Unsigned);
}
let sig_bytes = std::fs::read(&sig_path)
.map_err(|e| anyhow!("failed to read plugin.sig: {e}"))?;
let signature = Signature::from_slice(&sig_bytes).map_err(|e| {
// Malformed signature file is an invalid signature, not an I/O error
tracing::warn!(path = %sig_path.display(), "malformed plugin.sig: {e}");
anyhow!("malformed plugin.sig: {e}")
});
let Ok(signature) = signature else {
return Ok(SignatureStatus::Invalid(
"malformed signature file".to_string(),
));
};
// BLAKE3 hash of the WASM binary is the signed message
let wasm_bytes = std::fs::read(wasm_path)
.map_err(|e| anyhow!("failed to read WASM binary for verification: {e}"))?;
let digest = blake3::hash(&wasm_bytes);
let message = digest.as_bytes();
for key in trusted_keys {
if key.verify(message, &signature).is_ok() {
return Ok(SignatureStatus::Valid);
}
}
Ok(SignatureStatus::Invalid(
"signature did not match any trusted key".to_string(),
))
}
/// Parse a hex-encoded Ed25519 public key (64 hex characters = 32 bytes).
///
/// # Errors
///
/// Returns an error if the string is not valid hex or is the wrong length.
pub fn parse_public_key(hex_str: &str) -> Result<VerifyingKey> {
let hex_str = hex_str.trim();
if hex_str.len() != 64 {
return Err(anyhow!(
"expected 64 hex characters for Ed25519 public key, got {}",
hex_str.len()
));
}
let mut bytes = [0u8; 32];
for (i, byte) in bytes.iter_mut().enumerate() {
*byte = u8::from_str_radix(&hex_str[i * 2..i * 2 + 2], 16)
.map_err(|e| anyhow!("invalid hex in public key: {e}"))?;
}
VerifyingKey::from_bytes(&bytes)
.map_err(|e| anyhow!("invalid Ed25519 public key: {e}"))
}
#[cfg(test)]
mod tests {
use ed25519_dalek::{Signer, SigningKey};
use rand::RngExt;
use super::*;
fn make_keypair() -> (SigningKey, VerifyingKey) {
let secret_bytes: [u8; 32] = rand::rng().random();
let signing = SigningKey::from_bytes(&secret_bytes);
let verifying = signing.verifying_key();
(signing, verifying)
}
#[test]
fn test_verify_unsigned_plugin() {
let dir = tempfile::tempdir().unwrap();
let wasm_path = dir.path().join("plugin.wasm");
std::fs::write(&wasm_path, b"\0asm\x01\x00\x00\x00").unwrap();
let (_, vk) = make_keypair();
let status =
verify_plugin_signature(dir.path(), &wasm_path, &[vk]).unwrap();
assert_eq!(status, SignatureStatus::Unsigned);
}
#[test]
fn test_verify_valid_signature() {
let dir = tempfile::tempdir().unwrap();
let wasm_path = dir.path().join("plugin.wasm");
let wasm_bytes = b"\0asm\x01\x00\x00\x00some_code_here";
std::fs::write(&wasm_path, wasm_bytes).unwrap();
let (sk, vk) = make_keypair();
// Sign the BLAKE3 hash of the WASM binary
let digest = blake3::hash(wasm_bytes);
let signature = sk.sign(digest.as_bytes());
std::fs::write(dir.path().join("plugin.sig"), signature.to_bytes())
.unwrap();
let status =
verify_plugin_signature(dir.path(), &wasm_path, &[vk]).unwrap();
assert_eq!(status, SignatureStatus::Valid);
}
#[test]
fn test_verify_wrong_key() {
let dir = tempfile::tempdir().unwrap();
let wasm_path = dir.path().join("plugin.wasm");
let wasm_bytes = b"\0asm\x01\x00\x00\x00some_code";
std::fs::write(&wasm_path, wasm_bytes).unwrap();
let (sk, _) = make_keypair();
let (_, wrong_vk) = make_keypair();
let digest = blake3::hash(wasm_bytes);
let signature = sk.sign(digest.as_bytes());
std::fs::write(dir.path().join("plugin.sig"), signature.to_bytes())
.unwrap();
let status =
verify_plugin_signature(dir.path(), &wasm_path, &[wrong_vk]).unwrap();
assert!(matches!(status, SignatureStatus::Invalid(_)));
}
#[test]
fn test_verify_tampered_wasm() {
let dir = tempfile::tempdir().unwrap();
let wasm_path = dir.path().join("plugin.wasm");
let original = b"\0asm\x01\x00\x00\x00original";
std::fs::write(&wasm_path, original).unwrap();
let (sk, vk) = make_keypair();
let digest = blake3::hash(original);
let signature = sk.sign(digest.as_bytes());
std::fs::write(dir.path().join("plugin.sig"), signature.to_bytes())
.unwrap();
// Tamper with the WASM file after signing
std::fs::write(&wasm_path, b"\0asm\x01\x00\x00\x00tampered").unwrap();
let status =
verify_plugin_signature(dir.path(), &wasm_path, &[vk]).unwrap();
assert!(matches!(status, SignatureStatus::Invalid(_)));
}
#[test]
fn test_verify_malformed_sig_file() {
let dir = tempfile::tempdir().unwrap();
let wasm_path = dir.path().join("plugin.wasm");
std::fs::write(&wasm_path, b"\0asm\x01\x00\x00\x00").unwrap();
// Write garbage to plugin.sig (wrong length)
std::fs::write(dir.path().join("plugin.sig"), b"not a signature").unwrap();
let (_, vk) = make_keypair();
let status =
verify_plugin_signature(dir.path(), &wasm_path, &[vk]).unwrap();
assert!(matches!(status, SignatureStatus::Invalid(_)));
}
#[test]
fn test_verify_multiple_trusted_keys() {
let dir = tempfile::tempdir().unwrap();
let wasm_path = dir.path().join("plugin.wasm");
let wasm_bytes = b"\0asm\x01\x00\x00\x00multi_key_test";
std::fs::write(&wasm_path, wasm_bytes).unwrap();
let (sk2, vk2) = make_keypair();
let (_, vk1) = make_keypair();
let (_, vk3) = make_keypair();
// Sign with key 2
let digest = blake3::hash(wasm_bytes);
let signature = sk2.sign(digest.as_bytes());
std::fs::write(dir.path().join("plugin.sig"), signature.to_bytes())
.unwrap();
// Verify against [vk1, vk2, vk3]; should find vk2
let status =
verify_plugin_signature(dir.path(), &wasm_path, &[vk1, vk2, vk3])
.unwrap();
assert_eq!(status, SignatureStatus::Valid);
}
#[test]
fn test_parse_public_key_valid() {
let (_, vk) = make_keypair();
let hex = hex_encode(vk.as_bytes());
let parsed = parse_public_key(&hex).unwrap();
assert_eq!(parsed, vk);
}
#[test]
fn test_parse_public_key_wrong_length() {
assert!(parse_public_key("abcdef").is_err());
}
#[test]
fn test_parse_public_key_invalid_hex() {
let bad =
"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz";
assert!(parse_public_key(bad).is_err());
}
fn hex_encode(bytes: &[u8]) -> String {
bytes.iter().map(|b| format!("{b:02x}")).collect()
}
}

View file

@ -0,0 +1,21 @@
[package]
name = "pinakes-sync"
edition.workspace = true
version.workspace = true
license.workspace = true
[dependencies]
pinakes-types = { workspace = true }
tokio = { workspace = true }
chrono = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
tracing = { workspace = true }
uuid = { workspace = true }
blake3 = { workspace = true }
[dev-dependencies]
tempfile = { workspace = true }
[lints]
workspace = true

View file

@ -0,0 +1,326 @@
//! Chunked upload handling for large file sync.
use std::path::{Path, PathBuf};
use chrono::Utc;
use pinakes_types::error::{PinakesError, Result};
use tokio::{
fs,
io::{AsyncReadExt, AsyncSeekExt, AsyncWriteExt},
};
use tracing::{debug, info};
use uuid::Uuid;
use super::{ChunkInfo, UploadSession};
/// Manager for chunked uploads.
#[derive(Debug, Clone)]
pub struct ChunkedUploadManager {
temp_dir: PathBuf,
}
impl ChunkedUploadManager {
/// Create a new chunked upload manager.
#[must_use]
pub const fn new(temp_dir: PathBuf) -> Self {
Self { temp_dir }
}
/// Initialize the temp directory.
///
/// # Errors
///
/// Returns an error if the directory cannot be created.
pub async fn init(&self) -> Result<()> {
fs::create_dir_all(&self.temp_dir).await?;
Ok(())
}
/// Get the temp file path for an upload session.
#[must_use]
pub fn temp_path(&self, session_id: Uuid) -> PathBuf {
self.temp_dir.join(format!("{session_id}.upload"))
}
/// Create the temp file for a new upload session.
///
/// # Errors
///
/// Returns an error if the file cannot be created or sized.
pub async fn create_temp_file(&self, session: &UploadSession) -> Result<()> {
let path = self.temp_path(session.id);
// Create a sparse file of the expected size
let file = fs::File::create(&path).await?;
file.set_len(session.expected_size).await?;
debug!(
session_id = %session.id,
size = session.expected_size,
"created temp file for upload"
);
Ok(())
}
/// Write a chunk to the temp file.
///
/// # Errors
///
/// Returns an error if the session file is not found, the chunk index is out
/// of range, the chunk size is wrong, or the write fails.
pub async fn write_chunk(
&self,
session: &UploadSession,
chunk_index: u64,
data: &[u8],
) -> Result<ChunkInfo> {
let path = self.temp_path(session.id);
if !path.exists() {
return Err(PinakesError::UploadSessionNotFound(session.id.to_string()));
}
// Calculate offset
let offset = chunk_index * session.chunk_size;
// Validate chunk
if offset >= session.expected_size {
return Err(PinakesError::ChunkOutOfOrder {
expected: session.chunk_count - 1,
actual: chunk_index,
});
}
// Calculate expected chunk size
let expected_size = if chunk_index == session.chunk_count - 1 {
// Last chunk may be smaller
session.expected_size - offset
} else {
session.chunk_size
};
if data.len() as u64 != expected_size {
return Err(PinakesError::InvalidData(format!(
"chunk {} has wrong size: expected {}, got {}",
chunk_index,
expected_size,
data.len()
)));
}
// Write chunk to file at offset
let mut file = fs::OpenOptions::new().write(true).open(&path).await?;
file.seek(std::io::SeekFrom::Start(offset)).await?;
file.write_all(data).await?;
file.flush().await?;
// Compute chunk hash
let hash = blake3::hash(data).to_hex().to_string();
debug!(
session_id = %session.id,
chunk_index,
offset,
size = data.len(),
"wrote chunk"
);
Ok(ChunkInfo {
upload_id: session.id,
chunk_index,
offset,
size: data.len() as u64,
hash,
received_at: Utc::now(),
})
}
/// Verify and finalize the upload.
///
/// Checks that:
/// 1. All chunks are received
/// 2. File size matches expected
/// 3. Content hash matches expected
///
/// # Errors
///
/// Returns an error if chunks are missing, the file size does not match, the
/// hash does not match, or the file metadata cannot be read.
pub async fn finalize(
&self,
session: &UploadSession,
received_chunks: &[ChunkInfo],
) -> Result<PathBuf> {
let path = self.temp_path(session.id);
// Check all chunks received
if received_chunks.len() as u64 != session.chunk_count {
return Err(PinakesError::InvalidData(format!(
"missing chunks: expected {}, got {}",
session.chunk_count,
received_chunks.len()
)));
}
// Verify chunk indices
let mut indices: Vec<u64> =
received_chunks.iter().map(|c| c.chunk_index).collect();
indices.sort_unstable();
for (i, idx) in indices.iter().enumerate() {
if *idx != i as u64 {
return Err(PinakesError::InvalidData(format!(
"chunk {i} missing or out of order"
)));
}
}
// Verify file size
let metadata = fs::metadata(&path).await?;
if metadata.len() != session.expected_size {
return Err(PinakesError::InvalidData(format!(
"file size mismatch: expected {}, got {}",
session.expected_size,
metadata.len()
)));
}
// Verify content hash
let computed_hash = compute_file_hash(&path).await?;
if computed_hash != session.expected_hash.0 {
return Err(PinakesError::StorageIntegrity(format!(
"hash mismatch: expected {}, computed {}",
session.expected_hash, computed_hash
)));
}
info!(
session_id = %session.id,
hash = %session.expected_hash,
size = session.expected_size,
"finalized chunked upload"
);
Ok(path)
}
/// Cancel an upload and clean up temp file.
///
/// # Errors
///
/// Returns an error if the temp file cannot be removed.
pub async fn cancel(&self, session_id: Uuid) -> Result<()> {
let path = self.temp_path(session_id);
if path.exists() {
fs::remove_file(&path).await?;
debug!(session_id = %session_id, "cancelled upload, removed temp file");
}
Ok(())
}
/// Clean up expired temp files.
///
/// # Errors
///
/// Returns an error if the temp directory cannot be read.
pub async fn cleanup_expired(&self, max_age_hours: u64) -> Result<u64> {
let mut count = 0u64;
let max_age = std::time::Duration::from_secs(max_age_hours * 3600);
let mut entries = fs::read_dir(&self.temp_dir).await?;
while let Some(entry) = entries.next_entry().await? {
let path = entry.path();
if path.extension().is_some_and(|e| e == "upload")
&& let Ok(metadata) = fs::metadata(&path).await
&& let Ok(modified) = metadata.modified()
{
let age = std::time::SystemTime::now()
.duration_since(modified)
.unwrap_or_default();
if age > max_age {
let _ = fs::remove_file(&path).await;
count += 1;
}
}
}
if count > 0 {
info!(count, "cleaned up expired upload temp files");
}
Ok(count)
}
}
/// Compute the BLAKE3 hash of a file.
async fn compute_file_hash(path: &Path) -> Result<String> {
let mut file = fs::File::open(path).await?;
let mut hasher = blake3::Hasher::new();
let mut buf = vec![0u8; 64 * 1024];
loop {
let n = file.read(&mut buf).await?;
if n == 0 {
break;
}
hasher.update(&buf[..n]);
}
Ok(hasher.finalize().to_hex().to_string())
}
#[cfg(test)]
mod tests {
use pinakes_types::model::ContentHash;
use tempfile::tempdir;
use super::*;
use crate::UploadStatus;
#[tokio::test]
async fn test_chunked_upload() {
let dir = tempdir().unwrap();
let manager = ChunkedUploadManager::new(dir.path().to_path_buf());
manager.init().await.unwrap();
// Create test data
let data = b"Hello, World! This is test data for chunked upload.";
let hash = blake3::hash(data).to_hex().to_string();
let chunk_size = 20u64;
let session = UploadSession {
id: Uuid::now_v7(),
device_id: super::super::DeviceId::new(),
target_path: "/test/file.txt".to_string(),
expected_hash: ContentHash::new(hash.clone()),
expected_size: data.len() as u64,
chunk_size,
chunk_count: (data.len() as u64).div_ceil(chunk_size),
status: UploadStatus::InProgress,
created_at: Utc::now(),
expires_at: Utc::now() + chrono::Duration::hours(24),
last_activity: Utc::now(),
};
manager.create_temp_file(&session).await.unwrap();
// Write chunks
let mut chunks = Vec::new();
for i in 0..session.chunk_count {
let start = (i * chunk_size) as usize;
let end = ((i + 1) * chunk_size).min(data.len() as u64) as usize;
let chunk_data = &data[start..end];
let chunk = manager.write_chunk(&session, i, chunk_data).await.unwrap();
chunks.push(chunk);
}
// Finalize
let final_path = manager.finalize(&session, &chunks).await.unwrap();
assert!(final_path.exists());
// Verify content
let content = fs::read(&final_path).await.unwrap();
assert_eq!(&content[..], data);
}
}

View file

@ -0,0 +1,148 @@
//! Conflict detection and resolution for sync.
use pinakes_types::config::ConflictResolution;
use super::DeviceSyncState;
/// Detect if there's a conflict between local and server state.
#[must_use]
pub fn detect_conflict(state: &DeviceSyncState) -> Option<ConflictInfo> {
// If either side has no hash, no conflict possible
let local_hash = state.local_hash.as_ref()?;
let server_hash = state.server_hash.as_ref()?;
// Same hash = no conflict
if local_hash == server_hash {
return None;
}
// Both have different hashes = conflict
Some(ConflictInfo {
path: state.path.clone(),
local_hash: local_hash.clone(),
server_hash: server_hash.clone(),
local_mtime: state.local_mtime,
server_mtime: state.server_mtime,
})
}
/// Information about a detected conflict.
#[derive(Debug, Clone)]
pub struct ConflictInfo {
pub path: String,
pub local_hash: String,
pub server_hash: String,
pub local_mtime: Option<i64>,
pub server_mtime: Option<i64>,
}
/// Result of resolving a conflict.
#[derive(Debug, Clone)]
pub enum ConflictOutcome {
/// Use the server version
UseServer,
/// Use the local version (upload it)
UseLocal,
/// Keep both versions (rename one)
KeepBoth { new_local_path: String },
/// Requires manual intervention
Manual,
}
/// Resolve a conflict based on the configured strategy.
#[must_use]
pub fn resolve_conflict(
conflict: &ConflictInfo,
resolution: ConflictResolution,
) -> ConflictOutcome {
match resolution {
ConflictResolution::ServerWins => ConflictOutcome::UseServer,
ConflictResolution::ClientWins => ConflictOutcome::UseLocal,
ConflictResolution::KeepBoth => {
let new_path =
generate_conflict_path(&conflict.path, &conflict.local_hash);
ConflictOutcome::KeepBoth {
new_local_path: new_path,
}
},
ConflictResolution::Manual => ConflictOutcome::Manual,
}
}
/// Generate a new path for the conflicting local file.
/// Format: filename.conflict-<`short_hash>.ext`
fn generate_conflict_path(original_path: &str, local_hash: &str) -> String {
let short_hash = &local_hash[..8.min(local_hash.len())];
if let Some((base, ext)) = original_path.rsplit_once('.') {
format!("{base}.conflict-{short_hash}.{ext}")
} else {
format!("{original_path}.conflict-{short_hash}")
}
}
/// Automatic conflict resolution based on modification times.
/// Useful when `ConflictResolution` is set to a time-based strategy.
#[must_use]
pub const fn resolve_by_mtime(conflict: &ConflictInfo) -> ConflictOutcome {
match (conflict.local_mtime, conflict.server_mtime) {
(Some(local), Some(server)) => {
if local > server {
ConflictOutcome::UseLocal
} else {
ConflictOutcome::UseServer
}
},
(Some(_), None) => ConflictOutcome::UseLocal,
(None, Some(_)) => ConflictOutcome::UseServer,
(None, None) => ConflictOutcome::UseServer, // Default to server
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::FileSyncStatus;
#[test]
fn test_generate_conflict_path() {
assert_eq!(
generate_conflict_path("/path/to/file.txt", "abc12345"),
"/path/to/file.conflict-abc12345.txt"
);
assert_eq!(
generate_conflict_path("/path/to/file", "abc12345"),
"/path/to/file.conflict-abc12345"
);
}
#[test]
fn test_detect_conflict() {
let state_no_conflict = DeviceSyncState {
device_id: super::super::DeviceId::new(),
path: "/test".to_string(),
local_hash: Some("abc".to_string()),
server_hash: Some("abc".to_string()),
local_mtime: None,
server_mtime: None,
sync_status: FileSyncStatus::Synced,
last_synced_at: None,
conflict_info_json: None,
};
assert!(detect_conflict(&state_no_conflict).is_none());
let state_conflict = DeviceSyncState {
device_id: super::super::DeviceId::new(),
path: "/test".to_string(),
local_hash: Some("abc".to_string()),
server_hash: Some("def".to_string()),
local_mtime: None,
server_mtime: None,
sync_status: FileSyncStatus::Conflict,
last_synced_at: None,
conflict_info_json: None,
};
assert!(detect_conflict(&state_conflict).is_some());
}
}

View file

@ -0,0 +1,7 @@
mod chunked;
mod conflict;
mod models;
pub use chunked::*;
pub use conflict::*;
pub use models::*;

View file

@ -0,0 +1,382 @@
//! Sync domain models.
use std::fmt;
use chrono::{DateTime, Utc};
use pinakes_types::{
config::ConflictResolution,
model::{ContentHash, MediaId, UserId},
};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
/// Unique identifier for a sync device.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct DeviceId(pub Uuid);
impl DeviceId {
#[must_use]
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl Default for DeviceId {
fn default() -> Self {
Self::new()
}
}
impl fmt::Display for DeviceId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
/// Type of sync device.
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default,
)]
#[serde(rename_all = "lowercase")]
pub enum DeviceType {
Desktop,
Mobile,
Tablet,
Server,
#[default]
Other,
}
impl fmt::Display for DeviceType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Desktop => write!(f, "desktop"),
Self::Mobile => write!(f, "mobile"),
Self::Tablet => write!(f, "tablet"),
Self::Server => write!(f, "server"),
Self::Other => write!(f, "other"),
}
}
}
impl std::str::FromStr for DeviceType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"desktop" => Ok(Self::Desktop),
"mobile" => Ok(Self::Mobile),
"tablet" => Ok(Self::Tablet),
"server" => Ok(Self::Server),
"other" => Ok(Self::Other),
_ => Err(format!("unknown device type: {s}")),
}
}
}
/// A registered sync device.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyncDevice {
pub id: DeviceId,
pub user_id: UserId,
pub name: String,
pub device_type: DeviceType,
pub client_version: String,
pub os_info: Option<String>,
pub last_sync_at: Option<DateTime<Utc>>,
pub last_seen_at: DateTime<Utc>,
pub sync_cursor: Option<i64>,
pub enabled: bool,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl SyncDevice {
#[must_use]
pub fn new(
user_id: UserId,
name: String,
device_type: DeviceType,
client_version: String,
) -> Self {
let now = Utc::now();
Self {
id: DeviceId::new(),
user_id,
name,
device_type,
client_version,
os_info: None,
last_sync_at: None,
last_seen_at: now,
sync_cursor: None,
enabled: true,
created_at: now,
updated_at: now,
}
}
}
/// Type of change recorded in the sync log.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum SyncChangeType {
Created,
Modified,
Deleted,
Moved,
MetadataUpdated,
}
impl fmt::Display for SyncChangeType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Created => write!(f, "created"),
Self::Modified => write!(f, "modified"),
Self::Deleted => write!(f, "deleted"),
Self::Moved => write!(f, "moved"),
Self::MetadataUpdated => write!(f, "metadata_updated"),
}
}
}
impl std::str::FromStr for SyncChangeType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"created" => Ok(Self::Created),
"modified" => Ok(Self::Modified),
"deleted" => Ok(Self::Deleted),
"moved" => Ok(Self::Moved),
"metadata_updated" => Ok(Self::MetadataUpdated),
_ => Err(format!("unknown sync change type: {s}")),
}
}
}
/// An entry in the sync log tracking a change.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyncLogEntry {
pub id: Uuid,
pub sequence: i64,
pub change_type: SyncChangeType,
pub media_id: Option<MediaId>,
pub path: String,
pub content_hash: Option<ContentHash>,
pub file_size: Option<u64>,
pub metadata_json: Option<String>,
pub changed_by_device: Option<DeviceId>,
pub timestamp: DateTime<Utc>,
}
impl SyncLogEntry {
#[must_use]
pub fn new(
change_type: SyncChangeType,
path: String,
media_id: Option<MediaId>,
content_hash: Option<ContentHash>,
) -> Self {
Self {
id: Uuid::now_v7(),
sequence: 0, // Will be assigned by database
change_type,
media_id,
path,
content_hash,
file_size: None,
metadata_json: None,
changed_by_device: None,
timestamp: Utc::now(),
}
}
}
/// Sync status for a file on a device.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum FileSyncStatus {
Synced,
PendingUpload,
PendingDownload,
Conflict,
Deleted,
}
impl fmt::Display for FileSyncStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Synced => write!(f, "synced"),
Self::PendingUpload => write!(f, "pending_upload"),
Self::PendingDownload => write!(f, "pending_download"),
Self::Conflict => write!(f, "conflict"),
Self::Deleted => write!(f, "deleted"),
}
}
}
impl std::str::FromStr for FileSyncStatus {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"synced" => Ok(Self::Synced),
"pending_upload" => Ok(Self::PendingUpload),
"pending_download" => Ok(Self::PendingDownload),
"conflict" => Ok(Self::Conflict),
"deleted" => Ok(Self::Deleted),
_ => Err(format!("unknown file sync status: {s}")),
}
}
}
/// Sync state for a specific file on a specific device.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DeviceSyncState {
pub device_id: DeviceId,
pub path: String,
pub local_hash: Option<String>,
pub server_hash: Option<String>,
pub local_mtime: Option<i64>,
pub server_mtime: Option<i64>,
pub sync_status: FileSyncStatus,
pub last_synced_at: Option<DateTime<Utc>>,
pub conflict_info_json: Option<String>,
}
/// A sync conflict that needs resolution.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SyncConflict {
pub id: Uuid,
pub device_id: DeviceId,
pub path: String,
pub local_hash: String,
pub local_mtime: i64,
pub server_hash: String,
pub server_mtime: i64,
pub detected_at: DateTime<Utc>,
pub resolved_at: Option<DateTime<Utc>>,
pub resolution: Option<ConflictResolution>,
}
impl SyncConflict {
#[must_use]
pub fn new(
device_id: DeviceId,
path: String,
local_hash: String,
local_mtime: i64,
server_hash: String,
server_mtime: i64,
) -> Self {
Self {
id: Uuid::now_v7(),
device_id,
path,
local_hash,
local_mtime,
server_hash,
server_mtime,
detected_at: Utc::now(),
resolved_at: None,
resolution: None,
}
}
}
/// Status of an upload session.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum UploadStatus {
Pending,
InProgress,
Completed,
Failed,
Expired,
Cancelled,
}
impl fmt::Display for UploadStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Pending => write!(f, "pending"),
Self::InProgress => write!(f, "in_progress"),
Self::Completed => write!(f, "completed"),
Self::Failed => write!(f, "failed"),
Self::Expired => write!(f, "expired"),
Self::Cancelled => write!(f, "cancelled"),
}
}
}
impl std::str::FromStr for UploadStatus {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"pending" => Ok(Self::Pending),
"in_progress" => Ok(Self::InProgress),
"completed" => Ok(Self::Completed),
"failed" => Ok(Self::Failed),
"expired" => Ok(Self::Expired),
"cancelled" => Ok(Self::Cancelled),
_ => Err(format!("unknown upload status: {s}")),
}
}
}
/// A chunked upload session.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UploadSession {
pub id: Uuid,
pub device_id: DeviceId,
pub target_path: String,
pub expected_hash: ContentHash,
pub expected_size: u64,
pub chunk_size: u64,
pub chunk_count: u64,
pub status: UploadStatus,
pub created_at: DateTime<Utc>,
pub expires_at: DateTime<Utc>,
pub last_activity: DateTime<Utc>,
}
impl UploadSession {
#[must_use]
pub fn new(
device_id: DeviceId,
target_path: String,
expected_hash: ContentHash,
expected_size: u64,
chunk_size: u64,
timeout_hours: u64,
) -> Self {
let now = Utc::now();
let chunk_count = expected_size.div_ceil(chunk_size);
Self {
id: Uuid::now_v7(),
device_id,
target_path,
expected_hash,
expected_size,
chunk_size,
chunk_count,
status: UploadStatus::Pending,
created_at: now,
expires_at: now + chrono::Duration::hours(timeout_hours as i64),
last_activity: now,
}
}
}
/// Information about an uploaded chunk.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ChunkInfo {
pub upload_id: Uuid,
pub chunk_index: u64,
pub offset: u64,
pub size: u64,
pub hash: String,
pub received_at: DateTime<Utc>,
}

View file

@ -0,0 +1,18 @@
[package]
name = "pinakes-types"
edition.workspace = true
version.workspace = true
license.workspace = true
[dependencies]
thiserror = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
chrono = { workspace = true }
uuid = { workspace = true }
rustc-hash = { workspace = true }
toml = { workspace = true }
anyhow = { workspace = true }
[lints]
workspace = true

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,142 @@
use std::path::PathBuf;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum PinakesError {
#[error("IO error: {0}")]
Io(#[from] std::io::Error),
#[error("database error: {0}")]
Database(String),
#[error("migration error: {0}")]
Migration(String),
#[error("configuration error: {0}")]
Config(String),
#[error("media item not found: {0}")]
NotFound(String),
#[error("duplicate content hash: {0}")]
DuplicateHash(String),
#[error("unsupported media type for path: {0}")]
UnsupportedMediaType(PathBuf),
#[error("metadata extraction failed: {0}")]
MetadataExtraction(String),
#[error("thumbnail generation failed: {0}")]
ThumbnailGeneration(String),
#[error("search query parse error: {0}")]
SearchParse(String),
#[error("file not found at path: {0}")]
FileNotFound(PathBuf),
#[error("tag not found: {0}")]
TagNotFound(String),
#[error("collection not found: {0}")]
CollectionNotFound(String),
#[error("invalid operation: {0}")]
InvalidOperation(String),
#[error("invalid data: {0}")]
InvalidData(String),
#[error("authentication error: {0}")]
Authentication(String),
#[error("authorization error: {0}")]
Authorization(String),
#[error("path not allowed: {0}")]
PathNotAllowed(String),
#[error("external API error: {0}")]
External(String),
// Managed Storage errors
#[error("managed storage not enabled")]
ManagedStorageDisabled,
#[error("upload too large: {0} bytes exceeds limit")]
UploadTooLarge(u64),
#[error("blob not found: {0}")]
BlobNotFound(String),
#[error("storage integrity error: {0}")]
StorageIntegrity(String),
// Sync errors
#[error("sync not enabled")]
SyncDisabled,
#[error("device not found: {0}")]
DeviceNotFound(String),
#[error("sync conflict: {0}")]
SyncConflict(String),
#[error("upload session expired: {0}")]
UploadSessionExpired(String),
#[error("upload session not found: {0}")]
UploadSessionNotFound(String),
#[error("chunk out of order: expected {expected}, got {actual}")]
ChunkOutOfOrder { expected: u64, actual: u64 },
// Sharing errors
#[error("share not found: {0}")]
ShareNotFound(String),
#[error("share expired: {0}")]
ShareExpired(String),
#[error("share password required")]
SharePasswordRequired,
#[error("share password invalid")]
SharePasswordInvalid,
#[error("insufficient share permissions")]
InsufficientSharePermissions,
#[error("serialization error: {0}")]
Serialization(String),
#[error("external tool `{tool}` failed: {stderr}")]
ExternalTool { tool: String, stderr: String },
#[error("subtitle track {index} not found in media")]
SubtitleTrackNotFound { index: u32 },
#[error("invalid language code: {0}")]
InvalidLanguageCode(String),
}
impl From<serde_json::Error> for PinakesError {
fn from(e: serde_json::Error) -> Self {
Self::Serialization(e.to_string())
}
}
/// Build a closure that wraps a database error with operation context.
///
/// Usage: `stmt.execute(params).map_err(db_ctx("insert_media", media_id))?;`
pub fn db_ctx<E: std::fmt::Display>(
operation: &str,
entity: impl std::fmt::Display,
) -> impl FnOnce(E) -> PinakesError {
let context = format!("{operation} [{entity}]");
move |e| PinakesError::Database(format!("{context}: {e}"))
}
pub type Result<T> = std::result::Result<T, PinakesError>;

View file

@ -0,0 +1,4 @@
pub mod config;
pub mod error;
pub mod media_type;
pub mod model;

View file

@ -0,0 +1,292 @@
use std::path::Path;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum BuiltinMediaType {
// Audio
Mp3,
Flac,
Ogg,
Wav,
Aac,
Opus,
// Video
Mp4,
Mkv,
Avi,
Webm,
// Documents
Pdf,
Epub,
Djvu,
// Text
Markdown,
PlainText,
// Images
Jpeg,
Png,
Gif,
Webp,
Svg,
Avif,
Tiff,
Bmp,
// RAW Images
Cr2,
Nef,
Arw,
Dng,
Orf,
Rw2,
// HEIC/HEIF
Heic,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum MediaCategory {
Audio,
Video,
Document,
Text,
Image,
}
impl BuiltinMediaType {
/// Get the unique, stable ID for this media type.
#[must_use]
pub const fn id(&self) -> &'static str {
match self {
Self::Mp3 => "mp3",
Self::Flac => "flac",
Self::Ogg => "ogg",
Self::Wav => "wav",
Self::Aac => "aac",
Self::Opus => "opus",
Self::Mp4 => "mp4",
Self::Mkv => "mkv",
Self::Avi => "avi",
Self::Webm => "webm",
Self::Pdf => "pdf",
Self::Epub => "epub",
Self::Djvu => "djvu",
Self::Markdown => "markdown",
Self::PlainText => "plaintext",
Self::Jpeg => "jpeg",
Self::Png => "png",
Self::Gif => "gif",
Self::Webp => "webp",
Self::Svg => "svg",
Self::Avif => "avif",
Self::Tiff => "tiff",
Self::Bmp => "bmp",
Self::Cr2 => "cr2",
Self::Nef => "nef",
Self::Arw => "arw",
Self::Dng => "dng",
Self::Orf => "orf",
Self::Rw2 => "rw2",
Self::Heic => "heic",
}
}
/// Get the display name for this media type
#[must_use]
pub fn name(&self) -> String {
match self {
Self::Mp3 => "MP3 Audio".to_string(),
Self::Flac => "FLAC Audio".to_string(),
Self::Ogg => "OGG Audio".to_string(),
Self::Wav => "WAV Audio".to_string(),
Self::Aac => "AAC Audio".to_string(),
Self::Opus => "Opus Audio".to_string(),
Self::Mp4 => "MP4 Video".to_string(),
Self::Mkv => "MKV Video".to_string(),
Self::Avi => "AVI Video".to_string(),
Self::Webm => "WebM Video".to_string(),
Self::Pdf => "PDF Document".to_string(),
Self::Epub => "EPUB eBook".to_string(),
Self::Djvu => "DjVu Document".to_string(),
Self::Markdown => "Markdown".to_string(),
Self::PlainText => "Plain Text".to_string(),
Self::Jpeg => "JPEG Image".to_string(),
Self::Png => "PNG Image".to_string(),
Self::Gif => "GIF Image".to_string(),
Self::Webp => "WebP Image".to_string(),
Self::Svg => "SVG Image".to_string(),
Self::Avif => "AVIF Image".to_string(),
Self::Tiff => "TIFF Image".to_string(),
Self::Bmp => "BMP Image".to_string(),
Self::Cr2 => "Canon RAW (CR2)".to_string(),
Self::Nef => "Nikon RAW (NEF)".to_string(),
Self::Arw => "Sony RAW (ARW)".to_string(),
Self::Dng => "Adobe DNG RAW".to_string(),
Self::Orf => "Olympus RAW (ORF)".to_string(),
Self::Rw2 => "Panasonic RAW (RW2)".to_string(),
Self::Heic => "HEIC Image".to_string(),
}
}
#[must_use]
pub fn from_extension(ext: &str) -> Option<Self> {
match ext.to_ascii_lowercase().as_str() {
"mp3" => Some(Self::Mp3),
"flac" => Some(Self::Flac),
"ogg" | "oga" => Some(Self::Ogg),
"wav" => Some(Self::Wav),
"aac" | "m4a" => Some(Self::Aac),
"opus" => Some(Self::Opus),
"mp4" | "m4v" => Some(Self::Mp4),
"mkv" => Some(Self::Mkv),
"avi" => Some(Self::Avi),
"webm" => Some(Self::Webm),
"pdf" => Some(Self::Pdf),
"epub" => Some(Self::Epub),
"djvu" => Some(Self::Djvu),
"md" | "markdown" => Some(Self::Markdown),
"txt" | "text" => Some(Self::PlainText),
"jpg" | "jpeg" => Some(Self::Jpeg),
"png" => Some(Self::Png),
"gif" => Some(Self::Gif),
"webp" => Some(Self::Webp),
"svg" => Some(Self::Svg),
"avif" => Some(Self::Avif),
"tiff" | "tif" => Some(Self::Tiff),
"bmp" => Some(Self::Bmp),
"cr2" => Some(Self::Cr2),
"nef" => Some(Self::Nef),
"arw" => Some(Self::Arw),
"dng" => Some(Self::Dng),
"orf" => Some(Self::Orf),
"rw2" => Some(Self::Rw2),
"heic" | "heif" => Some(Self::Heic),
_ => None,
}
}
pub fn from_path(path: &Path) -> Option<Self> {
path
.extension()
.and_then(|e| e.to_str())
.and_then(Self::from_extension)
}
#[must_use]
pub const fn mime_type(&self) -> &'static str {
match self {
Self::Mp3 => "audio/mpeg",
Self::Flac => "audio/flac",
Self::Ogg => "audio/ogg",
Self::Wav => "audio/wav",
Self::Aac => "audio/aac",
Self::Opus => "audio/opus",
Self::Mp4 => "video/mp4",
Self::Mkv => "video/x-matroska",
Self::Avi => "video/x-msvideo",
Self::Webm => "video/webm",
Self::Pdf => "application/pdf",
Self::Epub => "application/epub+zip",
Self::Djvu => "image/vnd.djvu",
Self::Markdown => "text/markdown",
Self::PlainText => "text/plain",
Self::Jpeg => "image/jpeg",
Self::Png => "image/png",
Self::Gif => "image/gif",
Self::Webp => "image/webp",
Self::Svg => "image/svg+xml",
Self::Avif => "image/avif",
Self::Tiff => "image/tiff",
Self::Bmp => "image/bmp",
Self::Cr2 => "image/x-canon-cr2",
Self::Nef => "image/x-nikon-nef",
Self::Arw => "image/x-sony-arw",
Self::Dng => "image/x-adobe-dng",
Self::Orf => "image/x-olympus-orf",
Self::Rw2 => "image/x-panasonic-rw2",
Self::Heic => "image/heic",
}
}
#[must_use]
pub const fn category(&self) -> MediaCategory {
match self {
Self::Mp3
| Self::Flac
| Self::Ogg
| Self::Wav
| Self::Aac
| Self::Opus => MediaCategory::Audio,
Self::Mp4 | Self::Mkv | Self::Avi | Self::Webm => MediaCategory::Video,
Self::Pdf | Self::Epub | Self::Djvu => MediaCategory::Document,
Self::Markdown | Self::PlainText => MediaCategory::Text,
Self::Jpeg
| Self::Png
| Self::Gif
| Self::Webp
| Self::Svg
| Self::Avif
| Self::Tiff
| Self::Bmp
| Self::Cr2
| Self::Nef
| Self::Arw
| Self::Dng
| Self::Orf
| Self::Rw2
| Self::Heic => MediaCategory::Image,
}
}
#[must_use]
pub const fn extensions(&self) -> &'static [&'static str] {
match self {
Self::Mp3 => &["mp3"],
Self::Flac => &["flac"],
Self::Ogg => &["ogg", "oga"],
Self::Wav => &["wav"],
Self::Aac => &["aac", "m4a"],
Self::Opus => &["opus"],
Self::Mp4 => &["mp4", "m4v"],
Self::Mkv => &["mkv"],
Self::Avi => &["avi"],
Self::Webm => &["webm"],
Self::Pdf => &["pdf"],
Self::Epub => &["epub"],
Self::Djvu => &["djvu"],
Self::Markdown => &["md", "markdown"],
Self::PlainText => &["txt", "text"],
Self::Jpeg => &["jpg", "jpeg"],
Self::Png => &["png"],
Self::Gif => &["gif"],
Self::Webp => &["webp"],
Self::Svg => &["svg"],
Self::Avif => &["avif"],
Self::Tiff => &["tiff", "tif"],
Self::Bmp => &["bmp"],
Self::Cr2 => &["cr2"],
Self::Nef => &["nef"],
Self::Arw => &["arw"],
Self::Dng => &["dng"],
Self::Orf => &["orf"],
Self::Rw2 => &["rw2"],
Self::Heic => &["heic", "heif"],
}
}
/// Returns true if this is a RAW image format.
#[must_use]
pub const fn is_raw(&self) -> bool {
matches!(
self,
Self::Cr2 | Self::Nef | Self::Arw | Self::Dng | Self::Orf | Self::Rw2
)
}
}

View file

@ -0,0 +1,281 @@
//! Media types
//!
//! Supports both
//! built-in media types and plugin-registered custom types.
use std::path::Path;
use serde::{Deserialize, Serialize};
pub mod builtin;
pub mod registry;
pub use builtin::{BuiltinMediaType, MediaCategory};
pub use registry::{MediaTypeDescriptor, MediaTypeRegistry};
/// Media type identifier, can be either built-in or custom
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(untagged)]
pub enum MediaType {
/// Built-in media type (backward compatible)
Builtin(BuiltinMediaType),
/// Custom media type from a plugin
Custom(String),
}
impl MediaType {
/// Create a new custom media type
pub fn custom(id: impl Into<String>) -> Self {
Self::Custom(id.into())
}
/// Get the type ID as a string
#[must_use]
pub fn id(&self) -> String {
match self {
Self::Builtin(b) => b.id().to_string(),
Self::Custom(id) => id.clone(),
}
}
/// Get the display name for this media type
/// For custom types without a registry, returns the ID as the name
#[must_use]
pub fn name(&self) -> String {
match self {
Self::Builtin(b) => b.name(),
Self::Custom(id) => id.clone(),
}
}
/// Get the display name for this media type with registry support
#[must_use]
pub fn name_with_registry(&self, registry: &MediaTypeRegistry) -> String {
match self {
Self::Builtin(b) => b.name(),
Self::Custom(id) => {
registry
.get(id)
.map_or_else(|| id.clone(), |d| d.name.clone())
},
}
}
/// Get the category for this media type
/// For custom types without a registry, returns [`MediaCategory::Document`]
/// as default
#[must_use]
pub const fn category(&self) -> MediaCategory {
match self {
Self::Builtin(b) => b.category(),
Self::Custom(_) => MediaCategory::Document,
}
}
/// Get the category for this media type with registry support
#[must_use]
pub fn category_with_registry(
&self,
registry: &MediaTypeRegistry,
) -> MediaCategory {
match self {
Self::Builtin(b) => b.category(),
Self::Custom(id) => {
registry
.get(id)
.and_then(|d| d.category)
.unwrap_or(MediaCategory::Document)
},
}
}
/// Get the MIME type
/// For custom types without a registry, returns "application/octet-stream"
#[must_use]
pub fn mime_type(&self) -> String {
match self {
Self::Builtin(b) => b.mime_type().to_string(),
Self::Custom(_) => "application/octet-stream".to_string(),
}
}
/// Get the MIME type with registry support
#[must_use]
pub fn mime_type_with_registry(
&self,
registry: &MediaTypeRegistry,
) -> String {
match self {
Self::Builtin(b) => b.mime_type().to_string(),
Self::Custom(id) => {
registry
.get(id)
.and_then(|d| d.mime_types.first().cloned())
.unwrap_or_else(|| "application/octet-stream".to_string())
},
}
}
/// Get file extensions
/// For custom types without a registry, returns an empty vec
#[must_use]
pub fn extensions(&self) -> Vec<String> {
match self {
Self::Builtin(b) => {
b.extensions()
.iter()
.map(std::string::ToString::to_string)
.collect()
},
Self::Custom(_) => vec![],
}
}
/// Get file extensions with registry support
#[must_use]
pub fn extensions_with_registry(
&self,
registry: &MediaTypeRegistry,
) -> Vec<String> {
match self {
Self::Builtin(b) => {
b.extensions()
.iter()
.map(std::string::ToString::to_string)
.collect()
},
Self::Custom(id) => {
registry
.get(id)
.map(|d| d.extensions.clone())
.unwrap_or_default()
},
}
}
/// Check if this is a RAW image format
#[must_use]
pub const fn is_raw(&self) -> bool {
match self {
Self::Builtin(b) => b.is_raw(),
Self::Custom(_) => false,
}
}
/// Resolve a media type from file extension (built-in types only)
/// Use `from_extension_with_registry` for custom types
pub fn from_extension(ext: &str) -> Option<Self> {
BuiltinMediaType::from_extension(ext).map(Self::Builtin)
}
/// Resolve a media type from file extension with registry (includes custom
/// types)
#[must_use]
pub fn from_extension_with_registry(
ext: &str,
registry: &MediaTypeRegistry,
) -> Option<Self> {
// Try built-in types first
if let Some(builtin) = BuiltinMediaType::from_extension(ext) {
return Some(Self::Builtin(builtin));
}
// Try registered custom types
registry
.get_by_extension(ext)
.map(|desc| Self::Custom(desc.id.clone()))
}
/// Resolve a media type from file path (built-in types only)
/// Use `from_path_with_registry` for custom types
pub fn from_path(path: &Path) -> Option<Self> {
path
.extension()
.and_then(|e| e.to_str())
.and_then(Self::from_extension)
}
/// Resolve a media type from file path with registry (includes custom types)
#[must_use]
pub fn from_path_with_registry(
path: &Path,
registry: &MediaTypeRegistry,
) -> Option<Self> {
path
.extension()
.and_then(|e| e.to_str())
.and_then(|ext| Self::from_extension_with_registry(ext, registry))
}
}
// Implement `From<BuiltinMediaType>` for easier conversion
impl From<BuiltinMediaType> for MediaType {
fn from(builtin: BuiltinMediaType) -> Self {
Self::Builtin(builtin)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_builtin_media_type() {
let mt = MediaType::Builtin(BuiltinMediaType::Mp3);
assert_eq!(mt.id(), "mp3");
assert_eq!(mt.mime_type(), "audio/mpeg");
assert_eq!(mt.category(), MediaCategory::Audio);
}
#[test]
fn test_custom_media_type() {
let mut registry = MediaTypeRegistry::new();
let descriptor = MediaTypeDescriptor {
id: "heif".to_string(),
name: "HEIF Image".to_string(),
category: Some(MediaCategory::Image),
extensions: vec!["heif".to_string()],
mime_types: vec!["image/heif".to_string()],
plugin_id: Some("heif-plugin".to_string()),
};
registry.register(descriptor).unwrap();
let mt = MediaType::custom("heif");
assert_eq!(mt.id(), "heif");
assert_eq!(mt.mime_type_with_registry(&registry), "image/heif");
assert_eq!(mt.category_with_registry(&registry), MediaCategory::Image);
}
#[test]
fn test_from_extension_builtin() {
let registry = MediaTypeRegistry::new();
let mt = MediaType::from_extension_with_registry("mp3", &registry);
assert!(mt.is_some());
assert_eq!(mt.unwrap(), MediaType::Builtin(BuiltinMediaType::Mp3));
}
#[test]
fn test_from_extension_custom() {
let mut registry = MediaTypeRegistry::new();
let descriptor = MediaTypeDescriptor {
id: "customformat".to_string(),
name: "Custom Format".to_string(),
category: Some(MediaCategory::Image),
extensions: vec!["xyz".to_string()],
mime_types: vec!["application/x-custom".to_string()],
plugin_id: Some("custom-plugin".to_string()),
};
registry.register(descriptor).unwrap();
let mt = MediaType::from_extension_with_registry("xyz", &registry);
assert!(mt.is_some());
assert_eq!(mt.unwrap(), MediaType::custom("customformat"));
}
}

View file

@ -0,0 +1,297 @@
//! Media type registry for managing both built-in and custom media types
use anyhow::{Result, anyhow};
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use super::MediaCategory;
/// Descriptor for a media type (built-in or custom)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MediaTypeDescriptor {
/// Unique identifier
pub id: String,
/// Display name
pub name: String,
/// Category
pub category: Option<MediaCategory>,
/// File extensions
pub extensions: Vec<String>,
/// MIME types
pub mime_types: Vec<String>,
/// Plugin that registered this type (None for built-in types)
pub plugin_id: Option<String>,
}
/// Registry for media types
#[derive(Debug, Clone)]
pub struct MediaTypeRegistry {
/// Map of media type ID to descriptor
types: FxHashMap<String, MediaTypeDescriptor>,
/// Map of extension to media type ID
extension_map: FxHashMap<String, String>,
}
impl MediaTypeRegistry {
/// Create a new empty registry
#[must_use]
pub fn new() -> Self {
Self {
types: FxHashMap::default(),
extension_map: FxHashMap::default(),
}
}
/// Register a new media type
pub fn register(&mut self, descriptor: MediaTypeDescriptor) -> Result<()> {
// Check if ID is already registered
if self.types.contains_key(&descriptor.id) {
return Err(anyhow!("Media type already registered: {}", descriptor.id));
}
// Register extensions
for ext in &descriptor.extensions {
let ext_lower = ext.to_lowercase();
if self.extension_map.contains_key(&ext_lower) {
// Extension already registered - this is OK, we'll use the first one
// In a more sophisticated system, we might track multiple types per
// extension
continue;
}
self.extension_map.insert(ext_lower, descriptor.id.clone());
}
// Register the type
self.types.insert(descriptor.id.clone(), descriptor);
Ok(())
}
/// Unregister a media type
pub fn unregister(&mut self, id: &str) -> Result<()> {
let descriptor = self
.types
.remove(id)
.ok_or_else(|| anyhow!("Media type not found: {id}"))?;
// Remove extensions
for ext in &descriptor.extensions {
let ext_lower = ext.to_lowercase();
if self.extension_map.get(&ext_lower) == Some(&descriptor.id) {
self.extension_map.remove(&ext_lower);
}
}
Ok(())
}
/// Get a media type descriptor by ID
#[must_use]
pub fn get(&self, id: &str) -> Option<&MediaTypeDescriptor> {
self.types.get(id)
}
/// Get a media type by file extension
#[must_use]
pub fn get_by_extension(&self, ext: &str) -> Option<&MediaTypeDescriptor> {
let ext_lower = ext.to_lowercase();
self
.extension_map
.get(&ext_lower)
.and_then(|id| self.types.get(id))
}
/// List all registered media types
#[must_use]
pub fn list_all(&self) -> Vec<&MediaTypeDescriptor> {
self.types.values().collect()
}
/// List media types from a specific plugin
#[must_use]
pub fn list_by_plugin(&self, plugin_id: &str) -> Vec<&MediaTypeDescriptor> {
self
.types
.values()
.filter(|d| d.plugin_id.as_deref() == Some(plugin_id))
.collect()
}
/// List built-in media types (`plugin_id` is None)
#[must_use]
pub fn list_builtin(&self) -> Vec<&MediaTypeDescriptor> {
self
.types
.values()
.filter(|d| d.plugin_id.is_none())
.collect()
}
/// Get count of registered types
#[must_use]
pub fn count(&self) -> usize {
self.types.len()
}
/// Check if a media type is registered
#[must_use]
pub fn contains(&self, id: &str) -> bool {
self.types.contains_key(id)
}
/// Unregister all types from a specific plugin
pub fn unregister_plugin(&mut self, plugin_id: &str) -> Result<usize> {
let type_ids: Vec<String> = self
.types
.values()
.filter(|d| d.plugin_id.as_deref() == Some(plugin_id))
.map(|d| d.id.clone())
.collect();
let count = type_ids.len();
for id in type_ids {
self.unregister(&id)?;
}
Ok(count)
}
}
impl Default for MediaTypeRegistry {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
fn create_test_descriptor(id: &str, ext: &str) -> MediaTypeDescriptor {
MediaTypeDescriptor {
id: id.to_string(),
name: format!("{id} Type"),
category: Some(MediaCategory::Document),
extensions: vec![ext.to_string()],
mime_types: vec![format!("application/{}", id)],
plugin_id: Some("test-plugin".to_string()),
}
}
#[test]
fn test_register_and_get() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor).unwrap();
let retrieved = registry.get("test").unwrap();
assert_eq!(retrieved.id, "test");
assert_eq!(retrieved.name, "test Type");
}
#[test]
fn test_register_duplicate() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor.clone()).unwrap();
let result = registry.register(descriptor);
assert!(result.is_err());
}
#[test]
fn test_get_by_extension() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor).unwrap();
let retrieved = registry.get_by_extension("tst").unwrap();
assert_eq!(retrieved.id, "test");
// Test case insensitivity
let retrieved = registry.get_by_extension("TST").unwrap();
assert_eq!(retrieved.id, "test");
}
#[test]
fn test_unregister() {
let mut registry = MediaTypeRegistry::new();
let descriptor = create_test_descriptor("test", "tst");
registry.register(descriptor).unwrap();
assert!(registry.contains("test"));
registry.unregister("test").unwrap();
assert!(!registry.contains("test"));
// Extension should also be removed
assert!(registry.get_by_extension("tst").is_none());
}
#[test]
fn test_list_by_plugin() {
let mut registry = MediaTypeRegistry::new();
let desc1 = MediaTypeDescriptor {
id: "type1".to_string(),
name: "Type 1".to_string(),
category: Some(MediaCategory::Document),
extensions: vec!["t1".to_string()],
mime_types: vec!["application/type1".to_string()],
plugin_id: Some("plugin1".to_string()),
};
let desc2 = MediaTypeDescriptor {
id: "type2".to_string(),
name: "Type 2".to_string(),
category: Some(MediaCategory::Document),
extensions: vec!["t2".to_string()],
mime_types: vec!["application/type2".to_string()],
plugin_id: Some("plugin2".to_string()),
};
registry.register(desc1).unwrap();
registry.register(desc2).unwrap();
let plugin1_types = registry.list_by_plugin("plugin1");
assert_eq!(plugin1_types.len(), 1);
assert_eq!(plugin1_types[0].id, "type1");
let plugin2_types = registry.list_by_plugin("plugin2");
assert_eq!(plugin2_types.len(), 1);
assert_eq!(plugin2_types[0].id, "type2");
}
#[test]
fn test_unregister_plugin() {
let mut registry = MediaTypeRegistry::new();
for i in 1..=3 {
let desc = MediaTypeDescriptor {
id: format!("type{i}"),
name: format!("Type {i}"),
category: Some(MediaCategory::Document),
extensions: vec![format!("t{}", i)],
mime_types: vec![format!("application/type{}", i)],
plugin_id: Some("test-plugin".to_string()),
};
registry.register(desc).unwrap();
}
assert_eq!(registry.count(), 3);
let removed = registry.unregister_plugin("test-plugin").unwrap();
assert_eq!(removed, 3);
assert_eq!(registry.count(), 0);
}
}

View file

@ -0,0 +1,688 @@
use std::{fmt, path::PathBuf};
use chrono::{DateTime, Utc};
use rustc_hash::FxHashMap;
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::media_type::MediaType;
/// Unique identifier for a user account.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct UserId(pub Uuid);
impl UserId {
#[must_use]
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl Default for UserId {
fn default() -> Self {
Self::new()
}
}
impl fmt::Display for UserId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl From<Uuid> for UserId {
fn from(id: Uuid) -> Self {
Self(id)
}
}
/// Unique identifier for a media item.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct MediaId(pub Uuid);
impl MediaId {
/// Creates a new media ID using `UUIDv7`.
#[must_use]
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl fmt::Display for MediaId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Default for MediaId {
fn default() -> Self {
Self(uuid::Uuid::nil())
}
}
/// BLAKE3 content hash for deduplication.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ContentHash(pub String);
impl ContentHash {
/// Creates a new content hash from a hex string.
#[must_use]
pub const fn new(hex: String) -> Self {
Self(hex)
}
}
impl fmt::Display for ContentHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
/// Storage mode for media items
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize,
)]
#[serde(rename_all = "lowercase")]
pub enum StorageMode {
/// File exists on disk, referenced by path
#[default]
External,
/// File is stored in managed content-addressable storage
Managed,
}
impl fmt::Display for StorageMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::External => write!(f, "external"),
Self::Managed => write!(f, "managed"),
}
}
}
impl std::str::FromStr for StorageMode {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"external" => Ok(Self::External),
"managed" => Ok(Self::Managed),
_ => Err(format!("unknown storage mode: {s}")),
}
}
}
/// A blob stored in managed storage (content-addressable)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManagedBlob {
pub content_hash: ContentHash,
pub file_size: u64,
pub mime_type: String,
pub reference_count: u32,
pub stored_at: DateTime<Utc>,
pub last_verified: Option<DateTime<Utc>>,
}
/// Result of uploading a file to managed storage
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UploadResult {
pub media_id: MediaId,
pub content_hash: ContentHash,
pub was_duplicate: bool,
pub file_size: u64,
}
/// Statistics about managed storage
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ManagedStorageStats {
pub total_blobs: u64,
pub total_size_bytes: u64,
pub unique_size_bytes: u64,
pub deduplication_ratio: f64,
pub managed_media_count: u64,
pub orphaned_blobs: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MediaItem {
pub id: MediaId,
pub path: PathBuf,
pub file_name: String,
pub media_type: MediaType,
pub content_hash: ContentHash,
pub file_size: u64,
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub thumbnail_path: Option<PathBuf>,
pub custom_fields: FxHashMap<String, CustomField>,
/// File modification time (Unix timestamp in seconds), used for incremental
/// scanning
pub file_mtime: Option<i64>,
// Photo-specific metadata
pub date_taken: Option<DateTime<Utc>>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub rating: Option<i32>,
pub perceptual_hash: Option<String>,
// Managed storage fields
/// How the file is stored (external on disk or managed in
/// content-addressable storage)
#[serde(default)]
pub storage_mode: StorageMode,
/// Original filename for uploaded files (preserved separately from
/// `file_name`)
pub original_filename: Option<String>,
/// When the file was uploaded to managed storage
pub uploaded_at: Option<DateTime<Utc>>,
/// Storage key for looking up the blob (usually same as `content_hash`)
pub storage_key: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
/// Soft delete timestamp. If set, the item is in the trash.
pub deleted_at: Option<DateTime<Utc>>,
/// When markdown links were last extracted from this file.
pub links_extracted_at: Option<DateTime<Utc>>,
}
/// A custom field attached to a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CustomField {
pub field_type: CustomFieldType,
pub value: String,
}
/// Type of custom field value.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CustomFieldType {
Text,
Number,
Date,
Boolean,
}
impl CustomFieldType {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Text => "text",
Self::Number => "number",
Self::Date => "date",
Self::Boolean => "boolean",
}
}
}
impl std::fmt::Display for CustomFieldType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
/// A tag that can be applied to media items.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tag {
pub id: Uuid,
pub name: String,
pub parent_id: Option<Uuid>,
pub created_at: DateTime<Utc>,
}
/// A collection of media items.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Collection {
pub id: Uuid,
pub name: String,
pub description: Option<String>,
pub kind: CollectionKind,
pub filter_query: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
/// Kind of collection.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CollectionKind {
Manual,
Virtual,
}
impl CollectionKind {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Manual => "manual",
Self::Virtual => "virtual",
}
}
}
impl std::fmt::Display for CollectionKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
/// A member of a collection with position tracking.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionMember {
pub collection_id: Uuid,
pub media_id: MediaId,
pub position: i32,
pub added_at: DateTime<Utc>,
}
/// An audit trail entry.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuditEntry {
pub id: Uuid,
pub media_id: Option<MediaId>,
pub action: AuditAction,
pub details: Option<String>,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditAction {
// Media actions
Imported,
Updated,
Deleted,
Tagged,
Untagged,
AddedToCollection,
RemovedFromCollection,
Opened,
Scanned,
// Authentication actions
LoginSuccess,
LoginFailed,
Logout,
SessionExpired,
// Authorization actions
PermissionDenied,
RoleChanged,
LibraryAccessGranted,
LibraryAccessRevoked,
// User management
UserCreated,
UserUpdated,
UserDeleted,
// Plugin actions
PluginInstalled,
PluginUninstalled,
PluginEnabled,
PluginDisabled,
// Configuration actions
ConfigChanged,
RootDirectoryAdded,
RootDirectoryRemoved,
// Social/Sharing actions
ShareLinkCreated,
ShareLinkAccessed,
// System actions
DatabaseVacuumed,
DatabaseCleared,
ExportCompleted,
IntegrityCheckCompleted,
}
impl fmt::Display for AuditAction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
// Media actions
Self::Imported => "imported",
Self::Updated => "updated",
Self::Deleted => "deleted",
Self::Tagged => "tagged",
Self::Untagged => "untagged",
Self::AddedToCollection => "added_to_collection",
Self::RemovedFromCollection => "removed_from_collection",
Self::Opened => "opened",
Self::Scanned => "scanned",
// Authentication actions
Self::LoginSuccess => "login_success",
Self::LoginFailed => "login_failed",
Self::Logout => "logout",
Self::SessionExpired => "session_expired",
// Authorization actions
Self::PermissionDenied => "permission_denied",
Self::RoleChanged => "role_changed",
Self::LibraryAccessGranted => "library_access_granted",
Self::LibraryAccessRevoked => "library_access_revoked",
// User management
Self::UserCreated => "user_created",
Self::UserUpdated => "user_updated",
Self::UserDeleted => "user_deleted",
// Plugin actions
Self::PluginInstalled => "plugin_installed",
Self::PluginUninstalled => "plugin_uninstalled",
Self::PluginEnabled => "plugin_enabled",
Self::PluginDisabled => "plugin_disabled",
// Configuration actions
Self::ConfigChanged => "config_changed",
Self::RootDirectoryAdded => "root_directory_added",
Self::RootDirectoryRemoved => "root_directory_removed",
// Social/Sharing actions
Self::ShareLinkCreated => "share_link_created",
Self::ShareLinkAccessed => "share_link_accessed",
// System actions
Self::DatabaseVacuumed => "database_vacuumed",
Self::DatabaseCleared => "database_cleared",
Self::ExportCompleted => "export_completed",
Self::IntegrityCheckCompleted => "integrity_check_completed",
};
write!(f, "{s}")
}
}
/// Pagination parameters for list queries.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Pagination {
pub offset: u64,
pub limit: u64,
pub sort: Option<String>,
}
impl Pagination {
/// Creates a new pagination instance.
#[must_use]
pub const fn new(offset: u64, limit: u64, sort: Option<String>) -> Self {
Self {
offset,
limit,
sort,
}
}
}
impl Default for Pagination {
fn default() -> Self {
Self {
offset: 0,
limit: 50,
sort: None,
}
}
}
/// A saved search query.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SavedSearch {
pub id: Uuid,
pub name: String,
pub query: String,
pub sort_order: Option<String>,
pub created_at: DateTime<Utc>,
}
// Book Management Types
/// Metadata for book-type media.
///
/// Used both as a DB record (with populated `media_id`, `created_at`,
/// `updated_at`) and as an extraction result (with placeholder values for
/// those fields when the record has not yet been persisted).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BookMetadata {
pub media_id: MediaId,
pub isbn: Option<String>,
pub isbn13: Option<String>,
pub publisher: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub publication_date: Option<chrono::NaiveDate>,
pub series_name: Option<String>,
pub series_index: Option<f64>,
pub format: Option<String>,
pub authors: Vec<AuthorInfo>,
pub identifiers: FxHashMap<String, Vec<String>>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
impl Default for BookMetadata {
fn default() -> Self {
let now = Utc::now();
Self {
media_id: MediaId(uuid::Uuid::nil()),
isbn: None,
isbn13: None,
publisher: None,
language: None,
page_count: None,
publication_date: None,
series_name: None,
series_index: None,
format: None,
authors: Vec::new(),
identifiers: FxHashMap::default(),
created_at: now,
updated_at: now,
}
}
}
/// Information about a book author.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AuthorInfo {
pub name: String,
pub role: String,
pub file_as: Option<String>,
pub position: i32,
}
impl AuthorInfo {
/// Creates a new author with the given name.
#[must_use]
pub fn new(name: String) -> Self {
Self {
name,
role: "author".to_string(),
file_as: None,
position: 0,
}
}
/// Sets the author's role.
#[must_use]
pub fn with_role(mut self, role: String) -> Self {
self.role = role;
self
}
#[must_use]
pub fn with_file_as(mut self, file_as: String) -> Self {
self.file_as = Some(file_as);
self
}
#[must_use]
pub const fn with_position(mut self, position: i32) -> Self {
self.position = position;
self
}
}
/// Reading progress for a book.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReadingProgress {
pub media_id: MediaId,
pub user_id: Uuid,
pub current_page: i32,
pub total_pages: Option<i32>,
pub progress_percent: f64,
pub last_read_at: DateTime<Utc>,
}
impl ReadingProgress {
/// Creates a new reading progress entry.
#[must_use]
pub fn new(
media_id: MediaId,
user_id: Uuid,
current_page: i32,
total_pages: Option<i32>,
) -> Self {
let progress_percent = total_pages.map_or(0.0, |total| {
if total > 0 {
(f64::from(current_page) / f64::from(total) * 100.0).min(100.0)
} else {
0.0
}
});
Self {
media_id,
user_id,
current_page,
total_pages,
progress_percent,
last_read_at: Utc::now(),
}
}
}
/// Reading status for a book.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ReadingStatus {
ToRead,
Reading,
Completed,
Abandoned,
}
impl fmt::Display for ReadingStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ToRead => write!(f, "to_read"),
Self::Reading => write!(f, "reading"),
Self::Completed => write!(f, "completed"),
Self::Abandoned => write!(f, "abandoned"),
}
}
}
/// Type of markdown link
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LinkType {
/// Wikilink: [[target]] or [[target|display]]
Wikilink,
/// Markdown link: [text](path)
MarkdownLink,
/// Embed: ![[target]]
Embed,
}
impl fmt::Display for LinkType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Wikilink => write!(f, "wikilink"),
Self::MarkdownLink => write!(f, "markdown_link"),
Self::Embed => write!(f, "embed"),
}
}
}
impl std::str::FromStr for LinkType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"wikilink" => Ok(Self::Wikilink),
"markdown_link" => Ok(Self::MarkdownLink),
"embed" => Ok(Self::Embed),
_ => Err(format!("unknown link type: {s}")),
}
}
}
/// A markdown link extracted from a file.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MarkdownLink {
pub id: Uuid,
pub source_media_id: MediaId,
/// Raw link target as written in the source (wikilink name or path)
pub target_path: String,
/// Resolved target `media_id` (None if unresolved)
pub target_media_id: Option<MediaId>,
pub link_type: LinkType,
/// Display text for the link
pub link_text: Option<String>,
/// Line number in source file (1-indexed)
pub line_number: Option<i32>,
/// Surrounding text for backlink preview
pub context: Option<String>,
pub created_at: DateTime<Utc>,
}
/// Information about a backlink (incoming link).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BacklinkInfo {
pub link_id: Uuid,
pub source_id: MediaId,
pub source_title: Option<String>,
pub source_path: String,
pub link_text: Option<String>,
pub line_number: Option<i32>,
pub context: Option<String>,
pub link_type: LinkType,
}
/// Graph data for visualization.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphData {
pub nodes: Vec<GraphNode>,
pub edges: Vec<GraphEdge>,
}
/// A node in the graph visualization.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphNode {
pub id: String,
pub label: String,
pub title: Option<String>,
pub media_type: String,
/// Number of outgoing links from this node
pub link_count: u32,
/// Number of incoming links to this node
pub backlink_count: u32,
}
/// An edge (link) in the graph visualization.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphEdge {
pub source: String,
pub target: String,
pub link_type: LinkType,
}