pinakes/crates/pinakes-core/src/model.rs
NotAShelf d5be5026a7
pinakes-core: fix isbn regex, csv quoting, document extraction, and enrichment accuracy
Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I974959e74d2b5b5591437daa0f29291a6a6a6964
2026-03-08 00:43:23 +03:00

648 lines
17 KiB
Rust

use std::{collections::HashMap, fmt, path::PathBuf};
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use uuid::Uuid;
use crate::media_type::MediaType;
/// Unique identifier for a media item.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct MediaId(pub Uuid);
impl MediaId {
/// Creates a new media ID using `UUIDv7`.
#[must_use]
pub fn new() -> Self {
Self(Uuid::now_v7())
}
}
impl fmt::Display for MediaId {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
impl Default for MediaId {
fn default() -> Self {
Self(uuid::Uuid::nil())
}
}
/// BLAKE3 content hash for deduplication.
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
pub struct ContentHash(pub String);
impl ContentHash {
/// Creates a new content hash from a hex string.
#[must_use]
pub const fn new(hex: String) -> Self {
Self(hex)
}
}
impl fmt::Display for ContentHash {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.0)
}
}
/// Storage mode for media items
#[derive(
Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize,
)]
#[serde(rename_all = "lowercase")]
pub enum StorageMode {
/// File exists on disk, referenced by path
#[default]
External,
/// File is stored in managed content-addressable storage
Managed,
}
impl fmt::Display for StorageMode {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::External => write!(f, "external"),
Self::Managed => write!(f, "managed"),
}
}
}
impl std::str::FromStr for StorageMode {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"external" => Ok(Self::External),
"managed" => Ok(Self::Managed),
_ => Err(format!("unknown storage mode: {s}")),
}
}
}
/// A blob stored in managed storage (content-addressable)
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ManagedBlob {
pub content_hash: ContentHash,
pub file_size: u64,
pub mime_type: String,
pub reference_count: u32,
pub stored_at: DateTime<Utc>,
pub last_verified: Option<DateTime<Utc>>,
}
/// Result of uploading a file to managed storage
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct UploadResult {
pub media_id: MediaId,
pub content_hash: ContentHash,
pub was_duplicate: bool,
pub file_size: u64,
}
/// Statistics about managed storage
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ManagedStorageStats {
pub total_blobs: u64,
pub total_size_bytes: u64,
pub unique_size_bytes: u64,
pub deduplication_ratio: f64,
pub managed_media_count: u64,
pub orphaned_blobs: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MediaItem {
pub id: MediaId,
pub path: PathBuf,
pub file_name: String,
pub media_type: MediaType,
pub content_hash: ContentHash,
pub file_size: u64,
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub genre: Option<String>,
pub year: Option<i32>,
pub duration_secs: Option<f64>,
pub description: Option<String>,
pub thumbnail_path: Option<PathBuf>,
pub custom_fields: HashMap<String, CustomField>,
/// File modification time (Unix timestamp in seconds), used for incremental
/// scanning
pub file_mtime: Option<i64>,
// Photo-specific metadata
pub date_taken: Option<DateTime<Utc>>,
pub latitude: Option<f64>,
pub longitude: Option<f64>,
pub camera_make: Option<String>,
pub camera_model: Option<String>,
pub rating: Option<i32>,
pub perceptual_hash: Option<String>,
// Managed storage fields
/// How the file is stored (external on disk or managed in
/// content-addressable storage)
#[serde(default)]
pub storage_mode: StorageMode,
/// Original filename for uploaded files (preserved separately from
/// `file_name`)
pub original_filename: Option<String>,
/// When the file was uploaded to managed storage
pub uploaded_at: Option<DateTime<Utc>>,
/// Storage key for looking up the blob (usually same as `content_hash`)
pub storage_key: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
/// Soft delete timestamp. If set, the item is in the trash.
pub deleted_at: Option<DateTime<Utc>>,
/// When markdown links were last extracted from this file.
pub links_extracted_at: Option<DateTime<Utc>>,
}
/// A custom field attached to a media item.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CustomField {
pub field_type: CustomFieldType,
pub value: String,
}
/// Type of custom field value.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CustomFieldType {
Text,
Number,
Date,
Boolean,
}
impl CustomFieldType {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Text => "text",
Self::Number => "number",
Self::Date => "date",
Self::Boolean => "boolean",
}
}
}
impl std::fmt::Display for CustomFieldType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
/// A tag that can be applied to media items.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Tag {
pub id: Uuid,
pub name: String,
pub parent_id: Option<Uuid>,
pub created_at: DateTime<Utc>,
}
/// A collection of media items.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Collection {
pub id: Uuid,
pub name: String,
pub description: Option<String>,
pub kind: CollectionKind,
pub filter_query: Option<String>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
/// Kind of collection.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum CollectionKind {
Manual,
Virtual,
}
impl CollectionKind {
#[must_use]
pub const fn as_str(&self) -> &'static str {
match self {
Self::Manual => "manual",
Self::Virtual => "virtual",
}
}
}
impl std::fmt::Display for CollectionKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
/// A member of a collection with position tracking.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct CollectionMember {
pub collection_id: Uuid,
pub media_id: MediaId,
pub position: i32,
pub added_at: DateTime<Utc>,
}
/// An audit trail entry.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuditEntry {
pub id: Uuid,
pub media_id: Option<MediaId>,
pub action: AuditAction,
pub details: Option<String>,
pub timestamp: DateTime<Utc>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum AuditAction {
// Media actions
Imported,
Updated,
Deleted,
Tagged,
Untagged,
AddedToCollection,
RemovedFromCollection,
Opened,
Scanned,
// Authentication actions
LoginSuccess,
LoginFailed,
Logout,
SessionExpired,
// Authorization actions
PermissionDenied,
RoleChanged,
LibraryAccessGranted,
LibraryAccessRevoked,
// User management
UserCreated,
UserUpdated,
UserDeleted,
// Plugin actions
PluginInstalled,
PluginUninstalled,
PluginEnabled,
PluginDisabled,
// Configuration actions
ConfigChanged,
RootDirectoryAdded,
RootDirectoryRemoved,
// Social/Sharing actions
ShareLinkCreated,
ShareLinkAccessed,
// System actions
DatabaseVacuumed,
DatabaseCleared,
ExportCompleted,
IntegrityCheckCompleted,
}
impl fmt::Display for AuditAction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let s = match self {
// Media actions
Self::Imported => "imported",
Self::Updated => "updated",
Self::Deleted => "deleted",
Self::Tagged => "tagged",
Self::Untagged => "untagged",
Self::AddedToCollection => "added_to_collection",
Self::RemovedFromCollection => "removed_from_collection",
Self::Opened => "opened",
Self::Scanned => "scanned",
// Authentication actions
Self::LoginSuccess => "login_success",
Self::LoginFailed => "login_failed",
Self::Logout => "logout",
Self::SessionExpired => "session_expired",
// Authorization actions
Self::PermissionDenied => "permission_denied",
Self::RoleChanged => "role_changed",
Self::LibraryAccessGranted => "library_access_granted",
Self::LibraryAccessRevoked => "library_access_revoked",
// User management
Self::UserCreated => "user_created",
Self::UserUpdated => "user_updated",
Self::UserDeleted => "user_deleted",
// Plugin actions
Self::PluginInstalled => "plugin_installed",
Self::PluginUninstalled => "plugin_uninstalled",
Self::PluginEnabled => "plugin_enabled",
Self::PluginDisabled => "plugin_disabled",
// Configuration actions
Self::ConfigChanged => "config_changed",
Self::RootDirectoryAdded => "root_directory_added",
Self::RootDirectoryRemoved => "root_directory_removed",
// Social/Sharing actions
Self::ShareLinkCreated => "share_link_created",
Self::ShareLinkAccessed => "share_link_accessed",
// System actions
Self::DatabaseVacuumed => "database_vacuumed",
Self::DatabaseCleared => "database_cleared",
Self::ExportCompleted => "export_completed",
Self::IntegrityCheckCompleted => "integrity_check_completed",
};
write!(f, "{s}")
}
}
/// Pagination parameters for list queries.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Pagination {
pub offset: u64,
pub limit: u64,
pub sort: Option<String>,
}
impl Pagination {
/// Creates a new pagination instance.
#[must_use]
pub const fn new(offset: u64, limit: u64, sort: Option<String>) -> Self {
Self {
offset,
limit,
sort,
}
}
}
impl Default for Pagination {
fn default() -> Self {
Self {
offset: 0,
limit: 50,
sort: None,
}
}
}
/// A saved search query.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SavedSearch {
pub id: Uuid,
pub name: String,
pub query: String,
pub sort_order: Option<String>,
pub created_at: DateTime<Utc>,
}
// Book Management Types
/// Metadata for book-type media.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BookMetadata {
pub media_id: MediaId,
pub isbn: Option<String>,
pub isbn13: Option<String>,
pub publisher: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub publication_date: Option<chrono::NaiveDate>,
pub series_name: Option<String>,
pub series_index: Option<f64>,
pub format: Option<String>,
pub authors: Vec<AuthorInfo>,
pub identifiers: HashMap<String, Vec<String>>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
/// Information about a book author.
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct AuthorInfo {
pub name: String,
pub role: String,
pub file_as: Option<String>,
pub position: i32,
}
impl AuthorInfo {
/// Creates a new author with the given name.
#[must_use]
pub fn new(name: String) -> Self {
Self {
name,
role: "author".to_string(),
file_as: None,
position: 0,
}
}
/// Sets the author's role.
#[must_use]
pub fn with_role(mut self, role: String) -> Self {
self.role = role;
self
}
#[must_use]
pub fn with_file_as(mut self, file_as: String) -> Self {
self.file_as = Some(file_as);
self
}
#[must_use]
pub const fn with_position(mut self, position: i32) -> Self {
self.position = position;
self
}
}
/// Book metadata extracted from files (without database-specific fields)
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ExtractedBookMetadata {
pub isbn: Option<String>,
pub isbn13: Option<String>,
pub publisher: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub publication_date: Option<chrono::NaiveDate>,
pub series_name: Option<String>,
pub series_index: Option<f64>,
pub format: Option<String>,
pub authors: Vec<AuthorInfo>,
pub identifiers: HashMap<String, Vec<String>>,
}
/// Reading progress for a book.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReadingProgress {
pub media_id: MediaId,
pub user_id: Uuid,
pub current_page: i32,
pub total_pages: Option<i32>,
pub progress_percent: f64,
pub last_read_at: DateTime<Utc>,
}
impl ReadingProgress {
/// Creates a new reading progress entry.
#[must_use]
pub fn new(
media_id: MediaId,
user_id: Uuid,
current_page: i32,
total_pages: Option<i32>,
) -> Self {
let progress_percent = total_pages.map_or(0.0, |total| {
if total > 0 {
(f64::from(current_page) / f64::from(total) * 100.0).min(100.0)
} else {
0.0
}
});
Self {
media_id,
user_id,
current_page,
total_pages,
progress_percent,
last_read_at: Utc::now(),
}
}
}
/// Reading status for a book.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ReadingStatus {
ToRead,
Reading,
Completed,
Abandoned,
}
impl fmt::Display for ReadingStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ToRead => write!(f, "to_read"),
Self::Reading => write!(f, "reading"),
Self::Completed => write!(f, "completed"),
Self::Abandoned => write!(f, "abandoned"),
}
}
}
/// Type of markdown link
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum LinkType {
/// Wikilink: [[target]] or [[target|display]]
Wikilink,
/// Markdown link: [text](path)
MarkdownLink,
/// Embed: ![[target]]
Embed,
}
impl fmt::Display for LinkType {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Wikilink => write!(f, "wikilink"),
Self::MarkdownLink => write!(f, "markdown_link"),
Self::Embed => write!(f, "embed"),
}
}
}
impl std::str::FromStr for LinkType {
type Err = String;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_lowercase().as_str() {
"wikilink" => Ok(Self::Wikilink),
"markdown_link" => Ok(Self::MarkdownLink),
"embed" => Ok(Self::Embed),
_ => Err(format!("unknown link type: {s}")),
}
}
}
/// A markdown link extracted from a file.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MarkdownLink {
pub id: Uuid,
pub source_media_id: MediaId,
/// Raw link target as written in the source (wikilink name or path)
pub target_path: String,
/// Resolved target `media_id` (None if unresolved)
pub target_media_id: Option<MediaId>,
pub link_type: LinkType,
/// Display text for the link
pub link_text: Option<String>,
/// Line number in source file (1-indexed)
pub line_number: Option<i32>,
/// Surrounding text for backlink preview
pub context: Option<String>,
pub created_at: DateTime<Utc>,
}
/// Information about a backlink (incoming link).
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BacklinkInfo {
pub link_id: Uuid,
pub source_id: MediaId,
pub source_title: Option<String>,
pub source_path: String,
pub link_text: Option<String>,
pub line_number: Option<i32>,
pub context: Option<String>,
pub link_type: LinkType,
}
/// Graph data for visualization.
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct GraphData {
pub nodes: Vec<GraphNode>,
pub edges: Vec<GraphEdge>,
}
/// A node in the graph visualization.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphNode {
pub id: String,
pub label: String,
pub title: Option<String>,
pub media_type: String,
/// Number of outgoing links from this node
pub link_count: u32,
/// Number of incoming links to this node
pub backlink_count: u32,
}
/// An edge (link) in the graph visualization.
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GraphEdge {
pub source: String,
pub target: String,
pub link_type: LinkType,
}