diff --git a/Cargo.lock b/Cargo.lock index 18005af..37d570a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4825,6 +4825,7 @@ dependencies = [ "postgres-native-tls", "postgres-types", "refinery", + "regex", "reqwest", "rusqlite", "serde", diff --git a/Cargo.toml b/Cargo.toml index 1c81955..6d40221 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -116,6 +116,7 @@ argon2 = { version = "0.5.3", features = ["std"] } # Misc mime_guess = "2.0.5" +regex = "1.11" # WASM runtime for plugins wasmtime = { version = "30.0.2", features = ["component-model"] } diff --git a/crates/pinakes-core/Cargo.toml b/crates/pinakes-core/Cargo.toml index f809097..3c98760 100644 --- a/crates/pinakes-core/Cargo.toml +++ b/crates/pinakes-core/Cargo.toml @@ -37,6 +37,7 @@ image = { workspace = true } tokio-util = { workspace = true } reqwest = { workspace = true } argon2 = { workspace = true } +regex = { workspace = true } moka = { version = "0.12", features = ["future"] } # Plugin system diff --git a/crates/pinakes-core/src/books.rs b/crates/pinakes-core/src/books.rs new file mode 100644 index 0000000..ce44ba2 --- /dev/null +++ b/crates/pinakes-core/src/books.rs @@ -0,0 +1,187 @@ +use crate::error::{PinakesError, Result}; + +/// Normalize ISBN to ISBN-13 format +pub fn normalize_isbn(isbn: &str) -> Result { + // Remove hyphens, spaces, and any non-numeric characters (except X for ISBN-10) + let clean: String = isbn + .chars() + .filter(|c| c.is_ascii_digit() || *c == 'X' || *c == 'x') + .collect(); + + match clean.len() { + 10 => isbn10_to_isbn13(&clean), + 13 => { + if is_valid_isbn13(&clean) { + Ok(clean) + } else { + Err(PinakesError::InvalidData(format!( + "Invalid ISBN-13 checksum: {}", + isbn + ))) + } + } + _ => Err(PinakesError::InvalidData(format!( + "Invalid ISBN length: {}", + isbn + ))), + } +} + +/// Convert ISBN-10 to ISBN-13 +fn isbn10_to_isbn13(isbn10: &str) -> Result { + if isbn10.len() != 10 { + return Err(PinakesError::InvalidData(format!( + "ISBN-10 must be 10 characters: {}", + isbn10 + ))); + } + + // Add 978 prefix + let mut isbn13 = format!("978{}", &isbn10[..9]); + + // Calculate check digit + let check_digit = calculate_isbn13_check_digit(&isbn13)?; + isbn13.push_str(&check_digit.to_string()); + + Ok(isbn13) +} + +/// Calculate ISBN-13 check digit +fn calculate_isbn13_check_digit(isbn_without_check: &str) -> Result { + if isbn_without_check.len() != 12 { + return Err(PinakesError::InvalidData( + "ISBN-13 without check digit must be 12 digits".to_string(), + )); + } + + let sum: u32 = isbn_without_check + .chars() + .enumerate() + .filter_map(|(i, c)| c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 })) + .sum(); + + let check_digit = (10 - (sum % 10)) % 10; + Ok(check_digit) +} + +/// Validate ISBN-13 checksum +fn is_valid_isbn13(isbn13: &str) -> bool { + if isbn13.len() != 13 { + return false; + } + + let sum: u32 = isbn13 + .chars() + .enumerate() + .filter_map(|(i, c)| c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 })) + .sum(); + + sum % 10 == 0 +} + +/// Extract ISBN from text (searches for ISBN-10 or ISBN-13 patterns) +pub fn extract_isbn_from_text(text: &str) -> Option { + use regex::Regex; + + // Try different patterns in order of specificity + let patterns = vec![ + // ISBN followed by colon or "is" with hyphens (most specific) + r"ISBN(?:-13)?(?:\s+is|:)?\s*(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)", + r"ISBN(?:-10)?(?:\s+is|:)?\s*(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])", + // ISBN with just whitespace + r"ISBN(?:-13)?\s+(\d{13})", + r"ISBN(?:-10)?\s+(\d{9}[\dXx])", + // Bare ISBN-13 with hyphens (in case "ISBN" is missing) + r"\b(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)\b", + // Bare ISBN-10 with hyphens + r"\b(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])\b", + ]; + + for pattern_str in patterns { + if let Ok(pattern) = Regex::new(pattern_str) { + if let Some(captures) = pattern.captures(text) { + if let Some(isbn) = captures.get(1) { + if let Ok(normalized) = normalize_isbn(isbn.as_str()) { + return Some(normalized); + } + } + } + } + } + + None +} + +/// Parse author name into "Last, First" format for sorting +pub fn parse_author_file_as(name: &str) -> String { + // Simple heuristic: if already contains comma, use as-is + if name.contains(',') { + return name.to_string(); + } + + // Split by whitespace + let parts: Vec<&str> = name.split_whitespace().collect(); + + match parts.len() { + 0 => String::new(), + 1 => parts[0].to_string(), + _ => { + // Last part is surname, rest is given names + let surname = parts.last().unwrap(); + let given_names = parts[..parts.len() - 1].join(" "); + format!("{}, {}", surname, given_names) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_normalize_isbn10() { + assert_eq!(normalize_isbn("0-306-40615-2").unwrap(), "9780306406157"); + assert_eq!(normalize_isbn("0306406152").unwrap(), "9780306406157"); + } + + #[test] + fn test_normalize_isbn13() { + assert_eq!( + normalize_isbn("978-0-306-40615-7").unwrap(), + "9780306406157" + ); + assert_eq!(normalize_isbn("9780306406157").unwrap(), "9780306406157"); + } + + #[test] + fn test_invalid_isbn() { + assert!(normalize_isbn("123").is_err()); + assert!(normalize_isbn("123456789012345").is_err()); + } + + #[test] + fn test_extract_isbn() { + let text = "This book's ISBN is 978-0-306-40615-7 and was published in 2020."; + assert_eq!( + extract_isbn_from_text(text), + Some("9780306406157".to_string()) + ); + + let text2 = "ISBN-10: 0-306-40615-2"; + assert_eq!( + extract_isbn_from_text(text2), + Some("9780306406157".to_string()) + ); + + let text3 = "No ISBN here"; + assert_eq!(extract_isbn_from_text(text3), None); + } + + #[test] + fn test_parse_author_file_as() { + assert_eq!(parse_author_file_as("J.K. Rowling"), "Rowling, J.K."); + assert_eq!(parse_author_file_as("Neil Gaiman"), "Gaiman, Neil"); + assert_eq!(parse_author_file_as("Rowling, J.K."), "Rowling, J.K."); + assert_eq!(parse_author_file_as("Prince"), "Prince"); + } +} diff --git a/crates/pinakes-core/src/error.rs b/crates/pinakes-core/src/error.rs index 67a39c9..5e03a13 100644 --- a/crates/pinakes-core/src/error.rs +++ b/crates/pinakes-core/src/error.rs @@ -43,6 +43,9 @@ pub enum PinakesError { #[error("invalid operation: {0}")] InvalidOperation(String), + #[error("invalid data: {0}")] + InvalidData(String), + #[error("authentication error: {0}")] Authentication(String), diff --git a/crates/pinakes-core/src/lib.rs b/crates/pinakes-core/src/lib.rs index ff98fe2..1db2c28 100644 --- a/crates/pinakes-core/src/lib.rs +++ b/crates/pinakes-core/src/lib.rs @@ -1,5 +1,6 @@ pub mod analytics; pub mod audit; +pub mod books; pub mod cache; pub mod collections; pub mod config; diff --git a/crates/pinakes-core/src/model.rs b/crates/pinakes-core/src/model.rs index 01624a4..1688c05 100644 --- a/crates/pinakes-core/src/model.rs +++ b/crates/pinakes-core/src/model.rs @@ -269,3 +269,115 @@ pub struct SavedSearch { pub sort_order: Option, pub created_at: DateTime, } + +// Book Management Types + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BookMetadata { + pub media_id: MediaId, + pub isbn: Option, + pub isbn13: Option, + pub publisher: Option, + pub language: Option, + pub page_count: Option, + pub publication_date: Option, + pub series_name: Option, + pub series_index: Option, + pub format: Option, + pub authors: Vec, + pub identifiers: HashMap>, + pub created_at: DateTime, + pub updated_at: DateTime, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct AuthorInfo { + pub name: String, + pub role: String, + pub file_as: Option, + pub position: i32, +} + +impl AuthorInfo { + pub fn new(name: String) -> Self { + Self { + name, + role: "author".to_string(), + file_as: None, + position: 0, + } + } + + pub fn with_role(mut self, role: String) -> Self { + self.role = role; + self + } + + pub fn with_file_as(mut self, file_as: String) -> Self { + self.file_as = Some(file_as); + self + } + + pub fn with_position(mut self, position: i32) -> Self { + self.position = position; + self + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ReadingProgress { + pub media_id: MediaId, + pub user_id: Uuid, + pub current_page: i32, + pub total_pages: Option, + pub progress_percent: f64, + pub last_read_at: DateTime, +} + +impl ReadingProgress { + pub fn new( + media_id: MediaId, + user_id: Uuid, + current_page: i32, + total_pages: Option, + ) -> Self { + let progress_percent = if let Some(total) = total_pages { + if total > 0 { + (current_page as f64 / total as f64 * 100.0).min(100.0) + } else { + 0.0 + } + } else { + 0.0 + }; + + Self { + media_id, + user_id, + current_page, + total_pages, + progress_percent, + last_read_at: Utc::now(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum ReadingStatus { + ToRead, + Reading, + Completed, + Abandoned, +} + +impl fmt::Display for ReadingStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::ToRead => write!(f, "to_read"), + Self::Reading => write!(f, "reading"), + Self::Completed => write!(f, "completed"), + Self::Abandoned => write!(f, "abandoned"), + } + } +} diff --git a/migrations/postgres/V12__book_management.sql b/migrations/postgres/V12__book_management.sql new file mode 100644 index 0000000..2452032 --- /dev/null +++ b/migrations/postgres/V12__book_management.sql @@ -0,0 +1,60 @@ +-- V12: Book Management Schema (PostgreSQL) +-- Adds comprehensive book metadata tracking, authors, and identifiers + +-- Book metadata (supplements media_items for EPUB/PDF/MOBI) +CREATE TABLE book_metadata ( + media_id UUID PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE, + isbn TEXT, + isbn13 TEXT, -- Normalized ISBN-13 for lookups + publisher TEXT, + language TEXT, -- ISO 639-1 code + page_count INTEGER, + publication_date DATE, + series_name TEXT, + series_index DOUBLE PRECISION, -- Supports 1.5, etc. + format TEXT, -- 'epub', 'pdf', 'mobi', 'azw3' + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW() +); + +CREATE INDEX idx_book_isbn13 ON book_metadata(isbn13); +CREATE INDEX idx_book_series ON book_metadata(series_name, series_index); +CREATE INDEX idx_book_publisher ON book_metadata(publisher); +CREATE INDEX idx_book_language ON book_metadata(language); + +-- Multiple authors per book (many-to-many) +CREATE TABLE book_authors ( + media_id UUID NOT NULL REFERENCES media_items(id) ON DELETE CASCADE, + author_name TEXT NOT NULL, + author_sort TEXT, -- "Last, First" for sorting + role TEXT NOT NULL DEFAULT 'author', -- author, translator, editor, illustrator + position INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (media_id, author_name, role) +); + +CREATE INDEX idx_book_authors_name ON book_authors(author_name); +CREATE INDEX idx_book_authors_sort ON book_authors(author_sort); + +-- Multiple identifiers (ISBN variants, ASIN, DOI, etc.) +CREATE TABLE book_identifiers ( + media_id UUID NOT NULL REFERENCES media_items(id) ON DELETE CASCADE, + identifier_type TEXT NOT NULL, -- isbn, isbn13, asin, doi, lccn, oclc + identifier_value TEXT NOT NULL, + PRIMARY KEY (media_id, identifier_type, identifier_value) +); + +CREATE INDEX idx_book_identifiers ON book_identifiers(identifier_type, identifier_value); + +-- Trigger to update updated_at on book_metadata changes +CREATE OR REPLACE FUNCTION update_book_metadata_timestamp() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = NOW(); + RETURN NEW; +END; +$$ LANGUAGE plpgsql; + +CREATE TRIGGER update_book_metadata_timestamp + BEFORE UPDATE ON book_metadata + FOR EACH ROW + EXECUTE FUNCTION update_book_metadata_timestamp(); diff --git a/migrations/sqlite/V12__book_management.sql b/migrations/sqlite/V12__book_management.sql new file mode 100644 index 0000000..9823b87 --- /dev/null +++ b/migrations/sqlite/V12__book_management.sql @@ -0,0 +1,54 @@ +-- V12: Book Management Schema +-- Adds comprehensive book metadata tracking, authors, and identifiers + +-- Book metadata (supplements media_items for EPUB/PDF/MOBI) +CREATE TABLE book_metadata ( + media_id TEXT PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE, + isbn TEXT, + isbn13 TEXT, -- Normalized ISBN-13 for lookups + publisher TEXT, + language TEXT, -- ISO 639-1 code + page_count INTEGER, + publication_date TEXT, -- ISO 8601 date string + series_name TEXT, + series_index REAL, -- Supports 1.5, etc. + format TEXT, -- 'epub', 'pdf', 'mobi', 'azw3' + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +) STRICT; + +CREATE INDEX idx_book_isbn13 ON book_metadata(isbn13); +CREATE INDEX idx_book_series ON book_metadata(series_name, series_index); +CREATE INDEX idx_book_publisher ON book_metadata(publisher); +CREATE INDEX idx_book_language ON book_metadata(language); + +-- Multiple authors per book (many-to-many) +CREATE TABLE book_authors ( + media_id TEXT NOT NULL REFERENCES media_items(id) ON DELETE CASCADE, + author_name TEXT NOT NULL, + author_sort TEXT, -- "Last, First" for sorting + role TEXT NOT NULL DEFAULT 'author', -- author, translator, editor, illustrator + position INTEGER NOT NULL DEFAULT 0, + PRIMARY KEY (media_id, author_name, role) +) STRICT; + +CREATE INDEX idx_book_authors_name ON book_authors(author_name); +CREATE INDEX idx_book_authors_sort ON book_authors(author_sort); + +-- Multiple identifiers (ISBN variants, ASIN, DOI, etc.) +CREATE TABLE book_identifiers ( + media_id TEXT NOT NULL REFERENCES media_items(id) ON DELETE CASCADE, + identifier_type TEXT NOT NULL, -- isbn, isbn13, asin, doi, lccn, oclc + identifier_value TEXT NOT NULL, + PRIMARY KEY (media_id, identifier_type, identifier_value) +) STRICT; + +CREATE INDEX idx_book_identifiers ON book_identifiers(identifier_type, identifier_value); + +-- Trigger to update updated_at on book_metadata changes +CREATE TRIGGER update_book_metadata_timestamp + AFTER UPDATE ON book_metadata + FOR EACH ROW +BEGIN + UPDATE book_metadata SET updated_at = datetime('now') WHERE media_id = NEW.media_id; +END;