pinakes-core: book management foundation

Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I379005c29a79a637a8e1fc3709907cd36a6a6964
2026-02-04 22:53:47 +03:00 · 2026-02-04 22:53:47 +03:00 · bda36ac152
commit bda36ac152
parent 87a4482576
9 changed files with 419 additions and 0 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
--- a/Cargo.toml
+++ b/Cargo.toml
@ -116,6 +116,7 @@ argon2 = { version = "0.5.3", features = ["std"] }

 # Misc
 mime_guess = "2.0.5"
+regex = "1.11"

 # WASM runtime for plugins
 wasmtime = { version = "30.0.2", features = ["component-model"] }
--- a/crates/pinakes-core/Cargo.toml
+++ b/crates/pinakes-core/Cargo.toml
@ -37,6 +37,7 @@ image = { workspace = true }
 tokio-util = { workspace = true }
 reqwest = { workspace = true }
 argon2 = { workspace = true }
+regex = { workspace = true }
 moka = { version = "0.12", features = ["future"] }

 # Plugin system
--- a/crates/pinakes-core/src/books.rs
+++ b/crates/pinakes-core/src/books.rs
@ -0,0 +1,187 @@
+use crate::error::{PinakesError, Result};
+
+/// Normalize ISBN to ISBN-13 format
+pub fn normalize_isbn(isbn: &str) -> Result<String> {
+    // Remove hyphens, spaces, and any non-numeric characters (except X for ISBN-10)
+    let clean: String = isbn
+        .chars()
+        .filter(|c| c.is_ascii_digit() || *c == 'X' || *c == 'x')
+        .collect();
+
+    match clean.len() {
+        10 => isbn10_to_isbn13(&clean),
+        13 => {
+            if is_valid_isbn13(&clean) {
+                Ok(clean)
+            } else {
+                Err(PinakesError::InvalidData(format!(
+                    "Invalid ISBN-13 checksum: {}",
+                    isbn
+                )))
+            }
+        }
+        _ => Err(PinakesError::InvalidData(format!(
+            "Invalid ISBN length: {}",
+            isbn
+        ))),
+    }
+}
+
+/// Convert ISBN-10 to ISBN-13
+fn isbn10_to_isbn13(isbn10: &str) -> Result<String> {
+    if isbn10.len() != 10 {
+        return Err(PinakesError::InvalidData(format!(
+            "ISBN-10 must be 10 characters: {}",
+            isbn10
+        )));
+    }
+
+    // Add 978 prefix
+    let mut isbn13 = format!("978{}", &isbn10[..9]);
+
+    // Calculate check digit
+    let check_digit = calculate_isbn13_check_digit(&isbn13)?;
+    isbn13.push_str(&check_digit.to_string());
+
+    Ok(isbn13)
+}
+
+/// Calculate ISBN-13 check digit
+fn calculate_isbn13_check_digit(isbn_without_check: &str) -> Result<u32> {
+    if isbn_without_check.len() != 12 {
+        return Err(PinakesError::InvalidData(
+            "ISBN-13 without check digit must be 12 digits".to_string(),
+        ));
+    }
+
+    let sum: u32 = isbn_without_check
+        .chars()
+        .enumerate()
+        .filter_map(|(i, c)| c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 }))
+        .sum();
+
+    let check_digit = (10 - (sum % 10)) % 10;
+    Ok(check_digit)
+}
+
+/// Validate ISBN-13 checksum
+fn is_valid_isbn13(isbn13: &str) -> bool {
+    if isbn13.len() != 13 {
+        return false;
+    }
+
+    let sum: u32 = isbn13
+        .chars()
+        .enumerate()
+        .filter_map(|(i, c)| c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 }))
+        .sum();
+
+    sum % 10 == 0
+}
+
+/// Extract ISBN from text (searches for ISBN-10 or ISBN-13 patterns)
+pub fn extract_isbn_from_text(text: &str) -> Option<String> {
+    use regex::Regex;
+
+    // Try different patterns in order of specificity
+    let patterns = vec![
+        // ISBN followed by colon or "is" with hyphens (most specific)
+        r"ISBN(?:-13)?(?:\s+is|:)?\s*(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)",
+        r"ISBN(?:-10)?(?:\s+is|:)?\s*(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])",
+        // ISBN with just whitespace
+        r"ISBN(?:-13)?\s+(\d{13})",
+        r"ISBN(?:-10)?\s+(\d{9}[\dXx])",
+        // Bare ISBN-13 with hyphens (in case "ISBN" is missing)
+        r"\b(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)\b",
+        // Bare ISBN-10 with hyphens
+        r"\b(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])\b",
+    ];
+
+    for pattern_str in patterns {
+        if let Ok(pattern) = Regex::new(pattern_str) {
+            if let Some(captures) = pattern.captures(text) {
+                if let Some(isbn) = captures.get(1) {
+                    if let Ok(normalized) = normalize_isbn(isbn.as_str()) {
+                        return Some(normalized);
+                    }
+                }
+            }
+        }
+    }
+
+    None
+}
+
+/// Parse author name into "Last, First" format for sorting
+pub fn parse_author_file_as(name: &str) -> String {
+    // Simple heuristic: if already contains comma, use as-is
+    if name.contains(',') {
+        return name.to_string();
+    }
+
+    // Split by whitespace
+    let parts: Vec<&str> = name.split_whitespace().collect();
+
+    match parts.len() {
+        0 => String::new(),
+        1 => parts[0].to_string(),
+        _ => {
+            // Last part is surname, rest is given names
+            let surname = parts.last().unwrap();
+            let given_names = parts[..parts.len() - 1].join(" ");
+            format!("{}, {}", surname, given_names)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_normalize_isbn10() {
+        assert_eq!(normalize_isbn("0-306-40615-2").unwrap(), "9780306406157");
+        assert_eq!(normalize_isbn("0306406152").unwrap(), "9780306406157");
+    }
+
+    #[test]
+    fn test_normalize_isbn13() {
+        assert_eq!(
+            normalize_isbn("978-0-306-40615-7").unwrap(),
+            "9780306406157"
+        );
+        assert_eq!(normalize_isbn("9780306406157").unwrap(), "9780306406157");
+    }
+
+    #[test]
+    fn test_invalid_isbn() {
+        assert!(normalize_isbn("123").is_err());
+        assert!(normalize_isbn("123456789012345").is_err());
+    }
+
+    #[test]
+    fn test_extract_isbn() {
+        let text = "This book's ISBN is 978-0-306-40615-7 and was published in 2020.";
+        assert_eq!(
+            extract_isbn_from_text(text),
+            Some("9780306406157".to_string())
+        );
+
+        let text2 = "ISBN-10: 0-306-40615-2";
+        assert_eq!(
+            extract_isbn_from_text(text2),
+            Some("9780306406157".to_string())
+        );
+
+        let text3 = "No ISBN here";
+        assert_eq!(extract_isbn_from_text(text3), None);
+    }
+
+    #[test]
+    fn test_parse_author_file_as() {
+        assert_eq!(parse_author_file_as("J.K. Rowling"), "Rowling, J.K.");
+        assert_eq!(parse_author_file_as("Neil Gaiman"), "Gaiman, Neil");
+        assert_eq!(parse_author_file_as("Rowling, J.K."), "Rowling, J.K.");
+        assert_eq!(parse_author_file_as("Prince"), "Prince");
+    }
+}
--- a/crates/pinakes-core/src/error.rs
+++ b/crates/pinakes-core/src/error.rs
@ -43,6 +43,9 @@ pub enum PinakesError {
    #[error("invalid operation: {0}")]
    InvalidOperation(String),

+    #[error("invalid data: {0}")]
+    InvalidData(String),
+
    #[error("authentication error: {0}")]
    Authentication(String),

--- a/crates/pinakes-core/src/lib.rs
+++ b/crates/pinakes-core/src/lib.rs
@ -1,5 +1,6 @@
 pub mod analytics;
 pub mod audit;
+pub mod books;
 pub mod cache;
 pub mod collections;
 pub mod config;
--- a/crates/pinakes-core/src/model.rs
+++ b/crates/pinakes-core/src/model.rs
@ -269,3 +269,115 @@ pub struct SavedSearch {
    pub sort_order: Option<String>,
    pub created_at: DateTime<Utc>,
 }
+
+// Book Management Types
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BookMetadata {
+    pub media_id: MediaId,
+    pub isbn: Option<String>,
+    pub isbn13: Option<String>,
+    pub publisher: Option<String>,
+    pub language: Option<String>,
+    pub page_count: Option<i32>,
+    pub publication_date: Option<chrono::NaiveDate>,
+    pub series_name: Option<String>,
+    pub series_index: Option<f64>,
+    pub format: Option<String>,
+    pub authors: Vec<AuthorInfo>,
+    pub identifiers: HashMap<String, Vec<String>>,
+    pub created_at: DateTime<Utc>,
+    pub updated_at: DateTime<Utc>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct AuthorInfo {
+    pub name: String,
+    pub role: String,
+    pub file_as: Option<String>,
+    pub position: i32,
+}
+
+impl AuthorInfo {
+    pub fn new(name: String) -> Self {
+        Self {
+            name,
+            role: "author".to_string(),
+            file_as: None,
+            position: 0,
+        }
+    }
+
+    pub fn with_role(mut self, role: String) -> Self {
+        self.role = role;
+        self
+    }
+
+    pub fn with_file_as(mut self, file_as: String) -> Self {
+        self.file_as = Some(file_as);
+        self
+    }
+
+    pub fn with_position(mut self, position: i32) -> Self {
+        self.position = position;
+        self
+    }
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct ReadingProgress {
+    pub media_id: MediaId,
+    pub user_id: Uuid,
+    pub current_page: i32,
+    pub total_pages: Option<i32>,
+    pub progress_percent: f64,
+    pub last_read_at: DateTime<Utc>,
+}
+
+impl ReadingProgress {
+    pub fn new(
+        media_id: MediaId,
+        user_id: Uuid,
+        current_page: i32,
+        total_pages: Option<i32>,
+    ) -> Self {
+        let progress_percent = if let Some(total) = total_pages {
+            if total > 0 {
+                (current_page as f64 / total as f64 * 100.0).min(100.0)
+            } else {
+                0.0
+            }
+        } else {
+            0.0
+        };
+
+        Self {
+            media_id,
+            user_id,
+            current_page,
+            total_pages,
+            progress_percent,
+            last_read_at: Utc::now(),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum ReadingStatus {
+    ToRead,
+    Reading,
+    Completed,
+    Abandoned,
+}
+
+impl fmt::Display for ReadingStatus {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::ToRead => write!(f, "to_read"),
+            Self::Reading => write!(f, "reading"),
+            Self::Completed => write!(f, "completed"),
+            Self::Abandoned => write!(f, "abandoned"),
+        }
+    }
+}
--- a/migrations/postgres/V12__book_management.sql
+++ b/migrations/postgres/V12__book_management.sql
@ -0,0 +1,60 @@
+-- V12: Book Management Schema (PostgreSQL)
+-- Adds comprehensive book metadata tracking, authors, and identifiers
+
+-- Book metadata (supplements media_items for EPUB/PDF/MOBI)
+CREATE TABLE book_metadata (
+    media_id UUID PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE,
+    isbn TEXT,
+    isbn13 TEXT,              -- Normalized ISBN-13 for lookups
+    publisher TEXT,
+    language TEXT,            -- ISO 639-1 code
+    page_count INTEGER,
+    publication_date DATE,
+    series_name TEXT,
+    series_index DOUBLE PRECISION,  -- Supports 1.5, etc.
+    format TEXT,              -- 'epub', 'pdf', 'mobi', 'azw3'
+    created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
+    updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
+);
+
+CREATE INDEX idx_book_isbn13 ON book_metadata(isbn13);
+CREATE INDEX idx_book_series ON book_metadata(series_name, series_index);
+CREATE INDEX idx_book_publisher ON book_metadata(publisher);
+CREATE INDEX idx_book_language ON book_metadata(language);
+
+-- Multiple authors per book (many-to-many)
+CREATE TABLE book_authors (
+    media_id UUID NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
+    author_name TEXT NOT NULL,
+    author_sort TEXT,         -- "Last, First" for sorting
+    role TEXT NOT NULL DEFAULT 'author', -- author, translator, editor, illustrator
+    position INTEGER NOT NULL DEFAULT 0,
+    PRIMARY KEY (media_id, author_name, role)
+);
+
+CREATE INDEX idx_book_authors_name ON book_authors(author_name);
+CREATE INDEX idx_book_authors_sort ON book_authors(author_sort);
+
+-- Multiple identifiers (ISBN variants, ASIN, DOI, etc.)
+CREATE TABLE book_identifiers (
+    media_id UUID NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
+    identifier_type TEXT NOT NULL,  -- isbn, isbn13, asin, doi, lccn, oclc
+    identifier_value TEXT NOT NULL,
+    PRIMARY KEY (media_id, identifier_type, identifier_value)
+);
+
+CREATE INDEX idx_book_identifiers ON book_identifiers(identifier_type, identifier_value);
+
+-- Trigger to update updated_at on book_metadata changes
+CREATE OR REPLACE FUNCTION update_book_metadata_timestamp()
+RETURNS TRIGGER AS $$
+BEGIN
+    NEW.updated_at = NOW();
+    RETURN NEW;
+END;
+$$ LANGUAGE plpgsql;
+
+CREATE TRIGGER update_book_metadata_timestamp
+    BEFORE UPDATE ON book_metadata
+    FOR EACH ROW
+    EXECUTE FUNCTION update_book_metadata_timestamp();
--- a/migrations/sqlite/V12__book_management.sql
+++ b/migrations/sqlite/V12__book_management.sql
@ -0,0 +1,54 @@
+-- V12: Book Management Schema
+-- Adds comprehensive book metadata tracking, authors, and identifiers
+
+-- Book metadata (supplements media_items for EPUB/PDF/MOBI)
+CREATE TABLE book_metadata (
+    media_id TEXT PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE,
+    isbn TEXT,
+    isbn13 TEXT,              -- Normalized ISBN-13 for lookups
+    publisher TEXT,
+    language TEXT,            -- ISO 639-1 code
+    page_count INTEGER,
+    publication_date TEXT,    -- ISO 8601 date string
+    series_name TEXT,
+    series_index REAL,        -- Supports 1.5, etc.
+    format TEXT,              -- 'epub', 'pdf', 'mobi', 'azw3'
+    created_at TEXT NOT NULL DEFAULT (datetime('now')),
+    updated_at TEXT NOT NULL DEFAULT (datetime('now'))
+) STRICT;
+
+CREATE INDEX idx_book_isbn13 ON book_metadata(isbn13);
+CREATE INDEX idx_book_series ON book_metadata(series_name, series_index);
+CREATE INDEX idx_book_publisher ON book_metadata(publisher);
+CREATE INDEX idx_book_language ON book_metadata(language);
+
+-- Multiple authors per book (many-to-many)
+CREATE TABLE book_authors (
+    media_id TEXT NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
+    author_name TEXT NOT NULL,
+    author_sort TEXT,         -- "Last, First" for sorting
+    role TEXT NOT NULL DEFAULT 'author', -- author, translator, editor, illustrator
+    position INTEGER NOT NULL DEFAULT 0,
+    PRIMARY KEY (media_id, author_name, role)
+) STRICT;
+
+CREATE INDEX idx_book_authors_name ON book_authors(author_name);
+CREATE INDEX idx_book_authors_sort ON book_authors(author_sort);
+
+-- Multiple identifiers (ISBN variants, ASIN, DOI, etc.)
+CREATE TABLE book_identifiers (
+    media_id TEXT NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
+    identifier_type TEXT NOT NULL,  -- isbn, isbn13, asin, doi, lccn, oclc
+    identifier_value TEXT NOT NULL,
+    PRIMARY KEY (media_id, identifier_type, identifier_value)
+) STRICT;
+
+CREATE INDEX idx_book_identifiers ON book_identifiers(identifier_type, identifier_value);
+
+-- Trigger to update updated_at on book_metadata changes
+CREATE TRIGGER update_book_metadata_timestamp
+    AFTER UPDATE ON book_metadata
+    FOR EACH ROW
+BEGIN
+    UPDATE book_metadata SET updated_at = datetime('now') WHERE media_id = NEW.media_id;
+END;