pinakes-core: book management foundation

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I379005c29a79a637a8e1fc3709907cd36a6a6964
This commit is contained in:
raf 2026-02-04 22:53:47 +03:00
commit bda36ac152
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
9 changed files with 420 additions and 0 deletions

1
Cargo.lock generated
View file

@ -4825,6 +4825,7 @@ dependencies = [
"postgres-native-tls",
"postgres-types",
"refinery",
"regex",
"reqwest",
"rusqlite",
"serde",

View file

@ -116,6 +116,7 @@ argon2 = { version = "0.5.3", features = ["std"] }
# Misc
mime_guess = "2.0.5"
regex = "1.11"
# WASM runtime for plugins
wasmtime = { version = "30.0.2", features = ["component-model"] }

View file

@ -37,6 +37,7 @@ image = { workspace = true }
tokio-util = { workspace = true }
reqwest = { workspace = true }
argon2 = { workspace = true }
regex = { workspace = true }
moka = { version = "0.12", features = ["future"] }
# Plugin system

View file

@ -0,0 +1,187 @@
use crate::error::{PinakesError, Result};
/// Normalize ISBN to ISBN-13 format
pub fn normalize_isbn(isbn: &str) -> Result<String> {
// Remove hyphens, spaces, and any non-numeric characters (except X for ISBN-10)
let clean: String = isbn
.chars()
.filter(|c| c.is_ascii_digit() || *c == 'X' || *c == 'x')
.collect();
match clean.len() {
10 => isbn10_to_isbn13(&clean),
13 => {
if is_valid_isbn13(&clean) {
Ok(clean)
} else {
Err(PinakesError::InvalidData(format!(
"Invalid ISBN-13 checksum: {}",
isbn
)))
}
}
_ => Err(PinakesError::InvalidData(format!(
"Invalid ISBN length: {}",
isbn
))),
}
}
/// Convert ISBN-10 to ISBN-13
fn isbn10_to_isbn13(isbn10: &str) -> Result<String> {
if isbn10.len() != 10 {
return Err(PinakesError::InvalidData(format!(
"ISBN-10 must be 10 characters: {}",
isbn10
)));
}
// Add 978 prefix
let mut isbn13 = format!("978{}", &isbn10[..9]);
// Calculate check digit
let check_digit = calculate_isbn13_check_digit(&isbn13)?;
isbn13.push_str(&check_digit.to_string());
Ok(isbn13)
}
/// Calculate ISBN-13 check digit
fn calculate_isbn13_check_digit(isbn_without_check: &str) -> Result<u32> {
if isbn_without_check.len() != 12 {
return Err(PinakesError::InvalidData(
"ISBN-13 without check digit must be 12 digits".to_string(),
));
}
let sum: u32 = isbn_without_check
.chars()
.enumerate()
.filter_map(|(i, c)| c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 }))
.sum();
let check_digit = (10 - (sum % 10)) % 10;
Ok(check_digit)
}
/// Validate ISBN-13 checksum
fn is_valid_isbn13(isbn13: &str) -> bool {
if isbn13.len() != 13 {
return false;
}
let sum: u32 = isbn13
.chars()
.enumerate()
.filter_map(|(i, c)| c.to_digit(10).map(|d| if i % 2 == 0 { d } else { d * 3 }))
.sum();
sum % 10 == 0
}
/// Extract ISBN from text (searches for ISBN-10 or ISBN-13 patterns)
pub fn extract_isbn_from_text(text: &str) -> Option<String> {
use regex::Regex;
// Try different patterns in order of specificity
let patterns = vec![
// ISBN followed by colon or "is" with hyphens (most specific)
r"ISBN(?:-13)?(?:\s+is|:)?\s*(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)",
r"ISBN(?:-10)?(?:\s+is|:)?\s*(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])",
// ISBN with just whitespace
r"ISBN(?:-13)?\s+(\d{13})",
r"ISBN(?:-10)?\s+(\d{9}[\dXx])",
// Bare ISBN-13 with hyphens (in case "ISBN" is missing)
r"\b(\d{3}-\d{1,5}-\d{1,7}-\d{1,7}-\d)\b",
// Bare ISBN-10 with hyphens
r"\b(\d{1,5}-\d{1,7}-\d{1,7}-[\dXx])\b",
];
for pattern_str in patterns {
if let Ok(pattern) = Regex::new(pattern_str) {
if let Some(captures) = pattern.captures(text) {
if let Some(isbn) = captures.get(1) {
if let Ok(normalized) = normalize_isbn(isbn.as_str()) {
return Some(normalized);
}
}
}
}
}
None
}
/// Parse author name into "Last, First" format for sorting
pub fn parse_author_file_as(name: &str) -> String {
// Simple heuristic: if already contains comma, use as-is
if name.contains(',') {
return name.to_string();
}
// Split by whitespace
let parts: Vec<&str> = name.split_whitespace().collect();
match parts.len() {
0 => String::new(),
1 => parts[0].to_string(),
_ => {
// Last part is surname, rest is given names
let surname = parts.last().unwrap();
let given_names = parts[..parts.len() - 1].join(" ");
format!("{}, {}", surname, given_names)
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_normalize_isbn10() {
assert_eq!(normalize_isbn("0-306-40615-2").unwrap(), "9780306406157");
assert_eq!(normalize_isbn("0306406152").unwrap(), "9780306406157");
}
#[test]
fn test_normalize_isbn13() {
assert_eq!(
normalize_isbn("978-0-306-40615-7").unwrap(),
"9780306406157"
);
assert_eq!(normalize_isbn("9780306406157").unwrap(), "9780306406157");
}
#[test]
fn test_invalid_isbn() {
assert!(normalize_isbn("123").is_err());
assert!(normalize_isbn("123456789012345").is_err());
}
#[test]
fn test_extract_isbn() {
let text = "This book's ISBN is 978-0-306-40615-7 and was published in 2020.";
assert_eq!(
extract_isbn_from_text(text),
Some("9780306406157".to_string())
);
let text2 = "ISBN-10: 0-306-40615-2";
assert_eq!(
extract_isbn_from_text(text2),
Some("9780306406157".to_string())
);
let text3 = "No ISBN here";
assert_eq!(extract_isbn_from_text(text3), None);
}
#[test]
fn test_parse_author_file_as() {
assert_eq!(parse_author_file_as("J.K. Rowling"), "Rowling, J.K.");
assert_eq!(parse_author_file_as("Neil Gaiman"), "Gaiman, Neil");
assert_eq!(parse_author_file_as("Rowling, J.K."), "Rowling, J.K.");
assert_eq!(parse_author_file_as("Prince"), "Prince");
}
}

View file

@ -43,6 +43,9 @@ pub enum PinakesError {
#[error("invalid operation: {0}")]
InvalidOperation(String),
#[error("invalid data: {0}")]
InvalidData(String),
#[error("authentication error: {0}")]
Authentication(String),

View file

@ -1,5 +1,6 @@
pub mod analytics;
pub mod audit;
pub mod books;
pub mod cache;
pub mod collections;
pub mod config;

View file

@ -269,3 +269,115 @@ pub struct SavedSearch {
pub sort_order: Option<String>,
pub created_at: DateTime<Utc>,
}
// Book Management Types
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct BookMetadata {
pub media_id: MediaId,
pub isbn: Option<String>,
pub isbn13: Option<String>,
pub publisher: Option<String>,
pub language: Option<String>,
pub page_count: Option<i32>,
pub publication_date: Option<chrono::NaiveDate>,
pub series_name: Option<String>,
pub series_index: Option<f64>,
pub format: Option<String>,
pub authors: Vec<AuthorInfo>,
pub identifiers: HashMap<String, Vec<String>>,
pub created_at: DateTime<Utc>,
pub updated_at: DateTime<Utc>,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct AuthorInfo {
pub name: String,
pub role: String,
pub file_as: Option<String>,
pub position: i32,
}
impl AuthorInfo {
pub fn new(name: String) -> Self {
Self {
name,
role: "author".to_string(),
file_as: None,
position: 0,
}
}
pub fn with_role(mut self, role: String) -> Self {
self.role = role;
self
}
pub fn with_file_as(mut self, file_as: String) -> Self {
self.file_as = Some(file_as);
self
}
pub fn with_position(mut self, position: i32) -> Self {
self.position = position;
self
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ReadingProgress {
pub media_id: MediaId,
pub user_id: Uuid,
pub current_page: i32,
pub total_pages: Option<i32>,
pub progress_percent: f64,
pub last_read_at: DateTime<Utc>,
}
impl ReadingProgress {
pub fn new(
media_id: MediaId,
user_id: Uuid,
current_page: i32,
total_pages: Option<i32>,
) -> Self {
let progress_percent = if let Some(total) = total_pages {
if total > 0 {
(current_page as f64 / total as f64 * 100.0).min(100.0)
} else {
0.0
}
} else {
0.0
};
Self {
media_id,
user_id,
current_page,
total_pages,
progress_percent,
last_read_at: Utc::now(),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum ReadingStatus {
ToRead,
Reading,
Completed,
Abandoned,
}
impl fmt::Display for ReadingStatus {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::ToRead => write!(f, "to_read"),
Self::Reading => write!(f, "reading"),
Self::Completed => write!(f, "completed"),
Self::Abandoned => write!(f, "abandoned"),
}
}
}

View file

@ -0,0 +1,60 @@
-- V12: Book Management Schema (PostgreSQL)
-- Adds comprehensive book metadata tracking, authors, and identifiers
-- Book metadata (supplements media_items for EPUB/PDF/MOBI)
CREATE TABLE book_metadata (
media_id UUID PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE,
isbn TEXT,
isbn13 TEXT, -- Normalized ISBN-13 for lookups
publisher TEXT,
language TEXT, -- ISO 639-1 code
page_count INTEGER,
publication_date DATE,
series_name TEXT,
series_index DOUBLE PRECISION, -- Supports 1.5, etc.
format TEXT, -- 'epub', 'pdf', 'mobi', 'azw3'
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
CREATE INDEX idx_book_isbn13 ON book_metadata(isbn13);
CREATE INDEX idx_book_series ON book_metadata(series_name, series_index);
CREATE INDEX idx_book_publisher ON book_metadata(publisher);
CREATE INDEX idx_book_language ON book_metadata(language);
-- Multiple authors per book (many-to-many)
CREATE TABLE book_authors (
media_id UUID NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
author_name TEXT NOT NULL,
author_sort TEXT, -- "Last, First" for sorting
role TEXT NOT NULL DEFAULT 'author', -- author, translator, editor, illustrator
position INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (media_id, author_name, role)
);
CREATE INDEX idx_book_authors_name ON book_authors(author_name);
CREATE INDEX idx_book_authors_sort ON book_authors(author_sort);
-- Multiple identifiers (ISBN variants, ASIN, DOI, etc.)
CREATE TABLE book_identifiers (
media_id UUID NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
identifier_type TEXT NOT NULL, -- isbn, isbn13, asin, doi, lccn, oclc
identifier_value TEXT NOT NULL,
PRIMARY KEY (media_id, identifier_type, identifier_value)
);
CREATE INDEX idx_book_identifiers ON book_identifiers(identifier_type, identifier_value);
-- Trigger to update updated_at on book_metadata changes
CREATE OR REPLACE FUNCTION update_book_metadata_timestamp()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = NOW();
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER update_book_metadata_timestamp
BEFORE UPDATE ON book_metadata
FOR EACH ROW
EXECUTE FUNCTION update_book_metadata_timestamp();

View file

@ -0,0 +1,54 @@
-- V12: Book Management Schema
-- Adds comprehensive book metadata tracking, authors, and identifiers
-- Book metadata (supplements media_items for EPUB/PDF/MOBI)
CREATE TABLE book_metadata (
media_id TEXT PRIMARY KEY REFERENCES media_items(id) ON DELETE CASCADE,
isbn TEXT,
isbn13 TEXT, -- Normalized ISBN-13 for lookups
publisher TEXT,
language TEXT, -- ISO 639-1 code
page_count INTEGER,
publication_date TEXT, -- ISO 8601 date string
series_name TEXT,
series_index REAL, -- Supports 1.5, etc.
format TEXT, -- 'epub', 'pdf', 'mobi', 'azw3'
created_at TEXT NOT NULL DEFAULT (datetime('now')),
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
) STRICT;
CREATE INDEX idx_book_isbn13 ON book_metadata(isbn13);
CREATE INDEX idx_book_series ON book_metadata(series_name, series_index);
CREATE INDEX idx_book_publisher ON book_metadata(publisher);
CREATE INDEX idx_book_language ON book_metadata(language);
-- Multiple authors per book (many-to-many)
CREATE TABLE book_authors (
media_id TEXT NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
author_name TEXT NOT NULL,
author_sort TEXT, -- "Last, First" for sorting
role TEXT NOT NULL DEFAULT 'author', -- author, translator, editor, illustrator
position INTEGER NOT NULL DEFAULT 0,
PRIMARY KEY (media_id, author_name, role)
) STRICT;
CREATE INDEX idx_book_authors_name ON book_authors(author_name);
CREATE INDEX idx_book_authors_sort ON book_authors(author_sort);
-- Multiple identifiers (ISBN variants, ASIN, DOI, etc.)
CREATE TABLE book_identifiers (
media_id TEXT NOT NULL REFERENCES media_items(id) ON DELETE CASCADE,
identifier_type TEXT NOT NULL, -- isbn, isbn13, asin, doi, lccn, oclc
identifier_value TEXT NOT NULL,
PRIMARY KEY (media_id, identifier_type, identifier_value)
) STRICT;
CREATE INDEX idx_book_identifiers ON book_identifiers(identifier_type, identifier_value);
-- Trigger to update updated_at on book_metadata changes
CREATE TRIGGER update_book_metadata_timestamp
AFTER UPDATE ON book_metadata
FOR EACH ROW
BEGIN
UPDATE book_metadata SET updated_at = datetime('now') WHERE media_id = NEW.media_id;
END;