various: remove dead code; fix skipped tests

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I9100489be899f9e9fbd32f6aca3080196a6a6964
This commit is contained in:
raf 2026-02-05 00:18:02 +03:00
commit cfdc3d0622
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
18 changed files with 1445 additions and 28 deletions

View file

@ -0,0 +1,233 @@
use chrono::Utc;
use uuid::Uuid;
use crate::error::{PinakesError, Result};
use crate::model::MediaItem;
use super::googlebooks::GoogleBooksClient;
use super::openlibrary::OpenLibraryClient;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
/// Book enricher that tries OpenLibrary first, then falls back to Google Books
pub struct BookEnricher {
openlibrary: OpenLibraryClient,
googlebooks: GoogleBooksClient,
}
impl BookEnricher {
pub fn new(google_api_key: Option<String>) -> Self {
Self {
openlibrary: OpenLibraryClient::new(),
googlebooks: GoogleBooksClient::new(google_api_key),
}
}
/// Try to enrich from OpenLibrary first
pub async fn try_openlibrary(&self, isbn: &str) -> Result<Option<ExternalMetadata>> {
match self.openlibrary.fetch_by_isbn(isbn).await {
Ok(book) => {
let metadata_json = serde_json::to_string(&book)
.map_err(|e| PinakesError::External(format!("Failed to serialize metadata: {}", e)))?;
Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()), // Will be set by caller
source: EnrichmentSourceType::OpenLibrary,
external_id: None,
metadata_json,
confidence: calculate_openlibrary_confidence(&book),
last_updated: Utc::now(),
}))
}
Err(_) => Ok(None),
}
}
/// Try to enrich from Google Books
pub async fn try_googlebooks(&self, isbn: &str) -> Result<Option<ExternalMetadata>> {
match self.googlebooks.fetch_by_isbn(isbn).await {
Ok(books) if !books.is_empty() => {
let book = &books[0];
let metadata_json = serde_json::to_string(book)
.map_err(|e| PinakesError::External(format!("Failed to serialize metadata: {}", e)))?;
Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()), // Will be set by caller
source: EnrichmentSourceType::GoogleBooks,
external_id: Some(book.id.clone()),
metadata_json,
confidence: calculate_googlebooks_confidence(&book.volume_info),
last_updated: Utc::now(),
}))
}
_ => Ok(None),
}
}
/// Try to enrich by searching with title and author
pub async fn enrich_by_search(
&self,
title: &str,
author: Option<&str>,
) -> Result<Option<ExternalMetadata>> {
// Try OpenLibrary search first
if let Ok(results) = self.openlibrary.search(title, author).await
&& let Some(result) = results.first()
{
let metadata_json = serde_json::to_string(result)
.map_err(|e| PinakesError::External(format!("Failed to serialize metadata: {}", e)))?;
return Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()),
source: EnrichmentSourceType::OpenLibrary,
external_id: result.key.clone(),
metadata_json,
confidence: 0.6, // Lower confidence for search results
last_updated: Utc::now(),
}));
}
// Fall back to Google Books
if let Ok(results) = self.googlebooks.search(title, author).await
&& let Some(book) = results.first()
{
let metadata_json = serde_json::to_string(book)
.map_err(|e| PinakesError::External(format!("Failed to serialize metadata: {}", e)))?;
return Ok(Some(ExternalMetadata {
id: Uuid::new_v4(),
media_id: crate::model::MediaId(Uuid::nil()),
source: EnrichmentSourceType::GoogleBooks,
external_id: Some(book.id.clone()),
metadata_json,
confidence: 0.6,
last_updated: Utc::now(),
}));
}
Ok(None)
}
}
#[async_trait::async_trait]
impl MetadataEnricher for BookEnricher {
fn source(&self) -> EnrichmentSourceType {
// Returns the preferred source
EnrichmentSourceType::OpenLibrary
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
// Try ISBN-based enrichment first by checking title/description for ISBN patterns
if let Some(ref title) = item.title {
if let Some(isbn) = crate::books::extract_isbn_from_text(title) {
if let Some(mut metadata) = self.try_openlibrary(&isbn).await? {
metadata.media_id = item.id;
return Ok(Some(metadata));
}
if let Some(mut metadata) = self.try_googlebooks(&isbn).await? {
metadata.media_id = item.id;
return Ok(Some(metadata));
}
}
// Fall back to title/author search
let author = item.artist.as_deref();
return self.enrich_by_search(title, author).await;
}
// No title available
Ok(None)
}
}
/// Calculate confidence score for OpenLibrary metadata
pub fn calculate_openlibrary_confidence(book: &super::openlibrary::OpenLibraryBook) -> f64 {
let mut score: f64 = 0.5; // Base score
if book.title.is_some() {
score += 0.1;
}
if !book.authors.is_empty() {
score += 0.1;
}
if !book.publishers.is_empty() {
score += 0.05;
}
if book.publish_date.is_some() {
score += 0.05;
}
if book.description.is_some() {
score += 0.1;
}
if !book.covers.is_empty() {
score += 0.1;
}
score.min(1.0)
}
/// Calculate confidence score for Google Books metadata
pub fn calculate_googlebooks_confidence(info: &super::googlebooks::VolumeInfo) -> f64 {
let mut score: f64 = 0.5; // Base score
if info.title.is_some() {
score += 0.1;
}
if !info.authors.is_empty() {
score += 0.1;
}
if info.publisher.is_some() {
score += 0.05;
}
if info.published_date.is_some() {
score += 0.05;
}
if info.description.is_some() {
score += 0.1;
}
if info.image_links.is_some() {
score += 0.1;
}
score.min(1.0)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_openlibrary_confidence_calculation() {
let book = super::super::openlibrary::OpenLibraryBook {
title: Some("Test Book".to_string()),
subtitle: None,
authors: vec![],
publishers: vec![],
publish_date: None,
number_of_pages: None,
subjects: vec![],
covers: vec![],
isbn_10: vec![],
isbn_13: vec![],
series: vec![],
description: None,
languages: vec![],
};
let confidence = calculate_openlibrary_confidence(&book);
assert_eq!(confidence, 0.6); // 0.5 base + 0.1 for title
}
#[test]
fn test_googlebooks_confidence_calculation() {
let info = super::super::googlebooks::VolumeInfo {
title: Some("Test Book".to_string()),
..Default::default()
};
let confidence = calculate_googlebooks_confidence(&info);
assert_eq!(confidence, 0.6); // 0.5 base + 0.1 for title
}
}

View file

@ -0,0 +1,283 @@
use serde::{Deserialize, Serialize};
use crate::error::{PinakesError, Result};
/// Google Books API client for book metadata enrichment
pub struct GoogleBooksClient {
client: reqwest::Client,
api_key: Option<String>,
}
impl GoogleBooksClient {
pub fn new(api_key: Option<String>) -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/1.0")
.timeout(std::time::Duration::from_secs(10))
.build()
.expect("Failed to build HTTP client"),
api_key,
}
}
/// Fetch book metadata by ISBN
pub async fn fetch_by_isbn(&self, isbn: &str) -> Result<Vec<GoogleBook>> {
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes?q=isbn:{}",
isbn
);
if let Some(ref key) = self.api_key {
url.push_str(&format!("&key={}", key));
}
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("Google Books request failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Google Books returned status: {}",
response.status()
)));
}
let volumes: GoogleBooksResponse = response
.json()
.await
.map_err(|e| PinakesError::External(format!("Failed to parse Google Books response: {}", e)))?;
Ok(volumes.items)
}
/// Search for books by title and author
pub async fn search(&self, title: &str, author: Option<&str>) -> Result<Vec<GoogleBook>> {
let mut query = format!("intitle:{}", urlencoding::encode(title));
if let Some(author) = author {
query.push_str(&format!("+inauthor:{}", urlencoding::encode(author)));
}
let mut url = format!(
"https://www.googleapis.com/books/v1/volumes?q={}&maxResults=5",
query
);
if let Some(ref key) = self.api_key {
url.push_str(&format!("&key={}", key));
}
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("Google Books search failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Google Books search returned status: {}",
response.status()
)));
}
let volumes: GoogleBooksResponse = response
.json()
.await
.map_err(|e| PinakesError::External(format!("Failed to parse search results: {}", e)))?;
Ok(volumes.items)
}
/// Download cover image from Google Books
pub async fn fetch_cover(&self, image_link: &str) -> Result<Vec<u8>> {
// Replace thumbnail link with higher resolution if possible
let high_res_link = image_link
.replace("&zoom=1", "&zoom=2")
.replace("&edge=curl", "");
let response = self
.client
.get(&high_res_link)
.send()
.await
.map_err(|e| PinakesError::External(format!("Cover download failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| PinakesError::External(format!("Failed to read cover data: {}", e)))
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoogleBooksResponse {
#[serde(default)]
pub items: Vec<GoogleBook>,
#[serde(default)]
pub total_items: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct GoogleBook {
pub id: String,
#[serde(default)]
pub volume_info: VolumeInfo,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct VolumeInfo {
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub subtitle: Option<String>,
#[serde(default)]
pub authors: Vec<String>,
#[serde(default)]
pub publisher: Option<String>,
#[serde(default)]
pub published_date: Option<String>,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub page_count: Option<i32>,
#[serde(default)]
pub categories: Vec<String>,
#[serde(default)]
pub average_rating: Option<f64>,
#[serde(default)]
pub ratings_count: Option<i32>,
#[serde(default)]
pub image_links: Option<ImageLinks>,
#[serde(default)]
pub language: Option<String>,
#[serde(default)]
pub industry_identifiers: Vec<IndustryIdentifier>,
#[serde(default)]
pub main_category: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageLinks {
#[serde(default)]
pub small_thumbnail: Option<String>,
#[serde(default)]
pub thumbnail: Option<String>,
#[serde(default)]
pub small: Option<String>,
#[serde(default)]
pub medium: Option<String>,
#[serde(default)]
pub large: Option<String>,
#[serde(default)]
pub extra_large: Option<String>,
}
impl ImageLinks {
/// Get the best available image link (highest resolution)
pub fn best_link(&self) -> Option<&String> {
self.extra_large
.as_ref()
.or(self.large.as_ref())
.or(self.medium.as_ref())
.or(self.small.as_ref())
.or(self.thumbnail.as_ref())
.or(self.small_thumbnail.as_ref())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct IndustryIdentifier {
#[serde(rename = "type")]
pub identifier_type: String,
pub identifier: String,
}
impl IndustryIdentifier {
/// Check if this is an ISBN-13
pub fn is_isbn13(&self) -> bool {
self.identifier_type == "ISBN_13"
}
/// Check if this is an ISBN-10
pub fn is_isbn10(&self) -> bool {
self.identifier_type == "ISBN_10"
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_googlebooks_client_creation() {
let client = GoogleBooksClient::new(None);
assert!(client.api_key.is_none());
let client_with_key = GoogleBooksClient::new(Some("test-key".to_string()));
assert_eq!(client_with_key.api_key, Some("test-key".to_string()));
}
#[test]
fn test_image_links_best_link() {
let links = ImageLinks {
small_thumbnail: Some("small.jpg".to_string()),
thumbnail: Some("thumb.jpg".to_string()),
small: None,
medium: Some("medium.jpg".to_string()),
large: Some("large.jpg".to_string()),
extra_large: None,
};
assert_eq!(links.best_link(), Some(&"large.jpg".to_string()));
}
#[test]
fn test_industry_identifier_type_checks() {
let isbn13 = IndustryIdentifier {
identifier_type: "ISBN_13".to_string(),
identifier: "9780123456789".to_string(),
};
assert!(isbn13.is_isbn13());
assert!(!isbn13.is_isbn10());
let isbn10 = IndustryIdentifier {
identifier_type: "ISBN_10".to_string(),
identifier: "0123456789".to_string(),
};
assert!(!isbn10.is_isbn13());
assert!(isbn10.is_isbn10());
}
}

View file

@ -1,7 +1,10 @@
//! Metadata enrichment from external sources.
pub mod books;
pub mod googlebooks;
pub mod lastfm;
pub mod musicbrainz;
pub mod openlibrary;
pub mod tmdb;
use chrono::{DateTime, Utc};
@ -32,6 +35,10 @@ pub enum EnrichmentSourceType {
Tmdb,
#[serde(rename = "lastfm")]
LastFm,
#[serde(rename = "openlibrary")]
OpenLibrary,
#[serde(rename = "googlebooks")]
GoogleBooks,
}
impl std::fmt::Display for EnrichmentSourceType {
@ -40,6 +47,8 @@ impl std::fmt::Display for EnrichmentSourceType {
Self::MusicBrainz => "musicbrainz",
Self::Tmdb => "tmdb",
Self::LastFm => "lastfm",
Self::OpenLibrary => "openlibrary",
Self::GoogleBooks => "googlebooks",
};
write!(f, "{s}")
}
@ -53,6 +62,8 @@ impl std::str::FromStr for EnrichmentSourceType {
"musicbrainz" => Ok(Self::MusicBrainz),
"tmdb" => Ok(Self::Tmdb),
"lastfm" => Ok(Self::LastFm),
"openlibrary" => Ok(Self::OpenLibrary),
"googlebooks" => Ok(Self::GoogleBooks),
_ => Err(format!("unknown enrichment source: {s}")),
}
}

View file

@ -0,0 +1,283 @@
use serde::{Deserialize, Serialize};
use crate::error::{PinakesError, Result};
/// OpenLibrary API client for book metadata enrichment
pub struct OpenLibraryClient {
client: reqwest::Client,
base_url: String,
}
impl Default for OpenLibraryClient {
fn default() -> Self {
Self::new()
}
}
impl OpenLibraryClient {
pub fn new() -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/1.0")
.timeout(std::time::Duration::from_secs(10))
.build()
.expect("Failed to build HTTP client"),
base_url: "https://openlibrary.org".to_string(),
}
}
/// Fetch book metadata by ISBN
pub async fn fetch_by_isbn(&self, isbn: &str) -> Result<OpenLibraryBook> {
let url = format!("{}/isbn/{}.json", self.base_url, isbn);
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("OpenLibrary request failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"OpenLibrary returned status: {}",
response.status()
)));
}
response
.json::<OpenLibraryBook>()
.await
.map_err(|e| PinakesError::External(format!("Failed to parse OpenLibrary response: {}", e)))
}
/// Search for books by title and author
pub async fn search(&self, title: &str, author: Option<&str>) -> Result<Vec<OpenLibrarySearchResult>> {
let mut url = format!("{}/search.json?title={}", self.base_url, urlencoding::encode(title));
if let Some(author) = author {
url.push_str(&format!("&author={}", urlencoding::encode(author)));
}
url.push_str("&limit=5");
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("OpenLibrary search failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"OpenLibrary search returned status: {}",
response.status()
)));
}
let search_response: OpenLibrarySearchResponse = response
.json()
.await
.map_err(|e| PinakesError::External(format!("Failed to parse search results: {}", e)))?;
Ok(search_response.docs)
}
/// Fetch cover image by cover ID
pub async fn fetch_cover(&self, cover_id: i64, size: CoverSize) -> Result<Vec<u8>> {
let size_str = match size {
CoverSize::Small => "S",
CoverSize::Medium => "M",
CoverSize::Large => "L",
};
let url = format!(
"https://covers.openlibrary.org/b/id/{}-{}.jpg",
cover_id, size_str
);
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("Cover download failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| PinakesError::External(format!("Failed to read cover data: {}", e)))
}
/// Fetch cover by ISBN
pub async fn fetch_cover_by_isbn(&self, isbn: &str, size: CoverSize) -> Result<Vec<u8>> {
let size_str = match size {
CoverSize::Small => "S",
CoverSize::Medium => "M",
CoverSize::Large => "L",
};
let url = format!(
"https://covers.openlibrary.org/b/isbn/{}-{}.jpg",
isbn, size_str
);
let response = self
.client
.get(&url)
.send()
.await
.map_err(|e| PinakesError::External(format!("Cover download failed: {}", e)))?;
if !response.status().is_success() {
return Err(PinakesError::External(format!(
"Cover download returned status: {}",
response.status()
)));
}
response
.bytes()
.await
.map(|b| b.to_vec())
.map_err(|e| PinakesError::External(format!("Failed to read cover data: {}", e)))
}
}
#[derive(Debug, Clone, Copy)]
pub enum CoverSize {
Small, // 256x256
Medium, // 600x800
Large, // Original
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibraryBook {
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub subtitle: Option<String>,
#[serde(default)]
pub authors: Vec<AuthorRef>,
#[serde(default)]
pub publishers: Vec<String>,
#[serde(default)]
pub publish_date: Option<String>,
#[serde(default)]
pub number_of_pages: Option<i32>,
#[serde(default)]
pub subjects: Vec<String>,
#[serde(default)]
pub covers: Vec<i64>,
#[serde(default)]
pub isbn_10: Vec<String>,
#[serde(default)]
pub isbn_13: Vec<String>,
#[serde(default)]
pub series: Vec<String>,
#[serde(default)]
pub description: Option<StringOrObject>,
#[serde(default)]
pub languages: Vec<LanguageRef>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AuthorRef {
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LanguageRef {
pub key: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(untagged)]
pub enum StringOrObject {
String(String),
Object { value: String },
}
impl StringOrObject {
pub fn as_str(&self) -> &str {
match self {
Self::String(s) => s,
Self::Object { value } => value,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibrarySearchResponse {
#[serde(default)]
pub docs: Vec<OpenLibrarySearchResult>,
#[serde(default)]
pub num_found: i32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct OpenLibrarySearchResult {
#[serde(default)]
pub key: Option<String>,
#[serde(default)]
pub title: Option<String>,
#[serde(default)]
pub author_name: Vec<String>,
#[serde(default)]
pub first_publish_year: Option<i32>,
#[serde(default)]
pub publisher: Vec<String>,
#[serde(default)]
pub isbn: Vec<String>,
#[serde(default)]
pub cover_i: Option<i64>,
#[serde(default)]
pub subject: Vec<String>,
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_openlibrary_client_creation() {
let client = OpenLibraryClient::new();
assert_eq!(client.base_url, "https://openlibrary.org");
}
#[test]
fn test_string_or_object_parsing() {
let string_desc: StringOrObject = serde_json::from_str(r#""Simple description""#).unwrap();
assert_eq!(string_desc.as_str(), "Simple description");
let object_desc: StringOrObject = serde_json::from_str(r#"{"value": "Object description"}"#).unwrap();
assert_eq!(object_desc.as_str(), "Object description");
}
}