various: simplify code; work on security and performance
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I9a5114addcab5fbff430ab2b919b83466a6a6964
This commit is contained in:
parent
016841b200
commit
c4adc4e3e0
75 changed files with 12921 additions and 358 deletions
109
crates/pinakes-core/src/enrichment/lastfm.rs
Normal file
109
crates/pinakes-core/src/enrichment/lastfm.rs
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
//! Last.fm metadata enrichment for audio files.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::Utc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::{PinakesError, Result};
|
||||
use crate::model::MediaItem;
|
||||
|
||||
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
|
||||
|
||||
pub struct LastFmEnricher {
|
||||
client: reqwest::Client,
|
||||
api_key: String,
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
impl LastFmEnricher {
|
||||
pub fn new(api_key: String) -> Self {
|
||||
Self {
|
||||
client: reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.build()
|
||||
.expect("failed to build HTTP client with configured timeouts"),
|
||||
api_key,
|
||||
base_url: "https://ws.audioscrobbler.com/2.0".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MetadataEnricher for LastFmEnricher {
|
||||
fn source(&self) -> EnrichmentSourceType {
|
||||
EnrichmentSourceType::LastFm
|
||||
}
|
||||
|
||||
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
|
||||
let artist = match &item.artist {
|
||||
Some(a) if !a.is_empty() => a,
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
let title = match &item.title {
|
||||
Some(t) if !t.is_empty() => t,
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
let url = format!("{}/", self.base_url);
|
||||
|
||||
let resp = self
|
||||
.client
|
||||
.get(&url)
|
||||
.query(&[
|
||||
("method", "track.getInfo"),
|
||||
("api_key", self.api_key.as_str()),
|
||||
("artist", artist.as_str()),
|
||||
("track", title.as_str()),
|
||||
("format", "json"),
|
||||
])
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
PinakesError::MetadataExtraction(format!("Last.fm request failed: {e}"))
|
||||
})?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
PinakesError::MetadataExtraction(format!("Last.fm response read failed: {e}"))
|
||||
})?;
|
||||
|
||||
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
PinakesError::MetadataExtraction(format!("Last.fm JSON parse failed: {e}"))
|
||||
})?;
|
||||
|
||||
// Check for error response
|
||||
if json.get("error").is_some() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let track = match json.get("track") {
|
||||
Some(t) => t,
|
||||
None => return Ok(None),
|
||||
};
|
||||
|
||||
let mbid = track.get("mbid").and_then(|m| m.as_str()).map(String::from);
|
||||
let listeners = track
|
||||
.get("listeners")
|
||||
.and_then(|l| l.as_str())
|
||||
.and_then(|l| l.parse::<f64>().ok())
|
||||
.unwrap_or(0.0);
|
||||
// Normalize listeners to confidence (arbitrary scale)
|
||||
let confidence = (listeners / 1_000_000.0).min(1.0);
|
||||
|
||||
Ok(Some(ExternalMetadata {
|
||||
id: Uuid::now_v7(),
|
||||
media_id: item.id,
|
||||
source: EnrichmentSourceType::LastFm,
|
||||
external_id: mbid,
|
||||
metadata_json: body,
|
||||
confidence,
|
||||
last_updated: Utc::now(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
66
crates/pinakes-core/src/enrichment/mod.rs
Normal file
66
crates/pinakes-core/src/enrichment/mod.rs
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
//! Metadata enrichment from external sources.
|
||||
|
||||
pub mod lastfm;
|
||||
pub mod musicbrainz;
|
||||
pub mod tmdb;
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::Result;
|
||||
use crate::model::{MediaId, MediaItem};
|
||||
|
||||
/// Externally-sourced metadata for a media item.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct ExternalMetadata {
|
||||
pub id: Uuid,
|
||||
pub media_id: MediaId,
|
||||
pub source: EnrichmentSourceType,
|
||||
pub external_id: Option<String>,
|
||||
pub metadata_json: String,
|
||||
pub confidence: f64,
|
||||
pub last_updated: DateTime<Utc>,
|
||||
}
|
||||
|
||||
/// Supported enrichment data sources.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum EnrichmentSourceType {
|
||||
#[serde(rename = "musicbrainz")]
|
||||
MusicBrainz,
|
||||
#[serde(rename = "tmdb")]
|
||||
Tmdb,
|
||||
#[serde(rename = "lastfm")]
|
||||
LastFm,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for EnrichmentSourceType {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
Self::MusicBrainz => "musicbrainz",
|
||||
Self::Tmdb => "tmdb",
|
||||
Self::LastFm => "lastfm",
|
||||
};
|
||||
write!(f, "{s}")
|
||||
}
|
||||
}
|
||||
|
||||
impl std::str::FromStr for EnrichmentSourceType {
|
||||
type Err = String;
|
||||
|
||||
fn from_str(s: &str) -> std::result::Result<Self, Self::Err> {
|
||||
match s {
|
||||
"musicbrainz" => Ok(Self::MusicBrainz),
|
||||
"tmdb" => Ok(Self::Tmdb),
|
||||
"lastfm" => Ok(Self::LastFm),
|
||||
_ => Err(format!("unknown enrichment source: {s}")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait for metadata enrichment providers.
|
||||
#[async_trait::async_trait]
|
||||
pub trait MetadataEnricher: Send + Sync {
|
||||
fn source(&self) -> EnrichmentSourceType;
|
||||
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>>;
|
||||
}
|
||||
134
crates/pinakes-core/src/enrichment/musicbrainz.rs
Normal file
134
crates/pinakes-core/src/enrichment/musicbrainz.rs
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
//! MusicBrainz metadata enrichment for audio files.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::Utc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::{PinakesError, Result};
|
||||
use crate::model::MediaItem;
|
||||
|
||||
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
|
||||
|
||||
pub struct MusicBrainzEnricher {
|
||||
client: reqwest::Client,
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
impl Default for MusicBrainzEnricher {
|
||||
fn default() -> Self {
|
||||
Self::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl MusicBrainzEnricher {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
client: reqwest::Client::builder()
|
||||
.user_agent("Pinakes/0.1 (https://github.com/notashelf/pinakes)")
|
||||
.timeout(Duration::from_secs(10))
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.build()
|
||||
.expect("failed to build HTTP client with configured timeouts"),
|
||||
base_url: "https://musicbrainz.org/ws/2".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn escape_lucene_query(s: &str) -> String {
|
||||
let special_chars = [
|
||||
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*', '?', ':', '\\',
|
||||
'/',
|
||||
];
|
||||
let mut escaped = String::with_capacity(s.len() * 2);
|
||||
for c in s.chars() {
|
||||
if special_chars.contains(&c) {
|
||||
escaped.push('\\');
|
||||
}
|
||||
escaped.push(c);
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MetadataEnricher for MusicBrainzEnricher {
|
||||
fn source(&self) -> EnrichmentSourceType {
|
||||
EnrichmentSourceType::MusicBrainz
|
||||
}
|
||||
|
||||
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
|
||||
let title = match &item.title {
|
||||
Some(t) if !t.is_empty() => t,
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
let mut query = format!("recording:{}", escape_lucene_query(title));
|
||||
if let Some(ref artist) = item.artist {
|
||||
query.push_str(&format!(" AND artist:{}", escape_lucene_query(artist)));
|
||||
}
|
||||
|
||||
let url = format!("{}/recording/", self.base_url);
|
||||
|
||||
let resp = self
|
||||
.client
|
||||
.get(&url)
|
||||
.query(&[
|
||||
("query", &query),
|
||||
("fmt", &"json".to_string()),
|
||||
("limit", &"1".to_string()),
|
||||
])
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
PinakesError::MetadataExtraction(format!("MusicBrainz request failed: {e}"))
|
||||
})?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
if status == reqwest::StatusCode::TOO_MANY_REQUESTS
|
||||
|| status == reqwest::StatusCode::SERVICE_UNAVAILABLE
|
||||
{
|
||||
return Err(PinakesError::MetadataExtraction(format!(
|
||||
"MusicBrainz rate limited (HTTP {})",
|
||||
status.as_u16()
|
||||
)));
|
||||
}
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
PinakesError::MetadataExtraction(format!("MusicBrainz response read failed: {e}"))
|
||||
})?;
|
||||
|
||||
// Parse to check if we got results
|
||||
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
PinakesError::MetadataExtraction(format!("MusicBrainz JSON parse failed: {e}"))
|
||||
})?;
|
||||
|
||||
let recordings = json.get("recordings").and_then(|r| r.as_array());
|
||||
if recordings.is_none_or(|r| r.is_empty()) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let recording = &recordings.unwrap()[0];
|
||||
let external_id = recording
|
||||
.get("id")
|
||||
.and_then(|id| id.as_str())
|
||||
.map(String::from);
|
||||
let score = recording
|
||||
.get("score")
|
||||
.and_then(|s| s.as_f64())
|
||||
.unwrap_or(0.0)
|
||||
/ 100.0;
|
||||
|
||||
Ok(Some(ExternalMetadata {
|
||||
id: Uuid::now_v7(),
|
||||
media_id: item.id,
|
||||
source: EnrichmentSourceType::MusicBrainz,
|
||||
external_id,
|
||||
metadata_json: body,
|
||||
confidence: score,
|
||||
last_updated: Utc::now(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
109
crates/pinakes-core/src/enrichment/tmdb.rs
Normal file
109
crates/pinakes-core/src/enrichment/tmdb.rs
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
//! TMDB (The Movie Database) metadata enrichment for video files.
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use chrono::Utc;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::error::{PinakesError, Result};
|
||||
use crate::model::MediaItem;
|
||||
|
||||
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
|
||||
|
||||
pub struct TmdbEnricher {
|
||||
client: reqwest::Client,
|
||||
api_key: String,
|
||||
base_url: String,
|
||||
}
|
||||
|
||||
impl TmdbEnricher {
|
||||
pub fn new(api_key: String) -> Self {
|
||||
Self {
|
||||
client: reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.connect_timeout(Duration::from_secs(5))
|
||||
.build()
|
||||
.expect("failed to build HTTP client with configured timeouts"),
|
||||
api_key,
|
||||
base_url: "https://api.themoviedb.org/3".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl MetadataEnricher for TmdbEnricher {
|
||||
fn source(&self) -> EnrichmentSourceType {
|
||||
EnrichmentSourceType::Tmdb
|
||||
}
|
||||
|
||||
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
|
||||
let title = match &item.title {
|
||||
Some(t) if !t.is_empty() => t,
|
||||
_ => return Ok(None),
|
||||
};
|
||||
|
||||
let url = format!("{}/search/movie", self.base_url);
|
||||
|
||||
let resp = self
|
||||
.client
|
||||
.get(&url)
|
||||
.query(&[
|
||||
("api_key", &self.api_key),
|
||||
("query", &title.to_string()),
|
||||
("page", &"1".to_string()),
|
||||
])
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| PinakesError::MetadataExtraction(format!("TMDB request failed: {e}")))?;
|
||||
|
||||
if !resp.status().is_success() {
|
||||
let status = resp.status();
|
||||
if status == reqwest::StatusCode::UNAUTHORIZED {
|
||||
return Err(PinakesError::MetadataExtraction(
|
||||
"TMDB API key is invalid (401)".into(),
|
||||
));
|
||||
}
|
||||
if status == reqwest::StatusCode::TOO_MANY_REQUESTS {
|
||||
tracing::warn!("TMDB rate limit exceeded (429)");
|
||||
return Ok(None);
|
||||
}
|
||||
tracing::debug!(status = %status, "TMDB search returned non-success status");
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let body = resp.text().await.map_err(|e| {
|
||||
PinakesError::MetadataExtraction(format!("TMDB response read failed: {e}"))
|
||||
})?;
|
||||
|
||||
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
||||
PinakesError::MetadataExtraction(format!("TMDB JSON parse failed: {e}"))
|
||||
})?;
|
||||
|
||||
let results = json.get("results").and_then(|r| r.as_array());
|
||||
if results.is_none_or(|r| r.is_empty()) {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let movie = &results.unwrap()[0];
|
||||
let external_id = match movie.get("id").and_then(|id| id.as_i64()) {
|
||||
Some(id) => id.to_string(),
|
||||
None => return Ok(None),
|
||||
};
|
||||
let popularity = movie
|
||||
.get("popularity")
|
||||
.and_then(|p| p.as_f64())
|
||||
.unwrap_or(0.0);
|
||||
// Normalize popularity to 0-1 range (TMDB popularity can be very high)
|
||||
let confidence = (popularity / 100.0).min(1.0);
|
||||
|
||||
Ok(Some(ExternalMetadata {
|
||||
id: Uuid::now_v7(),
|
||||
media_id: item.id,
|
||||
source: EnrichmentSourceType::Tmdb,
|
||||
external_id: Some(external_id),
|
||||
metadata_json: body,
|
||||
confidence,
|
||||
last_updated: Utc::now(),
|
||||
}))
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue