pinakes/crates/pinakes-core/src/enrichment/musicbrainz.rs
NotAShelf b8ff35acea
various: inherit workspace lints in all crates; eliminate unwrap()
throughout

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Id8de9d65139ec4cf4cdeaee14c8c95b06a6a6964
2026-03-08 00:43:16 +03:00

144 lines
3.6 KiB
Rust

//! MusicBrainz metadata enrichment for audio files.
use std::time::Duration;
use chrono::Utc;
use uuid::Uuid;
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
use crate::{
error::{PinakesError, Result},
model::MediaItem,
};
pub struct MusicBrainzEnricher {
client: reqwest::Client,
base_url: String,
}
impl Default for MusicBrainzEnricher {
fn default() -> Self {
Self::new()
}
}
impl MusicBrainzEnricher {
pub fn new() -> Self {
Self {
client: reqwest::Client::builder()
.user_agent("Pinakes/0.1 (https://github.com/notashelf/pinakes)")
.timeout(Duration::from_secs(10))
.connect_timeout(Duration::from_secs(5))
.build()
.expect("failed to build HTTP client with configured timeouts"),
base_url: "https://musicbrainz.org/ws/2".to_string(),
}
}
}
fn escape_lucene_query(s: &str) -> String {
let special_chars = [
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*',
'?', ':', '\\', '/',
];
let mut escaped = String::with_capacity(s.len() * 2);
for c in s.chars() {
if special_chars.contains(&c) {
escaped.push('\\');
}
escaped.push(c);
}
escaped
}
#[async_trait::async_trait]
impl MetadataEnricher for MusicBrainzEnricher {
fn source(&self) -> EnrichmentSourceType {
EnrichmentSourceType::MusicBrainz
}
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
let title = match &item.title {
Some(t) if !t.is_empty() => t,
_ => return Ok(None),
};
let mut query = format!("recording:{}", escape_lucene_query(title));
if let Some(ref artist) = item.artist {
query.push_str(&format!(" AND artist:{}", escape_lucene_query(artist)));
}
let url = format!("{}/recording/", self.base_url);
let resp = self
.client
.get(&url)
.query(&[
("query", &query),
("fmt", &"json".to_string()),
("limit", &"1".to_string()),
])
.send()
.await
.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz request failed: {e}"
))
})?;
if !resp.status().is_success() {
let status = resp.status();
if status == reqwest::StatusCode::TOO_MANY_REQUESTS
|| status == reqwest::StatusCode::SERVICE_UNAVAILABLE
{
return Err(PinakesError::MetadataExtraction(format!(
"MusicBrainz rate limited (HTTP {})",
status.as_u16()
)));
}
return Ok(None);
}
let body = resp.text().await.map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz response read failed: {e}"
))
})?;
// Parse to check if we got results
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
PinakesError::MetadataExtraction(format!(
"MusicBrainz JSON parse failed: {e}"
))
})?;
let recordings = json.get("recordings").and_then(|r| r.as_array());
if recordings.is_none_or(|r| r.is_empty()) {
return Ok(None);
}
let Some(recordings) = recordings else {
return Ok(None);
};
let recording = &recordings[0];
let external_id = recording
.get("id")
.and_then(|id| id.as_str())
.map(String::from);
let score = recording
.get("score")
.and_then(|s| s.as_f64())
.unwrap_or(0.0)
/ 100.0;
Ok(Some(ExternalMetadata {
id: Uuid::now_v7(),
media_id: item.id,
source: EnrichmentSourceType::MusicBrainz,
external_id,
metadata_json: body,
confidence: score,
last_updated: Utc::now(),
}))
}
}