throughout Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Id8de9d65139ec4cf4cdeaee14c8c95b06a6a6964
144 lines
3.6 KiB
Rust
144 lines
3.6 KiB
Rust
//! MusicBrainz metadata enrichment for audio files.
|
|
|
|
use std::time::Duration;
|
|
|
|
use chrono::Utc;
|
|
use uuid::Uuid;
|
|
|
|
use super::{EnrichmentSourceType, ExternalMetadata, MetadataEnricher};
|
|
use crate::{
|
|
error::{PinakesError, Result},
|
|
model::MediaItem,
|
|
};
|
|
|
|
pub struct MusicBrainzEnricher {
|
|
client: reqwest::Client,
|
|
base_url: String,
|
|
}
|
|
|
|
impl Default for MusicBrainzEnricher {
|
|
fn default() -> Self {
|
|
Self::new()
|
|
}
|
|
}
|
|
|
|
impl MusicBrainzEnricher {
|
|
pub fn new() -> Self {
|
|
Self {
|
|
client: reqwest::Client::builder()
|
|
.user_agent("Pinakes/0.1 (https://github.com/notashelf/pinakes)")
|
|
.timeout(Duration::from_secs(10))
|
|
.connect_timeout(Duration::from_secs(5))
|
|
.build()
|
|
.expect("failed to build HTTP client with configured timeouts"),
|
|
base_url: "https://musicbrainz.org/ws/2".to_string(),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn escape_lucene_query(s: &str) -> String {
|
|
let special_chars = [
|
|
'+', '-', '&', '|', '!', '(', ')', '{', '}', '[', ']', '^', '"', '~', '*',
|
|
'?', ':', '\\', '/',
|
|
];
|
|
let mut escaped = String::with_capacity(s.len() * 2);
|
|
for c in s.chars() {
|
|
if special_chars.contains(&c) {
|
|
escaped.push('\\');
|
|
}
|
|
escaped.push(c);
|
|
}
|
|
escaped
|
|
}
|
|
|
|
#[async_trait::async_trait]
|
|
impl MetadataEnricher for MusicBrainzEnricher {
|
|
fn source(&self) -> EnrichmentSourceType {
|
|
EnrichmentSourceType::MusicBrainz
|
|
}
|
|
|
|
async fn enrich(&self, item: &MediaItem) -> Result<Option<ExternalMetadata>> {
|
|
let title = match &item.title {
|
|
Some(t) if !t.is_empty() => t,
|
|
_ => return Ok(None),
|
|
};
|
|
|
|
let mut query = format!("recording:{}", escape_lucene_query(title));
|
|
if let Some(ref artist) = item.artist {
|
|
query.push_str(&format!(" AND artist:{}", escape_lucene_query(artist)));
|
|
}
|
|
|
|
let url = format!("{}/recording/", self.base_url);
|
|
|
|
let resp = self
|
|
.client
|
|
.get(&url)
|
|
.query(&[
|
|
("query", &query),
|
|
("fmt", &"json".to_string()),
|
|
("limit", &"1".to_string()),
|
|
])
|
|
.send()
|
|
.await
|
|
.map_err(|e| {
|
|
PinakesError::MetadataExtraction(format!(
|
|
"MusicBrainz request failed: {e}"
|
|
))
|
|
})?;
|
|
|
|
if !resp.status().is_success() {
|
|
let status = resp.status();
|
|
if status == reqwest::StatusCode::TOO_MANY_REQUESTS
|
|
|| status == reqwest::StatusCode::SERVICE_UNAVAILABLE
|
|
{
|
|
return Err(PinakesError::MetadataExtraction(format!(
|
|
"MusicBrainz rate limited (HTTP {})",
|
|
status.as_u16()
|
|
)));
|
|
}
|
|
return Ok(None);
|
|
}
|
|
|
|
let body = resp.text().await.map_err(|e| {
|
|
PinakesError::MetadataExtraction(format!(
|
|
"MusicBrainz response read failed: {e}"
|
|
))
|
|
})?;
|
|
|
|
// Parse to check if we got results
|
|
let json: serde_json::Value = serde_json::from_str(&body).map_err(|e| {
|
|
PinakesError::MetadataExtraction(format!(
|
|
"MusicBrainz JSON parse failed: {e}"
|
|
))
|
|
})?;
|
|
|
|
let recordings = json.get("recordings").and_then(|r| r.as_array());
|
|
if recordings.is_none_or(|r| r.is_empty()) {
|
|
return Ok(None);
|
|
}
|
|
|
|
let Some(recordings) = recordings else {
|
|
return Ok(None);
|
|
};
|
|
let recording = &recordings[0];
|
|
let external_id = recording
|
|
.get("id")
|
|
.and_then(|id| id.as_str())
|
|
.map(String::from);
|
|
let score = recording
|
|
.get("score")
|
|
.and_then(|s| s.as_f64())
|
|
.unwrap_or(0.0)
|
|
/ 100.0;
|
|
|
|
Ok(Some(ExternalMetadata {
|
|
id: Uuid::now_v7(),
|
|
media_id: item.id,
|
|
source: EnrichmentSourceType::MusicBrainz,
|
|
external_id,
|
|
metadata_json: body,
|
|
confidence: score,
|
|
last_updated: Utc::now(),
|
|
}))
|
|
}
|
|
}
|