//! Subtitle management for video media items. use std::path::{Path, PathBuf}; use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use uuid::Uuid; use crate::model::MediaId; /// A subtitle track associated with a media item. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Subtitle { pub id: Uuid, pub media_id: MediaId, pub language: Option, pub format: SubtitleFormat, pub file_path: Option, pub is_embedded: bool, pub track_index: Option, pub offset_ms: i64, pub created_at: DateTime, } /// Supported subtitle formats. #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] pub enum SubtitleFormat { Srt, Vtt, Ass, Ssa, Pgs, } impl SubtitleFormat { /// Returns the MIME type for this subtitle format. pub const fn mime_type(self) -> &'static str { match self { Self::Srt => "application/x-subrip", Self::Vtt => "text/vtt", Self::Ass | Self::Ssa => "text/plain; charset=utf-8", Self::Pgs => "application/octet-stream", } } /// Returns true if this format is binary (not UTF-8 text). pub const fn is_binary(self) -> bool { matches!(self, Self::Pgs) } } impl std::fmt::Display for SubtitleFormat { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let s = match self { Self::Srt => "srt", Self::Vtt => "vtt", Self::Ass => "ass", Self::Ssa => "ssa", Self::Pgs => "pgs", }; write!(f, "{s}") } } impl std::str::FromStr for SubtitleFormat { type Err = String; fn from_str(s: &str) -> std::result::Result { match s { "srt" => Ok(Self::Srt), "vtt" => Ok(Self::Vtt), "ass" => Ok(Self::Ass), "ssa" => Ok(Self::Ssa), "pgs" => Ok(Self::Pgs), _ => Err(format!("unknown subtitle format: {s}")), } } } use crate::error::{PinakesError, Result}; /// Information about a subtitle track embedded in a media container. #[derive(Debug, Clone, PartialEq, Eq)] pub struct SubtitleTrackInfo { /// Zero-based index among subtitle streams, as reported by ffprobe. pub index: u32, /// BCP 47 language code extracted from stream tags, if present. pub language: Option, /// Subtitle format derived from the codec name. pub format: SubtitleFormat, /// Human-readable title from stream tags, if present. pub title: Option, } /// Detects the subtitle format from a file extension. /// /// Returns `None` if the extension is unrecognised or absent. pub fn detect_format(path: &Path) -> Option { match path.extension()?.to_str()?.to_lowercase().as_str() { "srt" => Some(SubtitleFormat::Srt), "vtt" => Some(SubtitleFormat::Vtt), "ass" => Some(SubtitleFormat::Ass), "ssa" => Some(SubtitleFormat::Ssa), "pgs" | "sup" => Some(SubtitleFormat::Pgs), _ => None, } } /// Validates a BCP 47 language code. /// /// Accepts a primary tag of 2-3 letters followed by zero or more /// hyphen-separated subtags of 2-8 alphanumeric characters each. /// Examples: `en`, `en-US`, `zh-Hant`, `zh-Hant-TW`. pub fn validate_language_code(lang: &str) -> bool { static RE: std::sync::LazyLock = std::sync::LazyLock::new(|| { #[expect(clippy::expect_used)] regex::Regex::new(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$") .expect("valid regex pattern") }); RE.is_match(lang) } /// Lists subtitle tracks embedded in a media file using ffprobe. /// /// Returns an empty vec if the file has no subtitle streams. /// /// # Errors /// /// Returns `PinakesError::ExternalTool` if ffprobe is not available or /// produces an error exit code. pub async fn list_embedded_tracks( media_path: &Path, ) -> Result> { let output = tokio::process::Command::new("ffprobe") .args([ "-v", "quiet", "-print_format", "json", "-show_streams", "-select_streams", "s", ]) .arg(media_path) .output() .await .map_err(|e| { PinakesError::ExternalTool { tool: "ffprobe".into(), stderr: e.to_string(), } })?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr).into_owned(); return Err(PinakesError::ExternalTool { tool: "ffprobe".into(), stderr, }); } let json: serde_json::Value = serde_json::from_slice(&output.stdout) .map_err(|e| { PinakesError::ExternalTool { tool: "ffprobe".into(), stderr: format!("failed to parse output: {e}"), } })?; let streams = match json.get("streams").and_then(|s| s.as_array()) { Some(s) => s, None => return Ok(vec![]), }; let mut tracks = Vec::new(); for (idx, stream) in streams.iter().enumerate() { let codec_name = stream .get("codec_name") .and_then(|v| v.as_str()) .unwrap_or(""); let format = match codec_name { "subrip" => SubtitleFormat::Srt, "webvtt" => SubtitleFormat::Vtt, "ass" | "ssa" => SubtitleFormat::Ass, "hdmv_pgs_subtitle" | "pgssub" => SubtitleFormat::Pgs, _ => continue, // skip unknown codec }; let tags = stream.get("tags"); let language = tags .and_then(|t| t.get("language")) .and_then(|v| v.as_str()) .map(str::to_owned); let title = tags .and_then(|t| t.get("title")) .and_then(|v| v.as_str()) .map(str::to_owned); tracks.push(SubtitleTrackInfo { index: idx as u32, language, format, title, }); } Ok(tracks) } /// Extracts an embedded subtitle track from a media file using ffmpeg. /// /// The caller must ensure the output directory exists before calling this /// function. The output format is determined by the file extension of /// `output_path`. /// /// # Errors /// /// Returns `PinakesError::ExternalTool` if ffmpeg is not available or exits /// with a non-zero status. pub async fn extract_embedded_track( media_path: &Path, track_index: u32, output_path: &Path, ) -> Result<()> { let output = tokio::process::Command::new("ffmpeg") .args(["-v", "quiet", "-i"]) .arg(media_path) .args(["-map", &format!("0:s:{track_index}"), "-y"]) .arg(output_path) .output() .await .map_err(|e| { PinakesError::ExternalTool { tool: "ffmpeg".into(), stderr: e.to_string(), } })?; if !output.status.success() { let stderr = String::from_utf8_lossy(&output.stderr).into_owned(); return Err(PinakesError::ExternalTool { tool: "ffmpeg".into(), stderr, }); } Ok(()) } #[cfg(test)] mod tests { use std::path::Path; use super::{SubtitleFormat, detect_format, validate_language_code}; #[test] fn test_detect_format_srt() { assert_eq!( detect_format(Path::new("track.srt")), Some(SubtitleFormat::Srt) ); } #[test] fn test_detect_format_vtt() { assert_eq!( detect_format(Path::new("track.vtt")), Some(SubtitleFormat::Vtt) ); } #[test] fn test_detect_format_ass() { assert_eq!( detect_format(Path::new("track.ass")), Some(SubtitleFormat::Ass) ); } #[test] fn test_detect_format_ssa() { assert_eq!( detect_format(Path::new("track.ssa")), Some(SubtitleFormat::Ssa) ); } #[test] fn test_detect_format_pgs() { assert_eq!( detect_format(Path::new("track.pgs")), Some(SubtitleFormat::Pgs) ); } #[test] fn test_detect_format_sup() { assert_eq!( detect_format(Path::new("track.sup")), Some(SubtitleFormat::Pgs) ); } #[test] fn test_detect_format_unknown() { assert_eq!(detect_format(Path::new("track.xyz")), None); } #[test] fn test_detect_format_no_extension() { assert_eq!(detect_format(Path::new("track")), None); } #[test] fn test_detect_format_case_insensitive() { assert_eq!( detect_format(Path::new("track.SRT")), Some(SubtitleFormat::Srt) ); assert_eq!( detect_format(Path::new("track.VTT")), Some(SubtitleFormat::Vtt) ); } #[test] fn test_validate_language_code_simple() { assert!(validate_language_code("en")); } #[test] fn test_validate_language_code_with_region() { assert!(validate_language_code("en-US")); } #[test] fn test_validate_language_code_script() { assert!(validate_language_code("zh-Hant")); } #[test] fn test_validate_language_code_full() { assert!(validate_language_code("zh-Hant-TW")); } #[test] fn test_validate_language_code_empty() { assert!(!validate_language_code("")); } #[test] fn test_validate_language_code_primary_too_long() { assert!(!validate_language_code("toolong-tag-over-3-chars")); } #[test] fn test_validate_language_code_underscore_separator() { assert!(!validate_language_code("en_US")); } #[test] fn test_validate_language_code_subtag_too_short() { assert!(!validate_language_code("en-a")); } #[test] fn test_validate_language_code_three_letter_primary() { assert!(validate_language_code("eng")); } }