pinakes-core: initial subtitle management
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Id2f9b87b1cc903462539ab8ea47099696a6a6964
This commit is contained in:
parent
6233b46f70
commit
349b51e76c
1 changed files with 313 additions and 2 deletions
|
|
@ -1,6 +1,6 @@
|
|||
//! Subtitle management for video media items.
|
||||
|
||||
use std::path::PathBuf;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use chrono::{DateTime, Utc};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -17,7 +17,7 @@ pub struct Subtitle {
|
|||
pub format: SubtitleFormat,
|
||||
pub file_path: Option<PathBuf>,
|
||||
pub is_embedded: bool,
|
||||
pub track_index: Option<usize>,
|
||||
pub track_index: Option<u32>,
|
||||
pub offset_ms: i64,
|
||||
pub created_at: DateTime<Utc>,
|
||||
}
|
||||
|
|
@ -33,6 +33,23 @@ pub enum SubtitleFormat {
|
|||
Pgs,
|
||||
}
|
||||
|
||||
impl SubtitleFormat {
|
||||
/// Returns the MIME type for this subtitle format.
|
||||
pub const fn mime_type(self) -> &'static str {
|
||||
match self {
|
||||
Self::Srt => "application/x-subrip",
|
||||
Self::Vtt => "text/vtt",
|
||||
Self::Ass | Self::Ssa => "text/plain; charset=utf-8",
|
||||
Self::Pgs => "application/octet-stream",
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this format is binary (not UTF-8 text).
|
||||
pub const fn is_binary(self) -> bool {
|
||||
matches!(self, Self::Pgs)
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for SubtitleFormat {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let s = match self {
|
||||
|
|
@ -60,3 +77,297 @@ impl std::str::FromStr for SubtitleFormat {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
use crate::error::{PinakesError, Result};
|
||||
|
||||
/// Information about a subtitle track embedded in a media container.
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct SubtitleTrackInfo {
|
||||
/// Zero-based index among subtitle streams, as reported by ffprobe.
|
||||
pub index: u32,
|
||||
/// BCP 47 language code extracted from stream tags, if present.
|
||||
pub language: Option<String>,
|
||||
/// Subtitle format derived from the codec name.
|
||||
pub format: SubtitleFormat,
|
||||
/// Human-readable title from stream tags, if present.
|
||||
pub title: Option<String>,
|
||||
}
|
||||
|
||||
/// Detects the subtitle format from a file extension.
|
||||
///
|
||||
/// Returns `None` if the extension is unrecognised or absent.
|
||||
pub fn detect_format(path: &Path) -> Option<SubtitleFormat> {
|
||||
match path.extension()?.to_str()?.to_lowercase().as_str() {
|
||||
"srt" => Some(SubtitleFormat::Srt),
|
||||
"vtt" => Some(SubtitleFormat::Vtt),
|
||||
"ass" => Some(SubtitleFormat::Ass),
|
||||
"ssa" => Some(SubtitleFormat::Ssa),
|
||||
"pgs" | "sup" => Some(SubtitleFormat::Pgs),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Validates a BCP 47 language code.
|
||||
///
|
||||
/// Accepts a primary tag of 2-3 letters followed by zero or more
|
||||
/// hyphen-separated subtags of 2-8 alphanumeric characters each.
|
||||
/// Examples: `en`, `en-US`, `zh-Hant`, `zh-Hant-TW`.
|
||||
pub fn validate_language_code(lang: &str) -> bool {
|
||||
static RE: std::sync::LazyLock<regex::Regex> =
|
||||
std::sync::LazyLock::new(|| {
|
||||
#[expect(clippy::expect_used)]
|
||||
regex::Regex::new(r"^[A-Za-z]{2,3}(-[A-Za-z0-9]{2,8})*$")
|
||||
.expect("valid regex pattern")
|
||||
});
|
||||
RE.is_match(lang)
|
||||
}
|
||||
|
||||
/// Lists subtitle tracks embedded in a media file using ffprobe.
|
||||
///
|
||||
/// Returns an empty vec if the file has no subtitle streams.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `PinakesError::ExternalTool` if ffprobe is not available or
|
||||
/// produces an error exit code.
|
||||
pub async fn list_embedded_tracks(
|
||||
media_path: &Path,
|
||||
) -> Result<Vec<SubtitleTrackInfo>> {
|
||||
let output = tokio::process::Command::new("ffprobe")
|
||||
.args([
|
||||
"-v",
|
||||
"quiet",
|
||||
"-print_format",
|
||||
"json",
|
||||
"-show_streams",
|
||||
"-select_streams",
|
||||
"s",
|
||||
])
|
||||
.arg(media_path)
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
PinakesError::ExternalTool {
|
||||
tool: "ffprobe".into(),
|
||||
stderr: e.to_string(),
|
||||
}
|
||||
})?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
|
||||
return Err(PinakesError::ExternalTool {
|
||||
tool: "ffprobe".into(),
|
||||
stderr,
|
||||
});
|
||||
}
|
||||
|
||||
let json: serde_json::Value = serde_json::from_slice(&output.stdout)
|
||||
.map_err(|e| {
|
||||
PinakesError::ExternalTool {
|
||||
tool: "ffprobe".into(),
|
||||
stderr: format!("failed to parse output: {e}"),
|
||||
}
|
||||
})?;
|
||||
|
||||
let streams = match json.get("streams").and_then(|s| s.as_array()) {
|
||||
Some(s) => s,
|
||||
None => return Ok(vec![]),
|
||||
};
|
||||
|
||||
let mut tracks = Vec::new();
|
||||
for (idx, stream) in streams.iter().enumerate() {
|
||||
let codec_name = stream
|
||||
.get("codec_name")
|
||||
.and_then(|v| v.as_str())
|
||||
.unwrap_or("");
|
||||
|
||||
let format = match codec_name {
|
||||
"subrip" => SubtitleFormat::Srt,
|
||||
"webvtt" => SubtitleFormat::Vtt,
|
||||
"ass" | "ssa" => SubtitleFormat::Ass,
|
||||
"hdmv_pgs_subtitle" | "pgssub" => SubtitleFormat::Pgs,
|
||||
_ => continue, // skip unknown codec
|
||||
};
|
||||
|
||||
let tags = stream.get("tags");
|
||||
let language = tags
|
||||
.and_then(|t| t.get("language"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_owned);
|
||||
let title = tags
|
||||
.and_then(|t| t.get("title"))
|
||||
.and_then(|v| v.as_str())
|
||||
.map(str::to_owned);
|
||||
|
||||
tracks.push(SubtitleTrackInfo {
|
||||
index: idx as u32,
|
||||
language,
|
||||
format,
|
||||
title,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(tracks)
|
||||
}
|
||||
|
||||
/// Extracts an embedded subtitle track from a media file using ffmpeg.
|
||||
///
|
||||
/// The caller must ensure the output directory exists before calling this
|
||||
/// function. The output format is determined by the file extension of
|
||||
/// `output_path`.
|
||||
///
|
||||
/// # Errors
|
||||
///
|
||||
/// Returns `PinakesError::ExternalTool` if ffmpeg is not available or exits
|
||||
/// with a non-zero status.
|
||||
pub async fn extract_embedded_track(
|
||||
media_path: &Path,
|
||||
track_index: u32,
|
||||
output_path: &Path,
|
||||
) -> Result<()> {
|
||||
let output = tokio::process::Command::new("ffmpeg")
|
||||
.args(["-v", "quiet", "-i"])
|
||||
.arg(media_path)
|
||||
.args(["-map", &format!("0:s:{track_index}"), "-y"])
|
||||
.arg(output_path)
|
||||
.output()
|
||||
.await
|
||||
.map_err(|e| {
|
||||
PinakesError::ExternalTool {
|
||||
tool: "ffmpeg".into(),
|
||||
stderr: e.to_string(),
|
||||
}
|
||||
})?;
|
||||
|
||||
if !output.status.success() {
|
||||
let stderr = String::from_utf8_lossy(&output.stderr).into_owned();
|
||||
return Err(PinakesError::ExternalTool {
|
||||
tool: "ffmpeg".into(),
|
||||
stderr,
|
||||
});
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use super::{SubtitleFormat, detect_format, validate_language_code};
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_srt() {
|
||||
assert_eq!(
|
||||
detect_format(Path::new("track.srt")),
|
||||
Some(SubtitleFormat::Srt)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_vtt() {
|
||||
assert_eq!(
|
||||
detect_format(Path::new("track.vtt")),
|
||||
Some(SubtitleFormat::Vtt)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_ass() {
|
||||
assert_eq!(
|
||||
detect_format(Path::new("track.ass")),
|
||||
Some(SubtitleFormat::Ass)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_ssa() {
|
||||
assert_eq!(
|
||||
detect_format(Path::new("track.ssa")),
|
||||
Some(SubtitleFormat::Ssa)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_pgs() {
|
||||
assert_eq!(
|
||||
detect_format(Path::new("track.pgs")),
|
||||
Some(SubtitleFormat::Pgs)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_sup() {
|
||||
assert_eq!(
|
||||
detect_format(Path::new("track.sup")),
|
||||
Some(SubtitleFormat::Pgs)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_unknown() {
|
||||
assert_eq!(detect_format(Path::new("track.xyz")), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_no_extension() {
|
||||
assert_eq!(detect_format(Path::new("track")), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_format_case_insensitive() {
|
||||
assert_eq!(
|
||||
detect_format(Path::new("track.SRT")),
|
||||
Some(SubtitleFormat::Srt)
|
||||
);
|
||||
assert_eq!(
|
||||
detect_format(Path::new("track.VTT")),
|
||||
Some(SubtitleFormat::Vtt)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_simple() {
|
||||
assert!(validate_language_code("en"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_with_region() {
|
||||
assert!(validate_language_code("en-US"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_script() {
|
||||
assert!(validate_language_code("zh-Hant"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_full() {
|
||||
assert!(validate_language_code("zh-Hant-TW"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_empty() {
|
||||
assert!(!validate_language_code(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_primary_too_long() {
|
||||
assert!(!validate_language_code("toolong-tag-over-3-chars"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_underscore_separator() {
|
||||
assert!(!validate_language_code("en_US"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_subtag_too_short() {
|
||||
assert!(!validate_language_code("en-a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_validate_language_code_three_letter_primary() {
|
||||
assert!(validate_language_code("eng"));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue