examples: add WASM plugin examples
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Id4b791396ab37827caced2c8cc03ec356a6a6964
This commit is contained in:
parent
011e8edb28
commit
934fcba8ca
16 changed files with 1720 additions and 0 deletions
BIN
examples/plugins/subtitle-detector/Cargo.lock
generated
Normal file
BIN
examples/plugins/subtitle-detector/Cargo.lock
generated
Normal file
Binary file not shown.
15
examples/plugins/subtitle-detector/Cargo.toml
Normal file
15
examples/plugins/subtitle-detector/Cargo.toml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "subtitle-detector"
|
||||
version = "1.0.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
dlmalloc = { version = "0.2", features = ["global"] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "s"
|
||||
lto = true
|
||||
strip = true
|
||||
18
examples/plugins/subtitle-detector/plugin.toml
Normal file
18
examples/plugins/subtitle-detector/plugin.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[plugin]
|
||||
name = "subtitle-detector"
|
||||
version = "1.0.0"
|
||||
api_version = "1.0"
|
||||
description = "Registers SRT, VTT, and ASS subtitle formats and extracts language and entry count metadata"
|
||||
kind = ["media_type", "metadata_extractor"]
|
||||
priority = 500
|
||||
|
||||
[plugin.binary]
|
||||
wasm = "subtitle_detector.wasm"
|
||||
|
||||
[capabilities]
|
||||
network = false
|
||||
|
||||
[capabilities.filesystem]
|
||||
# Users must add their media root directories here. Example:
|
||||
# read = ["/home/user/media", "/mnt/nas/subtitles"]
|
||||
read = []
|
||||
345
examples/plugins/subtitle-detector/src/lib.rs
Normal file
345
examples/plugins/subtitle-detector/src/lib.rs
Normal file
|
|
@ -0,0 +1,345 @@
|
|||
//! Subtitle-detector plugin for Pinakes.
|
||||
//!
|
||||
//! Registers SRT, VTT, and ASS/SSA subtitle file formats and extracts
|
||||
//! language code and entry count metadata from them.
|
||||
//!
|
||||
//! Registered media types:
|
||||
//! - `subtitle-srt`: extensions `["srt"]`, mime `["text/x-subrip"]`
|
||||
//! - `subtitle-vtt`: extensions `["vtt"]`, mime `["text/vtt"]`
|
||||
//! - `subtitle-ass`: extensions `["ass","ssa"]`, mime `["text/x-ass"]`
|
||||
//!
|
||||
//! Language detection uses filename conventions: `movie.en.srt` -> `en`.
|
||||
//!
|
||||
//! The `filesystem.read` capability in `plugin.toml` must be configured
|
||||
//! to include the directories containing subtitle files.
|
||||
//!
|
||||
//! Build with:
|
||||
//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
|
||||
|
||||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::{format, vec::Vec};
|
||||
use core::alloc::Layout;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
|
||||
|
||||
#[panic_handler]
|
||||
fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
|
||||
core::arch::wasm32::unreachable()
|
||||
}
|
||||
|
||||
// Host functions provided by the runtime
|
||||
unsafe extern "C" {
|
||||
fn host_set_result(ptr: i32, len: i32);
|
||||
fn host_log(level: i32, ptr: i32, len: i32);
|
||||
fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
|
||||
fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
|
||||
}
|
||||
|
||||
fn set_response(json: &[u8]) {
|
||||
unsafe {
|
||||
host_set_result(json.as_ptr() as i32, json.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
fn log_info(msg: &str) {
|
||||
unsafe {
|
||||
host_log(2, msg.as_ptr() as i32, msg.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
|
||||
if ptr < 0 || len <= 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
|
||||
slice.to_vec()
|
||||
}
|
||||
|
||||
/// Extract a string value from a JSON object for a given key.
|
||||
fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
|
||||
let json_str = core::str::from_utf8(json).ok()?;
|
||||
let pattern = format!("\"{}\"", key);
|
||||
let key_pos = json_str.find(&pattern)?;
|
||||
let after_key = &json_str[key_pos + pattern.len()..];
|
||||
let after_colon = after_key.trim_start().strip_prefix(':')?;
|
||||
let after_colon = after_colon.trim_start();
|
||||
|
||||
if after_colon.starts_with('"') {
|
||||
let value_start = 1;
|
||||
let value_end = after_colon[value_start..].find('"')?;
|
||||
Some(&after_colon[value_start..value_start + value_end])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape a string for safe inclusion in a JSON string value.
|
||||
fn json_escape(s: &str) -> alloc::string::String {
|
||||
let mut out = alloc::string::String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'"' => out.push_str("\\\""),
|
||||
'\\' => out.push_str("\\\\"),
|
||||
'\n' => out.push_str("\\n"),
|
||||
'\r' => out.push_str("\\r"),
|
||||
'\t' => out.push_str("\\t"),
|
||||
_ => out.push(c),
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// 512 KB content read limit for subtitle files
|
||||
const MAX_FILE_BYTES: usize = 512 * 1024;
|
||||
|
||||
/// Subtitle format variants.
|
||||
enum SubtitleFormat {
|
||||
Srt,
|
||||
Vtt,
|
||||
Ass,
|
||||
}
|
||||
|
||||
/// Determine subtitle format from file path extension.
|
||||
fn detect_format(path: &str) -> Option<SubtitleFormat> {
|
||||
let lower = path.to_ascii_lowercase();
|
||||
if lower.ends_with(".srt") {
|
||||
Some(SubtitleFormat::Srt)
|
||||
} else if lower.ends_with(".vtt") {
|
||||
Some(SubtitleFormat::Vtt)
|
||||
} else if lower.ends_with(".ass") || lower.ends_with(".ssa") {
|
||||
Some(SubtitleFormat::Ass)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to detect a 2-3 letter language code from a filename stem.
|
||||
/// Matches patterns like `movie.en.srt` or `film.fra.vtt`.
|
||||
/// Returns the code if found.
|
||||
fn detect_language(path: &str) -> Option<&str> {
|
||||
// Get the filename component
|
||||
let filename = path.rsplit('/').next().unwrap_or(path);
|
||||
// Strip the final extension
|
||||
let stem = if let Some(dot) = filename.rfind('.') {
|
||||
&filename[..dot]
|
||||
} else {
|
||||
filename
|
||||
};
|
||||
// Check for another dot-separated segment at the end of the stem
|
||||
if let Some(dot) = stem.rfind('.') {
|
||||
let candidate = &stem[dot + 1..];
|
||||
let len = candidate.len();
|
||||
if (len == 2 || len == 3) && candidate.bytes().all(|b| b.is_ascii_alphabetic()) {
|
||||
return Some(candidate);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Count `-->` occurrences in a byte slice.
|
||||
fn count_arrow_markers(data: &[u8]) -> usize {
|
||||
let mut count = 0usize;
|
||||
let mut i = 0usize;
|
||||
while i + 2 < data.len() {
|
||||
if data[i] == b'-' && data[i + 1] == b'-' && data[i + 2] == b'>' {
|
||||
count += 1;
|
||||
i += 3;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
/// Count `Dialogue:` lines in an ASS/SSA file.
|
||||
fn count_ass_dialogues(data: &[u8]) -> usize {
|
||||
let mut count = 0usize;
|
||||
let needle = b"Dialogue:";
|
||||
let mut i = 0usize;
|
||||
// Count only at line starts (after newline or at file start)
|
||||
let mut at_line_start = true;
|
||||
while i < data.len() {
|
||||
if at_line_start && data[i..].starts_with(needle) {
|
||||
count += 1;
|
||||
i += needle.len();
|
||||
at_line_start = false;
|
||||
} else {
|
||||
if data[i] == b'\n' {
|
||||
at_line_start = true;
|
||||
} else {
|
||||
at_line_start = false;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn alloc(size: i32) -> i32 {
|
||||
if size <= 0 {
|
||||
return 0;
|
||||
}
|
||||
unsafe {
|
||||
let layout = match Layout::from_size_align(size as usize, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return -1,
|
||||
};
|
||||
let ptr = alloc::alloc::alloc(layout);
|
||||
if ptr.is_null() {
|
||||
return -1;
|
||||
}
|
||||
ptr as i32
|
||||
}
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn initialize() -> i32 {
|
||||
log_info("subtitle-detector initialized");
|
||||
0
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn shutdown() -> i32 {
|
||||
log_info("subtitle-detector shutdown");
|
||||
0
|
||||
}
|
||||
|
||||
/// Returns the media type definitions provided by this plugin.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) {
|
||||
let response = br#"[
|
||||
{"id":"subtitle-srt","name":"SubRip Subtitle","category":"document","extensions":["srt"],"mime_types":["text/x-subrip"]},
|
||||
{"id":"subtitle-vtt","name":"WebVTT Subtitle","category":"document","extensions":["vtt"],"mime_types":["text/vtt"]},
|
||||
{"id":"subtitle-ass","name":"Advanced SubStation Alpha Subtitle","category":"document","extensions":["ass","ssa"],"mime_types":["text/x-ass"]}
|
||||
]"#;
|
||||
set_response(response);
|
||||
}
|
||||
|
||||
/// Check whether this plugin can handle a given path.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn can_handle(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let path = json_get_str(&req, "path").unwrap_or("");
|
||||
let can = detect_format(path).is_some();
|
||||
if can {
|
||||
set_response(br#"{"can_handle":true}"#);
|
||||
} else {
|
||||
set_response(br#"{"can_handle":false}"#);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the media type IDs this extractor supports.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
|
||||
set_response(br#"["subtitle-srt","subtitle-vtt","subtitle-ass"]"#);
|
||||
}
|
||||
|
||||
/// Extract metadata from a subtitle file.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let path = match json_get_str(&req, "path") {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
set_response(br#"{"extra":{"error":"missing path"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let format = match detect_format(path) {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
set_response(br#"{"extra":{"error":"unsupported format"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let format_str = match format {
|
||||
SubtitleFormat::Srt => "srt",
|
||||
SubtitleFormat::Vtt => "vtt",
|
||||
SubtitleFormat::Ass => "ass",
|
||||
};
|
||||
|
||||
let language = detect_language(path);
|
||||
|
||||
// Load file contents
|
||||
let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
|
||||
if file_size < 0 {
|
||||
// Return what we have without entry count
|
||||
let lang_field = language
|
||||
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
|
||||
.unwrap_or_default();
|
||||
let resp = format!(
|
||||
r#"{{"extra":{{"format":"{}"{}}}}}"#,
|
||||
format_str, lang_field,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
return;
|
||||
}
|
||||
|
||||
if file_size as usize >= MAX_FILE_BYTES {
|
||||
let lang_field = language
|
||||
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
|
||||
.unwrap_or_default();
|
||||
let resp = format!(
|
||||
r#"{{"extra":{{"format":"{}","too_large":"true"{}}}}}"#,
|
||||
format_str, lang_field,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
return;
|
||||
}
|
||||
|
||||
let buf_size = file_size as usize;
|
||||
let entry_count = if buf_size == 0 {
|
||||
0usize
|
||||
} else {
|
||||
let layout = match Layout::from_size_align(buf_size, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => {
|
||||
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
|
||||
if buf_ptr.is_null() {
|
||||
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
|
||||
let count = if copied > 0 {
|
||||
let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) };
|
||||
match format_str {
|
||||
"srt" => count_arrow_markers(data),
|
||||
"vtt" => count_arrow_markers(data),
|
||||
_ => count_ass_dialogues(data),
|
||||
}
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
|
||||
count
|
||||
};
|
||||
|
||||
let msg = format!(
|
||||
"subtitle-detector: format={}, entries={}, path={}",
|
||||
format_str, entry_count, path,
|
||||
);
|
||||
log_info(&msg);
|
||||
|
||||
let lang_field = language
|
||||
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
|
||||
.unwrap_or_default();
|
||||
|
||||
let resp = format!(
|
||||
r#"{{"extra":{{"format":"{}","entry_count":"{}"{}}}}}"#,
|
||||
format_str, entry_count, lang_field,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue