examples: add WASM plugin examples

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Id4b791396ab37827caced2c8cc03ec356a6a6964
This commit is contained in:
raf 2026-05-20 21:52:21 +03:00
commit 934fcba8ca
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
16 changed files with 1720 additions and 0 deletions

Binary file not shown.

View file

@ -0,0 +1,15 @@
[package]
name = "subtitle-detector"
version = "1.0.0"
edition = "2024"
[lib]
crate-type = ["cdylib"]
[dependencies]
dlmalloc = { version = "0.2", features = ["global"] }
[profile.release]
opt-level = "s"
lto = true
strip = true

View file

@ -0,0 +1,18 @@
[plugin]
name = "subtitle-detector"
version = "1.0.0"
api_version = "1.0"
description = "Registers SRT, VTT, and ASS subtitle formats and extracts language and entry count metadata"
kind = ["media_type", "metadata_extractor"]
priority = 500
[plugin.binary]
wasm = "subtitle_detector.wasm"
[capabilities]
network = false
[capabilities.filesystem]
# Users must add their media root directories here. Example:
# read = ["/home/user/media", "/mnt/nas/subtitles"]
read = []

View file

@ -0,0 +1,345 @@
//! Subtitle-detector plugin for Pinakes.
//!
//! Registers SRT, VTT, and ASS/SSA subtitle file formats and extracts
//! language code and entry count metadata from them.
//!
//! Registered media types:
//! - `subtitle-srt`: extensions `["srt"]`, mime `["text/x-subrip"]`
//! - `subtitle-vtt`: extensions `["vtt"]`, mime `["text/vtt"]`
//! - `subtitle-ass`: extensions `["ass","ssa"]`, mime `["text/x-ass"]`
//!
//! Language detection uses filename conventions: `movie.en.srt` -> `en`.
//!
//! The `filesystem.read` capability in `plugin.toml` must be configured
//! to include the directories containing subtitle files.
//!
//! Build with:
//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
#![no_std]
extern crate alloc;
use alloc::{format, vec::Vec};
use core::alloc::Layout;
#[global_allocator]
static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
#[panic_handler]
fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
core::arch::wasm32::unreachable()
}
// Host functions provided by the runtime
unsafe extern "C" {
fn host_set_result(ptr: i32, len: i32);
fn host_log(level: i32, ptr: i32, len: i32);
fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
}
fn set_response(json: &[u8]) {
unsafe {
host_set_result(json.as_ptr() as i32, json.len() as i32);
}
}
fn log_info(msg: &str) {
unsafe {
host_log(2, msg.as_ptr() as i32, msg.len() as i32);
}
}
unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
if ptr < 0 || len <= 0 {
return Vec::new();
}
let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
slice.to_vec()
}
/// Extract a string value from a JSON object for a given key.
fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
let json_str = core::str::from_utf8(json).ok()?;
let pattern = format!("\"{}\"", key);
let key_pos = json_str.find(&pattern)?;
let after_key = &json_str[key_pos + pattern.len()..];
let after_colon = after_key.trim_start().strip_prefix(':')?;
let after_colon = after_colon.trim_start();
if after_colon.starts_with('"') {
let value_start = 1;
let value_end = after_colon[value_start..].find('"')?;
Some(&after_colon[value_start..value_start + value_end])
} else {
None
}
}
/// Escape a string for safe inclusion in a JSON string value.
fn json_escape(s: &str) -> alloc::string::String {
let mut out = alloc::string::String::with_capacity(s.len());
for c in s.chars() {
match c {
'"' => out.push_str("\\\""),
'\\' => out.push_str("\\\\"),
'\n' => out.push_str("\\n"),
'\r' => out.push_str("\\r"),
'\t' => out.push_str("\\t"),
_ => out.push(c),
}
}
out
}
// 512 KB content read limit for subtitle files
const MAX_FILE_BYTES: usize = 512 * 1024;
/// Subtitle format variants.
enum SubtitleFormat {
Srt,
Vtt,
Ass,
}
/// Determine subtitle format from file path extension.
fn detect_format(path: &str) -> Option<SubtitleFormat> {
let lower = path.to_ascii_lowercase();
if lower.ends_with(".srt") {
Some(SubtitleFormat::Srt)
} else if lower.ends_with(".vtt") {
Some(SubtitleFormat::Vtt)
} else if lower.ends_with(".ass") || lower.ends_with(".ssa") {
Some(SubtitleFormat::Ass)
} else {
None
}
}
/// Try to detect a 2-3 letter language code from a filename stem.
/// Matches patterns like `movie.en.srt` or `film.fra.vtt`.
/// Returns the code if found.
fn detect_language(path: &str) -> Option<&str> {
// Get the filename component
let filename = path.rsplit('/').next().unwrap_or(path);
// Strip the final extension
let stem = if let Some(dot) = filename.rfind('.') {
&filename[..dot]
} else {
filename
};
// Check for another dot-separated segment at the end of the stem
if let Some(dot) = stem.rfind('.') {
let candidate = &stem[dot + 1..];
let len = candidate.len();
if (len == 2 || len == 3) && candidate.bytes().all(|b| b.is_ascii_alphabetic()) {
return Some(candidate);
}
}
None
}
/// Count `-->` occurrences in a byte slice.
fn count_arrow_markers(data: &[u8]) -> usize {
let mut count = 0usize;
let mut i = 0usize;
while i + 2 < data.len() {
if data[i] == b'-' && data[i + 1] == b'-' && data[i + 2] == b'>' {
count += 1;
i += 3;
} else {
i += 1;
}
}
count
}
/// Count `Dialogue:` lines in an ASS/SSA file.
fn count_ass_dialogues(data: &[u8]) -> usize {
let mut count = 0usize;
let needle = b"Dialogue:";
let mut i = 0usize;
// Count only at line starts (after newline or at file start)
let mut at_line_start = true;
while i < data.len() {
if at_line_start && data[i..].starts_with(needle) {
count += 1;
i += needle.len();
at_line_start = false;
} else {
if data[i] == b'\n' {
at_line_start = true;
} else {
at_line_start = false;
}
i += 1;
}
}
count
}
#[unsafe(no_mangle)]
pub extern "C" fn alloc(size: i32) -> i32 {
if size <= 0 {
return 0;
}
unsafe {
let layout = match Layout::from_size_align(size as usize, 1) {
Ok(l) => l,
Err(_) => return -1,
};
let ptr = alloc::alloc::alloc(layout);
if ptr.is_null() {
return -1;
}
ptr as i32
}
}
#[unsafe(no_mangle)]
pub extern "C" fn initialize() -> i32 {
log_info("subtitle-detector initialized");
0
}
#[unsafe(no_mangle)]
pub extern "C" fn shutdown() -> i32 {
log_info("subtitle-detector shutdown");
0
}
/// Returns the media type definitions provided by this plugin.
#[unsafe(no_mangle)]
pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) {
let response = br#"[
{"id":"subtitle-srt","name":"SubRip Subtitle","category":"document","extensions":["srt"],"mime_types":["text/x-subrip"]},
{"id":"subtitle-vtt","name":"WebVTT Subtitle","category":"document","extensions":["vtt"],"mime_types":["text/vtt"]},
{"id":"subtitle-ass","name":"Advanced SubStation Alpha Subtitle","category":"document","extensions":["ass","ssa"],"mime_types":["text/x-ass"]}
]"#;
set_response(response);
}
/// Check whether this plugin can handle a given path.
#[unsafe(no_mangle)]
pub extern "C" fn can_handle(ptr: i32, len: i32) {
let req = unsafe { read_request(ptr, len) };
let path = json_get_str(&req, "path").unwrap_or("");
let can = detect_format(path).is_some();
if can {
set_response(br#"{"can_handle":true}"#);
} else {
set_response(br#"{"can_handle":false}"#);
}
}
/// Returns the media type IDs this extractor supports.
#[unsafe(no_mangle)]
pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
set_response(br#"["subtitle-srt","subtitle-vtt","subtitle-ass"]"#);
}
/// Extract metadata from a subtitle file.
#[unsafe(no_mangle)]
pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
let req = unsafe { read_request(ptr, len) };
let path = match json_get_str(&req, "path") {
Some(p) => p,
None => {
set_response(br#"{"extra":{"error":"missing path"}}"#);
return;
}
};
let format = match detect_format(path) {
Some(f) => f,
None => {
set_response(br#"{"extra":{"error":"unsupported format"}}"#);
return;
}
};
let format_str = match format {
SubtitleFormat::Srt => "srt",
SubtitleFormat::Vtt => "vtt",
SubtitleFormat::Ass => "ass",
};
let language = detect_language(path);
// Load file contents
let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
if file_size < 0 {
// Return what we have without entry count
let lang_field = language
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
.unwrap_or_default();
let resp = format!(
r#"{{"extra":{{"format":"{}"{}}}}}"#,
format_str, lang_field,
);
set_response(resp.as_bytes());
return;
}
if file_size as usize >= MAX_FILE_BYTES {
let lang_field = language
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
.unwrap_or_default();
let resp = format!(
r#"{{"extra":{{"format":"{}","too_large":"true"{}}}}}"#,
format_str, lang_field,
);
set_response(resp.as_bytes());
return;
}
let buf_size = file_size as usize;
let entry_count = if buf_size == 0 {
0usize
} else {
let layout = match Layout::from_size_align(buf_size, 1) {
Ok(l) => l,
Err(_) => {
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
return;
}
};
let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
if buf_ptr.is_null() {
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
return;
}
let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
let count = if copied > 0 {
let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) };
match format_str {
"srt" => count_arrow_markers(data),
"vtt" => count_arrow_markers(data),
_ => count_ass_dialogues(data),
}
} else {
0
};
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
count
};
let msg = format!(
"subtitle-detector: format={}, entries={}, path={}",
format_str, entry_count, path,
);
log_info(&msg);
let lang_field = language
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
.unwrap_or_default();
let resp = format!(
r#"{{"extra":{{"format":"{}","entry_count":"{}"{}}}}}"#,
format_str, entry_count, lang_field,
);
set_response(resp.as_bytes());
}