From 934fcba8ca7fd55d0d98660c66b8d975b60209e4 Mon Sep 17 00:00:00 2001 From: NotAShelf Date: Wed, 20 May 2026 21:52:21 +0300 Subject: [PATCH] examples: add WASM plugin examples Signed-off-by: NotAShelf Change-Id: Id4b791396ab37827caced2c8cc03ec356a6a6964 --- examples/plugins/auto-tagger/Cargo.lock | Bin 0 -> 1243 bytes examples/plugins/auto-tagger/Cargo.toml | 15 + examples/plugins/auto-tagger/plugin.toml | 13 + examples/plugins/auto-tagger/src/lib.rs | 303 +++++++ examples/plugins/cbz-comics/Cargo.lock | Bin 0 -> 1673 bytes examples/plugins/cbz-comics/Cargo.toml | 18 + examples/plugins/cbz-comics/plugin.toml | 20 + examples/plugins/cbz-comics/src/lib.rs | 742 ++++++++++++++++++ examples/plugins/subtitle-detector/Cargo.lock | Bin 0 -> 1249 bytes examples/plugins/subtitle-detector/Cargo.toml | 15 + .../plugins/subtitle-detector/plugin.toml | 18 + examples/plugins/subtitle-detector/src/lib.rs | 345 ++++++++ examples/plugins/text-enrichment/Cargo.lock | Bin 0 -> 1247 bytes examples/plugins/text-enrichment/Cargo.toml | 15 + examples/plugins/text-enrichment/plugin.toml | 18 + examples/plugins/text-enrichment/src/lib.rs | 198 +++++ 16 files changed, 1720 insertions(+) create mode 100644 examples/plugins/auto-tagger/Cargo.lock create mode 100644 examples/plugins/auto-tagger/Cargo.toml create mode 100644 examples/plugins/auto-tagger/plugin.toml create mode 100644 examples/plugins/auto-tagger/src/lib.rs create mode 100644 examples/plugins/cbz-comics/Cargo.lock create mode 100644 examples/plugins/cbz-comics/Cargo.toml create mode 100644 examples/plugins/cbz-comics/plugin.toml create mode 100644 examples/plugins/cbz-comics/src/lib.rs create mode 100644 examples/plugins/subtitle-detector/Cargo.lock create mode 100644 examples/plugins/subtitle-detector/Cargo.toml create mode 100644 examples/plugins/subtitle-detector/plugin.toml create mode 100644 examples/plugins/subtitle-detector/src/lib.rs create mode 100644 examples/plugins/text-enrichment/Cargo.lock create mode 100644 examples/plugins/text-enrichment/Cargo.toml create mode 100644 examples/plugins/text-enrichment/plugin.toml create mode 100644 examples/plugins/text-enrichment/src/lib.rs diff --git a/examples/plugins/auto-tagger/Cargo.lock b/examples/plugins/auto-tagger/Cargo.lock new file mode 100644 index 0000000000000000000000000000000000000000..a398a38e786445303fe56a29563e3e3c5e5ffa39 GIT binary patch literal 1243 zcmb`HO>5jR5Qgvm6~Z|->(%#1p-||l_ufJdBWcDf)@ujb+jReZ<Ivs{I>A~J48w(e?Qz5zR?IA3*f*H_scY`o_jPL>r+SPlj=5gb zG_w9DR(%jf;R zFYDnPyEL8l^Lbf!1C0GXZN`_bOuJH5{A!zYz;s;Br%ifLij$9$NGMX;Xdbo3pe>Rz zLI^L7lP)Tsm6EAz}OynX7N8TU?@3U7jX^ut* zusUWRAj%Y|UVs%UW~$O9py3W>sCiz&^C$fKN@^%^qr}?hn%?xh?3T;&F}0WOcb@&N zO-B>0w${i<)>alFpvd3@7g5Dvr1ZfO7wD9YkPSJh2puSpHKq{q2h{#r9Ljinvw1J% zZVZmN=T)ghdJaltvcgp|x^9`vnRL*?f|EXSnW73(5GhJmm+uXQ?SJ?U*_VT{ literal 0 HcmV?d00001 diff --git a/examples/plugins/auto-tagger/Cargo.toml b/examples/plugins/auto-tagger/Cargo.toml new file mode 100644 index 0000000..cdcee85 --- /dev/null +++ b/examples/plugins/auto-tagger/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "auto-tagger" +version = "1.0.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +dlmalloc = { version = "0.2", features = ["global"] } + +[profile.release] +opt-level = "s" +lto = true +strip = true diff --git a/examples/plugins/auto-tagger/plugin.toml b/examples/plugins/auto-tagger/plugin.toml new file mode 100644 index 0000000..24354f4 --- /dev/null +++ b/examples/plugins/auto-tagger/plugin.toml @@ -0,0 +1,13 @@ +[plugin] +name = "auto-tagger" +version = "1.0.0" +api_version = "1.0" +description = "Listens for MediaImported events and emits AutoTagSuggested events based on path pattern rules" +kind = ["event_handler"] +priority = 500 + +[plugin.binary] +wasm = "auto_tagger.wasm" + +[capabilities] +network = false diff --git a/examples/plugins/auto-tagger/src/lib.rs b/examples/plugins/auto-tagger/src/lib.rs new file mode 100644 index 0000000..2f30527 --- /dev/null +++ b/examples/plugins/auto-tagger/src/lib.rs @@ -0,0 +1,303 @@ +//! Auto-tagger plugin for Pinakes. +//! +//! Listens for `MediaImported` events and, based on configurable path pattern +//! rules, emits `AutoTagSuggested` events. Rules map path substrings to tag +//! names. +//! +//! Configuration key `rules` expects a JSON array of objects: +//! `[{"pattern": "/music/", "tag": "music"}, ...]` +//! +//! If no config is present, built-in defaults are used: +//! - `/music/` -> `music` +//! - `/photos/` -> `photo` +//! - `/videos/` -> `video` +//! - `/books/` -> `book` +//! - `/documents/` -> `document` +//! +//! Build with: +//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release + +#![no_std] + +extern crate alloc; + +use alloc::{format, string::String, vec, vec::Vec}; +use core::alloc::Layout; + +#[global_allocator] +static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc; + +#[panic_handler] +fn panic_handler(_info: &core::panic::PanicInfo) -> ! { + core::arch::wasm32::unreachable() +} + +// Host functions provided by the runtime +unsafe extern "C" { + fn host_set_result(ptr: i32, len: i32); + fn host_log(level: i32, ptr: i32, len: i32); + fn host_emit_event(type_ptr: i32, type_len: i32, payload_ptr: i32, payload_len: i32) -> i32; + fn host_get_config(key_ptr: i32, key_len: i32) -> i32; + fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32; +} + +fn set_response(json: &[u8]) { + unsafe { + host_set_result(json.as_ptr() as i32, json.len() as i32); + } +} + +fn log_info(msg: &str) { + unsafe { + host_log(2, msg.as_ptr() as i32, msg.len() as i32); + } +} + +unsafe fn read_request(ptr: i32, len: i32) -> Vec { + if ptr < 0 || len <= 0 { + return Vec::new(); + } + let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) }; + slice.to_vec() +} + +/// Extract a string value from a JSON object for a given key. +fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> { + let json_str = core::str::from_utf8(json).ok()?; + let pattern = format!("\"{}\"", key); + let key_pos = json_str.find(&pattern)?; + let after_key = &json_str[key_pos + pattern.len()..]; + let after_colon = after_key.trim_start().strip_prefix(':')?; + let after_colon = after_colon.trim_start(); + + if after_colon.starts_with('"') { + let value_start = 1; + let value_end = after_colon[value_start..].find('"')?; + Some(&after_colon[value_start..value_start + value_end]) + } else { + None + } +} + +/// A single tagging rule: match `pattern` in path -> apply `tag`. +struct Rule { + pattern: String, + tag: String, +} + +/// Default rules used when no `rules` config key is present. +fn default_rules() -> Vec { + vec![ + Rule { pattern: String::from("/music/"), tag: String::from("music") }, + Rule { pattern: String::from("/photos/"), tag: String::from("photo") }, + Rule { pattern: String::from("/videos/"), tag: String::from("video") }, + Rule { pattern: String::from("/books/"), tag: String::from("book") }, + Rule { pattern: String::from("/documents/"), tag: String::from("document") }, + ] +} + +/// Parse the `rules` JSON array from the config buffer. +/// Expected format: `[{"pattern":"...","tag":"..."},...]` +/// Returns an empty vec on any parse failure (falls back to defaults). +fn parse_rules_json(data: &[u8]) -> Vec { + let text = match core::str::from_utf8(data) { + Ok(s) => s, + Err(_) => return Vec::new(), + }; + + let mut rules = Vec::new(); + // Walk through occurrences of "pattern" keys inside object literals. + let mut search = text; + while let Some(p_pos) = search.find("\"pattern\"") { + let after_p = &search[p_pos + 9..]; + let after_colon = match after_p.trim_start().strip_prefix(':') { + Some(s) => s.trim_start(), + None => { + search = &search[p_pos + 1..]; + continue; + } + }; + let pattern = if after_colon.starts_with('"') { + let inner = &after_colon[1..]; + match inner.find('"') { + Some(end) => String::from(&inner[..end]), + None => { + search = &search[p_pos + 1..]; + continue; + } + } + } else { + search = &search[p_pos + 1..]; + continue; + }; + + // Now search for "tag" after the current pattern position. + let remaining = &search[p_pos..]; + let tag = if let Some(t_pos) = remaining.find("\"tag\"") { + let after_t = &remaining[t_pos + 5..]; + let after_colon_t = match after_t.trim_start().strip_prefix(':') { + Some(s) => s.trim_start(), + None => { + search = &search[p_pos + 1..]; + continue; + } + }; + if after_colon_t.starts_with('"') { + let inner = &after_colon_t[1..]; + match inner.find('"') { + Some(end) => String::from(&inner[..end]), + None => { + search = &search[p_pos + 1..]; + continue; + } + } + } else { + search = &search[p_pos + 1..]; + continue; + } + } else { + search = &search[p_pos + 1..]; + continue; + }; + + rules.push(Rule { pattern, tag }); + search = &search[p_pos + 1..]; + } + + rules +} + +/// Load rules from config, falling back to defaults. +fn load_rules() -> Vec { + let key = b"rules"; + let size = unsafe { host_get_config(key.as_ptr() as i32, key.len() as i32) }; + if size <= 0 { + return default_rules(); + } + + let buf_size = size as usize; + let layout = match Layout::from_size_align(buf_size, 1) { + Ok(l) => l, + Err(_) => return default_rules(), + }; + let ptr = unsafe { alloc::alloc::alloc(layout) }; + if ptr.is_null() { + return default_rules(); + } + + let copied = unsafe { host_get_buffer(ptr as i32, size) }; + if copied <= 0 { + unsafe { alloc::alloc::dealloc(ptr, layout) }; + return default_rules(); + } + + let data = unsafe { core::slice::from_raw_parts(ptr, copied as usize) }; + let rules = parse_rules_json(data); + unsafe { alloc::alloc::dealloc(ptr, layout) }; + + if rules.is_empty() { + default_rules() + } else { + rules + } +} + +/// Escape a string for safe inclusion in a JSON string value. +fn json_escape(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + _ => out.push(c), + } + } + out +} + +#[unsafe(no_mangle)] +pub extern "C" fn alloc(size: i32) -> i32 { + if size <= 0 { + return 0; + } + unsafe { + let layout = match Layout::from_size_align(size as usize, 1) { + Ok(l) => l, + Err(_) => return -1, + }; + let ptr = alloc::alloc::alloc(layout); + if ptr.is_null() { + return -1; + } + ptr as i32 + } +} + +#[unsafe(no_mangle)] +pub extern "C" fn initialize() -> i32 { + log_info("auto-tagger initialized"); + 0 +} + +#[unsafe(no_mangle)] +pub extern "C" fn shutdown() -> i32 { + log_info("auto-tagger shutdown"); + 0 +} + +/// Returns the event types this handler is interested in. +#[unsafe(no_mangle)] +pub extern "C" fn interested_events(_ptr: i32, _len: i32) { + set_response(br#"["MediaImported"]"#); +} + +/// Handle a `MediaImported` event: check path against rules and emit tag events. +#[unsafe(no_mangle)] +pub extern "C" fn handle_event(ptr: i32, len: i32) { + let req = unsafe { read_request(ptr, len) }; + + let media_id = json_get_str(&req, "media_id").unwrap_or(""); + // The payload is nested; attempt to extract `path` from the top-level + // request or from a nested `payload` object. + let path = json_get_str(&req, "path").unwrap_or(""); + + let rules = load_rules(); + let mut matched_count = 0u32; + + for rule in &rules { + if !path.is_empty() && path.contains(rule.pattern.as_str()) { + let event_type = b"AutoTagSuggested"; + let payload = format!( + r#"{{"media_id":"{}","tag":"{}"}}"#, + json_escape(media_id), + json_escape(&rule.tag), + ); + unsafe { + host_emit_event( + event_type.as_ptr() as i32, + event_type.len() as i32, + payload.as_ptr() as i32, + payload.len() as i32, + ); + } + matched_count += 1; + } + } + + if matched_count > 0 { + let msg = format!( + "auto-tagger: matched {} rule(s) for path: {}", + matched_count, + path, + ); + log_info(&msg); + } else { + let msg = format!("auto-tagger: no rules matched for path: {}", path); + log_info(&msg); + } + + set_response(b"{}"); +} diff --git a/examples/plugins/cbz-comics/Cargo.lock b/examples/plugins/cbz-comics/Cargo.lock new file mode 100644 index 0000000000000000000000000000000000000000..06ebc6a5682394a23901e3baa3f4937e0af6b2a5 GIT binary patch literal 1673 zcmb`HO>f&U42JLi6^47<;!2c6NdpE9*lG9Oiek_QnFw|4!gkh=Uq7W?hZak2IT(T@ z!s7crB(K_^yRx)g25RdX4(qfJYe``kp4x9)8fgwIrcJ5oJa$dm zQMy|W`{OeTgp}S`&naY|Ksu+{B<)aUiPGvUNFbQw2Z(G06FTpd79kRc?sWDkK5uFk zB`qHm$f`heO{V*!Gh-^W)Ff`2w#&o5x;mxqbKUIASe}1Pk0n#x-=2Abx0}NM{a#<# zAg`2Q7lBzSR@!J0wI;8TSQ#m$17pEP<+4)X>Zz~RX+UXYHB(exC-MH%`yR09p_l5? z0@y?$leFv%k#{aT1xX7s+6UAzJ0Bt>&-EgpR55c+9Re9)34HbQ1g`XI{t?zt;w!a> zT3*w`vRR&%^VD9V|I_R)EjpTr8qq*x#LB{Zk~ugpWK_``0Oyf~B&#rnY}kTgvOxu? zHhG`VPQrr?Fbk<}lKSqN|d literal 0 HcmV?d00001 diff --git a/examples/plugins/cbz-comics/Cargo.toml b/examples/plugins/cbz-comics/Cargo.toml new file mode 100644 index 0000000..23319ed --- /dev/null +++ b/examples/plugins/cbz-comics/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "cbz-comics" +version = "1.0.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +dlmalloc = { version = "0.2", features = ["global"] } +miniz_oxide = { version = "0.8", default-features = false, features = [ + "with-alloc", +] } + +[profile.release] +opt-level = "s" +lto = true +strip = true diff --git a/examples/plugins/cbz-comics/plugin.toml b/examples/plugins/cbz-comics/plugin.toml new file mode 100644 index 0000000..e2f6e74 --- /dev/null +++ b/examples/plugins/cbz-comics/plugin.toml @@ -0,0 +1,20 @@ +[plugin] +name = "cbz-comics" +version = "1.0.0" +api_version = "1.0" +description = "Supports CBZ (Comic Book ZIP) and CBR files with metadata extraction and thumbnail generation" +kind = ["media_type", "metadata_extractor", "thumbnail_generator"] +priority = 500 + +[plugin.binary] +wasm = "cbz_comics.wasm" + +[capabilities] +network = false + +[capabilities.filesystem] +# Users must add their media root directories here. Example: +# read = ["/home/user/comics"] +# write = ["/home/user/.cache/pinakes/thumbnails"] +read = [] +write = [] diff --git a/examples/plugins/cbz-comics/src/lib.rs b/examples/plugins/cbz-comics/src/lib.rs new file mode 100644 index 0000000..98d8f7b --- /dev/null +++ b/examples/plugins/cbz-comics/src/lib.rs @@ -0,0 +1,742 @@ +//! CBZ/CBR comics plugin for Pinakes. +//! +//! Registers comic book ZIP (`cbz`) and RAR (`cbr`) media types, extracts +//! metadata from CBZ archives (including `ComicInfo.xml` when present), and +//! generates thumbnails from the cover image. +//! +//! CBR is registered as a media type but metadata extraction is limited to +//! format detection only (RAR parsing is not implemented). +//! +//! ZIP parsing is implemented from scratch without external ZIP crates to keep +//! the WASM binary small. +//! +//! The `filesystem.read` and `filesystem.write` capabilities in `plugin.toml` +//! must be configured for the directories containing comic files and the +//! thumbnail output directory respectively. +//! +//! Build with: +//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release + +#![no_std] + +extern crate alloc; + +use alloc::{format, string::{String, ToString}, vec, vec::Vec}; +use core::alloc::Layout; + +#[global_allocator] +static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc; + +#[panic_handler] +fn panic_handler(_info: &core::panic::PanicInfo) -> ! { + core::arch::wasm32::unreachable() +} + +// Host functions provided by the runtime +unsafe extern "C" { + fn host_set_result(ptr: i32, len: i32); + fn host_log(level: i32, ptr: i32, len: i32); + fn host_read_file(path_ptr: i32, path_len: i32) -> i32; + fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32; + fn host_write_file(path_ptr: i32, path_len: i32, data_ptr: i32, data_len: i32) -> i32; +} + +fn set_response(json: &[u8]) { + unsafe { + host_set_result(json.as_ptr() as i32, json.len() as i32); + } +} + +fn log_info(msg: &str) { + unsafe { + host_log(2, msg.as_ptr() as i32, msg.len() as i32); + } +} + +unsafe fn read_request(ptr: i32, len: i32) -> Vec { + if ptr < 0 || len <= 0 { + return Vec::new(); + } + let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) }; + slice.to_vec() +} + +/// Extract a string value from a JSON object for a given key. +fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> { + let json_str = core::str::from_utf8(json).ok()?; + let pattern = format!("\"{}\"", key); + let key_pos = json_str.find(&pattern)?; + let after_key = &json_str[key_pos + pattern.len()..]; + let after_colon = after_key.trim_start().strip_prefix(':')?; + let after_colon = after_colon.trim_start(); + + if after_colon.starts_with('"') { + let value_start = 1; + let value_end = after_colon[value_start..].find('"')?; + Some(&after_colon[value_start..value_start + value_end]) + } else { + None + } +} + +/// Escape a string for safe inclusion in a JSON string value. +fn json_escape(s: &str) -> String { + let mut out = String::with_capacity(s.len()); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + _ => out.push(c), + } + } + out +} + +// 20 MB content read limit for comic archives +const MAX_FILE_BYTES: usize = 20 * 1024 * 1024; + +// ZIP signatures (little-endian u32) +const SIG_LOCAL_FILE: u32 = 0x04034b50; +const SIG_CENTRAL_DIR: u32 = 0x02014b50; +const SIG_EOCD: u32 = 0x06054b50; + +// Compression methods +const COMPRESS_STORE: u16 = 0; +const COMPRESS_DEFLATE: u16 = 8; + +/// Read a little-endian u16 from a byte slice at the given offset. +/// Returns `None` if out of bounds. +fn read_u16_le(data: &[u8], offset: usize) -> Option { + let b0 = *data.get(offset)? as u16; + let b1 = *data.get(offset + 1)? as u16; + Some(b0 | (b1 << 8)) +} + +/// Read a little-endian u32 from a byte slice at the given offset. +/// Returns `None` if out of bounds. +fn read_u32_le(data: &[u8], offset: usize) -> Option { + let b0 = *data.get(offset)? as u32; + let b1 = *data.get(offset + 1)? as u32; + let b2 = *data.get(offset + 2)? as u32; + let b3 = *data.get(offset + 3)? as u32; + Some(b0 | (b1 << 8) | (b2 << 16) | (b3 << 24)) +} + +/// Read a big-endian u16 from a byte slice at the given offset. +fn read_u16_be(data: &[u8], offset: usize) -> Option { + let b0 = *data.get(offset)? as u16; + let b1 = *data.get(offset + 1)? as u16; + Some((b0 << 8) | b1) +} + +/// Read a big-endian u32 from a byte slice at the given offset. +fn read_u32_be(data: &[u8], offset: usize) -> Option { + let b0 = *data.get(offset)? as u32; + let b1 = *data.get(offset + 1)? as u32; + let b2 = *data.get(offset + 2)? as u32; + let b3 = *data.get(offset + 3)? as u32; + Some((b0 << 24) | (b1 << 16) | (b2 << 8) | b3) +} + +/// A parsed central directory entry from a ZIP archive. +struct ZipEntry { + name: String, + compression: u16, + compressed_size: u32, + local_offset: u32, +} + +/// Find the End of Central Directory record offset by scanning backwards. +fn find_eocd(data: &[u8]) -> Option { + if data.len() < 22 { + return None; + } + // Scan backwards for the EOCD signature. The maximum comment size is + // 65535 bytes, so we only need to scan that far from the end. + let scan_start = if data.len() > 22 + 65535 { + data.len() - 22 - 65535 + } else { + 0 + }; + let mut i = data.len() - 22; + loop { + if read_u32_le(data, i) == Some(SIG_EOCD) { + return Some(i); + } + if i == scan_start { + break; + } + i -= 1; + } + None +} + +/// Parse all central directory entries from a ZIP archive. +fn parse_central_directory(data: &[u8]) -> Vec { + let mut entries = Vec::new(); + + let eocd_offset = match find_eocd(data) { + Some(o) => o, + None => return entries, + }; + + // EOCD layout (offsets relative to EOCD start): + // 0: signature (4) + // 4: disk number (2) + // 6: start disk (2) + // 8: entries on disk (2) + // 10: total entries (2) + // 12: central dir size (4) + // 16: central dir offset (4) + // 20: comment length (2) + let cd_offset = match read_u32_le(data, eocd_offset + 16) { + Some(o) => o as usize, + None => return entries, + }; + let total_entries = match read_u16_le(data, eocd_offset + 10) { + Some(n) => n as usize, + None => return entries, + }; + + let mut pos = cd_offset; + for _ in 0..total_entries { + if pos + 46 > data.len() { + break; + } + if read_u32_le(data, pos) != Some(SIG_CENTRAL_DIR) { + break; + } + + // Central directory entry layout: + // 0: signature (4) + // 4: version made by (2) + // 6: version needed (2) + // 8: flags (2) + // 10: compression (2) + // 12: mod time (2) + // 14: mod date (2) + // 16: crc32 (4) + // 20: compressed size (4) + // 24: uncompressed size (4) + // 28: filename length (2) + // 30: extra field length (2) + // 32: file comment length (2) + // 34: disk start (2) + // 36: internal attrs (2) + // 38: external attrs (4) + // 42: local header offset (4) + // 46: filename... + let compression = match read_u16_le(data, pos + 10) { Some(v) => v, None => break }; + let compressed_size = match read_u32_le(data, pos + 20) { Some(v) => v, None => break }; + // uncompressed_size at pos+24 is intentionally not stored; size comes from decompressor output. + let fname_len = match read_u16_le(data, pos + 28) { Some(v) => v as usize, None => break }; + let extra_len = match read_u16_le(data, pos + 30) { Some(v) => v as usize, None => break }; + let comment_len = match read_u16_le(data, pos + 32) { Some(v) => v as usize, None => break }; + let local_offset = match read_u32_le(data, pos + 42) { Some(v) => v, None => break }; + + let fname_start = pos + 46; + let fname_end = fname_start + fname_len; + if fname_end > data.len() { + break; + } + + let name = core::str::from_utf8(&data[fname_start..fname_end]) + .unwrap_or("") + .to_ascii_lowercase(); + + entries.push(ZipEntry { + name, + compression, + compressed_size, + local_offset, + }); + + pos = fname_end + extra_len + comment_len; + } + + entries +} + +/// Read raw bytes for a local file entry (the actual compressed/stored data). +/// Returns a slice into `data` containing the compressed bytes. +fn local_file_data<'a>(data: &'a [u8], entry: &ZipEntry) -> Option<&'a [u8]> { + let off = entry.local_offset as usize; + if off + 30 > data.len() { + return None; + } + if read_u32_le(data, off) != Some(SIG_LOCAL_FILE) { + return None; + } + + // Local file header layout: + // 0: signature (4) + // 4: version needed (2) + // 6: flags (2) + // 8: compression (2) + // 10: mod time (2) + // 12: mod date (2) + // 14: crc32 (4) + // 18: compressed size (4) + // 22: uncompressed size (4) + // 26: filename length (2) + // 28: extra length (2) + // 30: filename... + let fname_len = read_u16_le(data, off + 26)? as usize; + let extra_len = read_u16_le(data, off + 28)? as usize; + let data_start = off + 30 + fname_len + extra_len; + let data_end = data_start + entry.compressed_size as usize; + if data_end > data.len() { + return None; + } + Some(&data[data_start..data_end]) +} + +/// Decompress a stored (STORE) or deflated (DEFLATE) entry. +/// Returns the uncompressed bytes. +fn decompress_entry(data: &[u8], entry: &ZipEntry) -> Option> { + let raw = local_file_data(data, entry)?; + match entry.compression { + COMPRESS_STORE => Some(raw.to_vec()), + COMPRESS_DEFLATE => { + miniz_oxide::inflate::decompress_to_vec(raw).ok() + } + _ => None, + } +} + +/// Returns true if a filename has an image extension. +fn is_image_filename(name: &str) -> bool { + name.ends_with(".jpg") + || name.ends_with(".jpeg") + || name.ends_with(".png") + || name.ends_with(".webp") +} + +/// Extract a simple XML element value using substring search. +/// Looks for `value` and returns the inner text. +fn xml_get_text<'a>(xml: &'a str, tag: &str) -> Option<&'a str> { + let open = format!("<{}>", tag); + let close = format!("", tag); + let start = xml.find(&open)?; + let after_open = &xml[start + open.len()..]; + let end = after_open.find(&close)?; + Some(&after_open[..end]) +} + +/// Metadata extracted from a ComicInfo.xml file. +struct ComicInfo { + title: Option, + series: Option, + issue_number: Option, + writer: Option, + page_count: Option, + language: Option, + genre: Option, + summary: Option, +} + +/// Parse key fields from a ComicInfo.xml byte slice. +fn parse_comic_info(data: &[u8]) -> ComicInfo { + let text = core::str::from_utf8(data).unwrap_or(""); + ComicInfo { + title: xml_get_text(text, "Title") .map(|s| s.trim().to_ascii_lowercase()).filter(|s| !s.is_empty()).map(|s| { + // Re-capitalize first letter for title + let mut c = s.chars(); + match c.next() { + None => String::new(), + Some(f) => f.to_uppercase().collect::() + c.as_str(), + } + }), + series: xml_get_text(text, "Series") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), + issue_number: xml_get_text(text, "Number") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), + writer: xml_get_text(text, "Writer") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), + page_count: xml_get_text(text, "PageCount") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), + language: xml_get_text(text, "LanguageISO").map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), + genre: xml_get_text(text, "Genre") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), + summary: xml_get_text(text, "Summary") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), + } +} + +/// Image dimension and format information. +struct ImageInfo { + width: u32, + height: u32, + format: &'static str, +} + +/// Parse image dimensions and detect format from raw image bytes. +fn parse_image_info(data: &[u8]) -> Option { + if data.len() < 4 { + return None; + } + // JPEG: starts with 0xFF 0xD8 + if data[0] == 0xFF && data[1] == 0xD8 { + // Scan for SOF0 (0xFF 0xC0) or SOF2 (0xFF 0xC2) marker + let mut i = 2usize; + while i + 8 < data.len() { + if data[i] == 0xFF { + let marker = data[i + 1]; + if marker == 0xC0 || marker == 0xC2 { + // SOF marker layout: + // 0: 0xFF + // 1: marker + // 2-3: segment length (big-endian) + // 4: precision + // 5-6: height (big-endian u16) + // 7-8: width (big-endian u16) + let height = read_u16_be(data, i + 5)? as u32; + let width = read_u16_be(data, i + 7)? as u32; + return Some(ImageInfo { width, height, format: "jpeg" }); + } else if marker == 0xFF { + // Padding byte + i += 1; + continue; + } else if marker == 0xD8 || marker == 0xD9 { + // SOI / EOI - no length field + i += 2; + continue; + } else { + // Skip segment: length at i+2 (includes the 2 length bytes) + if let Some(seg_len) = read_u16_be(data, i + 2) { + i += 2 + seg_len as usize; + } else { + break; + } + } + } else { + i += 1; + } + } + // Return a JPEG without dimensions if SOF not found + return Some(ImageInfo { width: 0, height: 0, format: "jpeg" }); + } + + // PNG: starts with 0x89 0x50 0x4E 0x47 ('PNG') + if data.len() >= 24 && data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4E && data[3] == 0x47 { + // IHDR chunk: width at bytes 16-19, height at bytes 20-23 (big-endian u32) + let width = read_u32_be(data, 16)?; + let height = read_u32_be(data, 20)?; + return Some(ImageInfo { width, height, format: "png" }); + } + + // WebP: RIFF....WEBP + if data.len() >= 12 + && &data[0..4] == b"RIFF" + && &data[8..12] == b"WEBP" + { + return Some(ImageInfo { width: 0, height: 0, format: "webp" }); + } + + None +} + +/// Load a CBZ archive into memory. Returns the raw bytes or an error string. +fn load_cbz_file(path: &str) -> Result, &'static str> { + let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) }; + if file_size < 0 { + return Err("read failed"); + } + if file_size as usize >= MAX_FILE_BYTES { + return Err("too large"); + } + let buf_size = file_size as usize; + if buf_size == 0 { + return Ok(Vec::new()); + } + + let layout = Layout::from_size_align(buf_size, 1).map_err(|_| "alloc failed")?; + let buf_ptr = unsafe { alloc::alloc::alloc(layout) }; + if buf_ptr.is_null() { + return Err("alloc failed"); + } + + let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) }; + if copied <= 0 { + unsafe { alloc::alloc::dealloc(buf_ptr, layout) }; + return Err("buffer copy failed"); + } + + let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) }.to_vec(); + unsafe { alloc::alloc::dealloc(buf_ptr, layout) }; + Ok(data) +} + +#[unsafe(no_mangle)] +pub extern "C" fn alloc(size: i32) -> i32 { + if size <= 0 { + return 0; + } + unsafe { + let layout = match Layout::from_size_align(size as usize, 1) { + Ok(l) => l, + Err(_) => return -1, + }; + let ptr = alloc::alloc::alloc(layout); + if ptr.is_null() { + return -1; + } + ptr as i32 + } +} + +#[unsafe(no_mangle)] +pub extern "C" fn initialize() -> i32 { + log_info("cbz-comics initialized"); + 0 +} + +#[unsafe(no_mangle)] +pub extern "C" fn shutdown() -> i32 { + log_info("cbz-comics shutdown"); + 0 +} + +/// Returns the comic media type definitions. +#[unsafe(no_mangle)] +pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) { + let response = br#"[ +{"id":"comic-cbz","name":"Comic Book ZIP","category":"document","extensions":["cbz"],"mime_types":["application/vnd.comicbook+zip"]}, +{"id":"comic-cbr","name":"Comic Book RAR","category":"document","extensions":["cbr"],"mime_types":["application/vnd.comicbook-rar"]} +]"#; + set_response(response); +} + +/// Check whether this plugin can handle a given path. +#[unsafe(no_mangle)] +pub extern "C" fn can_handle(ptr: i32, len: i32) { + let req = unsafe { read_request(ptr, len) }; + let path = json_get_str(&req, "path").unwrap_or("").to_ascii_lowercase(); + let can = path.ends_with(".cbz") || path.ends_with(".cbr"); + if can { + set_response(br#"{"can_handle":true}"#); + } else { + set_response(br#"{"can_handle":false}"#); + } +} + +/// Returns the media type IDs this extractor supports. +#[unsafe(no_mangle)] +pub extern "C" fn supported_types(_ptr: i32, _len: i32) { + set_response(br#"["comic-cbz","comic-cbr"]"#); +} + +/// Extract metadata from a CBZ or CBR file. +#[unsafe(no_mangle)] +pub extern "C" fn extract_metadata(ptr: i32, len: i32) { + let req = unsafe { read_request(ptr, len) }; + let path = match json_get_str(&req, "path") { + Some(p) => p, + None => { + set_response(br#"{"extra":{"error":"missing path"}}"#); + return; + } + }; + + let lower = path.to_ascii_lowercase(); + + // CBR: register the type but do not attempt to parse RAR. + if lower.ends_with(".cbr") { + set_response(br#"{"extra":{"format":"cbr","note":"cbr-unsupported"}}"#); + return; + } + + // Load CBZ archive + let data = match load_cbz_file(path) { + Ok(d) => d, + Err("too large") => { + set_response(br#"{"extra":{"format":"cbz","too_large":"true"}}"#); + return; + } + Err(e) => { + let resp = format!(r#"{{"extra":{{"format":"cbz","error":"{}"}}}}"#, e); + set_response(resp.as_bytes()); + return; + } + }; + + let entries = parse_central_directory(&data); + + // Count image files as page count. + let image_count = entries.iter().filter(|e| is_image_filename(&e.name)).count(); + + // Look for ComicInfo.xml (case-insensitive). + let comic_info_entry = entries.iter().find(|e| { + let n = e.name.as_str(); + n == "comicinfo.xml" || n.ends_with("/comicinfo.xml") + }); + + let info = if let Some(entry) = comic_info_entry { + // Only decompress STORE entries here for simplicity; skip DEFLATE ones. + if entry.compression == COMPRESS_STORE || entry.compression == COMPRESS_DEFLATE { + if let Some(xml_bytes) = decompress_entry(&data, entry) { + Some(parse_comic_info(&xml_bytes)) + } else { + None + } + } else { + None + } + } else { + None + }; + + let msg = format!( + "cbz-comics: {} entries, {} images, ComicInfo.xml={}", + entries.len(), + image_count, + info.is_some(), + ); + log_info(&msg); + + // Build response JSON + let mut extra_pairs: Vec<(&str, String)> = vec![ + ("format", String::from("cbz")), + ]; + + let page_count_str; + if let Some(ref ci) = info { + if let Some(ref pc) = ci.page_count { + page_count_str = pc.clone(); + extra_pairs.push(("page_count", page_count_str.clone())); + } else { + page_count_str = format!("{}", image_count); + extra_pairs.push(("page_count", page_count_str.clone())); + } + if let Some(ref s) = ci.series { extra_pairs.push(("series", s.clone())) } + if let Some(ref n) = ci.issue_number { extra_pairs.push(("issue_number", n.clone())) } + if let Some(ref l) = ci.language { extra_pairs.push(("language", l.clone())) } + } else { + page_count_str = format!("{}", image_count); + extra_pairs.push(("page_count", page_count_str.clone())); + } + + // Build extra JSON object + let mut extra_json = String::from("{"); + for (i, (k, v)) in extra_pairs.iter().enumerate() { + if i > 0 { extra_json.push(','); } + extra_json.push('"'); + extra_json.push_str(k); + extra_json.push_str("\":\""); + extra_json.push_str(&json_escape(v)); + extra_json.push('"'); + } + extra_json.push('}'); + + let title_field = info.as_ref() + .and_then(|ci| ci.title.as_ref()) + .map(|t| format!(r#","title":"{}""#, json_escape(t))) + .unwrap_or_default(); + + let artist_field = info.as_ref() + .and_then(|ci| ci.writer.as_ref()) + .map(|w| format!(r#","artist":"{}""#, json_escape(w))) + .unwrap_or_default(); + + let genre_field = info.as_ref() + .and_then(|ci| ci.genre.as_ref()) + .map(|g| format!(r#","genre":"{}""#, json_escape(g))) + .unwrap_or_default(); + + let desc_field = info.as_ref() + .and_then(|ci| ci.summary.as_ref()) + .map(|s| format!(r#","description":"{}""#, json_escape(s))) + .unwrap_or_default(); + + let resp = format!( + r#"{{"extra":{}{}{}{}{}}}"#, + extra_json, title_field, artist_field, genre_field, desc_field, + ); + set_response(resp.as_bytes()); +} + +/// Generate a thumbnail from the cover image of a CBZ archive. +#[unsafe(no_mangle)] +pub extern "C" fn generate_thumbnail(ptr: i32, len: i32) { + let req = unsafe { read_request(ptr, len) }; + let source_path = match json_get_str(&req, "source_path") { + Some(p) => p, + None => { + set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); + return; + } + }; + let output_path = match json_get_str(&req, "output_path") { + Some(p) => p, + None => { + set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); + return; + } + }; + + let lower = source_path.to_ascii_lowercase(); + if !lower.ends_with(".cbz") { + set_response(br#"{"path":"","width":0,"height":0,"format":"unknown"}"#); + return; + } + + let data = match load_cbz_file(source_path) { + Ok(d) => d, + Err(_) => { + set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); + return; + } + }; + + let entries = parse_central_directory(&data); + + // Find alphabetically first image file for the cover. + let mut image_entries: Vec<&ZipEntry> = entries.iter().filter(|e| is_image_filename(&e.name)).collect(); + image_entries.sort_by(|a, b| a.name.as_str().cmp(b.name.as_str())); + + let cover = match image_entries.first() { + Some(e) => e, + None => { + set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); + return; + } + }; + + let image_bytes = match decompress_entry(&data, cover) { + Some(b) => b, + None => { + set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); + return; + } + }; + + let info = parse_image_info(&image_bytes).unwrap_or(ImageInfo { + width: 0, height: 0, format: "jpeg", + }); + + // Write thumbnail bytes to output path + let write_result = unsafe { + host_write_file( + output_path.as_ptr() as i32, + output_path.len() as i32, + image_bytes.as_ptr() as i32, + image_bytes.len() as i32, + ) + }; + if write_result < 0 { + set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); + return; + } + + let msg = format!( + "cbz-comics: thumbnail {}x{} {} written to {}", + info.width, info.height, info.format, output_path, + ); + log_info(&msg); + + let resp = format!( + r#"{{"path":"{}","width":{},"height":{},"format":"{}"}}"#, + json_escape(output_path), + info.width, + info.height, + info.format, + ); + set_response(resp.as_bytes()); +} diff --git a/examples/plugins/subtitle-detector/Cargo.lock b/examples/plugins/subtitle-detector/Cargo.lock new file mode 100644 index 0000000000000000000000000000000000000000..dda81f78a90e755efd0a764341c3f865622eac62 GIT binary patch literal 1249 zcmb`GOK;mS5QOjg6@qhYEtAW4fC2@2>b_{_=A_bfnCdl6 zb9?gR>W3CnE_v*`<}c1mp2qe^E1Tx&>D9*<-{bRhGx`(O(_N%~pK14LaK}5=$z8Kd z>l`mLGxm8Y^ZDDcl-K3)aOiV6uAz(5=`gQL*$;l~4{;N@ba~q6D(By>iARht%X->A z9RR(~{kWs*s_8L%V@{Vf}qIKNIBmu}7o{-R)q$L`bn3sg=3p(!}wgOy7 zta%4__TZ5V1K>goNykKtOt8!go=9_3Gnci$CMA+ia_tS+z#s!u3 literal 0 HcmV?d00001 diff --git a/examples/plugins/subtitle-detector/Cargo.toml b/examples/plugins/subtitle-detector/Cargo.toml new file mode 100644 index 0000000..2b7d8ec --- /dev/null +++ b/examples/plugins/subtitle-detector/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "subtitle-detector" +version = "1.0.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +dlmalloc = { version = "0.2", features = ["global"] } + +[profile.release] +opt-level = "s" +lto = true +strip = true diff --git a/examples/plugins/subtitle-detector/plugin.toml b/examples/plugins/subtitle-detector/plugin.toml new file mode 100644 index 0000000..d836b75 --- /dev/null +++ b/examples/plugins/subtitle-detector/plugin.toml @@ -0,0 +1,18 @@ +[plugin] +name = "subtitle-detector" +version = "1.0.0" +api_version = "1.0" +description = "Registers SRT, VTT, and ASS subtitle formats and extracts language and entry count metadata" +kind = ["media_type", "metadata_extractor"] +priority = 500 + +[plugin.binary] +wasm = "subtitle_detector.wasm" + +[capabilities] +network = false + +[capabilities.filesystem] +# Users must add their media root directories here. Example: +# read = ["/home/user/media", "/mnt/nas/subtitles"] +read = [] diff --git a/examples/plugins/subtitle-detector/src/lib.rs b/examples/plugins/subtitle-detector/src/lib.rs new file mode 100644 index 0000000..bfab5e7 --- /dev/null +++ b/examples/plugins/subtitle-detector/src/lib.rs @@ -0,0 +1,345 @@ +//! Subtitle-detector plugin for Pinakes. +//! +//! Registers SRT, VTT, and ASS/SSA subtitle file formats and extracts +//! language code and entry count metadata from them. +//! +//! Registered media types: +//! - `subtitle-srt`: extensions `["srt"]`, mime `["text/x-subrip"]` +//! - `subtitle-vtt`: extensions `["vtt"]`, mime `["text/vtt"]` +//! - `subtitle-ass`: extensions `["ass","ssa"]`, mime `["text/x-ass"]` +//! +//! Language detection uses filename conventions: `movie.en.srt` -> `en`. +//! +//! The `filesystem.read` capability in `plugin.toml` must be configured +//! to include the directories containing subtitle files. +//! +//! Build with: +//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release + +#![no_std] + +extern crate alloc; + +use alloc::{format, vec::Vec}; +use core::alloc::Layout; + +#[global_allocator] +static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc; + +#[panic_handler] +fn panic_handler(_info: &core::panic::PanicInfo) -> ! { + core::arch::wasm32::unreachable() +} + +// Host functions provided by the runtime +unsafe extern "C" { + fn host_set_result(ptr: i32, len: i32); + fn host_log(level: i32, ptr: i32, len: i32); + fn host_read_file(path_ptr: i32, path_len: i32) -> i32; + fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32; +} + +fn set_response(json: &[u8]) { + unsafe { + host_set_result(json.as_ptr() as i32, json.len() as i32); + } +} + +fn log_info(msg: &str) { + unsafe { + host_log(2, msg.as_ptr() as i32, msg.len() as i32); + } +} + +unsafe fn read_request(ptr: i32, len: i32) -> Vec { + if ptr < 0 || len <= 0 { + return Vec::new(); + } + let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) }; + slice.to_vec() +} + +/// Extract a string value from a JSON object for a given key. +fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> { + let json_str = core::str::from_utf8(json).ok()?; + let pattern = format!("\"{}\"", key); + let key_pos = json_str.find(&pattern)?; + let after_key = &json_str[key_pos + pattern.len()..]; + let after_colon = after_key.trim_start().strip_prefix(':')?; + let after_colon = after_colon.trim_start(); + + if after_colon.starts_with('"') { + let value_start = 1; + let value_end = after_colon[value_start..].find('"')?; + Some(&after_colon[value_start..value_start + value_end]) + } else { + None + } +} + +/// Escape a string for safe inclusion in a JSON string value. +fn json_escape(s: &str) -> alloc::string::String { + let mut out = alloc::string::String::with_capacity(s.len()); + for c in s.chars() { + match c { + '"' => out.push_str("\\\""), + '\\' => out.push_str("\\\\"), + '\n' => out.push_str("\\n"), + '\r' => out.push_str("\\r"), + '\t' => out.push_str("\\t"), + _ => out.push(c), + } + } + out +} + +// 512 KB content read limit for subtitle files +const MAX_FILE_BYTES: usize = 512 * 1024; + +/// Subtitle format variants. +enum SubtitleFormat { + Srt, + Vtt, + Ass, +} + +/// Determine subtitle format from file path extension. +fn detect_format(path: &str) -> Option { + let lower = path.to_ascii_lowercase(); + if lower.ends_with(".srt") { + Some(SubtitleFormat::Srt) + } else if lower.ends_with(".vtt") { + Some(SubtitleFormat::Vtt) + } else if lower.ends_with(".ass") || lower.ends_with(".ssa") { + Some(SubtitleFormat::Ass) + } else { + None + } +} + +/// Try to detect a 2-3 letter language code from a filename stem. +/// Matches patterns like `movie.en.srt` or `film.fra.vtt`. +/// Returns the code if found. +fn detect_language(path: &str) -> Option<&str> { + // Get the filename component + let filename = path.rsplit('/').next().unwrap_or(path); + // Strip the final extension + let stem = if let Some(dot) = filename.rfind('.') { + &filename[..dot] + } else { + filename + }; + // Check for another dot-separated segment at the end of the stem + if let Some(dot) = stem.rfind('.') { + let candidate = &stem[dot + 1..]; + let len = candidate.len(); + if (len == 2 || len == 3) && candidate.bytes().all(|b| b.is_ascii_alphabetic()) { + return Some(candidate); + } + } + None +} + +/// Count `-->` occurrences in a byte slice. +fn count_arrow_markers(data: &[u8]) -> usize { + let mut count = 0usize; + let mut i = 0usize; + while i + 2 < data.len() { + if data[i] == b'-' && data[i + 1] == b'-' && data[i + 2] == b'>' { + count += 1; + i += 3; + } else { + i += 1; + } + } + count +} + +/// Count `Dialogue:` lines in an ASS/SSA file. +fn count_ass_dialogues(data: &[u8]) -> usize { + let mut count = 0usize; + let needle = b"Dialogue:"; + let mut i = 0usize; + // Count only at line starts (after newline or at file start) + let mut at_line_start = true; + while i < data.len() { + if at_line_start && data[i..].starts_with(needle) { + count += 1; + i += needle.len(); + at_line_start = false; + } else { + if data[i] == b'\n' { + at_line_start = true; + } else { + at_line_start = false; + } + i += 1; + } + } + count +} + +#[unsafe(no_mangle)] +pub extern "C" fn alloc(size: i32) -> i32 { + if size <= 0 { + return 0; + } + unsafe { + let layout = match Layout::from_size_align(size as usize, 1) { + Ok(l) => l, + Err(_) => return -1, + }; + let ptr = alloc::alloc::alloc(layout); + if ptr.is_null() { + return -1; + } + ptr as i32 + } +} + +#[unsafe(no_mangle)] +pub extern "C" fn initialize() -> i32 { + log_info("subtitle-detector initialized"); + 0 +} + +#[unsafe(no_mangle)] +pub extern "C" fn shutdown() -> i32 { + log_info("subtitle-detector shutdown"); + 0 +} + +/// Returns the media type definitions provided by this plugin. +#[unsafe(no_mangle)] +pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) { + let response = br#"[ +{"id":"subtitle-srt","name":"SubRip Subtitle","category":"document","extensions":["srt"],"mime_types":["text/x-subrip"]}, +{"id":"subtitle-vtt","name":"WebVTT Subtitle","category":"document","extensions":["vtt"],"mime_types":["text/vtt"]}, +{"id":"subtitle-ass","name":"Advanced SubStation Alpha Subtitle","category":"document","extensions":["ass","ssa"],"mime_types":["text/x-ass"]} +]"#; + set_response(response); +} + +/// Check whether this plugin can handle a given path. +#[unsafe(no_mangle)] +pub extern "C" fn can_handle(ptr: i32, len: i32) { + let req = unsafe { read_request(ptr, len) }; + let path = json_get_str(&req, "path").unwrap_or(""); + let can = detect_format(path).is_some(); + if can { + set_response(br#"{"can_handle":true}"#); + } else { + set_response(br#"{"can_handle":false}"#); + } +} + +/// Returns the media type IDs this extractor supports. +#[unsafe(no_mangle)] +pub extern "C" fn supported_types(_ptr: i32, _len: i32) { + set_response(br#"["subtitle-srt","subtitle-vtt","subtitle-ass"]"#); +} + +/// Extract metadata from a subtitle file. +#[unsafe(no_mangle)] +pub extern "C" fn extract_metadata(ptr: i32, len: i32) { + let req = unsafe { read_request(ptr, len) }; + let path = match json_get_str(&req, "path") { + Some(p) => p, + None => { + set_response(br#"{"extra":{"error":"missing path"}}"#); + return; + } + }; + + let format = match detect_format(path) { + Some(f) => f, + None => { + set_response(br#"{"extra":{"error":"unsupported format"}}"#); + return; + } + }; + + let format_str = match format { + SubtitleFormat::Srt => "srt", + SubtitleFormat::Vtt => "vtt", + SubtitleFormat::Ass => "ass", + }; + + let language = detect_language(path); + + // Load file contents + let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) }; + if file_size < 0 { + // Return what we have without entry count + let lang_field = language + .map(|l| format!(r#","language":"{}""#, json_escape(l))) + .unwrap_or_default(); + let resp = format!( + r#"{{"extra":{{"format":"{}"{}}}}}"#, + format_str, lang_field, + ); + set_response(resp.as_bytes()); + return; + } + + if file_size as usize >= MAX_FILE_BYTES { + let lang_field = language + .map(|l| format!(r#","language":"{}""#, json_escape(l))) + .unwrap_or_default(); + let resp = format!( + r#"{{"extra":{{"format":"{}","too_large":"true"{}}}}}"#, + format_str, lang_field, + ); + set_response(resp.as_bytes()); + return; + } + + let buf_size = file_size as usize; + let entry_count = if buf_size == 0 { + 0usize + } else { + let layout = match Layout::from_size_align(buf_size, 1) { + Ok(l) => l, + Err(_) => { + set_response(br#"{"extra":{"error":"alloc failed"}}"#); + return; + } + }; + let buf_ptr = unsafe { alloc::alloc::alloc(layout) }; + if buf_ptr.is_null() { + set_response(br#"{"extra":{"error":"alloc failed"}}"#); + return; + } + + let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) }; + let count = if copied > 0 { + let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) }; + match format_str { + "srt" => count_arrow_markers(data), + "vtt" => count_arrow_markers(data), + _ => count_ass_dialogues(data), + } + } else { + 0 + }; + + unsafe { alloc::alloc::dealloc(buf_ptr, layout) }; + count + }; + + let msg = format!( + "subtitle-detector: format={}, entries={}, path={}", + format_str, entry_count, path, + ); + log_info(&msg); + + let lang_field = language + .map(|l| format!(r#","language":"{}""#, json_escape(l))) + .unwrap_or_default(); + + let resp = format!( + r#"{{"extra":{{"format":"{}","entry_count":"{}"{}}}}}"#, + format_str, entry_count, lang_field, + ); + set_response(resp.as_bytes()); +} diff --git a/examples/plugins/text-enrichment/Cargo.lock b/examples/plugins/text-enrichment/Cargo.lock new file mode 100644 index 0000000000000000000000000000000000000000..0697c2420c2dddeded216402223496ec7c273411 GIT binary patch literal 1247 zcmb`G%Wm5+5JmU;3c*>nmN|U0K!E~Xb>Cf(1s^jq5j_HlZsNZ$WgtNeI6JRMkppS& zIirX6?=i0}oOIbQoW{m zX-7Yu{Lo^`B~N|V{KI9<^VI%qWz#%9zWDg;dwhCoCO=|5-9_s6nRcHFchIp;?wWNz zE%8#bV4v5roWC7Qd0BrP4t*}iQ|RJ69+uNu_Jg1LL)@BNyFBl6ZRc&*#3RP%^)zmu zaY8V%ffIosA(do7N_0{q2?YR_E5l6?HVMIPJ#|)tO z%?JGNbvxt`ciZN*(wkq`{d!*O^V99zm+H^XE}8AULkAVK)LQXCT2~j&AtY}dgCK%a zoLi@f0UDu$Pl^l|gvJZcD<Nj>i$FawTsYK6VqL%?oRc$UBB0=@HyD`-72`&~k GSN{PV^M!B# literal 0 HcmV?d00001 diff --git a/examples/plugins/text-enrichment/Cargo.toml b/examples/plugins/text-enrichment/Cargo.toml new file mode 100644 index 0000000..d073c1f --- /dev/null +++ b/examples/plugins/text-enrichment/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "text-enrichment" +version = "1.0.0" +edition = "2024" + +[lib] +crate-type = ["cdylib"] + +[dependencies] +dlmalloc = { version = "0.2", features = ["global"] } + +[profile.release] +opt-level = "s" +lto = true +strip = true diff --git a/examples/plugins/text-enrichment/plugin.toml b/examples/plugins/text-enrichment/plugin.toml new file mode 100644 index 0000000..f451a3e --- /dev/null +++ b/examples/plugins/text-enrichment/plugin.toml @@ -0,0 +1,18 @@ +[plugin] +name = "text-enrichment" +version = "1.0.0" +api_version = "1.0" +description = "Enriches plain text files with word count, line count, character count, and estimated reading time" +kind = ["metadata_extractor"] +priority = 500 + +[plugin.binary] +wasm = "text_enrichment.wasm" + +[capabilities] +network = false + +[capabilities.filesystem] +# Users must add their media root directories here. Example: +# read = ["/home/user/media", "/mnt/nas/texts"] +read = [] diff --git a/examples/plugins/text-enrichment/src/lib.rs b/examples/plugins/text-enrichment/src/lib.rs new file mode 100644 index 0000000..f6b248d --- /dev/null +++ b/examples/plugins/text-enrichment/src/lib.rs @@ -0,0 +1,198 @@ +//! Text-enrichment plugin for Pinakes. +//! +//! Extracts word count, line count, character count, and estimated reading +//! time from plain text (`.txt`) files. +//! +//! The `filesystem.read` capability list in `plugin.toml` must be configured +//! to include the directories where text files live. +//! +//! Build with: +//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release + +#![no_std] + +extern crate alloc; + +use alloc::{format, vec::Vec}; +use core::alloc::Layout; + +#[global_allocator] +static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc; + +#[panic_handler] +fn panic_handler(_info: &core::panic::PanicInfo) -> ! { + core::arch::wasm32::unreachable() +} + +// Host functions provided by the runtime +unsafe extern "C" { + fn host_set_result(ptr: i32, len: i32); + fn host_log(level: i32, ptr: i32, len: i32); + fn host_read_file(path_ptr: i32, path_len: i32) -> i32; + fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32; +} + +fn set_response(json: &[u8]) { + unsafe { + host_set_result(json.as_ptr() as i32, json.len() as i32); + } +} + +fn log_info(msg: &str) { + unsafe { + host_log(2, msg.as_ptr() as i32, msg.len() as i32); + } +} + +unsafe fn read_request(ptr: i32, len: i32) -> Vec { + if ptr < 0 || len <= 0 { + return Vec::new(); + } + let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) }; + slice.to_vec() +} + +/// Extract a string value from a JSON object for a given key. +fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> { + let json_str = core::str::from_utf8(json).ok()?; + let pattern = format!("\"{}\"", key); + let key_pos = json_str.find(&pattern)?; + let after_key = &json_str[key_pos + pattern.len()..]; + let after_colon = after_key.trim_start().strip_prefix(':')?; + let after_colon = after_colon.trim_start(); + + if after_colon.starts_with('"') { + let value_start = 1; + let value_end = after_colon[value_start..].find('"')?; + Some(&after_colon[value_start..value_start + value_end]) + } else { + None + } +} + +// 5 MB content read limit +const MAX_FILE_BYTES: usize = 5 * 1024 * 1024; + +#[unsafe(no_mangle)] +pub extern "C" fn alloc(size: i32) -> i32 { + if size <= 0 { + return 0; + } + unsafe { + let layout = match Layout::from_size_align(size as usize, 1) { + Ok(l) => l, + Err(_) => return -1, + }; + let ptr = alloc::alloc::alloc(layout); + if ptr.is_null() { + return -1; + } + ptr as i32 + } +} + +#[unsafe(no_mangle)] +pub extern "C" fn initialize() -> i32 { + log_info("text-enrichment initialized"); + 0 +} + +#[unsafe(no_mangle)] +pub extern "C" fn shutdown() -> i32 { + log_info("text-enrichment shutdown"); + 0 +} + +/// Returns the media types this extractor supports. +#[unsafe(no_mangle)] +pub extern "C" fn supported_types(_ptr: i32, _len: i32) { + set_response(br#"["text"]"#); +} + +/// Extract text statistics from a plain text file. +#[unsafe(no_mangle)] +pub extern "C" fn extract_metadata(ptr: i32, len: i32) { + let req = unsafe { read_request(ptr, len) }; + let path = match json_get_str(&req, "path") { + Some(p) => p, + None => { + set_response(br#"{"extra":{"error":"missing path"}}"#); + return; + } + }; + + // Ask the host to load the file into the exchange buffer. + let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) }; + if file_size < 0 { + set_response(br#"{"extra":{"error":"read failed"}}"#); + return; + } + + if file_size as usize >= MAX_FILE_BYTES { + set_response(br#"{"extra":{"too_large":"true"}}"#); + return; + } + + let buf_size = file_size as usize; + if buf_size == 0 { + let resp = r#"{"extra":{"word_count":"0","line_count":"0","byte_count":"0","reading_minutes":"0"}}"#; + set_response(resp.as_bytes()); + return; + } + + let layout = match Layout::from_size_align(buf_size, 1) { + Ok(l) => l, + Err(_) => { + set_response(br#"{"extra":{"error":"alloc failed"}}"#); + return; + } + }; + let buf_ptr = unsafe { alloc::alloc::alloc(layout) }; + if buf_ptr.is_null() { + set_response(br#"{"extra":{"error":"alloc failed"}}"#); + return; + } + + let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) }; + if copied <= 0 { + unsafe { alloc::alloc::dealloc(buf_ptr, layout) }; + set_response(br#"{"extra":{"error":"buffer copy failed"}}"#); + return; + } + + let content = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) }; + + let byte_count = content.len(); + let line_count = content.iter().filter(|&&b| b == b'\n').count() + + if content.last().map_or(true, |&b| b != b'\n') { 1 } else { 0 }; + + // Count words: transitions from whitespace to non-whitespace. + let mut word_count = 0usize; + let mut in_word = false; + for &b in content { + let is_ws = b == b' ' || b == b'\t' || b == b'\n' || b == b'\r'; + if !is_ws && !in_word { + word_count += 1; + in_word = true; + } else if is_ws { + in_word = false; + } + } + + // Estimate reading time at 200 words per minute, rounding up. + let reading_minutes = (word_count + 199) / 200; + + unsafe { alloc::alloc::dealloc(buf_ptr, layout) }; + + let msg = format!( + "text-enrichment: {} words, {} lines, {} chars", + word_count, line_count, byte_count + ); + log_info(&msg); + + let resp = format!( + r#"{{"extra":{{"word_count":"{}","line_count":"{}","byte_count":"{}","reading_minutes":"{}"}}}}"#, + word_count, line_count, byte_count, reading_minutes, + ); + set_response(resp.as_bytes()); +}