From 934fcba8ca7fd55d0d98660c66b8d975b60209e4 Mon Sep 17 00:00:00 2001
From: NotAShelf <raf@notashelf.dev>
Date: Wed, 20 May 2026 21:52:21 +0300
Subject: [PATCH] examples: add WASM plugin examples

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Id4b791396ab37827caced2c8cc03ec356a6a6964
---
 examples/plugins/auto-tagger/Cargo.lock       | Bin 0 -> 1243 bytes
 examples/plugins/auto-tagger/Cargo.toml       |  15 +
 examples/plugins/auto-tagger/plugin.toml      |  13 +
 examples/plugins/auto-tagger/src/lib.rs       | 303 +++++++
 examples/plugins/cbz-comics/Cargo.lock        | Bin 0 -> 1673 bytes
 examples/plugins/cbz-comics/Cargo.toml        |  18 +
 examples/plugins/cbz-comics/plugin.toml       |  20 +
 examples/plugins/cbz-comics/src/lib.rs        | 742 ++++++++++++++++++
 examples/plugins/subtitle-detector/Cargo.lock | Bin 0 -> 1249 bytes
 examples/plugins/subtitle-detector/Cargo.toml |  15 +
 .../plugins/subtitle-detector/plugin.toml     |  18 +
 examples/plugins/subtitle-detector/src/lib.rs | 345 ++++++++
 examples/plugins/text-enrichment/Cargo.lock   | Bin 0 -> 1247 bytes
 examples/plugins/text-enrichment/Cargo.toml   |  15 +
 examples/plugins/text-enrichment/plugin.toml  |  18 +
 examples/plugins/text-enrichment/src/lib.rs   | 198 +++++
 16 files changed, 1720 insertions(+)
 create mode 100644 examples/plugins/auto-tagger/Cargo.lock
 create mode 100644 examples/plugins/auto-tagger/Cargo.toml
 create mode 100644 examples/plugins/auto-tagger/plugin.toml
 create mode 100644 examples/plugins/auto-tagger/src/lib.rs
 create mode 100644 examples/plugins/cbz-comics/Cargo.lock
 create mode 100644 examples/plugins/cbz-comics/Cargo.toml
 create mode 100644 examples/plugins/cbz-comics/plugin.toml
 create mode 100644 examples/plugins/cbz-comics/src/lib.rs
 create mode 100644 examples/plugins/subtitle-detector/Cargo.lock
 create mode 100644 examples/plugins/subtitle-detector/Cargo.toml
 create mode 100644 examples/plugins/subtitle-detector/plugin.toml
 create mode 100644 examples/plugins/subtitle-detector/src/lib.rs
 create mode 100644 examples/plugins/text-enrichment/Cargo.lock
 create mode 100644 examples/plugins/text-enrichment/Cargo.toml
 create mode 100644 examples/plugins/text-enrichment/plugin.toml
 create mode 100644 examples/plugins/text-enrichment/src/lib.rs

diff --git a/examples/plugins/auto-tagger/Cargo.lock b/examples/plugins/auto-tagger/Cargo.lock
new file mode 100644
index 0000000000000000000000000000000000000000..a398a38e786445303fe56a29563e3e3c5e5ffa39
GIT binary patch
literal 1243
zcmb`HO>5jR5Qgvm6~Z|->(%#1p-||l_ufJdBWcDf)@ujb+jReZ<<ORdZ0KcAg3&N~
z-}f1G$9^2j!l(?$>Ivs{I>A~J48w(e?Qz5zR?IA3*f*H_scY`o_jPL>r+SPlj=5gb
zG_w<oXBZgfvX-&$nx8l?Wg6KRrkm#B;R(_adVG9rMmS-uw%cR775X0M_EmQ)I^OZN
z$@sJlA5+1lwm&qi&BLh*n^Jq<JYJufsNWTOH%_<BGM(r2^6rd%S=RaT>9DR(%jf;R
zFYDnPyEL8l^Lbf!1C0GXZN`_bOuJH5{A!zYz;s;Br%ifLij$9$NGMX;Xdbo3pe>Rz
zLI^L7lP)Tsm6E<zgSSFUD~yoBbJ1Q`|Igp+ICv*Jp>Az}OynX7N8TU?@3U7jX^ut*
zusUWRAj%Y|UVs%UW~$O9py3W>sCiz&^C$fKN@^%^qr}?hn%?xh?3T;&F}0WOcb@&N
zO-B>0w${i<)>alFpvd3@7g5Dvr1ZfO7wD9YkPSJh2puSpHKq{q2h{#r9Ljinvw1J%
zZVZmN=T)gh<H#8)vIQ$qS%M&}ipd87DQhVyIFXSsxNJ2#o;hej^B)fzrnexh=;Vz(
z01D-#3^B!QQzlL>dJaltvcgp|x^9`vnRL*?f|EXSnW73(5GhJmm+uXQ?SJ?U*_VT{

literal 0
HcmV?d00001

diff --git a/examples/plugins/auto-tagger/Cargo.toml b/examples/plugins/auto-tagger/Cargo.toml
new file mode 100644
index 0000000..cdcee85
--- /dev/null
+++ b/examples/plugins/auto-tagger/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "auto-tagger"
+version = "1.0.0"
+edition = "2024"
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+dlmalloc = { version = "0.2", features = ["global"] }
+
+[profile.release]
+opt-level = "s"
+lto = true
+strip = true
diff --git a/examples/plugins/auto-tagger/plugin.toml b/examples/plugins/auto-tagger/plugin.toml
new file mode 100644
index 0000000..24354f4
--- /dev/null
+++ b/examples/plugins/auto-tagger/plugin.toml
@@ -0,0 +1,13 @@
+[plugin]
+name = "auto-tagger"
+version = "1.0.0"
+api_version = "1.0"
+description = "Listens for MediaImported events and emits AutoTagSuggested events based on path pattern rules"
+kind = ["event_handler"]
+priority = 500
+
+[plugin.binary]
+wasm = "auto_tagger.wasm"
+
+[capabilities]
+network = false
diff --git a/examples/plugins/auto-tagger/src/lib.rs b/examples/plugins/auto-tagger/src/lib.rs
new file mode 100644
index 0000000..2f30527
--- /dev/null
+++ b/examples/plugins/auto-tagger/src/lib.rs
@@ -0,0 +1,303 @@
+//! Auto-tagger plugin for Pinakes.
+//!
+//! Listens for `MediaImported` events and, based on configurable path pattern
+//! rules, emits `AutoTagSuggested` events. Rules map path substrings to tag
+//! names.
+//!
+//! Configuration key `rules` expects a JSON array of objects:
+//!   `[{"pattern": "/music/", "tag": "music"}, ...]`
+//!
+//! If no config is present, built-in defaults are used:
+//!   - `/music/`     -> `music`
+//!   - `/photos/`    -> `photo`
+//!   - `/videos/`    -> `video`
+//!   - `/books/`     -> `book`
+//!   - `/documents/` -> `document`
+//!
+//! Build with:
+//!   RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
+
+#![no_std]
+
+extern crate alloc;
+
+use alloc::{format, string::String, vec, vec::Vec};
+use core::alloc::Layout;
+
+#[global_allocator]
+static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
+
+#[panic_handler]
+fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
+    core::arch::wasm32::unreachable()
+}
+
+// Host functions provided by the runtime
+unsafe extern "C" {
+    fn host_set_result(ptr: i32, len: i32);
+    fn host_log(level: i32, ptr: i32, len: i32);
+    fn host_emit_event(type_ptr: i32, type_len: i32, payload_ptr: i32, payload_len: i32) -> i32;
+    fn host_get_config(key_ptr: i32, key_len: i32) -> i32;
+    fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
+}
+
+fn set_response(json: &[u8]) {
+    unsafe {
+        host_set_result(json.as_ptr() as i32, json.len() as i32);
+    }
+}
+
+fn log_info(msg: &str) {
+    unsafe {
+        host_log(2, msg.as_ptr() as i32, msg.len() as i32);
+    }
+}
+
+unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
+    if ptr < 0 || len <= 0 {
+        return Vec::new();
+    }
+    let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
+    slice.to_vec()
+}
+
+/// Extract a string value from a JSON object for a given key.
+fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
+    let json_str = core::str::from_utf8(json).ok()?;
+    let pattern = format!("\"{}\"", key);
+    let key_pos = json_str.find(&pattern)?;
+    let after_key = &json_str[key_pos + pattern.len()..];
+    let after_colon = after_key.trim_start().strip_prefix(':')?;
+    let after_colon = after_colon.trim_start();
+
+    if after_colon.starts_with('"') {
+        let value_start = 1;
+        let value_end = after_colon[value_start..].find('"')?;
+        Some(&after_colon[value_start..value_start + value_end])
+    } else {
+        None
+    }
+}
+
+/// A single tagging rule: match `pattern` in path -> apply `tag`.
+struct Rule {
+    pattern: String,
+    tag:     String,
+}
+
+/// Default rules used when no `rules` config key is present.
+fn default_rules() -> Vec<Rule> {
+    vec![
+        Rule { pattern: String::from("/music/"),     tag: String::from("music")    },
+        Rule { pattern: String::from("/photos/"),    tag: String::from("photo")    },
+        Rule { pattern: String::from("/videos/"),    tag: String::from("video")    },
+        Rule { pattern: String::from("/books/"),     tag: String::from("book")     },
+        Rule { pattern: String::from("/documents/"), tag: String::from("document") },
+    ]
+}
+
+/// Parse the `rules` JSON array from the config buffer.
+/// Expected format: `[{"pattern":"...","tag":"..."},...]`
+/// Returns an empty vec on any parse failure (falls back to defaults).
+fn parse_rules_json(data: &[u8]) -> Vec<Rule> {
+    let text = match core::str::from_utf8(data) {
+        Ok(s) => s,
+        Err(_) => return Vec::new(),
+    };
+
+    let mut rules = Vec::new();
+    // Walk through occurrences of "pattern" keys inside object literals.
+    let mut search = text;
+    while let Some(p_pos) = search.find("\"pattern\"") {
+        let after_p = &search[p_pos + 9..];
+        let after_colon = match after_p.trim_start().strip_prefix(':') {
+            Some(s) => s.trim_start(),
+            None => {
+                search = &search[p_pos + 1..];
+                continue;
+            }
+        };
+        let pattern = if after_colon.starts_with('"') {
+            let inner = &after_colon[1..];
+            match inner.find('"') {
+                Some(end) => String::from(&inner[..end]),
+                None => {
+                    search = &search[p_pos + 1..];
+                    continue;
+                }
+            }
+        } else {
+            search = &search[p_pos + 1..];
+            continue;
+        };
+
+        // Now search for "tag" after the current pattern position.
+        let remaining = &search[p_pos..];
+        let tag = if let Some(t_pos) = remaining.find("\"tag\"") {
+            let after_t = &remaining[t_pos + 5..];
+            let after_colon_t = match after_t.trim_start().strip_prefix(':') {
+                Some(s) => s.trim_start(),
+                None => {
+                    search = &search[p_pos + 1..];
+                    continue;
+                }
+            };
+            if after_colon_t.starts_with('"') {
+                let inner = &after_colon_t[1..];
+                match inner.find('"') {
+                    Some(end) => String::from(&inner[..end]),
+                    None => {
+                        search = &search[p_pos + 1..];
+                        continue;
+                    }
+                }
+            } else {
+                search = &search[p_pos + 1..];
+                continue;
+            }
+        } else {
+            search = &search[p_pos + 1..];
+            continue;
+        };
+
+        rules.push(Rule { pattern, tag });
+        search = &search[p_pos + 1..];
+    }
+
+    rules
+}
+
+/// Load rules from config, falling back to defaults.
+fn load_rules() -> Vec<Rule> {
+    let key = b"rules";
+    let size = unsafe { host_get_config(key.as_ptr() as i32, key.len() as i32) };
+    if size <= 0 {
+        return default_rules();
+    }
+
+    let buf_size = size as usize;
+    let layout = match Layout::from_size_align(buf_size, 1) {
+        Ok(l) => l,
+        Err(_) => return default_rules(),
+    };
+    let ptr = unsafe { alloc::alloc::alloc(layout) };
+    if ptr.is_null() {
+        return default_rules();
+    }
+
+    let copied = unsafe { host_get_buffer(ptr as i32, size) };
+    if copied <= 0 {
+        unsafe { alloc::alloc::dealloc(ptr, layout) };
+        return default_rules();
+    }
+
+    let data = unsafe { core::slice::from_raw_parts(ptr, copied as usize) };
+    let rules = parse_rules_json(data);
+    unsafe { alloc::alloc::dealloc(ptr, layout) };
+
+    if rules.is_empty() {
+        default_rules()
+    } else {
+        rules
+    }
+}
+
+/// Escape a string for safe inclusion in a JSON string value.
+fn json_escape(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    for c in s.chars() {
+        match c {
+            '"'  => out.push_str("\\\""),
+            '\\' => out.push_str("\\\\"),
+            '\n' => out.push_str("\\n"),
+            '\r' => out.push_str("\\r"),
+            '\t' => out.push_str("\\t"),
+            _    => out.push(c),
+        }
+    }
+    out
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn alloc(size: i32) -> i32 {
+    if size <= 0 {
+        return 0;
+    }
+    unsafe {
+        let layout = match Layout::from_size_align(size as usize, 1) {
+            Ok(l) => l,
+            Err(_) => return -1,
+        };
+        let ptr = alloc::alloc::alloc(layout);
+        if ptr.is_null() {
+            return -1;
+        }
+        ptr as i32
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn initialize() -> i32 {
+    log_info("auto-tagger initialized");
+    0
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn shutdown() -> i32 {
+    log_info("auto-tagger shutdown");
+    0
+}
+
+/// Returns the event types this handler is interested in.
+#[unsafe(no_mangle)]
+pub extern "C" fn interested_events(_ptr: i32, _len: i32) {
+    set_response(br#"["MediaImported"]"#);
+}
+
+/// Handle a `MediaImported` event: check path against rules and emit tag events.
+#[unsafe(no_mangle)]
+pub extern "C" fn handle_event(ptr: i32, len: i32) {
+    let req = unsafe { read_request(ptr, len) };
+
+    let media_id = json_get_str(&req, "media_id").unwrap_or("");
+    // The payload is nested; attempt to extract `path` from the top-level
+    // request or from a nested `payload` object.
+    let path = json_get_str(&req, "path").unwrap_or("");
+
+    let rules = load_rules();
+    let mut matched_count = 0u32;
+
+    for rule in &rules {
+        if !path.is_empty() && path.contains(rule.pattern.as_str()) {
+            let event_type = b"AutoTagSuggested";
+            let payload = format!(
+                r#"{{"media_id":"{}","tag":"{}"}}"#,
+                json_escape(media_id),
+                json_escape(&rule.tag),
+            );
+            unsafe {
+                host_emit_event(
+                    event_type.as_ptr() as i32,
+                    event_type.len() as i32,
+                    payload.as_ptr() as i32,
+                    payload.len() as i32,
+                );
+            }
+            matched_count += 1;
+        }
+    }
+
+    if matched_count > 0 {
+        let msg = format!(
+            "auto-tagger: matched {} rule(s) for path: {}",
+            matched_count,
+            path,
+        );
+        log_info(&msg);
+    } else {
+        let msg = format!("auto-tagger: no rules matched for path: {}", path);
+        log_info(&msg);
+    }
+
+    set_response(b"{}");
+}
diff --git a/examples/plugins/cbz-comics/Cargo.lock b/examples/plugins/cbz-comics/Cargo.lock
new file mode 100644
index 0000000000000000000000000000000000000000..06ebc6a5682394a23901e3baa3f4937e0af6b2a5
GIT binary patch
literal 1673
zcmb`HO>f&U42JLi6^47<;!2c6NdpE9*lG9Oiek_QnFw|4!gkh=Uq7W?hZak2IT(T@
z!s7crB(K_^yRx)g25RdX4(qfJYe``kp4x9)8fgwI<u*RG-^09}`sS+ru^vOmsV?J6
zW3Jac&FwynhcL91%UZ^5-~6U|Dbv_~ZFSS!+}wwB7q)bJ+l*mP^|T9lpc%S%fgSXs
zmtC_=hdI5(%(N}bIzN5dt?T{r<@$PC*4-iYY1&`Uhh^OiVccG)W0Ff>rcJ5oJa$dm
zQMy|W`{OeTgp}S`&naY|Ksu+{B<)aUiPGvUNFbQw2Z(G06FTpd79kRc?sWDkK5uFk
zB`qHm$f`heO{V*!Gh-^W)Ff`2w#&o5x;mxqbKUIASe}1Pk0n#x-=2Abx0}NM{a#<#
zAg`2Q7lBzSR@!J0wI;8TSQ#m$17pEP<+4)X>Zz~RX+UXYHB(exC-MH%`yR09p_l5?
z0@y?$leFv%k#{aT1xX7s+6UAzJ0Bt>&-EgpR55c+9Re9)34HbQ1g`XI{t?zt;w!a>
zT3*w`vRR&%^VD9V|I_R)EjpTr8qq*x#LB{Zk~ugpWK_``0Oyf~B&#rnY}kTgvOxu?
zHhG`VP<vmPA4Ry{UznTZU@<^)$W~NL5dboxN5fHNRDeE_az?u7REok_Q+wDMLouR~
zc#_-SsdBcd-WU#Ly!+5vFQs{5g)E$?D1eM*Yd~fU2&9mMm#AWLUWSx40DCJlF?pL&
zlNDJ6&1B9%ded;M!;g?q_I09Obbbbs$^!V9V#btNu#HXxB{SFQSxZmXHnKUh_F7`F
W;G%#Ol?TbJIGv>Qrr?Fbk<}lKSqN|d

literal 0
HcmV?d00001

diff --git a/examples/plugins/cbz-comics/Cargo.toml b/examples/plugins/cbz-comics/Cargo.toml
new file mode 100644
index 0000000..23319ed
--- /dev/null
+++ b/examples/plugins/cbz-comics/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "cbz-comics"
+version = "1.0.0"
+edition = "2024"
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+dlmalloc = { version = "0.2", features = ["global"] }
+miniz_oxide = { version = "0.8", default-features = false, features = [
+  "with-alloc",
+] }
+
+[profile.release]
+opt-level = "s"
+lto = true
+strip = true
diff --git a/examples/plugins/cbz-comics/plugin.toml b/examples/plugins/cbz-comics/plugin.toml
new file mode 100644
index 0000000..e2f6e74
--- /dev/null
+++ b/examples/plugins/cbz-comics/plugin.toml
@@ -0,0 +1,20 @@
+[plugin]
+name = "cbz-comics"
+version = "1.0.0"
+api_version = "1.0"
+description = "Supports CBZ (Comic Book ZIP) and CBR files with metadata extraction and thumbnail generation"
+kind = ["media_type", "metadata_extractor", "thumbnail_generator"]
+priority = 500
+
+[plugin.binary]
+wasm = "cbz_comics.wasm"
+
+[capabilities]
+network = false
+
+[capabilities.filesystem]
+# Users must add their media root directories here. Example:
+#   read  = ["/home/user/comics"]
+#   write = ["/home/user/.cache/pinakes/thumbnails"]
+read = []
+write = []
diff --git a/examples/plugins/cbz-comics/src/lib.rs b/examples/plugins/cbz-comics/src/lib.rs
new file mode 100644
index 0000000..98d8f7b
--- /dev/null
+++ b/examples/plugins/cbz-comics/src/lib.rs
@@ -0,0 +1,742 @@
+//! CBZ/CBR comics plugin for Pinakes.
+//!
+//! Registers comic book ZIP (`cbz`) and RAR (`cbr`) media types, extracts
+//! metadata from CBZ archives (including `ComicInfo.xml` when present), and
+//! generates thumbnails from the cover image.
+//!
+//! CBR is registered as a media type but metadata extraction is limited to
+//! format detection only (RAR parsing is not implemented).
+//!
+//! ZIP parsing is implemented from scratch without external ZIP crates to keep
+//! the WASM binary small.
+//!
+//! The `filesystem.read` and `filesystem.write` capabilities in `plugin.toml`
+//! must be configured for the directories containing comic files and the
+//! thumbnail output directory respectively.
+//!
+//! Build with:
+//!   RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
+
+#![no_std]
+
+extern crate alloc;
+
+use alloc::{format, string::{String, ToString}, vec, vec::Vec};
+use core::alloc::Layout;
+
+#[global_allocator]
+static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
+
+#[panic_handler]
+fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
+    core::arch::wasm32::unreachable()
+}
+
+// Host functions provided by the runtime
+unsafe extern "C" {
+    fn host_set_result(ptr: i32, len: i32);
+    fn host_log(level: i32, ptr: i32, len: i32);
+    fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
+    fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
+    fn host_write_file(path_ptr: i32, path_len: i32, data_ptr: i32, data_len: i32) -> i32;
+}
+
+fn set_response(json: &[u8]) {
+    unsafe {
+        host_set_result(json.as_ptr() as i32, json.len() as i32);
+    }
+}
+
+fn log_info(msg: &str) {
+    unsafe {
+        host_log(2, msg.as_ptr() as i32, msg.len() as i32);
+    }
+}
+
+unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
+    if ptr < 0 || len <= 0 {
+        return Vec::new();
+    }
+    let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
+    slice.to_vec()
+}
+
+/// Extract a string value from a JSON object for a given key.
+fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
+    let json_str = core::str::from_utf8(json).ok()?;
+    let pattern = format!("\"{}\"", key);
+    let key_pos = json_str.find(&pattern)?;
+    let after_key = &json_str[key_pos + pattern.len()..];
+    let after_colon = after_key.trim_start().strip_prefix(':')?;
+    let after_colon = after_colon.trim_start();
+
+    if after_colon.starts_with('"') {
+        let value_start = 1;
+        let value_end = after_colon[value_start..].find('"')?;
+        Some(&after_colon[value_start..value_start + value_end])
+    } else {
+        None
+    }
+}
+
+/// Escape a string for safe inclusion in a JSON string value.
+fn json_escape(s: &str) -> String {
+    let mut out = String::with_capacity(s.len());
+    for c in s.chars() {
+        match c {
+            '"'  => out.push_str("\\\""),
+            '\\' => out.push_str("\\\\"),
+            '\n' => out.push_str("\\n"),
+            '\r' => out.push_str("\\r"),
+            '\t' => out.push_str("\\t"),
+            _    => out.push(c),
+        }
+    }
+    out
+}
+
+// 20 MB content read limit for comic archives
+const MAX_FILE_BYTES: usize = 20 * 1024 * 1024;
+
+// ZIP signatures (little-endian u32)
+const SIG_LOCAL_FILE:     u32 = 0x04034b50;
+const SIG_CENTRAL_DIR:    u32 = 0x02014b50;
+const SIG_EOCD:           u32 = 0x06054b50;
+
+// Compression methods
+const COMPRESS_STORE:   u16 = 0;
+const COMPRESS_DEFLATE: u16 = 8;
+
+/// Read a little-endian u16 from a byte slice at the given offset.
+/// Returns `None` if out of bounds.
+fn read_u16_le(data: &[u8], offset: usize) -> Option<u16> {
+    let b0 = *data.get(offset)?     as u16;
+    let b1 = *data.get(offset + 1)? as u16;
+    Some(b0 | (b1 << 8))
+}
+
+/// Read a little-endian u32 from a byte slice at the given offset.
+/// Returns `None` if out of bounds.
+fn read_u32_le(data: &[u8], offset: usize) -> Option<u32> {
+    let b0 = *data.get(offset)?     as u32;
+    let b1 = *data.get(offset + 1)? as u32;
+    let b2 = *data.get(offset + 2)? as u32;
+    let b3 = *data.get(offset + 3)? as u32;
+    Some(b0 | (b1 << 8) | (b2 << 16) | (b3 << 24))
+}
+
+/// Read a big-endian u16 from a byte slice at the given offset.
+fn read_u16_be(data: &[u8], offset: usize) -> Option<u16> {
+    let b0 = *data.get(offset)?     as u16;
+    let b1 = *data.get(offset + 1)? as u16;
+    Some((b0 << 8) | b1)
+}
+
+/// Read a big-endian u32 from a byte slice at the given offset.
+fn read_u32_be(data: &[u8], offset: usize) -> Option<u32> {
+    let b0 = *data.get(offset)?     as u32;
+    let b1 = *data.get(offset + 1)? as u32;
+    let b2 = *data.get(offset + 2)? as u32;
+    let b3 = *data.get(offset + 3)? as u32;
+    Some((b0 << 24) | (b1 << 16) | (b2 << 8) | b3)
+}
+
+/// A parsed central directory entry from a ZIP archive.
+struct ZipEntry {
+    name:            String,
+    compression:     u16,
+    compressed_size: u32,
+    local_offset:    u32,
+}
+
+/// Find the End of Central Directory record offset by scanning backwards.
+fn find_eocd(data: &[u8]) -> Option<usize> {
+    if data.len() < 22 {
+        return None;
+    }
+    // Scan backwards for the EOCD signature. The maximum comment size is
+    // 65535 bytes, so we only need to scan that far from the end.
+    let scan_start = if data.len() > 22 + 65535 {
+        data.len() - 22 - 65535
+    } else {
+        0
+    };
+    let mut i = data.len() - 22;
+    loop {
+        if read_u32_le(data, i) == Some(SIG_EOCD) {
+            return Some(i);
+        }
+        if i == scan_start {
+            break;
+        }
+        i -= 1;
+    }
+    None
+}
+
+/// Parse all central directory entries from a ZIP archive.
+fn parse_central_directory(data: &[u8]) -> Vec<ZipEntry> {
+    let mut entries = Vec::new();
+
+    let eocd_offset = match find_eocd(data) {
+        Some(o) => o,
+        None => return entries,
+    };
+
+    // EOCD layout (offsets relative to EOCD start):
+    //   0: signature (4)
+    //   4: disk number (2)
+    //   6: start disk (2)
+    //   8: entries on disk (2)
+    //  10: total entries (2)
+    //  12: central dir size (4)
+    //  16: central dir offset (4)
+    //  20: comment length (2)
+    let cd_offset = match read_u32_le(data, eocd_offset + 16) {
+        Some(o) => o as usize,
+        None => return entries,
+    };
+    let total_entries = match read_u16_le(data, eocd_offset + 10) {
+        Some(n) => n as usize,
+        None => return entries,
+    };
+
+    let mut pos = cd_offset;
+    for _ in 0..total_entries {
+        if pos + 46 > data.len() {
+            break;
+        }
+        if read_u32_le(data, pos) != Some(SIG_CENTRAL_DIR) {
+            break;
+        }
+
+        // Central directory entry layout:
+        //   0:  signature (4)
+        //   4:  version made by (2)
+        //   6:  version needed (2)
+        //   8:  flags (2)
+        //  10:  compression (2)
+        //  12:  mod time (2)
+        //  14:  mod date (2)
+        //  16:  crc32 (4)
+        //  20:  compressed size (4)
+        //  24:  uncompressed size (4)
+        //  28:  filename length (2)
+        //  30:  extra field length (2)
+        //  32:  file comment length (2)
+        //  34:  disk start (2)
+        //  36:  internal attrs (2)
+        //  38:  external attrs (4)
+        //  42:  local header offset (4)
+        //  46:  filename...
+        let compression       = match read_u16_le(data, pos + 10) { Some(v) => v, None => break };
+        let compressed_size   = match read_u32_le(data, pos + 20) { Some(v) => v, None => break };
+        // uncompressed_size at pos+24 is intentionally not stored; size comes from decompressor output.
+        let fname_len         = match read_u16_le(data, pos + 28) { Some(v) => v as usize, None => break };
+        let extra_len         = match read_u16_le(data, pos + 30) { Some(v) => v as usize, None => break };
+        let comment_len       = match read_u16_le(data, pos + 32) { Some(v) => v as usize, None => break };
+        let local_offset      = match read_u32_le(data, pos + 42) { Some(v) => v, None => break };
+
+        let fname_start = pos + 46;
+        let fname_end = fname_start + fname_len;
+        if fname_end > data.len() {
+            break;
+        }
+
+        let name = core::str::from_utf8(&data[fname_start..fname_end])
+            .unwrap_or("")
+            .to_ascii_lowercase();
+
+        entries.push(ZipEntry {
+            name,
+            compression,
+            compressed_size,
+            local_offset,
+        });
+
+        pos = fname_end + extra_len + comment_len;
+    }
+
+    entries
+}
+
+/// Read raw bytes for a local file entry (the actual compressed/stored data).
+/// Returns a slice into `data` containing the compressed bytes.
+fn local_file_data<'a>(data: &'a [u8], entry: &ZipEntry) -> Option<&'a [u8]> {
+    let off = entry.local_offset as usize;
+    if off + 30 > data.len() {
+        return None;
+    }
+    if read_u32_le(data, off) != Some(SIG_LOCAL_FILE) {
+        return None;
+    }
+
+    // Local file header layout:
+    //   0:  signature (4)
+    //   4:  version needed (2)
+    //   6:  flags (2)
+    //   8:  compression (2)
+    //  10:  mod time (2)
+    //  12:  mod date (2)
+    //  14:  crc32 (4)
+    //  18:  compressed size (4)
+    //  22:  uncompressed size (4)
+    //  26:  filename length (2)
+    //  28:  extra length (2)
+    //  30:  filename...
+    let fname_len = read_u16_le(data, off + 26)? as usize;
+    let extra_len = read_u16_le(data, off + 28)? as usize;
+    let data_start = off + 30 + fname_len + extra_len;
+    let data_end = data_start + entry.compressed_size as usize;
+    if data_end > data.len() {
+        return None;
+    }
+    Some(&data[data_start..data_end])
+}
+
+/// Decompress a stored (STORE) or deflated (DEFLATE) entry.
+/// Returns the uncompressed bytes.
+fn decompress_entry(data: &[u8], entry: &ZipEntry) -> Option<Vec<u8>> {
+    let raw = local_file_data(data, entry)?;
+    match entry.compression {
+        COMPRESS_STORE => Some(raw.to_vec()),
+        COMPRESS_DEFLATE => {
+            miniz_oxide::inflate::decompress_to_vec(raw).ok()
+        }
+        _ => None,
+    }
+}
+
+/// Returns true if a filename has an image extension.
+fn is_image_filename(name: &str) -> bool {
+    name.ends_with(".jpg")
+        || name.ends_with(".jpeg")
+        || name.ends_with(".png")
+        || name.ends_with(".webp")
+}
+
+/// Extract a simple XML element value using substring search.
+/// Looks for `<tag>value</tag>` and returns the inner text.
+fn xml_get_text<'a>(xml: &'a str, tag: &str) -> Option<&'a str> {
+    let open  = format!("<{}>", tag);
+    let close = format!("</{}>", tag);
+    let start = xml.find(&open)?;
+    let after_open = &xml[start + open.len()..];
+    let end = after_open.find(&close)?;
+    Some(&after_open[..end])
+}
+
+/// Metadata extracted from a ComicInfo.xml file.
+struct ComicInfo {
+    title:        Option<String>,
+    series:       Option<String>,
+    issue_number: Option<String>,
+    writer:       Option<String>,
+    page_count:   Option<String>,
+    language:     Option<String>,
+    genre:        Option<String>,
+    summary:      Option<String>,
+}
+
+/// Parse key fields from a ComicInfo.xml byte slice.
+fn parse_comic_info(data: &[u8]) -> ComicInfo {
+    let text = core::str::from_utf8(data).unwrap_or("");
+    ComicInfo {
+        title:        xml_get_text(text, "Title")     .map(|s| s.trim().to_ascii_lowercase()).filter(|s| !s.is_empty()).map(|s| {
+            // Re-capitalize first letter for title
+            let mut c = s.chars();
+            match c.next() {
+                None => String::new(),
+                Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
+            }
+        }),
+        series:       xml_get_text(text, "Series")    .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
+        issue_number: xml_get_text(text, "Number")    .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
+        writer:       xml_get_text(text, "Writer")    .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
+        page_count:   xml_get_text(text, "PageCount") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
+        language:     xml_get_text(text, "LanguageISO").map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
+        genre:        xml_get_text(text, "Genre")     .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
+        summary:      xml_get_text(text, "Summary")   .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
+    }
+}
+
+/// Image dimension and format information.
+struct ImageInfo {
+    width:  u32,
+    height: u32,
+    format: &'static str,
+}
+
+/// Parse image dimensions and detect format from raw image bytes.
+fn parse_image_info(data: &[u8]) -> Option<ImageInfo> {
+    if data.len() < 4 {
+        return None;
+    }
+    // JPEG: starts with 0xFF 0xD8
+    if data[0] == 0xFF && data[1] == 0xD8 {
+        // Scan for SOF0 (0xFF 0xC0) or SOF2 (0xFF 0xC2) marker
+        let mut i = 2usize;
+        while i + 8 < data.len() {
+            if data[i] == 0xFF {
+                let marker = data[i + 1];
+                if marker == 0xC0 || marker == 0xC2 {
+                    // SOF marker layout:
+                    //  0: 0xFF
+                    //  1: marker
+                    //  2-3: segment length (big-endian)
+                    //  4: precision
+                    //  5-6: height (big-endian u16)
+                    //  7-8: width (big-endian u16)
+                    let height = read_u16_be(data, i + 5)? as u32;
+                    let width  = read_u16_be(data, i + 7)? as u32;
+                    return Some(ImageInfo { width, height, format: "jpeg" });
+                } else if marker == 0xFF {
+                    // Padding byte
+                    i += 1;
+                    continue;
+                } else if marker == 0xD8 || marker == 0xD9 {
+                    // SOI / EOI - no length field
+                    i += 2;
+                    continue;
+                } else {
+                    // Skip segment: length at i+2 (includes the 2 length bytes)
+                    if let Some(seg_len) = read_u16_be(data, i + 2) {
+                        i += 2 + seg_len as usize;
+                    } else {
+                        break;
+                    }
+                }
+            } else {
+                i += 1;
+            }
+        }
+        // Return a JPEG without dimensions if SOF not found
+        return Some(ImageInfo { width: 0, height: 0, format: "jpeg" });
+    }
+
+    // PNG: starts with 0x89 0x50 0x4E 0x47 ('PNG')
+    if data.len() >= 24 && data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4E && data[3] == 0x47 {
+        // IHDR chunk: width at bytes 16-19, height at bytes 20-23 (big-endian u32)
+        let width  = read_u32_be(data, 16)?;
+        let height = read_u32_be(data, 20)?;
+        return Some(ImageInfo { width, height, format: "png" });
+    }
+
+    // WebP: RIFF....WEBP
+    if data.len() >= 12
+        && &data[0..4] == b"RIFF"
+        && &data[8..12] == b"WEBP"
+    {
+        return Some(ImageInfo { width: 0, height: 0, format: "webp" });
+    }
+
+    None
+}
+
+/// Load a CBZ archive into memory. Returns the raw bytes or an error string.
+fn load_cbz_file(path: &str) -> Result<Vec<u8>, &'static str> {
+    let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
+    if file_size < 0 {
+        return Err("read failed");
+    }
+    if file_size as usize >= MAX_FILE_BYTES {
+        return Err("too large");
+    }
+    let buf_size = file_size as usize;
+    if buf_size == 0 {
+        return Ok(Vec::new());
+    }
+
+    let layout = Layout::from_size_align(buf_size, 1).map_err(|_| "alloc failed")?;
+    let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
+    if buf_ptr.is_null() {
+        return Err("alloc failed");
+    }
+
+    let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
+    if copied <= 0 {
+        unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
+        return Err("buffer copy failed");
+    }
+
+    let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) }.to_vec();
+    unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
+    Ok(data)
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn alloc(size: i32) -> i32 {
+    if size <= 0 {
+        return 0;
+    }
+    unsafe {
+        let layout = match Layout::from_size_align(size as usize, 1) {
+            Ok(l) => l,
+            Err(_) => return -1,
+        };
+        let ptr = alloc::alloc::alloc(layout);
+        if ptr.is_null() {
+            return -1;
+        }
+        ptr as i32
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn initialize() -> i32 {
+    log_info("cbz-comics initialized");
+    0
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn shutdown() -> i32 {
+    log_info("cbz-comics shutdown");
+    0
+}
+
+/// Returns the comic media type definitions.
+#[unsafe(no_mangle)]
+pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) {
+    let response = br#"[
+{"id":"comic-cbz","name":"Comic Book ZIP","category":"document","extensions":["cbz"],"mime_types":["application/vnd.comicbook+zip"]},
+{"id":"comic-cbr","name":"Comic Book RAR","category":"document","extensions":["cbr"],"mime_types":["application/vnd.comicbook-rar"]}
+]"#;
+    set_response(response);
+}
+
+/// Check whether this plugin can handle a given path.
+#[unsafe(no_mangle)]
+pub extern "C" fn can_handle(ptr: i32, len: i32) {
+    let req = unsafe { read_request(ptr, len) };
+    let path = json_get_str(&req, "path").unwrap_or("").to_ascii_lowercase();
+    let can = path.ends_with(".cbz") || path.ends_with(".cbr");
+    if can {
+        set_response(br#"{"can_handle":true}"#);
+    } else {
+        set_response(br#"{"can_handle":false}"#);
+    }
+}
+
+/// Returns the media type IDs this extractor supports.
+#[unsafe(no_mangle)]
+pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
+    set_response(br#"["comic-cbz","comic-cbr"]"#);
+}
+
+/// Extract metadata from a CBZ or CBR file.
+#[unsafe(no_mangle)]
+pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
+    let req = unsafe { read_request(ptr, len) };
+    let path = match json_get_str(&req, "path") {
+        Some(p) => p,
+        None => {
+            set_response(br#"{"extra":{"error":"missing path"}}"#);
+            return;
+        }
+    };
+
+    let lower = path.to_ascii_lowercase();
+
+    // CBR: register the type but do not attempt to parse RAR.
+    if lower.ends_with(".cbr") {
+        set_response(br#"{"extra":{"format":"cbr","note":"cbr-unsupported"}}"#);
+        return;
+    }
+
+    // Load CBZ archive
+    let data = match load_cbz_file(path) {
+        Ok(d) => d,
+        Err("too large") => {
+            set_response(br#"{"extra":{"format":"cbz","too_large":"true"}}"#);
+            return;
+        }
+        Err(e) => {
+            let resp = format!(r#"{{"extra":{{"format":"cbz","error":"{}"}}}}"#, e);
+            set_response(resp.as_bytes());
+            return;
+        }
+    };
+
+    let entries = parse_central_directory(&data);
+
+    // Count image files as page count.
+    let image_count = entries.iter().filter(|e| is_image_filename(&e.name)).count();
+
+    // Look for ComicInfo.xml (case-insensitive).
+    let comic_info_entry = entries.iter().find(|e| {
+        let n = e.name.as_str();
+        n == "comicinfo.xml" || n.ends_with("/comicinfo.xml")
+    });
+
+    let info = if let Some(entry) = comic_info_entry {
+        // Only decompress STORE entries here for simplicity; skip DEFLATE ones.
+        if entry.compression == COMPRESS_STORE || entry.compression == COMPRESS_DEFLATE {
+            if let Some(xml_bytes) = decompress_entry(&data, entry) {
+                Some(parse_comic_info(&xml_bytes))
+            } else {
+                None
+            }
+        } else {
+            None
+        }
+    } else {
+        None
+    };
+
+    let msg = format!(
+        "cbz-comics: {} entries, {} images, ComicInfo.xml={}",
+        entries.len(),
+        image_count,
+        info.is_some(),
+    );
+    log_info(&msg);
+
+    // Build response JSON
+    let mut extra_pairs: Vec<(&str, String)> = vec![
+        ("format", String::from("cbz")),
+    ];
+
+    let page_count_str;
+    if let Some(ref ci) = info {
+        if let Some(ref pc) = ci.page_count {
+            page_count_str = pc.clone();
+            extra_pairs.push(("page_count", page_count_str.clone()));
+        } else {
+            page_count_str = format!("{}", image_count);
+            extra_pairs.push(("page_count", page_count_str.clone()));
+        }
+        if let Some(ref s) = ci.series       { extra_pairs.push(("series",       s.clone())) }
+        if let Some(ref n) = ci.issue_number { extra_pairs.push(("issue_number", n.clone())) }
+        if let Some(ref l) = ci.language     { extra_pairs.push(("language",     l.clone())) }
+    } else {
+        page_count_str = format!("{}", image_count);
+        extra_pairs.push(("page_count", page_count_str.clone()));
+    }
+
+    // Build extra JSON object
+    let mut extra_json = String::from("{");
+    for (i, (k, v)) in extra_pairs.iter().enumerate() {
+        if i > 0 { extra_json.push(','); }
+        extra_json.push('"');
+        extra_json.push_str(k);
+        extra_json.push_str("\":\"");
+        extra_json.push_str(&json_escape(v));
+        extra_json.push('"');
+    }
+    extra_json.push('}');
+
+    let title_field = info.as_ref()
+        .and_then(|ci| ci.title.as_ref())
+        .map(|t| format!(r#","title":"{}""#, json_escape(t)))
+        .unwrap_or_default();
+
+    let artist_field = info.as_ref()
+        .and_then(|ci| ci.writer.as_ref())
+        .map(|w| format!(r#","artist":"{}""#, json_escape(w)))
+        .unwrap_or_default();
+
+    let genre_field = info.as_ref()
+        .and_then(|ci| ci.genre.as_ref())
+        .map(|g| format!(r#","genre":"{}""#, json_escape(g)))
+        .unwrap_or_default();
+
+    let desc_field = info.as_ref()
+        .and_then(|ci| ci.summary.as_ref())
+        .map(|s| format!(r#","description":"{}""#, json_escape(s)))
+        .unwrap_or_default();
+
+    let resp = format!(
+        r#"{{"extra":{}{}{}{}{}}}"#,
+        extra_json, title_field, artist_field, genre_field, desc_field,
+    );
+    set_response(resp.as_bytes());
+}
+
+/// Generate a thumbnail from the cover image of a CBZ archive.
+#[unsafe(no_mangle)]
+pub extern "C" fn generate_thumbnail(ptr: i32, len: i32) {
+    let req = unsafe { read_request(ptr, len) };
+    let source_path = match json_get_str(&req, "source_path") {
+        Some(p) => p,
+        None => {
+            set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
+            return;
+        }
+    };
+    let output_path = match json_get_str(&req, "output_path") {
+        Some(p) => p,
+        None => {
+            set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
+            return;
+        }
+    };
+
+    let lower = source_path.to_ascii_lowercase();
+    if !lower.ends_with(".cbz") {
+        set_response(br#"{"path":"","width":0,"height":0,"format":"unknown"}"#);
+        return;
+    }
+
+    let data = match load_cbz_file(source_path) {
+        Ok(d) => d,
+        Err(_) => {
+            set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
+            return;
+        }
+    };
+
+    let entries = parse_central_directory(&data);
+
+    // Find alphabetically first image file for the cover.
+    let mut image_entries: Vec<&ZipEntry> = entries.iter().filter(|e| is_image_filename(&e.name)).collect();
+    image_entries.sort_by(|a, b| a.name.as_str().cmp(b.name.as_str()));
+
+    let cover = match image_entries.first() {
+        Some(e) => e,
+        None => {
+            set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
+            return;
+        }
+    };
+
+    let image_bytes = match decompress_entry(&data, cover) {
+        Some(b) => b,
+        None => {
+            set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
+            return;
+        }
+    };
+
+    let info = parse_image_info(&image_bytes).unwrap_or(ImageInfo {
+        width: 0, height: 0, format: "jpeg",
+    });
+
+    // Write thumbnail bytes to output path
+    let write_result = unsafe {
+        host_write_file(
+            output_path.as_ptr() as i32,
+            output_path.len() as i32,
+            image_bytes.as_ptr() as i32,
+            image_bytes.len() as i32,
+        )
+    };
+    if write_result < 0 {
+        set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
+        return;
+    }
+
+    let msg = format!(
+        "cbz-comics: thumbnail {}x{} {} written to {}",
+        info.width, info.height, info.format, output_path,
+    );
+    log_info(&msg);
+
+    let resp = format!(
+        r#"{{"path":"{}","width":{},"height":{},"format":"{}"}}"#,
+        json_escape(output_path),
+        info.width,
+        info.height,
+        info.format,
+    );
+    set_response(resp.as_bytes());
+}
diff --git a/examples/plugins/subtitle-detector/Cargo.lock b/examples/plugins/subtitle-detector/Cargo.lock
new file mode 100644
index 0000000000000000000000000000000000000000..dda81f78a90e755efd0a764341c3f865622eac62
GIT binary patch
literal 1249
zcmb`GOK;mS5QOjg6@qhYEtAW4fC2@2>b<uh2R?RXBC-@nR8#-^QU(&lfOGSS6uFRQ
zz8O8Ve~x)+DG%7z)vskb`I4g_hI9L~#}Q{=Fty>_{_=A_b<IQjyKJN5RF|>fnCdl6
zb9?gR>W3CnE_v*`<}c1mp2qe^E1Tx&>D9*<-{bRhGx`(O(_N%~pK14LaK}5=$z8Kd
z>l`mLGxm8Y^ZDDcl-K3)aOiV6uAz(5=`gQL*$;l~4{;N@ba~q6D(By>iARht%X->A
z<Ah*j0}ydRLMq9El<1^J5{h$fK^ZVX*dzqldg`p^5;RvFxMh5ITYVZ%719*1^s^2+
zF79oBRA4-ELslU=Ym*faB|{~h*D@sQe1PbvUU<z#NK~bRN5u?+t2`6FZaIxH<5H14
zH6QT5*X@u)+-;k;N^g2w_RD#x&(F7gU#352cJXZQZ91r+rPc}pX<f5$4tes{F&;#4
z3cxx|jH3}c_@u~yAT(ZhsF<8fU!=CIq2w~)K4HOFruizqt$yq#u8*|8<nj)0$m7e^
zOD}xxTnS>9RR(~{kWs*s_8L%V@{Vf}qIKNIBmu}7o{-R)q$L`bn3sg=3p(!}wgOy7
zta%4__TZ5V1K>goNykKtOt8!go=9_3Gnci$CMA+ia_tS+z#s<UfD;L-Ki-X6zdN|3
Hux0fhi>!u3

literal 0
HcmV?d00001

diff --git a/examples/plugins/subtitle-detector/Cargo.toml b/examples/plugins/subtitle-detector/Cargo.toml
new file mode 100644
index 0000000..2b7d8ec
--- /dev/null
+++ b/examples/plugins/subtitle-detector/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "subtitle-detector"
+version = "1.0.0"
+edition = "2024"
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+dlmalloc = { version = "0.2", features = ["global"] }
+
+[profile.release]
+opt-level = "s"
+lto = true
+strip = true
diff --git a/examples/plugins/subtitle-detector/plugin.toml b/examples/plugins/subtitle-detector/plugin.toml
new file mode 100644
index 0000000..d836b75
--- /dev/null
+++ b/examples/plugins/subtitle-detector/plugin.toml
@@ -0,0 +1,18 @@
+[plugin]
+name = "subtitle-detector"
+version = "1.0.0"
+api_version = "1.0"
+description = "Registers SRT, VTT, and ASS subtitle formats and extracts language and entry count metadata"
+kind = ["media_type", "metadata_extractor"]
+priority = 500
+
+[plugin.binary]
+wasm = "subtitle_detector.wasm"
+
+[capabilities]
+network = false
+
+[capabilities.filesystem]
+# Users must add their media root directories here. Example:
+#   read = ["/home/user/media", "/mnt/nas/subtitles"]
+read = []
diff --git a/examples/plugins/subtitle-detector/src/lib.rs b/examples/plugins/subtitle-detector/src/lib.rs
new file mode 100644
index 0000000..bfab5e7
--- /dev/null
+++ b/examples/plugins/subtitle-detector/src/lib.rs
@@ -0,0 +1,345 @@
+//! Subtitle-detector plugin for Pinakes.
+//!
+//! Registers SRT, VTT, and ASS/SSA subtitle file formats and extracts
+//! language code and entry count metadata from them.
+//!
+//! Registered media types:
+//!   - `subtitle-srt`: extensions `["srt"]`,  mime `["text/x-subrip"]`
+//!   - `subtitle-vtt`: extensions `["vtt"]`,  mime `["text/vtt"]`
+//!   - `subtitle-ass`: extensions `["ass","ssa"]`, mime `["text/x-ass"]`
+//!
+//! Language detection uses filename conventions: `movie.en.srt` -> `en`.
+//!
+//! The `filesystem.read` capability in `plugin.toml` must be configured
+//! to include the directories containing subtitle files.
+//!
+//! Build with:
+//!   RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
+
+#![no_std]
+
+extern crate alloc;
+
+use alloc::{format, vec::Vec};
+use core::alloc::Layout;
+
+#[global_allocator]
+static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
+
+#[panic_handler]
+fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
+    core::arch::wasm32::unreachable()
+}
+
+// Host functions provided by the runtime
+unsafe extern "C" {
+    fn host_set_result(ptr: i32, len: i32);
+    fn host_log(level: i32, ptr: i32, len: i32);
+    fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
+    fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
+}
+
+fn set_response(json: &[u8]) {
+    unsafe {
+        host_set_result(json.as_ptr() as i32, json.len() as i32);
+    }
+}
+
+fn log_info(msg: &str) {
+    unsafe {
+        host_log(2, msg.as_ptr() as i32, msg.len() as i32);
+    }
+}
+
+unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
+    if ptr < 0 || len <= 0 {
+        return Vec::new();
+    }
+    let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
+    slice.to_vec()
+}
+
+/// Extract a string value from a JSON object for a given key.
+fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
+    let json_str = core::str::from_utf8(json).ok()?;
+    let pattern = format!("\"{}\"", key);
+    let key_pos = json_str.find(&pattern)?;
+    let after_key = &json_str[key_pos + pattern.len()..];
+    let after_colon = after_key.trim_start().strip_prefix(':')?;
+    let after_colon = after_colon.trim_start();
+
+    if after_colon.starts_with('"') {
+        let value_start = 1;
+        let value_end = after_colon[value_start..].find('"')?;
+        Some(&after_colon[value_start..value_start + value_end])
+    } else {
+        None
+    }
+}
+
+/// Escape a string for safe inclusion in a JSON string value.
+fn json_escape(s: &str) -> alloc::string::String {
+    let mut out = alloc::string::String::with_capacity(s.len());
+    for c in s.chars() {
+        match c {
+            '"'  => out.push_str("\\\""),
+            '\\' => out.push_str("\\\\"),
+            '\n' => out.push_str("\\n"),
+            '\r' => out.push_str("\\r"),
+            '\t' => out.push_str("\\t"),
+            _    => out.push(c),
+        }
+    }
+    out
+}
+
+// 512 KB content read limit for subtitle files
+const MAX_FILE_BYTES: usize = 512 * 1024;
+
+/// Subtitle format variants.
+enum SubtitleFormat {
+    Srt,
+    Vtt,
+    Ass,
+}
+
+/// Determine subtitle format from file path extension.
+fn detect_format(path: &str) -> Option<SubtitleFormat> {
+    let lower = path.to_ascii_lowercase();
+    if lower.ends_with(".srt") {
+        Some(SubtitleFormat::Srt)
+    } else if lower.ends_with(".vtt") {
+        Some(SubtitleFormat::Vtt)
+    } else if lower.ends_with(".ass") || lower.ends_with(".ssa") {
+        Some(SubtitleFormat::Ass)
+    } else {
+        None
+    }
+}
+
+/// Try to detect a 2-3 letter language code from a filename stem.
+/// Matches patterns like `movie.en.srt` or `film.fra.vtt`.
+/// Returns the code if found.
+fn detect_language(path: &str) -> Option<&str> {
+    // Get the filename component
+    let filename = path.rsplit('/').next().unwrap_or(path);
+    // Strip the final extension
+    let stem = if let Some(dot) = filename.rfind('.') {
+        &filename[..dot]
+    } else {
+        filename
+    };
+    // Check for another dot-separated segment at the end of the stem
+    if let Some(dot) = stem.rfind('.') {
+        let candidate = &stem[dot + 1..];
+        let len = candidate.len();
+        if (len == 2 || len == 3) && candidate.bytes().all(|b| b.is_ascii_alphabetic()) {
+            return Some(candidate);
+        }
+    }
+    None
+}
+
+/// Count `-->` occurrences in a byte slice.
+fn count_arrow_markers(data: &[u8]) -> usize {
+    let mut count = 0usize;
+    let mut i = 0usize;
+    while i + 2 < data.len() {
+        if data[i] == b'-' && data[i + 1] == b'-' && data[i + 2] == b'>' {
+            count += 1;
+            i += 3;
+        } else {
+            i += 1;
+        }
+    }
+    count
+}
+
+/// Count `Dialogue:` lines in an ASS/SSA file.
+fn count_ass_dialogues(data: &[u8]) -> usize {
+    let mut count = 0usize;
+    let needle = b"Dialogue:";
+    let mut i = 0usize;
+    // Count only at line starts (after newline or at file start)
+    let mut at_line_start = true;
+    while i < data.len() {
+        if at_line_start && data[i..].starts_with(needle) {
+            count += 1;
+            i += needle.len();
+            at_line_start = false;
+        } else {
+            if data[i] == b'\n' {
+                at_line_start = true;
+            } else {
+                at_line_start = false;
+            }
+            i += 1;
+        }
+    }
+    count
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn alloc(size: i32) -> i32 {
+    if size <= 0 {
+        return 0;
+    }
+    unsafe {
+        let layout = match Layout::from_size_align(size as usize, 1) {
+            Ok(l) => l,
+            Err(_) => return -1,
+        };
+        let ptr = alloc::alloc::alloc(layout);
+        if ptr.is_null() {
+            return -1;
+        }
+        ptr as i32
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn initialize() -> i32 {
+    log_info("subtitle-detector initialized");
+    0
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn shutdown() -> i32 {
+    log_info("subtitle-detector shutdown");
+    0
+}
+
+/// Returns the media type definitions provided by this plugin.
+#[unsafe(no_mangle)]
+pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) {
+    let response = br#"[
+{"id":"subtitle-srt","name":"SubRip Subtitle","category":"document","extensions":["srt"],"mime_types":["text/x-subrip"]},
+{"id":"subtitle-vtt","name":"WebVTT Subtitle","category":"document","extensions":["vtt"],"mime_types":["text/vtt"]},
+{"id":"subtitle-ass","name":"Advanced SubStation Alpha Subtitle","category":"document","extensions":["ass","ssa"],"mime_types":["text/x-ass"]}
+]"#;
+    set_response(response);
+}
+
+/// Check whether this plugin can handle a given path.
+#[unsafe(no_mangle)]
+pub extern "C" fn can_handle(ptr: i32, len: i32) {
+    let req = unsafe { read_request(ptr, len) };
+    let path = json_get_str(&req, "path").unwrap_or("");
+    let can = detect_format(path).is_some();
+    if can {
+        set_response(br#"{"can_handle":true}"#);
+    } else {
+        set_response(br#"{"can_handle":false}"#);
+    }
+}
+
+/// Returns the media type IDs this extractor supports.
+#[unsafe(no_mangle)]
+pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
+    set_response(br#"["subtitle-srt","subtitle-vtt","subtitle-ass"]"#);
+}
+
+/// Extract metadata from a subtitle file.
+#[unsafe(no_mangle)]
+pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
+    let req = unsafe { read_request(ptr, len) };
+    let path = match json_get_str(&req, "path") {
+        Some(p) => p,
+        None => {
+            set_response(br#"{"extra":{"error":"missing path"}}"#);
+            return;
+        }
+    };
+
+    let format = match detect_format(path) {
+        Some(f) => f,
+        None => {
+            set_response(br#"{"extra":{"error":"unsupported format"}}"#);
+            return;
+        }
+    };
+
+    let format_str = match format {
+        SubtitleFormat::Srt => "srt",
+        SubtitleFormat::Vtt => "vtt",
+        SubtitleFormat::Ass => "ass",
+    };
+
+    let language = detect_language(path);
+
+    // Load file contents
+    let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
+    if file_size < 0 {
+        // Return what we have without entry count
+        let lang_field = language
+            .map(|l| format!(r#","language":"{}""#, json_escape(l)))
+            .unwrap_or_default();
+        let resp = format!(
+            r#"{{"extra":{{"format":"{}"{}}}}}"#,
+            format_str, lang_field,
+        );
+        set_response(resp.as_bytes());
+        return;
+    }
+
+    if file_size as usize >= MAX_FILE_BYTES {
+        let lang_field = language
+            .map(|l| format!(r#","language":"{}""#, json_escape(l)))
+            .unwrap_or_default();
+        let resp = format!(
+            r#"{{"extra":{{"format":"{}","too_large":"true"{}}}}}"#,
+            format_str, lang_field,
+        );
+        set_response(resp.as_bytes());
+        return;
+    }
+
+    let buf_size = file_size as usize;
+    let entry_count = if buf_size == 0 {
+        0usize
+    } else {
+        let layout = match Layout::from_size_align(buf_size, 1) {
+            Ok(l) => l,
+            Err(_) => {
+                set_response(br#"{"extra":{"error":"alloc failed"}}"#);
+                return;
+            }
+        };
+        let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
+        if buf_ptr.is_null() {
+            set_response(br#"{"extra":{"error":"alloc failed"}}"#);
+            return;
+        }
+
+        let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
+        let count = if copied > 0 {
+            let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) };
+            match format_str {
+                "srt" => count_arrow_markers(data),
+                "vtt" => count_arrow_markers(data),
+                _     => count_ass_dialogues(data),
+            }
+        } else {
+            0
+        };
+
+        unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
+        count
+    };
+
+    let msg = format!(
+        "subtitle-detector: format={}, entries={}, path={}",
+        format_str, entry_count, path,
+    );
+    log_info(&msg);
+
+    let lang_field = language
+        .map(|l| format!(r#","language":"{}""#, json_escape(l)))
+        .unwrap_or_default();
+
+    let resp = format!(
+        r#"{{"extra":{{"format":"{}","entry_count":"{}"{}}}}}"#,
+        format_str, entry_count, lang_field,
+    );
+    set_response(resp.as_bytes());
+}
diff --git a/examples/plugins/text-enrichment/Cargo.lock b/examples/plugins/text-enrichment/Cargo.lock
new file mode 100644
index 0000000000000000000000000000000000000000..0697c2420c2dddeded216402223496ec7c273411
GIT binary patch
literal 1247
zcmb`G%Wm5+5JmU;3c*>nmN|U0K!E~Xb>Cf(1s^jq5j_HlZsNZ$WgtNeI6JRMkppS&
zIirX6?=i0}<pJBe`cs)lUvl)raBhF~IN{<8rZ$}0-+t-mu6bzxl&y4{>oOIbQoW{m
zX-7Yu{Lo^`B~N|V{KI9<^VI%qWz#%9zWDg;dwhCoCO=|5-9_s6nRcHFchIp;?wWNz
zE%8#bV4v5roWC7Qd0BrP4t*}iQ|RJ69+uNu_Jg1LL)@BNyFBl6ZRc&*#3RP%^)zmu
zaY8V%ffIosA(do7N_0{q2?YR_E5l6?HVMIPJ#|)t<Qf!kuncy$yHCSdA<glsf7bC1
z#JvNM3Jf9`vI@~zo2=kbGE~xeEkm--2Ob^O3$H<hM74G9Q89zuRh|i7HcwN`xK<>O
z%?JGNbvxt`ciZN*(wkq`{d!*O^V99zm+H^XE}8AULkAVK)LQXCT2~j&AtY}dgCK%a
zoLi@f0UDu$Pl^l|gvJZcD<<dC7uE{Cl|4>Nj>i$FawTsYK6Vq<hwCq?yrUcP^nCTw
z3!OU;1To7h0glR$Q5=%?nxoL<9cU4vb>L%?oRc$<kkFZ=B^s8PmxSsIHt!iW{nvsu
zbo|a4Jc2NsyAVUtF%csZEb{^qX)x8etkY{!BIzV(Z@3MN#~>UBB0=@HyD`-72`&~k
GSN{PV^M!B#

literal 0
HcmV?d00001

diff --git a/examples/plugins/text-enrichment/Cargo.toml b/examples/plugins/text-enrichment/Cargo.toml
new file mode 100644
index 0000000..d073c1f
--- /dev/null
+++ b/examples/plugins/text-enrichment/Cargo.toml
@@ -0,0 +1,15 @@
+[package]
+name = "text-enrichment"
+version = "1.0.0"
+edition = "2024"
+
+[lib]
+crate-type = ["cdylib"]
+
+[dependencies]
+dlmalloc = { version = "0.2", features = ["global"] }
+
+[profile.release]
+opt-level = "s"
+lto = true
+strip = true
diff --git a/examples/plugins/text-enrichment/plugin.toml b/examples/plugins/text-enrichment/plugin.toml
new file mode 100644
index 0000000..f451a3e
--- /dev/null
+++ b/examples/plugins/text-enrichment/plugin.toml
@@ -0,0 +1,18 @@
+[plugin]
+name = "text-enrichment"
+version = "1.0.0"
+api_version = "1.0"
+description = "Enriches plain text files with word count, line count, character count, and estimated reading time"
+kind = ["metadata_extractor"]
+priority = 500
+
+[plugin.binary]
+wasm = "text_enrichment.wasm"
+
+[capabilities]
+network = false
+
+[capabilities.filesystem]
+# Users must add their media root directories here. Example:
+#   read = ["/home/user/media", "/mnt/nas/texts"]
+read = []
diff --git a/examples/plugins/text-enrichment/src/lib.rs b/examples/plugins/text-enrichment/src/lib.rs
new file mode 100644
index 0000000..f6b248d
--- /dev/null
+++ b/examples/plugins/text-enrichment/src/lib.rs
@@ -0,0 +1,198 @@
+//! Text-enrichment plugin for Pinakes.
+//!
+//! Extracts word count, line count, character count, and estimated reading
+//! time from plain text (`.txt`) files.
+//!
+//! The `filesystem.read` capability list in `plugin.toml` must be configured
+//! to include the directories where text files live.
+//!
+//! Build with:
+//!   RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
+
+#![no_std]
+
+extern crate alloc;
+
+use alloc::{format, vec::Vec};
+use core::alloc::Layout;
+
+#[global_allocator]
+static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
+
+#[panic_handler]
+fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
+    core::arch::wasm32::unreachable()
+}
+
+// Host functions provided by the runtime
+unsafe extern "C" {
+    fn host_set_result(ptr: i32, len: i32);
+    fn host_log(level: i32, ptr: i32, len: i32);
+    fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
+    fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
+}
+
+fn set_response(json: &[u8]) {
+    unsafe {
+        host_set_result(json.as_ptr() as i32, json.len() as i32);
+    }
+}
+
+fn log_info(msg: &str) {
+    unsafe {
+        host_log(2, msg.as_ptr() as i32, msg.len() as i32);
+    }
+}
+
+unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
+    if ptr < 0 || len <= 0 {
+        return Vec::new();
+    }
+    let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
+    slice.to_vec()
+}
+
+/// Extract a string value from a JSON object for a given key.
+fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
+    let json_str = core::str::from_utf8(json).ok()?;
+    let pattern = format!("\"{}\"", key);
+    let key_pos = json_str.find(&pattern)?;
+    let after_key = &json_str[key_pos + pattern.len()..];
+    let after_colon = after_key.trim_start().strip_prefix(':')?;
+    let after_colon = after_colon.trim_start();
+
+    if after_colon.starts_with('"') {
+        let value_start = 1;
+        let value_end = after_colon[value_start..].find('"')?;
+        Some(&after_colon[value_start..value_start + value_end])
+    } else {
+        None
+    }
+}
+
+// 5 MB content read limit
+const MAX_FILE_BYTES: usize = 5 * 1024 * 1024;
+
+#[unsafe(no_mangle)]
+pub extern "C" fn alloc(size: i32) -> i32 {
+    if size <= 0 {
+        return 0;
+    }
+    unsafe {
+        let layout = match Layout::from_size_align(size as usize, 1) {
+            Ok(l) => l,
+            Err(_) => return -1,
+        };
+        let ptr = alloc::alloc::alloc(layout);
+        if ptr.is_null() {
+            return -1;
+        }
+        ptr as i32
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn initialize() -> i32 {
+    log_info("text-enrichment initialized");
+    0
+}
+
+#[unsafe(no_mangle)]
+pub extern "C" fn shutdown() -> i32 {
+    log_info("text-enrichment shutdown");
+    0
+}
+
+/// Returns the media types this extractor supports.
+#[unsafe(no_mangle)]
+pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
+    set_response(br#"["text"]"#);
+}
+
+/// Extract text statistics from a plain text file.
+#[unsafe(no_mangle)]
+pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
+    let req = unsafe { read_request(ptr, len) };
+    let path = match json_get_str(&req, "path") {
+        Some(p) => p,
+        None => {
+            set_response(br#"{"extra":{"error":"missing path"}}"#);
+            return;
+        }
+    };
+
+    // Ask the host to load the file into the exchange buffer.
+    let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
+    if file_size < 0 {
+        set_response(br#"{"extra":{"error":"read failed"}}"#);
+        return;
+    }
+
+    if file_size as usize >= MAX_FILE_BYTES {
+        set_response(br#"{"extra":{"too_large":"true"}}"#);
+        return;
+    }
+
+    let buf_size = file_size as usize;
+    if buf_size == 0 {
+        let resp = r#"{"extra":{"word_count":"0","line_count":"0","byte_count":"0","reading_minutes":"0"}}"#;
+        set_response(resp.as_bytes());
+        return;
+    }
+
+    let layout = match Layout::from_size_align(buf_size, 1) {
+        Ok(l) => l,
+        Err(_) => {
+            set_response(br#"{"extra":{"error":"alloc failed"}}"#);
+            return;
+        }
+    };
+    let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
+    if buf_ptr.is_null() {
+        set_response(br#"{"extra":{"error":"alloc failed"}}"#);
+        return;
+    }
+
+    let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
+    if copied <= 0 {
+        unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
+        set_response(br#"{"extra":{"error":"buffer copy failed"}}"#);
+        return;
+    }
+
+    let content = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) };
+
+    let byte_count = content.len();
+    let line_count = content.iter().filter(|&&b| b == b'\n').count()
+        + if content.last().map_or(true, |&b| b != b'\n') { 1 } else { 0 };
+
+    // Count words: transitions from whitespace to non-whitespace.
+    let mut word_count = 0usize;
+    let mut in_word = false;
+    for &b in content {
+        let is_ws = b == b' ' || b == b'\t' || b == b'\n' || b == b'\r';
+        if !is_ws && !in_word {
+            word_count += 1;
+            in_word = true;
+        } else if is_ws {
+            in_word = false;
+        }
+    }
+
+    // Estimate reading time at 200 words per minute, rounding up.
+    let reading_minutes = (word_count + 199) / 200;
+
+    unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
+
+    let msg = format!(
+        "text-enrichment: {} words, {} lines, {} chars",
+        word_count, line_count, byte_count
+    );
+    log_info(&msg);
+
+    let resp = format!(
+        r#"{{"extra":{{"word_count":"{}","line_count":"{}","byte_count":"{}","reading_minutes":"{}"}}}}"#,
+        word_count, line_count, byte_count, reading_minutes,
+    );
+    set_response(resp.as_bytes());
+}