examples: add WASM plugin examples

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Id4b791396ab37827caced2c8cc03ec356a6a6964
This commit is contained in:
raf 2026-05-20 21:52:21 +03:00
commit 934fcba8ca
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
16 changed files with 1720 additions and 0 deletions

Binary file not shown.

View file

@ -0,0 +1,15 @@
[package]
name = "text-enrichment"
version = "1.0.0"
edition = "2024"
[lib]
crate-type = ["cdylib"]
[dependencies]
dlmalloc = { version = "0.2", features = ["global"] }
[profile.release]
opt-level = "s"
lto = true
strip = true

View file

@ -0,0 +1,18 @@
[plugin]
name = "text-enrichment"
version = "1.0.0"
api_version = "1.0"
description = "Enriches plain text files with word count, line count, character count, and estimated reading time"
kind = ["metadata_extractor"]
priority = 500
[plugin.binary]
wasm = "text_enrichment.wasm"
[capabilities]
network = false
[capabilities.filesystem]
# Users must add their media root directories here. Example:
# read = ["/home/user/media", "/mnt/nas/texts"]
read = []

View file

@ -0,0 +1,198 @@
//! Text-enrichment plugin for Pinakes.
//!
//! Extracts word count, line count, character count, and estimated reading
//! time from plain text (`.txt`) files.
//!
//! The `filesystem.read` capability list in `plugin.toml` must be configured
//! to include the directories where text files live.
//!
//! Build with:
//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
#![no_std]
extern crate alloc;
use alloc::{format, vec::Vec};
use core::alloc::Layout;
#[global_allocator]
static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
#[panic_handler]
fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
core::arch::wasm32::unreachable()
}
// Host functions provided by the runtime
unsafe extern "C" {
fn host_set_result(ptr: i32, len: i32);
fn host_log(level: i32, ptr: i32, len: i32);
fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
}
fn set_response(json: &[u8]) {
unsafe {
host_set_result(json.as_ptr() as i32, json.len() as i32);
}
}
fn log_info(msg: &str) {
unsafe {
host_log(2, msg.as_ptr() as i32, msg.len() as i32);
}
}
unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
if ptr < 0 || len <= 0 {
return Vec::new();
}
let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
slice.to_vec()
}
/// Extract a string value from a JSON object for a given key.
fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
let json_str = core::str::from_utf8(json).ok()?;
let pattern = format!("\"{}\"", key);
let key_pos = json_str.find(&pattern)?;
let after_key = &json_str[key_pos + pattern.len()..];
let after_colon = after_key.trim_start().strip_prefix(':')?;
let after_colon = after_colon.trim_start();
if after_colon.starts_with('"') {
let value_start = 1;
let value_end = after_colon[value_start..].find('"')?;
Some(&after_colon[value_start..value_start + value_end])
} else {
None
}
}
// 5 MB content read limit
const MAX_FILE_BYTES: usize = 5 * 1024 * 1024;
#[unsafe(no_mangle)]
pub extern "C" fn alloc(size: i32) -> i32 {
if size <= 0 {
return 0;
}
unsafe {
let layout = match Layout::from_size_align(size as usize, 1) {
Ok(l) => l,
Err(_) => return -1,
};
let ptr = alloc::alloc::alloc(layout);
if ptr.is_null() {
return -1;
}
ptr as i32
}
}
#[unsafe(no_mangle)]
pub extern "C" fn initialize() -> i32 {
log_info("text-enrichment initialized");
0
}
#[unsafe(no_mangle)]
pub extern "C" fn shutdown() -> i32 {
log_info("text-enrichment shutdown");
0
}
/// Returns the media types this extractor supports.
#[unsafe(no_mangle)]
pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
set_response(br#"["text"]"#);
}
/// Extract text statistics from a plain text file.
#[unsafe(no_mangle)]
pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
let req = unsafe { read_request(ptr, len) };
let path = match json_get_str(&req, "path") {
Some(p) => p,
None => {
set_response(br#"{"extra":{"error":"missing path"}}"#);
return;
}
};
// Ask the host to load the file into the exchange buffer.
let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
if file_size < 0 {
set_response(br#"{"extra":{"error":"read failed"}}"#);
return;
}
if file_size as usize >= MAX_FILE_BYTES {
set_response(br#"{"extra":{"too_large":"true"}}"#);
return;
}
let buf_size = file_size as usize;
if buf_size == 0 {
let resp = r#"{"extra":{"word_count":"0","line_count":"0","byte_count":"0","reading_minutes":"0"}}"#;
set_response(resp.as_bytes());
return;
}
let layout = match Layout::from_size_align(buf_size, 1) {
Ok(l) => l,
Err(_) => {
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
return;
}
};
let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
if buf_ptr.is_null() {
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
return;
}
let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
if copied <= 0 {
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
set_response(br#"{"extra":{"error":"buffer copy failed"}}"#);
return;
}
let content = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) };
let byte_count = content.len();
let line_count = content.iter().filter(|&&b| b == b'\n').count()
+ if content.last().map_or(true, |&b| b != b'\n') { 1 } else { 0 };
// Count words: transitions from whitespace to non-whitespace.
let mut word_count = 0usize;
let mut in_word = false;
for &b in content {
let is_ws = b == b' ' || b == b'\t' || b == b'\n' || b == b'\r';
if !is_ws && !in_word {
word_count += 1;
in_word = true;
} else if is_ws {
in_word = false;
}
}
// Estimate reading time at 200 words per minute, rounding up.
let reading_minutes = (word_count + 199) / 200;
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
let msg = format!(
"text-enrichment: {} words, {} lines, {} chars",
word_count, line_count, byte_count
);
log_info(&msg);
let resp = format!(
r#"{{"extra":{{"word_count":"{}","line_count":"{}","byte_count":"{}","reading_minutes":"{}"}}}}"#,
word_count, line_count, byte_count, reading_minutes,
);
set_response(resp.as_bytes());
}