examples: add WASM plugin examples
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Id4b791396ab37827caced2c8cc03ec356a6a6964
This commit is contained in:
parent
011e8edb28
commit
934fcba8ca
16 changed files with 1720 additions and 0 deletions
BIN
examples/plugins/text-enrichment/Cargo.lock
generated
Normal file
BIN
examples/plugins/text-enrichment/Cargo.lock
generated
Normal file
Binary file not shown.
15
examples/plugins/text-enrichment/Cargo.toml
Normal file
15
examples/plugins/text-enrichment/Cargo.toml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "text-enrichment"
|
||||
version = "1.0.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
dlmalloc = { version = "0.2", features = ["global"] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "s"
|
||||
lto = true
|
||||
strip = true
|
||||
18
examples/plugins/text-enrichment/plugin.toml
Normal file
18
examples/plugins/text-enrichment/plugin.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[plugin]
|
||||
name = "text-enrichment"
|
||||
version = "1.0.0"
|
||||
api_version = "1.0"
|
||||
description = "Enriches plain text files with word count, line count, character count, and estimated reading time"
|
||||
kind = ["metadata_extractor"]
|
||||
priority = 500
|
||||
|
||||
[plugin.binary]
|
||||
wasm = "text_enrichment.wasm"
|
||||
|
||||
[capabilities]
|
||||
network = false
|
||||
|
||||
[capabilities.filesystem]
|
||||
# Users must add their media root directories here. Example:
|
||||
# read = ["/home/user/media", "/mnt/nas/texts"]
|
||||
read = []
|
||||
198
examples/plugins/text-enrichment/src/lib.rs
Normal file
198
examples/plugins/text-enrichment/src/lib.rs
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
//! Text-enrichment plugin for Pinakes.
|
||||
//!
|
||||
//! Extracts word count, line count, character count, and estimated reading
|
||||
//! time from plain text (`.txt`) files.
|
||||
//!
|
||||
//! The `filesystem.read` capability list in `plugin.toml` must be configured
|
||||
//! to include the directories where text files live.
|
||||
//!
|
||||
//! Build with:
|
||||
//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
|
||||
|
||||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::{format, vec::Vec};
|
||||
use core::alloc::Layout;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
|
||||
|
||||
#[panic_handler]
|
||||
fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
|
||||
core::arch::wasm32::unreachable()
|
||||
}
|
||||
|
||||
// Host functions provided by the runtime
|
||||
unsafe extern "C" {
|
||||
fn host_set_result(ptr: i32, len: i32);
|
||||
fn host_log(level: i32, ptr: i32, len: i32);
|
||||
fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
|
||||
fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
|
||||
}
|
||||
|
||||
fn set_response(json: &[u8]) {
|
||||
unsafe {
|
||||
host_set_result(json.as_ptr() as i32, json.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
fn log_info(msg: &str) {
|
||||
unsafe {
|
||||
host_log(2, msg.as_ptr() as i32, msg.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
|
||||
if ptr < 0 || len <= 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
|
||||
slice.to_vec()
|
||||
}
|
||||
|
||||
/// Extract a string value from a JSON object for a given key.
|
||||
fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
|
||||
let json_str = core::str::from_utf8(json).ok()?;
|
||||
let pattern = format!("\"{}\"", key);
|
||||
let key_pos = json_str.find(&pattern)?;
|
||||
let after_key = &json_str[key_pos + pattern.len()..];
|
||||
let after_colon = after_key.trim_start().strip_prefix(':')?;
|
||||
let after_colon = after_colon.trim_start();
|
||||
|
||||
if after_colon.starts_with('"') {
|
||||
let value_start = 1;
|
||||
let value_end = after_colon[value_start..].find('"')?;
|
||||
Some(&after_colon[value_start..value_start + value_end])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// 5 MB content read limit
|
||||
const MAX_FILE_BYTES: usize = 5 * 1024 * 1024;
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn alloc(size: i32) -> i32 {
|
||||
if size <= 0 {
|
||||
return 0;
|
||||
}
|
||||
unsafe {
|
||||
let layout = match Layout::from_size_align(size as usize, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return -1,
|
||||
};
|
||||
let ptr = alloc::alloc::alloc(layout);
|
||||
if ptr.is_null() {
|
||||
return -1;
|
||||
}
|
||||
ptr as i32
|
||||
}
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn initialize() -> i32 {
|
||||
log_info("text-enrichment initialized");
|
||||
0
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn shutdown() -> i32 {
|
||||
log_info("text-enrichment shutdown");
|
||||
0
|
||||
}
|
||||
|
||||
/// Returns the media types this extractor supports.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
|
||||
set_response(br#"["text"]"#);
|
||||
}
|
||||
|
||||
/// Extract text statistics from a plain text file.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let path = match json_get_str(&req, "path") {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
set_response(br#"{"extra":{"error":"missing path"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Ask the host to load the file into the exchange buffer.
|
||||
let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
|
||||
if file_size < 0 {
|
||||
set_response(br#"{"extra":{"error":"read failed"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
if file_size as usize >= MAX_FILE_BYTES {
|
||||
set_response(br#"{"extra":{"too_large":"true"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let buf_size = file_size as usize;
|
||||
if buf_size == 0 {
|
||||
let resp = r#"{"extra":{"word_count":"0","line_count":"0","byte_count":"0","reading_minutes":"0"}}"#;
|
||||
set_response(resp.as_bytes());
|
||||
return;
|
||||
}
|
||||
|
||||
let layout = match Layout::from_size_align(buf_size, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => {
|
||||
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
|
||||
if buf_ptr.is_null() {
|
||||
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
|
||||
if copied <= 0 {
|
||||
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
|
||||
set_response(br#"{"extra":{"error":"buffer copy failed"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let content = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) };
|
||||
|
||||
let byte_count = content.len();
|
||||
let line_count = content.iter().filter(|&&b| b == b'\n').count()
|
||||
+ if content.last().map_or(true, |&b| b != b'\n') { 1 } else { 0 };
|
||||
|
||||
// Count words: transitions from whitespace to non-whitespace.
|
||||
let mut word_count = 0usize;
|
||||
let mut in_word = false;
|
||||
for &b in content {
|
||||
let is_ws = b == b' ' || b == b'\t' || b == b'\n' || b == b'\r';
|
||||
if !is_ws && !in_word {
|
||||
word_count += 1;
|
||||
in_word = true;
|
||||
} else if is_ws {
|
||||
in_word = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Estimate reading time at 200 words per minute, rounding up.
|
||||
let reading_minutes = (word_count + 199) / 200;
|
||||
|
||||
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
|
||||
|
||||
let msg = format!(
|
||||
"text-enrichment: {} words, {} lines, {} chars",
|
||||
word_count, line_count, byte_count
|
||||
);
|
||||
log_info(&msg);
|
||||
|
||||
let resp = format!(
|
||||
r#"{{"extra":{{"word_count":"{}","line_count":"{}","byte_count":"{}","reading_minutes":"{}"}}}}"#,
|
||||
word_count, line_count, byte_count, reading_minutes,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue