examples: add WASM plugin examples
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Id4b791396ab37827caced2c8cc03ec356a6a6964
This commit is contained in:
parent
011e8edb28
commit
934fcba8ca
16 changed files with 1720 additions and 0 deletions
BIN
examples/plugins/auto-tagger/Cargo.lock
generated
Normal file
BIN
examples/plugins/auto-tagger/Cargo.lock
generated
Normal file
Binary file not shown.
15
examples/plugins/auto-tagger/Cargo.toml
Normal file
15
examples/plugins/auto-tagger/Cargo.toml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "auto-tagger"
|
||||
version = "1.0.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
dlmalloc = { version = "0.2", features = ["global"] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "s"
|
||||
lto = true
|
||||
strip = true
|
||||
13
examples/plugins/auto-tagger/plugin.toml
Normal file
13
examples/plugins/auto-tagger/plugin.toml
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
[plugin]
|
||||
name = "auto-tagger"
|
||||
version = "1.0.0"
|
||||
api_version = "1.0"
|
||||
description = "Listens for MediaImported events and emits AutoTagSuggested events based on path pattern rules"
|
||||
kind = ["event_handler"]
|
||||
priority = 500
|
||||
|
||||
[plugin.binary]
|
||||
wasm = "auto_tagger.wasm"
|
||||
|
||||
[capabilities]
|
||||
network = false
|
||||
303
examples/plugins/auto-tagger/src/lib.rs
Normal file
303
examples/plugins/auto-tagger/src/lib.rs
Normal file
|
|
@ -0,0 +1,303 @@
|
|||
//! Auto-tagger plugin for Pinakes.
|
||||
//!
|
||||
//! Listens for `MediaImported` events and, based on configurable path pattern
|
||||
//! rules, emits `AutoTagSuggested` events. Rules map path substrings to tag
|
||||
//! names.
|
||||
//!
|
||||
//! Configuration key `rules` expects a JSON array of objects:
|
||||
//! `[{"pattern": "/music/", "tag": "music"}, ...]`
|
||||
//!
|
||||
//! If no config is present, built-in defaults are used:
|
||||
//! - `/music/` -> `music`
|
||||
//! - `/photos/` -> `photo`
|
||||
//! - `/videos/` -> `video`
|
||||
//! - `/books/` -> `book`
|
||||
//! - `/documents/` -> `document`
|
||||
//!
|
||||
//! Build with:
|
||||
//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
|
||||
|
||||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::{format, string::String, vec, vec::Vec};
|
||||
use core::alloc::Layout;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
|
||||
|
||||
#[panic_handler]
|
||||
fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
|
||||
core::arch::wasm32::unreachable()
|
||||
}
|
||||
|
||||
// Host functions provided by the runtime
|
||||
unsafe extern "C" {
|
||||
fn host_set_result(ptr: i32, len: i32);
|
||||
fn host_log(level: i32, ptr: i32, len: i32);
|
||||
fn host_emit_event(type_ptr: i32, type_len: i32, payload_ptr: i32, payload_len: i32) -> i32;
|
||||
fn host_get_config(key_ptr: i32, key_len: i32) -> i32;
|
||||
fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
|
||||
}
|
||||
|
||||
fn set_response(json: &[u8]) {
|
||||
unsafe {
|
||||
host_set_result(json.as_ptr() as i32, json.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
fn log_info(msg: &str) {
|
||||
unsafe {
|
||||
host_log(2, msg.as_ptr() as i32, msg.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
|
||||
if ptr < 0 || len <= 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
|
||||
slice.to_vec()
|
||||
}
|
||||
|
||||
/// Extract a string value from a JSON object for a given key.
|
||||
fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
|
||||
let json_str = core::str::from_utf8(json).ok()?;
|
||||
let pattern = format!("\"{}\"", key);
|
||||
let key_pos = json_str.find(&pattern)?;
|
||||
let after_key = &json_str[key_pos + pattern.len()..];
|
||||
let after_colon = after_key.trim_start().strip_prefix(':')?;
|
||||
let after_colon = after_colon.trim_start();
|
||||
|
||||
if after_colon.starts_with('"') {
|
||||
let value_start = 1;
|
||||
let value_end = after_colon[value_start..].find('"')?;
|
||||
Some(&after_colon[value_start..value_start + value_end])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// A single tagging rule: match `pattern` in path -> apply `tag`.
|
||||
struct Rule {
|
||||
pattern: String,
|
||||
tag: String,
|
||||
}
|
||||
|
||||
/// Default rules used when no `rules` config key is present.
|
||||
fn default_rules() -> Vec<Rule> {
|
||||
vec![
|
||||
Rule { pattern: String::from("/music/"), tag: String::from("music") },
|
||||
Rule { pattern: String::from("/photos/"), tag: String::from("photo") },
|
||||
Rule { pattern: String::from("/videos/"), tag: String::from("video") },
|
||||
Rule { pattern: String::from("/books/"), tag: String::from("book") },
|
||||
Rule { pattern: String::from("/documents/"), tag: String::from("document") },
|
||||
]
|
||||
}
|
||||
|
||||
/// Parse the `rules` JSON array from the config buffer.
|
||||
/// Expected format: `[{"pattern":"...","tag":"..."},...]`
|
||||
/// Returns an empty vec on any parse failure (falls back to defaults).
|
||||
fn parse_rules_json(data: &[u8]) -> Vec<Rule> {
|
||||
let text = match core::str::from_utf8(data) {
|
||||
Ok(s) => s,
|
||||
Err(_) => return Vec::new(),
|
||||
};
|
||||
|
||||
let mut rules = Vec::new();
|
||||
// Walk through occurrences of "pattern" keys inside object literals.
|
||||
let mut search = text;
|
||||
while let Some(p_pos) = search.find("\"pattern\"") {
|
||||
let after_p = &search[p_pos + 9..];
|
||||
let after_colon = match after_p.trim_start().strip_prefix(':') {
|
||||
Some(s) => s.trim_start(),
|
||||
None => {
|
||||
search = &search[p_pos + 1..];
|
||||
continue;
|
||||
}
|
||||
};
|
||||
let pattern = if after_colon.starts_with('"') {
|
||||
let inner = &after_colon[1..];
|
||||
match inner.find('"') {
|
||||
Some(end) => String::from(&inner[..end]),
|
||||
None => {
|
||||
search = &search[p_pos + 1..];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
search = &search[p_pos + 1..];
|
||||
continue;
|
||||
};
|
||||
|
||||
// Now search for "tag" after the current pattern position.
|
||||
let remaining = &search[p_pos..];
|
||||
let tag = if let Some(t_pos) = remaining.find("\"tag\"") {
|
||||
let after_t = &remaining[t_pos + 5..];
|
||||
let after_colon_t = match after_t.trim_start().strip_prefix(':') {
|
||||
Some(s) => s.trim_start(),
|
||||
None => {
|
||||
search = &search[p_pos + 1..];
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if after_colon_t.starts_with('"') {
|
||||
let inner = &after_colon_t[1..];
|
||||
match inner.find('"') {
|
||||
Some(end) => String::from(&inner[..end]),
|
||||
None => {
|
||||
search = &search[p_pos + 1..];
|
||||
continue;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
search = &search[p_pos + 1..];
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
search = &search[p_pos + 1..];
|
||||
continue;
|
||||
};
|
||||
|
||||
rules.push(Rule { pattern, tag });
|
||||
search = &search[p_pos + 1..];
|
||||
}
|
||||
|
||||
rules
|
||||
}
|
||||
|
||||
/// Load rules from config, falling back to defaults.
|
||||
fn load_rules() -> Vec<Rule> {
|
||||
let key = b"rules";
|
||||
let size = unsafe { host_get_config(key.as_ptr() as i32, key.len() as i32) };
|
||||
if size <= 0 {
|
||||
return default_rules();
|
||||
}
|
||||
|
||||
let buf_size = size as usize;
|
||||
let layout = match Layout::from_size_align(buf_size, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return default_rules(),
|
||||
};
|
||||
let ptr = unsafe { alloc::alloc::alloc(layout) };
|
||||
if ptr.is_null() {
|
||||
return default_rules();
|
||||
}
|
||||
|
||||
let copied = unsafe { host_get_buffer(ptr as i32, size) };
|
||||
if copied <= 0 {
|
||||
unsafe { alloc::alloc::dealloc(ptr, layout) };
|
||||
return default_rules();
|
||||
}
|
||||
|
||||
let data = unsafe { core::slice::from_raw_parts(ptr, copied as usize) };
|
||||
let rules = parse_rules_json(data);
|
||||
unsafe { alloc::alloc::dealloc(ptr, layout) };
|
||||
|
||||
if rules.is_empty() {
|
||||
default_rules()
|
||||
} else {
|
||||
rules
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape a string for safe inclusion in a JSON string value.
|
||||
fn json_escape(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'"' => out.push_str("\\\""),
|
||||
'\\' => out.push_str("\\\\"),
|
||||
'\n' => out.push_str("\\n"),
|
||||
'\r' => out.push_str("\\r"),
|
||||
'\t' => out.push_str("\\t"),
|
||||
_ => out.push(c),
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn alloc(size: i32) -> i32 {
|
||||
if size <= 0 {
|
||||
return 0;
|
||||
}
|
||||
unsafe {
|
||||
let layout = match Layout::from_size_align(size as usize, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return -1,
|
||||
};
|
||||
let ptr = alloc::alloc::alloc(layout);
|
||||
if ptr.is_null() {
|
||||
return -1;
|
||||
}
|
||||
ptr as i32
|
||||
}
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn initialize() -> i32 {
|
||||
log_info("auto-tagger initialized");
|
||||
0
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn shutdown() -> i32 {
|
||||
log_info("auto-tagger shutdown");
|
||||
0
|
||||
}
|
||||
|
||||
/// Returns the event types this handler is interested in.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn interested_events(_ptr: i32, _len: i32) {
|
||||
set_response(br#"["MediaImported"]"#);
|
||||
}
|
||||
|
||||
/// Handle a `MediaImported` event: check path against rules and emit tag events.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn handle_event(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
|
||||
let media_id = json_get_str(&req, "media_id").unwrap_or("");
|
||||
// The payload is nested; attempt to extract `path` from the top-level
|
||||
// request or from a nested `payload` object.
|
||||
let path = json_get_str(&req, "path").unwrap_or("");
|
||||
|
||||
let rules = load_rules();
|
||||
let mut matched_count = 0u32;
|
||||
|
||||
for rule in &rules {
|
||||
if !path.is_empty() && path.contains(rule.pattern.as_str()) {
|
||||
let event_type = b"AutoTagSuggested";
|
||||
let payload = format!(
|
||||
r#"{{"media_id":"{}","tag":"{}"}}"#,
|
||||
json_escape(media_id),
|
||||
json_escape(&rule.tag),
|
||||
);
|
||||
unsafe {
|
||||
host_emit_event(
|
||||
event_type.as_ptr() as i32,
|
||||
event_type.len() as i32,
|
||||
payload.as_ptr() as i32,
|
||||
payload.len() as i32,
|
||||
);
|
||||
}
|
||||
matched_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if matched_count > 0 {
|
||||
let msg = format!(
|
||||
"auto-tagger: matched {} rule(s) for path: {}",
|
||||
matched_count,
|
||||
path,
|
||||
);
|
||||
log_info(&msg);
|
||||
} else {
|
||||
let msg = format!("auto-tagger: no rules matched for path: {}", path);
|
||||
log_info(&msg);
|
||||
}
|
||||
|
||||
set_response(b"{}");
|
||||
}
|
||||
BIN
examples/plugins/cbz-comics/Cargo.lock
generated
Normal file
BIN
examples/plugins/cbz-comics/Cargo.lock
generated
Normal file
Binary file not shown.
18
examples/plugins/cbz-comics/Cargo.toml
Normal file
18
examples/plugins/cbz-comics/Cargo.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[package]
|
||||
name = "cbz-comics"
|
||||
version = "1.0.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
dlmalloc = { version = "0.2", features = ["global"] }
|
||||
miniz_oxide = { version = "0.8", default-features = false, features = [
|
||||
"with-alloc",
|
||||
] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "s"
|
||||
lto = true
|
||||
strip = true
|
||||
20
examples/plugins/cbz-comics/plugin.toml
Normal file
20
examples/plugins/cbz-comics/plugin.toml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
[plugin]
|
||||
name = "cbz-comics"
|
||||
version = "1.0.0"
|
||||
api_version = "1.0"
|
||||
description = "Supports CBZ (Comic Book ZIP) and CBR files with metadata extraction and thumbnail generation"
|
||||
kind = ["media_type", "metadata_extractor", "thumbnail_generator"]
|
||||
priority = 500
|
||||
|
||||
[plugin.binary]
|
||||
wasm = "cbz_comics.wasm"
|
||||
|
||||
[capabilities]
|
||||
network = false
|
||||
|
||||
[capabilities.filesystem]
|
||||
# Users must add their media root directories here. Example:
|
||||
# read = ["/home/user/comics"]
|
||||
# write = ["/home/user/.cache/pinakes/thumbnails"]
|
||||
read = []
|
||||
write = []
|
||||
742
examples/plugins/cbz-comics/src/lib.rs
Normal file
742
examples/plugins/cbz-comics/src/lib.rs
Normal file
|
|
@ -0,0 +1,742 @@
|
|||
//! CBZ/CBR comics plugin for Pinakes.
|
||||
//!
|
||||
//! Registers comic book ZIP (`cbz`) and RAR (`cbr`) media types, extracts
|
||||
//! metadata from CBZ archives (including `ComicInfo.xml` when present), and
|
||||
//! generates thumbnails from the cover image.
|
||||
//!
|
||||
//! CBR is registered as a media type but metadata extraction is limited to
|
||||
//! format detection only (RAR parsing is not implemented).
|
||||
//!
|
||||
//! ZIP parsing is implemented from scratch without external ZIP crates to keep
|
||||
//! the WASM binary small.
|
||||
//!
|
||||
//! The `filesystem.read` and `filesystem.write` capabilities in `plugin.toml`
|
||||
//! must be configured for the directories containing comic files and the
|
||||
//! thumbnail output directory respectively.
|
||||
//!
|
||||
//! Build with:
|
||||
//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
|
||||
|
||||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::{format, string::{String, ToString}, vec, vec::Vec};
|
||||
use core::alloc::Layout;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
|
||||
|
||||
#[panic_handler]
|
||||
fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
|
||||
core::arch::wasm32::unreachable()
|
||||
}
|
||||
|
||||
// Host functions provided by the runtime
|
||||
unsafe extern "C" {
|
||||
fn host_set_result(ptr: i32, len: i32);
|
||||
fn host_log(level: i32, ptr: i32, len: i32);
|
||||
fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
|
||||
fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
|
||||
fn host_write_file(path_ptr: i32, path_len: i32, data_ptr: i32, data_len: i32) -> i32;
|
||||
}
|
||||
|
||||
fn set_response(json: &[u8]) {
|
||||
unsafe {
|
||||
host_set_result(json.as_ptr() as i32, json.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
fn log_info(msg: &str) {
|
||||
unsafe {
|
||||
host_log(2, msg.as_ptr() as i32, msg.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
|
||||
if ptr < 0 || len <= 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
|
||||
slice.to_vec()
|
||||
}
|
||||
|
||||
/// Extract a string value from a JSON object for a given key.
|
||||
fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
|
||||
let json_str = core::str::from_utf8(json).ok()?;
|
||||
let pattern = format!("\"{}\"", key);
|
||||
let key_pos = json_str.find(&pattern)?;
|
||||
let after_key = &json_str[key_pos + pattern.len()..];
|
||||
let after_colon = after_key.trim_start().strip_prefix(':')?;
|
||||
let after_colon = after_colon.trim_start();
|
||||
|
||||
if after_colon.starts_with('"') {
|
||||
let value_start = 1;
|
||||
let value_end = after_colon[value_start..].find('"')?;
|
||||
Some(&after_colon[value_start..value_start + value_end])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape a string for safe inclusion in a JSON string value.
|
||||
fn json_escape(s: &str) -> String {
|
||||
let mut out = String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'"' => out.push_str("\\\""),
|
||||
'\\' => out.push_str("\\\\"),
|
||||
'\n' => out.push_str("\\n"),
|
||||
'\r' => out.push_str("\\r"),
|
||||
'\t' => out.push_str("\\t"),
|
||||
_ => out.push(c),
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// 20 MB content read limit for comic archives
|
||||
const MAX_FILE_BYTES: usize = 20 * 1024 * 1024;
|
||||
|
||||
// ZIP signatures (little-endian u32)
|
||||
const SIG_LOCAL_FILE: u32 = 0x04034b50;
|
||||
const SIG_CENTRAL_DIR: u32 = 0x02014b50;
|
||||
const SIG_EOCD: u32 = 0x06054b50;
|
||||
|
||||
// Compression methods
|
||||
const COMPRESS_STORE: u16 = 0;
|
||||
const COMPRESS_DEFLATE: u16 = 8;
|
||||
|
||||
/// Read a little-endian u16 from a byte slice at the given offset.
|
||||
/// Returns `None` if out of bounds.
|
||||
fn read_u16_le(data: &[u8], offset: usize) -> Option<u16> {
|
||||
let b0 = *data.get(offset)? as u16;
|
||||
let b1 = *data.get(offset + 1)? as u16;
|
||||
Some(b0 | (b1 << 8))
|
||||
}
|
||||
|
||||
/// Read a little-endian u32 from a byte slice at the given offset.
|
||||
/// Returns `None` if out of bounds.
|
||||
fn read_u32_le(data: &[u8], offset: usize) -> Option<u32> {
|
||||
let b0 = *data.get(offset)? as u32;
|
||||
let b1 = *data.get(offset + 1)? as u32;
|
||||
let b2 = *data.get(offset + 2)? as u32;
|
||||
let b3 = *data.get(offset + 3)? as u32;
|
||||
Some(b0 | (b1 << 8) | (b2 << 16) | (b3 << 24))
|
||||
}
|
||||
|
||||
/// Read a big-endian u16 from a byte slice at the given offset.
|
||||
fn read_u16_be(data: &[u8], offset: usize) -> Option<u16> {
|
||||
let b0 = *data.get(offset)? as u16;
|
||||
let b1 = *data.get(offset + 1)? as u16;
|
||||
Some((b0 << 8) | b1)
|
||||
}
|
||||
|
||||
/// Read a big-endian u32 from a byte slice at the given offset.
|
||||
fn read_u32_be(data: &[u8], offset: usize) -> Option<u32> {
|
||||
let b0 = *data.get(offset)? as u32;
|
||||
let b1 = *data.get(offset + 1)? as u32;
|
||||
let b2 = *data.get(offset + 2)? as u32;
|
||||
let b3 = *data.get(offset + 3)? as u32;
|
||||
Some((b0 << 24) | (b1 << 16) | (b2 << 8) | b3)
|
||||
}
|
||||
|
||||
/// A parsed central directory entry from a ZIP archive.
|
||||
struct ZipEntry {
|
||||
name: String,
|
||||
compression: u16,
|
||||
compressed_size: u32,
|
||||
local_offset: u32,
|
||||
}
|
||||
|
||||
/// Find the End of Central Directory record offset by scanning backwards.
|
||||
fn find_eocd(data: &[u8]) -> Option<usize> {
|
||||
if data.len() < 22 {
|
||||
return None;
|
||||
}
|
||||
// Scan backwards for the EOCD signature. The maximum comment size is
|
||||
// 65535 bytes, so we only need to scan that far from the end.
|
||||
let scan_start = if data.len() > 22 + 65535 {
|
||||
data.len() - 22 - 65535
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let mut i = data.len() - 22;
|
||||
loop {
|
||||
if read_u32_le(data, i) == Some(SIG_EOCD) {
|
||||
return Some(i);
|
||||
}
|
||||
if i == scan_start {
|
||||
break;
|
||||
}
|
||||
i -= 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Parse all central directory entries from a ZIP archive.
|
||||
fn parse_central_directory(data: &[u8]) -> Vec<ZipEntry> {
|
||||
let mut entries = Vec::new();
|
||||
|
||||
let eocd_offset = match find_eocd(data) {
|
||||
Some(o) => o,
|
||||
None => return entries,
|
||||
};
|
||||
|
||||
// EOCD layout (offsets relative to EOCD start):
|
||||
// 0: signature (4)
|
||||
// 4: disk number (2)
|
||||
// 6: start disk (2)
|
||||
// 8: entries on disk (2)
|
||||
// 10: total entries (2)
|
||||
// 12: central dir size (4)
|
||||
// 16: central dir offset (4)
|
||||
// 20: comment length (2)
|
||||
let cd_offset = match read_u32_le(data, eocd_offset + 16) {
|
||||
Some(o) => o as usize,
|
||||
None => return entries,
|
||||
};
|
||||
let total_entries = match read_u16_le(data, eocd_offset + 10) {
|
||||
Some(n) => n as usize,
|
||||
None => return entries,
|
||||
};
|
||||
|
||||
let mut pos = cd_offset;
|
||||
for _ in 0..total_entries {
|
||||
if pos + 46 > data.len() {
|
||||
break;
|
||||
}
|
||||
if read_u32_le(data, pos) != Some(SIG_CENTRAL_DIR) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Central directory entry layout:
|
||||
// 0: signature (4)
|
||||
// 4: version made by (2)
|
||||
// 6: version needed (2)
|
||||
// 8: flags (2)
|
||||
// 10: compression (2)
|
||||
// 12: mod time (2)
|
||||
// 14: mod date (2)
|
||||
// 16: crc32 (4)
|
||||
// 20: compressed size (4)
|
||||
// 24: uncompressed size (4)
|
||||
// 28: filename length (2)
|
||||
// 30: extra field length (2)
|
||||
// 32: file comment length (2)
|
||||
// 34: disk start (2)
|
||||
// 36: internal attrs (2)
|
||||
// 38: external attrs (4)
|
||||
// 42: local header offset (4)
|
||||
// 46: filename...
|
||||
let compression = match read_u16_le(data, pos + 10) { Some(v) => v, None => break };
|
||||
let compressed_size = match read_u32_le(data, pos + 20) { Some(v) => v, None => break };
|
||||
// uncompressed_size at pos+24 is intentionally not stored; size comes from decompressor output.
|
||||
let fname_len = match read_u16_le(data, pos + 28) { Some(v) => v as usize, None => break };
|
||||
let extra_len = match read_u16_le(data, pos + 30) { Some(v) => v as usize, None => break };
|
||||
let comment_len = match read_u16_le(data, pos + 32) { Some(v) => v as usize, None => break };
|
||||
let local_offset = match read_u32_le(data, pos + 42) { Some(v) => v, None => break };
|
||||
|
||||
let fname_start = pos + 46;
|
||||
let fname_end = fname_start + fname_len;
|
||||
if fname_end > data.len() {
|
||||
break;
|
||||
}
|
||||
|
||||
let name = core::str::from_utf8(&data[fname_start..fname_end])
|
||||
.unwrap_or("")
|
||||
.to_ascii_lowercase();
|
||||
|
||||
entries.push(ZipEntry {
|
||||
name,
|
||||
compression,
|
||||
compressed_size,
|
||||
local_offset,
|
||||
});
|
||||
|
||||
pos = fname_end + extra_len + comment_len;
|
||||
}
|
||||
|
||||
entries
|
||||
}
|
||||
|
||||
/// Read raw bytes for a local file entry (the actual compressed/stored data).
|
||||
/// Returns a slice into `data` containing the compressed bytes.
|
||||
fn local_file_data<'a>(data: &'a [u8], entry: &ZipEntry) -> Option<&'a [u8]> {
|
||||
let off = entry.local_offset as usize;
|
||||
if off + 30 > data.len() {
|
||||
return None;
|
||||
}
|
||||
if read_u32_le(data, off) != Some(SIG_LOCAL_FILE) {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Local file header layout:
|
||||
// 0: signature (4)
|
||||
// 4: version needed (2)
|
||||
// 6: flags (2)
|
||||
// 8: compression (2)
|
||||
// 10: mod time (2)
|
||||
// 12: mod date (2)
|
||||
// 14: crc32 (4)
|
||||
// 18: compressed size (4)
|
||||
// 22: uncompressed size (4)
|
||||
// 26: filename length (2)
|
||||
// 28: extra length (2)
|
||||
// 30: filename...
|
||||
let fname_len = read_u16_le(data, off + 26)? as usize;
|
||||
let extra_len = read_u16_le(data, off + 28)? as usize;
|
||||
let data_start = off + 30 + fname_len + extra_len;
|
||||
let data_end = data_start + entry.compressed_size as usize;
|
||||
if data_end > data.len() {
|
||||
return None;
|
||||
}
|
||||
Some(&data[data_start..data_end])
|
||||
}
|
||||
|
||||
/// Decompress a stored (STORE) or deflated (DEFLATE) entry.
|
||||
/// Returns the uncompressed bytes.
|
||||
fn decompress_entry(data: &[u8], entry: &ZipEntry) -> Option<Vec<u8>> {
|
||||
let raw = local_file_data(data, entry)?;
|
||||
match entry.compression {
|
||||
COMPRESS_STORE => Some(raw.to_vec()),
|
||||
COMPRESS_DEFLATE => {
|
||||
miniz_oxide::inflate::decompress_to_vec(raw).ok()
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if a filename has an image extension.
|
||||
fn is_image_filename(name: &str) -> bool {
|
||||
name.ends_with(".jpg")
|
||||
|| name.ends_with(".jpeg")
|
||||
|| name.ends_with(".png")
|
||||
|| name.ends_with(".webp")
|
||||
}
|
||||
|
||||
/// Extract a simple XML element value using substring search.
|
||||
/// Looks for `<tag>value</tag>` and returns the inner text.
|
||||
fn xml_get_text<'a>(xml: &'a str, tag: &str) -> Option<&'a str> {
|
||||
let open = format!("<{}>", tag);
|
||||
let close = format!("</{}>", tag);
|
||||
let start = xml.find(&open)?;
|
||||
let after_open = &xml[start + open.len()..];
|
||||
let end = after_open.find(&close)?;
|
||||
Some(&after_open[..end])
|
||||
}
|
||||
|
||||
/// Metadata extracted from a ComicInfo.xml file.
|
||||
struct ComicInfo {
|
||||
title: Option<String>,
|
||||
series: Option<String>,
|
||||
issue_number: Option<String>,
|
||||
writer: Option<String>,
|
||||
page_count: Option<String>,
|
||||
language: Option<String>,
|
||||
genre: Option<String>,
|
||||
summary: Option<String>,
|
||||
}
|
||||
|
||||
/// Parse key fields from a ComicInfo.xml byte slice.
|
||||
fn parse_comic_info(data: &[u8]) -> ComicInfo {
|
||||
let text = core::str::from_utf8(data).unwrap_or("");
|
||||
ComicInfo {
|
||||
title: xml_get_text(text, "Title") .map(|s| s.trim().to_ascii_lowercase()).filter(|s| !s.is_empty()).map(|s| {
|
||||
// Re-capitalize first letter for title
|
||||
let mut c = s.chars();
|
||||
match c.next() {
|
||||
None => String::new(),
|
||||
Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
|
||||
}
|
||||
}),
|
||||
series: xml_get_text(text, "Series") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
|
||||
issue_number: xml_get_text(text, "Number") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
|
||||
writer: xml_get_text(text, "Writer") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
|
||||
page_count: xml_get_text(text, "PageCount") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
|
||||
language: xml_get_text(text, "LanguageISO").map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
|
||||
genre: xml_get_text(text, "Genre") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
|
||||
summary: xml_get_text(text, "Summary") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Image dimension and format information.
|
||||
struct ImageInfo {
|
||||
width: u32,
|
||||
height: u32,
|
||||
format: &'static str,
|
||||
}
|
||||
|
||||
/// Parse image dimensions and detect format from raw image bytes.
|
||||
fn parse_image_info(data: &[u8]) -> Option<ImageInfo> {
|
||||
if data.len() < 4 {
|
||||
return None;
|
||||
}
|
||||
// JPEG: starts with 0xFF 0xD8
|
||||
if data[0] == 0xFF && data[1] == 0xD8 {
|
||||
// Scan for SOF0 (0xFF 0xC0) or SOF2 (0xFF 0xC2) marker
|
||||
let mut i = 2usize;
|
||||
while i + 8 < data.len() {
|
||||
if data[i] == 0xFF {
|
||||
let marker = data[i + 1];
|
||||
if marker == 0xC0 || marker == 0xC2 {
|
||||
// SOF marker layout:
|
||||
// 0: 0xFF
|
||||
// 1: marker
|
||||
// 2-3: segment length (big-endian)
|
||||
// 4: precision
|
||||
// 5-6: height (big-endian u16)
|
||||
// 7-8: width (big-endian u16)
|
||||
let height = read_u16_be(data, i + 5)? as u32;
|
||||
let width = read_u16_be(data, i + 7)? as u32;
|
||||
return Some(ImageInfo { width, height, format: "jpeg" });
|
||||
} else if marker == 0xFF {
|
||||
// Padding byte
|
||||
i += 1;
|
||||
continue;
|
||||
} else if marker == 0xD8 || marker == 0xD9 {
|
||||
// SOI / EOI - no length field
|
||||
i += 2;
|
||||
continue;
|
||||
} else {
|
||||
// Skip segment: length at i+2 (includes the 2 length bytes)
|
||||
if let Some(seg_len) = read_u16_be(data, i + 2) {
|
||||
i += 2 + seg_len as usize;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
// Return a JPEG without dimensions if SOF not found
|
||||
return Some(ImageInfo { width: 0, height: 0, format: "jpeg" });
|
||||
}
|
||||
|
||||
// PNG: starts with 0x89 0x50 0x4E 0x47 ('PNG')
|
||||
if data.len() >= 24 && data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4E && data[3] == 0x47 {
|
||||
// IHDR chunk: width at bytes 16-19, height at bytes 20-23 (big-endian u32)
|
||||
let width = read_u32_be(data, 16)?;
|
||||
let height = read_u32_be(data, 20)?;
|
||||
return Some(ImageInfo { width, height, format: "png" });
|
||||
}
|
||||
|
||||
// WebP: RIFF....WEBP
|
||||
if data.len() >= 12
|
||||
&& &data[0..4] == b"RIFF"
|
||||
&& &data[8..12] == b"WEBP"
|
||||
{
|
||||
return Some(ImageInfo { width: 0, height: 0, format: "webp" });
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Load a CBZ archive into memory. Returns the raw bytes or an error string.
|
||||
fn load_cbz_file(path: &str) -> Result<Vec<u8>, &'static str> {
|
||||
let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
|
||||
if file_size < 0 {
|
||||
return Err("read failed");
|
||||
}
|
||||
if file_size as usize >= MAX_FILE_BYTES {
|
||||
return Err("too large");
|
||||
}
|
||||
let buf_size = file_size as usize;
|
||||
if buf_size == 0 {
|
||||
return Ok(Vec::new());
|
||||
}
|
||||
|
||||
let layout = Layout::from_size_align(buf_size, 1).map_err(|_| "alloc failed")?;
|
||||
let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
|
||||
if buf_ptr.is_null() {
|
||||
return Err("alloc failed");
|
||||
}
|
||||
|
||||
let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
|
||||
if copied <= 0 {
|
||||
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
|
||||
return Err("buffer copy failed");
|
||||
}
|
||||
|
||||
let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) }.to_vec();
|
||||
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
|
||||
Ok(data)
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn alloc(size: i32) -> i32 {
|
||||
if size <= 0 {
|
||||
return 0;
|
||||
}
|
||||
unsafe {
|
||||
let layout = match Layout::from_size_align(size as usize, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return -1,
|
||||
};
|
||||
let ptr = alloc::alloc::alloc(layout);
|
||||
if ptr.is_null() {
|
||||
return -1;
|
||||
}
|
||||
ptr as i32
|
||||
}
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn initialize() -> i32 {
|
||||
log_info("cbz-comics initialized");
|
||||
0
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn shutdown() -> i32 {
|
||||
log_info("cbz-comics shutdown");
|
||||
0
|
||||
}
|
||||
|
||||
/// Returns the comic media type definitions.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) {
|
||||
let response = br#"[
|
||||
{"id":"comic-cbz","name":"Comic Book ZIP","category":"document","extensions":["cbz"],"mime_types":["application/vnd.comicbook+zip"]},
|
||||
{"id":"comic-cbr","name":"Comic Book RAR","category":"document","extensions":["cbr"],"mime_types":["application/vnd.comicbook-rar"]}
|
||||
]"#;
|
||||
set_response(response);
|
||||
}
|
||||
|
||||
/// Check whether this plugin can handle a given path.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn can_handle(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let path = json_get_str(&req, "path").unwrap_or("").to_ascii_lowercase();
|
||||
let can = path.ends_with(".cbz") || path.ends_with(".cbr");
|
||||
if can {
|
||||
set_response(br#"{"can_handle":true}"#);
|
||||
} else {
|
||||
set_response(br#"{"can_handle":false}"#);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the media type IDs this extractor supports.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
|
||||
set_response(br#"["comic-cbz","comic-cbr"]"#);
|
||||
}
|
||||
|
||||
/// Extract metadata from a CBZ or CBR file.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let path = match json_get_str(&req, "path") {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
set_response(br#"{"extra":{"error":"missing path"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let lower = path.to_ascii_lowercase();
|
||||
|
||||
// CBR: register the type but do not attempt to parse RAR.
|
||||
if lower.ends_with(".cbr") {
|
||||
set_response(br#"{"extra":{"format":"cbr","note":"cbr-unsupported"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
// Load CBZ archive
|
||||
let data = match load_cbz_file(path) {
|
||||
Ok(d) => d,
|
||||
Err("too large") => {
|
||||
set_response(br#"{"extra":{"format":"cbz","too_large":"true"}}"#);
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
let resp = format!(r#"{{"extra":{{"format":"cbz","error":"{}"}}}}"#, e);
|
||||
set_response(resp.as_bytes());
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let entries = parse_central_directory(&data);
|
||||
|
||||
// Count image files as page count.
|
||||
let image_count = entries.iter().filter(|e| is_image_filename(&e.name)).count();
|
||||
|
||||
// Look for ComicInfo.xml (case-insensitive).
|
||||
let comic_info_entry = entries.iter().find(|e| {
|
||||
let n = e.name.as_str();
|
||||
n == "comicinfo.xml" || n.ends_with("/comicinfo.xml")
|
||||
});
|
||||
|
||||
let info = if let Some(entry) = comic_info_entry {
|
||||
// Only decompress STORE entries here for simplicity; skip DEFLATE ones.
|
||||
if entry.compression == COMPRESS_STORE || entry.compression == COMPRESS_DEFLATE {
|
||||
if let Some(xml_bytes) = decompress_entry(&data, entry) {
|
||||
Some(parse_comic_info(&xml_bytes))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let msg = format!(
|
||||
"cbz-comics: {} entries, {} images, ComicInfo.xml={}",
|
||||
entries.len(),
|
||||
image_count,
|
||||
info.is_some(),
|
||||
);
|
||||
log_info(&msg);
|
||||
|
||||
// Build response JSON
|
||||
let mut extra_pairs: Vec<(&str, String)> = vec![
|
||||
("format", String::from("cbz")),
|
||||
];
|
||||
|
||||
let page_count_str;
|
||||
if let Some(ref ci) = info {
|
||||
if let Some(ref pc) = ci.page_count {
|
||||
page_count_str = pc.clone();
|
||||
extra_pairs.push(("page_count", page_count_str.clone()));
|
||||
} else {
|
||||
page_count_str = format!("{}", image_count);
|
||||
extra_pairs.push(("page_count", page_count_str.clone()));
|
||||
}
|
||||
if let Some(ref s) = ci.series { extra_pairs.push(("series", s.clone())) }
|
||||
if let Some(ref n) = ci.issue_number { extra_pairs.push(("issue_number", n.clone())) }
|
||||
if let Some(ref l) = ci.language { extra_pairs.push(("language", l.clone())) }
|
||||
} else {
|
||||
page_count_str = format!("{}", image_count);
|
||||
extra_pairs.push(("page_count", page_count_str.clone()));
|
||||
}
|
||||
|
||||
// Build extra JSON object
|
||||
let mut extra_json = String::from("{");
|
||||
for (i, (k, v)) in extra_pairs.iter().enumerate() {
|
||||
if i > 0 { extra_json.push(','); }
|
||||
extra_json.push('"');
|
||||
extra_json.push_str(k);
|
||||
extra_json.push_str("\":\"");
|
||||
extra_json.push_str(&json_escape(v));
|
||||
extra_json.push('"');
|
||||
}
|
||||
extra_json.push('}');
|
||||
|
||||
let title_field = info.as_ref()
|
||||
.and_then(|ci| ci.title.as_ref())
|
||||
.map(|t| format!(r#","title":"{}""#, json_escape(t)))
|
||||
.unwrap_or_default();
|
||||
|
||||
let artist_field = info.as_ref()
|
||||
.and_then(|ci| ci.writer.as_ref())
|
||||
.map(|w| format!(r#","artist":"{}""#, json_escape(w)))
|
||||
.unwrap_or_default();
|
||||
|
||||
let genre_field = info.as_ref()
|
||||
.and_then(|ci| ci.genre.as_ref())
|
||||
.map(|g| format!(r#","genre":"{}""#, json_escape(g)))
|
||||
.unwrap_or_default();
|
||||
|
||||
let desc_field = info.as_ref()
|
||||
.and_then(|ci| ci.summary.as_ref())
|
||||
.map(|s| format!(r#","description":"{}""#, json_escape(s)))
|
||||
.unwrap_or_default();
|
||||
|
||||
let resp = format!(
|
||||
r#"{{"extra":{}{}{}{}{}}}"#,
|
||||
extra_json, title_field, artist_field, genre_field, desc_field,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
}
|
||||
|
||||
/// Generate a thumbnail from the cover image of a CBZ archive.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn generate_thumbnail(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let source_path = match json_get_str(&req, "source_path") {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let output_path = match json_get_str(&req, "output_path") {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let lower = source_path.to_ascii_lowercase();
|
||||
if !lower.ends_with(".cbz") {
|
||||
set_response(br#"{"path":"","width":0,"height":0,"format":"unknown"}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let data = match load_cbz_file(source_path) {
|
||||
Ok(d) => d,
|
||||
Err(_) => {
|
||||
set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let entries = parse_central_directory(&data);
|
||||
|
||||
// Find alphabetically first image file for the cover.
|
||||
let mut image_entries: Vec<&ZipEntry> = entries.iter().filter(|e| is_image_filename(&e.name)).collect();
|
||||
image_entries.sort_by(|a, b| a.name.as_str().cmp(b.name.as_str()));
|
||||
|
||||
let cover = match image_entries.first() {
|
||||
Some(e) => e,
|
||||
None => {
|
||||
set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let image_bytes = match decompress_entry(&data, cover) {
|
||||
Some(b) => b,
|
||||
None => {
|
||||
set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let info = parse_image_info(&image_bytes).unwrap_or(ImageInfo {
|
||||
width: 0, height: 0, format: "jpeg",
|
||||
});
|
||||
|
||||
// Write thumbnail bytes to output path
|
||||
let write_result = unsafe {
|
||||
host_write_file(
|
||||
output_path.as_ptr() as i32,
|
||||
output_path.len() as i32,
|
||||
image_bytes.as_ptr() as i32,
|
||||
image_bytes.len() as i32,
|
||||
)
|
||||
};
|
||||
if write_result < 0 {
|
||||
set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let msg = format!(
|
||||
"cbz-comics: thumbnail {}x{} {} written to {}",
|
||||
info.width, info.height, info.format, output_path,
|
||||
);
|
||||
log_info(&msg);
|
||||
|
||||
let resp = format!(
|
||||
r#"{{"path":"{}","width":{},"height":{},"format":"{}"}}"#,
|
||||
json_escape(output_path),
|
||||
info.width,
|
||||
info.height,
|
||||
info.format,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
}
|
||||
BIN
examples/plugins/subtitle-detector/Cargo.lock
generated
Normal file
BIN
examples/plugins/subtitle-detector/Cargo.lock
generated
Normal file
Binary file not shown.
15
examples/plugins/subtitle-detector/Cargo.toml
Normal file
15
examples/plugins/subtitle-detector/Cargo.toml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "subtitle-detector"
|
||||
version = "1.0.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
dlmalloc = { version = "0.2", features = ["global"] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "s"
|
||||
lto = true
|
||||
strip = true
|
||||
18
examples/plugins/subtitle-detector/plugin.toml
Normal file
18
examples/plugins/subtitle-detector/plugin.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[plugin]
|
||||
name = "subtitle-detector"
|
||||
version = "1.0.0"
|
||||
api_version = "1.0"
|
||||
description = "Registers SRT, VTT, and ASS subtitle formats and extracts language and entry count metadata"
|
||||
kind = ["media_type", "metadata_extractor"]
|
||||
priority = 500
|
||||
|
||||
[plugin.binary]
|
||||
wasm = "subtitle_detector.wasm"
|
||||
|
||||
[capabilities]
|
||||
network = false
|
||||
|
||||
[capabilities.filesystem]
|
||||
# Users must add their media root directories here. Example:
|
||||
# read = ["/home/user/media", "/mnt/nas/subtitles"]
|
||||
read = []
|
||||
345
examples/plugins/subtitle-detector/src/lib.rs
Normal file
345
examples/plugins/subtitle-detector/src/lib.rs
Normal file
|
|
@ -0,0 +1,345 @@
|
|||
//! Subtitle-detector plugin for Pinakes.
|
||||
//!
|
||||
//! Registers SRT, VTT, and ASS/SSA subtitle file formats and extracts
|
||||
//! language code and entry count metadata from them.
|
||||
//!
|
||||
//! Registered media types:
|
||||
//! - `subtitle-srt`: extensions `["srt"]`, mime `["text/x-subrip"]`
|
||||
//! - `subtitle-vtt`: extensions `["vtt"]`, mime `["text/vtt"]`
|
||||
//! - `subtitle-ass`: extensions `["ass","ssa"]`, mime `["text/x-ass"]`
|
||||
//!
|
||||
//! Language detection uses filename conventions: `movie.en.srt` -> `en`.
|
||||
//!
|
||||
//! The `filesystem.read` capability in `plugin.toml` must be configured
|
||||
//! to include the directories containing subtitle files.
|
||||
//!
|
||||
//! Build with:
|
||||
//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
|
||||
|
||||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::{format, vec::Vec};
|
||||
use core::alloc::Layout;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
|
||||
|
||||
#[panic_handler]
|
||||
fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
|
||||
core::arch::wasm32::unreachable()
|
||||
}
|
||||
|
||||
// Host functions provided by the runtime
|
||||
unsafe extern "C" {
|
||||
fn host_set_result(ptr: i32, len: i32);
|
||||
fn host_log(level: i32, ptr: i32, len: i32);
|
||||
fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
|
||||
fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
|
||||
}
|
||||
|
||||
fn set_response(json: &[u8]) {
|
||||
unsafe {
|
||||
host_set_result(json.as_ptr() as i32, json.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
fn log_info(msg: &str) {
|
||||
unsafe {
|
||||
host_log(2, msg.as_ptr() as i32, msg.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
|
||||
if ptr < 0 || len <= 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
|
||||
slice.to_vec()
|
||||
}
|
||||
|
||||
/// Extract a string value from a JSON object for a given key.
|
||||
fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
|
||||
let json_str = core::str::from_utf8(json).ok()?;
|
||||
let pattern = format!("\"{}\"", key);
|
||||
let key_pos = json_str.find(&pattern)?;
|
||||
let after_key = &json_str[key_pos + pattern.len()..];
|
||||
let after_colon = after_key.trim_start().strip_prefix(':')?;
|
||||
let after_colon = after_colon.trim_start();
|
||||
|
||||
if after_colon.starts_with('"') {
|
||||
let value_start = 1;
|
||||
let value_end = after_colon[value_start..].find('"')?;
|
||||
Some(&after_colon[value_start..value_start + value_end])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape a string for safe inclusion in a JSON string value.
|
||||
fn json_escape(s: &str) -> alloc::string::String {
|
||||
let mut out = alloc::string::String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
'"' => out.push_str("\\\""),
|
||||
'\\' => out.push_str("\\\\"),
|
||||
'\n' => out.push_str("\\n"),
|
||||
'\r' => out.push_str("\\r"),
|
||||
'\t' => out.push_str("\\t"),
|
||||
_ => out.push(c),
|
||||
}
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
// 512 KB content read limit for subtitle files
|
||||
const MAX_FILE_BYTES: usize = 512 * 1024;
|
||||
|
||||
/// Subtitle format variants.
|
||||
enum SubtitleFormat {
|
||||
Srt,
|
||||
Vtt,
|
||||
Ass,
|
||||
}
|
||||
|
||||
/// Determine subtitle format from file path extension.
|
||||
fn detect_format(path: &str) -> Option<SubtitleFormat> {
|
||||
let lower = path.to_ascii_lowercase();
|
||||
if lower.ends_with(".srt") {
|
||||
Some(SubtitleFormat::Srt)
|
||||
} else if lower.ends_with(".vtt") {
|
||||
Some(SubtitleFormat::Vtt)
|
||||
} else if lower.ends_with(".ass") || lower.ends_with(".ssa") {
|
||||
Some(SubtitleFormat::Ass)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Try to detect a 2-3 letter language code from a filename stem.
|
||||
/// Matches patterns like `movie.en.srt` or `film.fra.vtt`.
|
||||
/// Returns the code if found.
|
||||
fn detect_language(path: &str) -> Option<&str> {
|
||||
// Get the filename component
|
||||
let filename = path.rsplit('/').next().unwrap_or(path);
|
||||
// Strip the final extension
|
||||
let stem = if let Some(dot) = filename.rfind('.') {
|
||||
&filename[..dot]
|
||||
} else {
|
||||
filename
|
||||
};
|
||||
// Check for another dot-separated segment at the end of the stem
|
||||
if let Some(dot) = stem.rfind('.') {
|
||||
let candidate = &stem[dot + 1..];
|
||||
let len = candidate.len();
|
||||
if (len == 2 || len == 3) && candidate.bytes().all(|b| b.is_ascii_alphabetic()) {
|
||||
return Some(candidate);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Count `-->` occurrences in a byte slice.
|
||||
fn count_arrow_markers(data: &[u8]) -> usize {
|
||||
let mut count = 0usize;
|
||||
let mut i = 0usize;
|
||||
while i + 2 < data.len() {
|
||||
if data[i] == b'-' && data[i + 1] == b'-' && data[i + 2] == b'>' {
|
||||
count += 1;
|
||||
i += 3;
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
/// Count `Dialogue:` lines in an ASS/SSA file.
|
||||
fn count_ass_dialogues(data: &[u8]) -> usize {
|
||||
let mut count = 0usize;
|
||||
let needle = b"Dialogue:";
|
||||
let mut i = 0usize;
|
||||
// Count only at line starts (after newline or at file start)
|
||||
let mut at_line_start = true;
|
||||
while i < data.len() {
|
||||
if at_line_start && data[i..].starts_with(needle) {
|
||||
count += 1;
|
||||
i += needle.len();
|
||||
at_line_start = false;
|
||||
} else {
|
||||
if data[i] == b'\n' {
|
||||
at_line_start = true;
|
||||
} else {
|
||||
at_line_start = false;
|
||||
}
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn alloc(size: i32) -> i32 {
|
||||
if size <= 0 {
|
||||
return 0;
|
||||
}
|
||||
unsafe {
|
||||
let layout = match Layout::from_size_align(size as usize, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return -1,
|
||||
};
|
||||
let ptr = alloc::alloc::alloc(layout);
|
||||
if ptr.is_null() {
|
||||
return -1;
|
||||
}
|
||||
ptr as i32
|
||||
}
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn initialize() -> i32 {
|
||||
log_info("subtitle-detector initialized");
|
||||
0
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn shutdown() -> i32 {
|
||||
log_info("subtitle-detector shutdown");
|
||||
0
|
||||
}
|
||||
|
||||
/// Returns the media type definitions provided by this plugin.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) {
|
||||
let response = br#"[
|
||||
{"id":"subtitle-srt","name":"SubRip Subtitle","category":"document","extensions":["srt"],"mime_types":["text/x-subrip"]},
|
||||
{"id":"subtitle-vtt","name":"WebVTT Subtitle","category":"document","extensions":["vtt"],"mime_types":["text/vtt"]},
|
||||
{"id":"subtitle-ass","name":"Advanced SubStation Alpha Subtitle","category":"document","extensions":["ass","ssa"],"mime_types":["text/x-ass"]}
|
||||
]"#;
|
||||
set_response(response);
|
||||
}
|
||||
|
||||
/// Check whether this plugin can handle a given path.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn can_handle(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let path = json_get_str(&req, "path").unwrap_or("");
|
||||
let can = detect_format(path).is_some();
|
||||
if can {
|
||||
set_response(br#"{"can_handle":true}"#);
|
||||
} else {
|
||||
set_response(br#"{"can_handle":false}"#);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the media type IDs this extractor supports.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
|
||||
set_response(br#"["subtitle-srt","subtitle-vtt","subtitle-ass"]"#);
|
||||
}
|
||||
|
||||
/// Extract metadata from a subtitle file.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let path = match json_get_str(&req, "path") {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
set_response(br#"{"extra":{"error":"missing path"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let format = match detect_format(path) {
|
||||
Some(f) => f,
|
||||
None => {
|
||||
set_response(br#"{"extra":{"error":"unsupported format"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let format_str = match format {
|
||||
SubtitleFormat::Srt => "srt",
|
||||
SubtitleFormat::Vtt => "vtt",
|
||||
SubtitleFormat::Ass => "ass",
|
||||
};
|
||||
|
||||
let language = detect_language(path);
|
||||
|
||||
// Load file contents
|
||||
let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
|
||||
if file_size < 0 {
|
||||
// Return what we have without entry count
|
||||
let lang_field = language
|
||||
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
|
||||
.unwrap_or_default();
|
||||
let resp = format!(
|
||||
r#"{{"extra":{{"format":"{}"{}}}}}"#,
|
||||
format_str, lang_field,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
return;
|
||||
}
|
||||
|
||||
if file_size as usize >= MAX_FILE_BYTES {
|
||||
let lang_field = language
|
||||
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
|
||||
.unwrap_or_default();
|
||||
let resp = format!(
|
||||
r#"{{"extra":{{"format":"{}","too_large":"true"{}}}}}"#,
|
||||
format_str, lang_field,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
return;
|
||||
}
|
||||
|
||||
let buf_size = file_size as usize;
|
||||
let entry_count = if buf_size == 0 {
|
||||
0usize
|
||||
} else {
|
||||
let layout = match Layout::from_size_align(buf_size, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => {
|
||||
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
|
||||
if buf_ptr.is_null() {
|
||||
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
|
||||
let count = if copied > 0 {
|
||||
let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) };
|
||||
match format_str {
|
||||
"srt" => count_arrow_markers(data),
|
||||
"vtt" => count_arrow_markers(data),
|
||||
_ => count_ass_dialogues(data),
|
||||
}
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
|
||||
count
|
||||
};
|
||||
|
||||
let msg = format!(
|
||||
"subtitle-detector: format={}, entries={}, path={}",
|
||||
format_str, entry_count, path,
|
||||
);
|
||||
log_info(&msg);
|
||||
|
||||
let lang_field = language
|
||||
.map(|l| format!(r#","language":"{}""#, json_escape(l)))
|
||||
.unwrap_or_default();
|
||||
|
||||
let resp = format!(
|
||||
r#"{{"extra":{{"format":"{}","entry_count":"{}"{}}}}}"#,
|
||||
format_str, entry_count, lang_field,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
}
|
||||
BIN
examples/plugins/text-enrichment/Cargo.lock
generated
Normal file
BIN
examples/plugins/text-enrichment/Cargo.lock
generated
Normal file
Binary file not shown.
15
examples/plugins/text-enrichment/Cargo.toml
Normal file
15
examples/plugins/text-enrichment/Cargo.toml
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
[package]
|
||||
name = "text-enrichment"
|
||||
version = "1.0.0"
|
||||
edition = "2024"
|
||||
|
||||
[lib]
|
||||
crate-type = ["cdylib"]
|
||||
|
||||
[dependencies]
|
||||
dlmalloc = { version = "0.2", features = ["global"] }
|
||||
|
||||
[profile.release]
|
||||
opt-level = "s"
|
||||
lto = true
|
||||
strip = true
|
||||
18
examples/plugins/text-enrichment/plugin.toml
Normal file
18
examples/plugins/text-enrichment/plugin.toml
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
[plugin]
|
||||
name = "text-enrichment"
|
||||
version = "1.0.0"
|
||||
api_version = "1.0"
|
||||
description = "Enriches plain text files with word count, line count, character count, and estimated reading time"
|
||||
kind = ["metadata_extractor"]
|
||||
priority = 500
|
||||
|
||||
[plugin.binary]
|
||||
wasm = "text_enrichment.wasm"
|
||||
|
||||
[capabilities]
|
||||
network = false
|
||||
|
||||
[capabilities.filesystem]
|
||||
# Users must add their media root directories here. Example:
|
||||
# read = ["/home/user/media", "/mnt/nas/texts"]
|
||||
read = []
|
||||
198
examples/plugins/text-enrichment/src/lib.rs
Normal file
198
examples/plugins/text-enrichment/src/lib.rs
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
//! Text-enrichment plugin for Pinakes.
|
||||
//!
|
||||
//! Extracts word count, line count, character count, and estimated reading
|
||||
//! time from plain text (`.txt`) files.
|
||||
//!
|
||||
//! The `filesystem.read` capability list in `plugin.toml` must be configured
|
||||
//! to include the directories where text files live.
|
||||
//!
|
||||
//! Build with:
|
||||
//! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release
|
||||
|
||||
#![no_std]
|
||||
|
||||
extern crate alloc;
|
||||
|
||||
use alloc::{format, vec::Vec};
|
||||
use core::alloc::Layout;
|
||||
|
||||
#[global_allocator]
|
||||
static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
|
||||
|
||||
#[panic_handler]
|
||||
fn panic_handler(_info: &core::panic::PanicInfo) -> ! {
|
||||
core::arch::wasm32::unreachable()
|
||||
}
|
||||
|
||||
// Host functions provided by the runtime
|
||||
unsafe extern "C" {
|
||||
fn host_set_result(ptr: i32, len: i32);
|
||||
fn host_log(level: i32, ptr: i32, len: i32);
|
||||
fn host_read_file(path_ptr: i32, path_len: i32) -> i32;
|
||||
fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32;
|
||||
}
|
||||
|
||||
fn set_response(json: &[u8]) {
|
||||
unsafe {
|
||||
host_set_result(json.as_ptr() as i32, json.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
fn log_info(msg: &str) {
|
||||
unsafe {
|
||||
host_log(2, msg.as_ptr() as i32, msg.len() as i32);
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn read_request(ptr: i32, len: i32) -> Vec<u8> {
|
||||
if ptr < 0 || len <= 0 {
|
||||
return Vec::new();
|
||||
}
|
||||
let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) };
|
||||
slice.to_vec()
|
||||
}
|
||||
|
||||
/// Extract a string value from a JSON object for a given key.
|
||||
fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> {
|
||||
let json_str = core::str::from_utf8(json).ok()?;
|
||||
let pattern = format!("\"{}\"", key);
|
||||
let key_pos = json_str.find(&pattern)?;
|
||||
let after_key = &json_str[key_pos + pattern.len()..];
|
||||
let after_colon = after_key.trim_start().strip_prefix(':')?;
|
||||
let after_colon = after_colon.trim_start();
|
||||
|
||||
if after_colon.starts_with('"') {
|
||||
let value_start = 1;
|
||||
let value_end = after_colon[value_start..].find('"')?;
|
||||
Some(&after_colon[value_start..value_start + value_end])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
// 5 MB content read limit
|
||||
const MAX_FILE_BYTES: usize = 5 * 1024 * 1024;
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn alloc(size: i32) -> i32 {
|
||||
if size <= 0 {
|
||||
return 0;
|
||||
}
|
||||
unsafe {
|
||||
let layout = match Layout::from_size_align(size as usize, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => return -1,
|
||||
};
|
||||
let ptr = alloc::alloc::alloc(layout);
|
||||
if ptr.is_null() {
|
||||
return -1;
|
||||
}
|
||||
ptr as i32
|
||||
}
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn initialize() -> i32 {
|
||||
log_info("text-enrichment initialized");
|
||||
0
|
||||
}
|
||||
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn shutdown() -> i32 {
|
||||
log_info("text-enrichment shutdown");
|
||||
0
|
||||
}
|
||||
|
||||
/// Returns the media types this extractor supports.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn supported_types(_ptr: i32, _len: i32) {
|
||||
set_response(br#"["text"]"#);
|
||||
}
|
||||
|
||||
/// Extract text statistics from a plain text file.
|
||||
#[unsafe(no_mangle)]
|
||||
pub extern "C" fn extract_metadata(ptr: i32, len: i32) {
|
||||
let req = unsafe { read_request(ptr, len) };
|
||||
let path = match json_get_str(&req, "path") {
|
||||
Some(p) => p,
|
||||
None => {
|
||||
set_response(br#"{"extra":{"error":"missing path"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
// Ask the host to load the file into the exchange buffer.
|
||||
let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) };
|
||||
if file_size < 0 {
|
||||
set_response(br#"{"extra":{"error":"read failed"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
if file_size as usize >= MAX_FILE_BYTES {
|
||||
set_response(br#"{"extra":{"too_large":"true"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let buf_size = file_size as usize;
|
||||
if buf_size == 0 {
|
||||
let resp = r#"{"extra":{"word_count":"0","line_count":"0","byte_count":"0","reading_minutes":"0"}}"#;
|
||||
set_response(resp.as_bytes());
|
||||
return;
|
||||
}
|
||||
|
||||
let layout = match Layout::from_size_align(buf_size, 1) {
|
||||
Ok(l) => l,
|
||||
Err(_) => {
|
||||
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let buf_ptr = unsafe { alloc::alloc::alloc(layout) };
|
||||
if buf_ptr.is_null() {
|
||||
set_response(br#"{"extra":{"error":"alloc failed"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) };
|
||||
if copied <= 0 {
|
||||
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
|
||||
set_response(br#"{"extra":{"error":"buffer copy failed"}}"#);
|
||||
return;
|
||||
}
|
||||
|
||||
let content = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) };
|
||||
|
||||
let byte_count = content.len();
|
||||
let line_count = content.iter().filter(|&&b| b == b'\n').count()
|
||||
+ if content.last().map_or(true, |&b| b != b'\n') { 1 } else { 0 };
|
||||
|
||||
// Count words: transitions from whitespace to non-whitespace.
|
||||
let mut word_count = 0usize;
|
||||
let mut in_word = false;
|
||||
for &b in content {
|
||||
let is_ws = b == b' ' || b == b'\t' || b == b'\n' || b == b'\r';
|
||||
if !is_ws && !in_word {
|
||||
word_count += 1;
|
||||
in_word = true;
|
||||
} else if is_ws {
|
||||
in_word = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Estimate reading time at 200 words per minute, rounding up.
|
||||
let reading_minutes = (word_count + 199) / 200;
|
||||
|
||||
unsafe { alloc::alloc::dealloc(buf_ptr, layout) };
|
||||
|
||||
let msg = format!(
|
||||
"text-enrichment: {} words, {} lines, {} chars",
|
||||
word_count, line_count, byte_count
|
||||
);
|
||||
log_info(&msg);
|
||||
|
||||
let resp = format!(
|
||||
r#"{{"extra":{{"word_count":"{}","line_count":"{}","byte_count":"{}","reading_minutes":"{}"}}}}"#,
|
||||
word_count, line_count, byte_count, reading_minutes,
|
||||
);
|
||||
set_response(resp.as_bytes());
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue