//! CBZ/CBR comics plugin for Pinakes. //! //! Registers comic book ZIP (`cbz`) and RAR (`cbr`) media types, extracts //! metadata from CBZ archives (including `ComicInfo.xml` when present), and //! generates thumbnails from the cover image. //! //! CBR is registered as a media type but metadata extraction is limited to //! format detection only (RAR parsing is not implemented). //! //! ZIP parsing is implemented from scratch without external ZIP crates to keep //! the WASM binary small. //! //! The `filesystem.read` and `filesystem.write` capabilities in `plugin.toml` //! must be configured for the directories containing comic files and the //! thumbnail output directory respectively. //! //! Build with: //! RUSTFLAGS="" cargo build --target wasm32-unknown-unknown --release #![no_std] extern crate alloc; use alloc::{format, string::{String, ToString}, vec, vec::Vec}; use core::alloc::Layout; #[global_allocator] static ALLOC: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc; #[panic_handler] fn panic_handler(_info: &core::panic::PanicInfo) -> ! { core::arch::wasm32::unreachable() } // Host functions provided by the runtime unsafe extern "C" { fn host_set_result(ptr: i32, len: i32); fn host_log(level: i32, ptr: i32, len: i32); fn host_read_file(path_ptr: i32, path_len: i32) -> i32; fn host_get_buffer(dest_ptr: i32, dest_len: i32) -> i32; fn host_write_file(path_ptr: i32, path_len: i32, data_ptr: i32, data_len: i32) -> i32; } fn set_response(json: &[u8]) { unsafe { host_set_result(json.as_ptr() as i32, json.len() as i32); } } fn log_info(msg: &str) { unsafe { host_log(2, msg.as_ptr() as i32, msg.len() as i32); } } unsafe fn read_request(ptr: i32, len: i32) -> Vec { if ptr < 0 || len <= 0 { return Vec::new(); } let slice = unsafe { core::slice::from_raw_parts(ptr as *const u8, len as usize) }; slice.to_vec() } /// Extract a string value from a JSON object for a given key. fn json_get_str<'a>(json: &'a [u8], key: &str) -> Option<&'a str> { let json_str = core::str::from_utf8(json).ok()?; let pattern = format!("\"{}\"", key); let key_pos = json_str.find(&pattern)?; let after_key = &json_str[key_pos + pattern.len()..]; let after_colon = after_key.trim_start().strip_prefix(':')?; let after_colon = after_colon.trim_start(); if after_colon.starts_with('"') { let value_start = 1; let value_end = after_colon[value_start..].find('"')?; Some(&after_colon[value_start..value_start + value_end]) } else { None } } /// Escape a string for safe inclusion in a JSON string value. fn json_escape(s: &str) -> String { let mut out = String::with_capacity(s.len()); for c in s.chars() { match c { '"' => out.push_str("\\\""), '\\' => out.push_str("\\\\"), '\n' => out.push_str("\\n"), '\r' => out.push_str("\\r"), '\t' => out.push_str("\\t"), _ => out.push(c), } } out } // 20 MB content read limit for comic archives const MAX_FILE_BYTES: usize = 20 * 1024 * 1024; // ZIP signatures (little-endian u32) const SIG_LOCAL_FILE: u32 = 0x04034b50; const SIG_CENTRAL_DIR: u32 = 0x02014b50; const SIG_EOCD: u32 = 0x06054b50; // Compression methods const COMPRESS_STORE: u16 = 0; const COMPRESS_DEFLATE: u16 = 8; /// Read a little-endian u16 from a byte slice at the given offset. /// Returns `None` if out of bounds. fn read_u16_le(data: &[u8], offset: usize) -> Option { let b0 = *data.get(offset)? as u16; let b1 = *data.get(offset + 1)? as u16; Some(b0 | (b1 << 8)) } /// Read a little-endian u32 from a byte slice at the given offset. /// Returns `None` if out of bounds. fn read_u32_le(data: &[u8], offset: usize) -> Option { let b0 = *data.get(offset)? as u32; let b1 = *data.get(offset + 1)? as u32; let b2 = *data.get(offset + 2)? as u32; let b3 = *data.get(offset + 3)? as u32; Some(b0 | (b1 << 8) | (b2 << 16) | (b3 << 24)) } /// Read a big-endian u16 from a byte slice at the given offset. fn read_u16_be(data: &[u8], offset: usize) -> Option { let b0 = *data.get(offset)? as u16; let b1 = *data.get(offset + 1)? as u16; Some((b0 << 8) | b1) } /// Read a big-endian u32 from a byte slice at the given offset. fn read_u32_be(data: &[u8], offset: usize) -> Option { let b0 = *data.get(offset)? as u32; let b1 = *data.get(offset + 1)? as u32; let b2 = *data.get(offset + 2)? as u32; let b3 = *data.get(offset + 3)? as u32; Some((b0 << 24) | (b1 << 16) | (b2 << 8) | b3) } /// A parsed central directory entry from a ZIP archive. struct ZipEntry { name: String, compression: u16, compressed_size: u32, local_offset: u32, } /// Find the End of Central Directory record offset by scanning backwards. fn find_eocd(data: &[u8]) -> Option { if data.len() < 22 { return None; } // Scan backwards for the EOCD signature. The maximum comment size is // 65535 bytes, so we only need to scan that far from the end. let scan_start = if data.len() > 22 + 65535 { data.len() - 22 - 65535 } else { 0 }; let mut i = data.len() - 22; loop { if read_u32_le(data, i) == Some(SIG_EOCD) { return Some(i); } if i == scan_start { break; } i -= 1; } None } /// Parse all central directory entries from a ZIP archive. fn parse_central_directory(data: &[u8]) -> Vec { let mut entries = Vec::new(); let eocd_offset = match find_eocd(data) { Some(o) => o, None => return entries, }; // EOCD layout (offsets relative to EOCD start): // 0: signature (4) // 4: disk number (2) // 6: start disk (2) // 8: entries on disk (2) // 10: total entries (2) // 12: central dir size (4) // 16: central dir offset (4) // 20: comment length (2) let cd_offset = match read_u32_le(data, eocd_offset + 16) { Some(o) => o as usize, None => return entries, }; let total_entries = match read_u16_le(data, eocd_offset + 10) { Some(n) => n as usize, None => return entries, }; let mut pos = cd_offset; for _ in 0..total_entries { if pos + 46 > data.len() { break; } if read_u32_le(data, pos) != Some(SIG_CENTRAL_DIR) { break; } // Central directory entry layout: // 0: signature (4) // 4: version made by (2) // 6: version needed (2) // 8: flags (2) // 10: compression (2) // 12: mod time (2) // 14: mod date (2) // 16: crc32 (4) // 20: compressed size (4) // 24: uncompressed size (4) // 28: filename length (2) // 30: extra field length (2) // 32: file comment length (2) // 34: disk start (2) // 36: internal attrs (2) // 38: external attrs (4) // 42: local header offset (4) // 46: filename... let compression = match read_u16_le(data, pos + 10) { Some(v) => v, None => break }; let compressed_size = match read_u32_le(data, pos + 20) { Some(v) => v, None => break }; // uncompressed_size at pos+24 is intentionally not stored; size comes from decompressor output. let fname_len = match read_u16_le(data, pos + 28) { Some(v) => v as usize, None => break }; let extra_len = match read_u16_le(data, pos + 30) { Some(v) => v as usize, None => break }; let comment_len = match read_u16_le(data, pos + 32) { Some(v) => v as usize, None => break }; let local_offset = match read_u32_le(data, pos + 42) { Some(v) => v, None => break }; let fname_start = pos + 46; let fname_end = fname_start + fname_len; if fname_end > data.len() { break; } let name = core::str::from_utf8(&data[fname_start..fname_end]) .unwrap_or("") .to_ascii_lowercase(); entries.push(ZipEntry { name, compression, compressed_size, local_offset, }); pos = fname_end + extra_len + comment_len; } entries } /// Read raw bytes for a local file entry (the actual compressed/stored data). /// Returns a slice into `data` containing the compressed bytes. fn local_file_data<'a>(data: &'a [u8], entry: &ZipEntry) -> Option<&'a [u8]> { let off = entry.local_offset as usize; if off + 30 > data.len() { return None; } if read_u32_le(data, off) != Some(SIG_LOCAL_FILE) { return None; } // Local file header layout: // 0: signature (4) // 4: version needed (2) // 6: flags (2) // 8: compression (2) // 10: mod time (2) // 12: mod date (2) // 14: crc32 (4) // 18: compressed size (4) // 22: uncompressed size (4) // 26: filename length (2) // 28: extra length (2) // 30: filename... let fname_len = read_u16_le(data, off + 26)? as usize; let extra_len = read_u16_le(data, off + 28)? as usize; let data_start = off + 30 + fname_len + extra_len; let data_end = data_start + entry.compressed_size as usize; if data_end > data.len() { return None; } Some(&data[data_start..data_end]) } /// Decompress a stored (STORE) or deflated (DEFLATE) entry. /// Returns the uncompressed bytes. fn decompress_entry(data: &[u8], entry: &ZipEntry) -> Option> { let raw = local_file_data(data, entry)?; match entry.compression { COMPRESS_STORE => Some(raw.to_vec()), COMPRESS_DEFLATE => { miniz_oxide::inflate::decompress_to_vec(raw).ok() } _ => None, } } /// Returns true if a filename has an image extension. fn is_image_filename(name: &str) -> bool { name.ends_with(".jpg") || name.ends_with(".jpeg") || name.ends_with(".png") || name.ends_with(".webp") } /// Extract a simple XML element value using substring search. /// Looks for `value` and returns the inner text. fn xml_get_text<'a>(xml: &'a str, tag: &str) -> Option<&'a str> { let open = format!("<{}>", tag); let close = format!("", tag); let start = xml.find(&open)?; let after_open = &xml[start + open.len()..]; let end = after_open.find(&close)?; Some(&after_open[..end]) } /// Metadata extracted from a ComicInfo.xml file. struct ComicInfo { title: Option, series: Option, issue_number: Option, writer: Option, page_count: Option, language: Option, genre: Option, summary: Option, } /// Parse key fields from a ComicInfo.xml byte slice. fn parse_comic_info(data: &[u8]) -> ComicInfo { let text = core::str::from_utf8(data).unwrap_or(""); ComicInfo { title: xml_get_text(text, "Title") .map(|s| s.trim().to_ascii_lowercase()).filter(|s| !s.is_empty()).map(|s| { // Re-capitalize first letter for title let mut c = s.chars(); match c.next() { None => String::new(), Some(f) => f.to_uppercase().collect::() + c.as_str(), } }), series: xml_get_text(text, "Series") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), issue_number: xml_get_text(text, "Number") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), writer: xml_get_text(text, "Writer") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), page_count: xml_get_text(text, "PageCount") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), language: xml_get_text(text, "LanguageISO").map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), genre: xml_get_text(text, "Genre") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), summary: xml_get_text(text, "Summary") .map(|s| s.trim().to_string()).filter(|s| !s.is_empty()), } } /// Image dimension and format information. struct ImageInfo { width: u32, height: u32, format: &'static str, } /// Parse image dimensions and detect format from raw image bytes. fn parse_image_info(data: &[u8]) -> Option { if data.len() < 4 { return None; } // JPEG: starts with 0xFF 0xD8 if data[0] == 0xFF && data[1] == 0xD8 { // Scan for SOF0 (0xFF 0xC0) or SOF2 (0xFF 0xC2) marker let mut i = 2usize; while i + 8 < data.len() { if data[i] == 0xFF { let marker = data[i + 1]; if marker == 0xC0 || marker == 0xC2 { // SOF marker layout: // 0: 0xFF // 1: marker // 2-3: segment length (big-endian) // 4: precision // 5-6: height (big-endian u16) // 7-8: width (big-endian u16) let height = read_u16_be(data, i + 5)? as u32; let width = read_u16_be(data, i + 7)? as u32; return Some(ImageInfo { width, height, format: "jpeg" }); } else if marker == 0xFF { // Padding byte i += 1; continue; } else if marker == 0xD8 || marker == 0xD9 { // SOI / EOI - no length field i += 2; continue; } else { // Skip segment: length at i+2 (includes the 2 length bytes) if let Some(seg_len) = read_u16_be(data, i + 2) { i += 2 + seg_len as usize; } else { break; } } } else { i += 1; } } // Return a JPEG without dimensions if SOF not found return Some(ImageInfo { width: 0, height: 0, format: "jpeg" }); } // PNG: starts with 0x89 0x50 0x4E 0x47 ('PNG') if data.len() >= 24 && data[0] == 0x89 && data[1] == 0x50 && data[2] == 0x4E && data[3] == 0x47 { // IHDR chunk: width at bytes 16-19, height at bytes 20-23 (big-endian u32) let width = read_u32_be(data, 16)?; let height = read_u32_be(data, 20)?; return Some(ImageInfo { width, height, format: "png" }); } // WebP: RIFF....WEBP if data.len() >= 12 && &data[0..4] == b"RIFF" && &data[8..12] == b"WEBP" { return Some(ImageInfo { width: 0, height: 0, format: "webp" }); } None } /// Load a CBZ archive into memory. Returns the raw bytes or an error string. fn load_cbz_file(path: &str) -> Result, &'static str> { let file_size = unsafe { host_read_file(path.as_ptr() as i32, path.len() as i32) }; if file_size < 0 { return Err("read failed"); } if file_size as usize >= MAX_FILE_BYTES { return Err("too large"); } let buf_size = file_size as usize; if buf_size == 0 { return Ok(Vec::new()); } let layout = Layout::from_size_align(buf_size, 1).map_err(|_| "alloc failed")?; let buf_ptr = unsafe { alloc::alloc::alloc(layout) }; if buf_ptr.is_null() { return Err("alloc failed"); } let copied = unsafe { host_get_buffer(buf_ptr as i32, file_size) }; if copied <= 0 { unsafe { alloc::alloc::dealloc(buf_ptr, layout) }; return Err("buffer copy failed"); } let data = unsafe { core::slice::from_raw_parts(buf_ptr, copied as usize) }.to_vec(); unsafe { alloc::alloc::dealloc(buf_ptr, layout) }; Ok(data) } #[unsafe(no_mangle)] pub extern "C" fn alloc(size: i32) -> i32 { if size <= 0 { return 0; } unsafe { let layout = match Layout::from_size_align(size as usize, 1) { Ok(l) => l, Err(_) => return -1, }; let ptr = alloc::alloc::alloc(layout); if ptr.is_null() { return -1; } ptr as i32 } } #[unsafe(no_mangle)] pub extern "C" fn initialize() -> i32 { log_info("cbz-comics initialized"); 0 } #[unsafe(no_mangle)] pub extern "C" fn shutdown() -> i32 { log_info("cbz-comics shutdown"); 0 } /// Returns the comic media type definitions. #[unsafe(no_mangle)] pub extern "C" fn supported_media_types(_ptr: i32, _len: i32) { let response = br#"[ {"id":"comic-cbz","name":"Comic Book ZIP","category":"document","extensions":["cbz"],"mime_types":["application/vnd.comicbook+zip"]}, {"id":"comic-cbr","name":"Comic Book RAR","category":"document","extensions":["cbr"],"mime_types":["application/vnd.comicbook-rar"]} ]"#; set_response(response); } /// Check whether this plugin can handle a given path. #[unsafe(no_mangle)] pub extern "C" fn can_handle(ptr: i32, len: i32) { let req = unsafe { read_request(ptr, len) }; let path = json_get_str(&req, "path").unwrap_or("").to_ascii_lowercase(); let can = path.ends_with(".cbz") || path.ends_with(".cbr"); if can { set_response(br#"{"can_handle":true}"#); } else { set_response(br#"{"can_handle":false}"#); } } /// Returns the media type IDs this extractor supports. #[unsafe(no_mangle)] pub extern "C" fn supported_types(_ptr: i32, _len: i32) { set_response(br#"["comic-cbz","comic-cbr"]"#); } /// Extract metadata from a CBZ or CBR file. #[unsafe(no_mangle)] pub extern "C" fn extract_metadata(ptr: i32, len: i32) { let req = unsafe { read_request(ptr, len) }; let path = match json_get_str(&req, "path") { Some(p) => p, None => { set_response(br#"{"extra":{"error":"missing path"}}"#); return; } }; let lower = path.to_ascii_lowercase(); // CBR: register the type but do not attempt to parse RAR. if lower.ends_with(".cbr") { set_response(br#"{"extra":{"format":"cbr","note":"cbr-unsupported"}}"#); return; } // Load CBZ archive let data = match load_cbz_file(path) { Ok(d) => d, Err("too large") => { set_response(br#"{"extra":{"format":"cbz","too_large":"true"}}"#); return; } Err(e) => { let resp = format!(r#"{{"extra":{{"format":"cbz","error":"{}"}}}}"#, e); set_response(resp.as_bytes()); return; } }; let entries = parse_central_directory(&data); // Count image files as page count. let image_count = entries.iter().filter(|e| is_image_filename(&e.name)).count(); // Look for ComicInfo.xml (case-insensitive). let comic_info_entry = entries.iter().find(|e| { let n = e.name.as_str(); n == "comicinfo.xml" || n.ends_with("/comicinfo.xml") }); let info = if let Some(entry) = comic_info_entry { // Only decompress STORE entries here for simplicity; skip DEFLATE ones. if entry.compression == COMPRESS_STORE || entry.compression == COMPRESS_DEFLATE { if let Some(xml_bytes) = decompress_entry(&data, entry) { Some(parse_comic_info(&xml_bytes)) } else { None } } else { None } } else { None }; let msg = format!( "cbz-comics: {} entries, {} images, ComicInfo.xml={}", entries.len(), image_count, info.is_some(), ); log_info(&msg); // Build response JSON let mut extra_pairs: Vec<(&str, String)> = vec![ ("format", String::from("cbz")), ]; let page_count_str; if let Some(ref ci) = info { if let Some(ref pc) = ci.page_count { page_count_str = pc.clone(); extra_pairs.push(("page_count", page_count_str.clone())); } else { page_count_str = format!("{}", image_count); extra_pairs.push(("page_count", page_count_str.clone())); } if let Some(ref s) = ci.series { extra_pairs.push(("series", s.clone())) } if let Some(ref n) = ci.issue_number { extra_pairs.push(("issue_number", n.clone())) } if let Some(ref l) = ci.language { extra_pairs.push(("language", l.clone())) } } else { page_count_str = format!("{}", image_count); extra_pairs.push(("page_count", page_count_str.clone())); } // Build extra JSON object let mut extra_json = String::from("{"); for (i, (k, v)) in extra_pairs.iter().enumerate() { if i > 0 { extra_json.push(','); } extra_json.push('"'); extra_json.push_str(k); extra_json.push_str("\":\""); extra_json.push_str(&json_escape(v)); extra_json.push('"'); } extra_json.push('}'); let title_field = info.as_ref() .and_then(|ci| ci.title.as_ref()) .map(|t| format!(r#","title":"{}""#, json_escape(t))) .unwrap_or_default(); let artist_field = info.as_ref() .and_then(|ci| ci.writer.as_ref()) .map(|w| format!(r#","artist":"{}""#, json_escape(w))) .unwrap_or_default(); let genre_field = info.as_ref() .and_then(|ci| ci.genre.as_ref()) .map(|g| format!(r#","genre":"{}""#, json_escape(g))) .unwrap_or_default(); let desc_field = info.as_ref() .and_then(|ci| ci.summary.as_ref()) .map(|s| format!(r#","description":"{}""#, json_escape(s))) .unwrap_or_default(); let resp = format!( r#"{{"extra":{}{}{}{}{}}}"#, extra_json, title_field, artist_field, genre_field, desc_field, ); set_response(resp.as_bytes()); } /// Generate a thumbnail from the cover image of a CBZ archive. #[unsafe(no_mangle)] pub extern "C" fn generate_thumbnail(ptr: i32, len: i32) { let req = unsafe { read_request(ptr, len) }; let source_path = match json_get_str(&req, "source_path") { Some(p) => p, None => { set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); return; } }; let output_path = match json_get_str(&req, "output_path") { Some(p) => p, None => { set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); return; } }; let lower = source_path.to_ascii_lowercase(); if !lower.ends_with(".cbz") { set_response(br#"{"path":"","width":0,"height":0,"format":"unknown"}"#); return; } let data = match load_cbz_file(source_path) { Ok(d) => d, Err(_) => { set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); return; } }; let entries = parse_central_directory(&data); // Find alphabetically first image file for the cover. let mut image_entries: Vec<&ZipEntry> = entries.iter().filter(|e| is_image_filename(&e.name)).collect(); image_entries.sort_by(|a, b| a.name.as_str().cmp(b.name.as_str())); let cover = match image_entries.first() { Some(e) => e, None => { set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); return; } }; let image_bytes = match decompress_entry(&data, cover) { Some(b) => b, None => { set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); return; } }; let info = parse_image_info(&image_bytes).unwrap_or(ImageInfo { width: 0, height: 0, format: "jpeg", }); // Write thumbnail bytes to output path let write_result = unsafe { host_write_file( output_path.as_ptr() as i32, output_path.len() as i32, image_bytes.as_ptr() as i32, image_bytes.len() as i32, ) }; if write_result < 0 { set_response(br#"{"path":"","width":0,"height":0,"format":"jpeg"}"#); return; } let msg = format!( "cbz-comics: thumbnail {}x{} {} written to {}", info.width, info.height, info.format, output_path, ); log_info(&msg); let resp = format!( r#"{{"path":"{}","width":{},"height":{},"format":"{}"}}"#, json_escape(output_path), info.width, info.height, info.format, ); set_response(resp.as_bytes()); }