diff --git a/README.md b/README.md index 72ea591..faabc1c 100644 --- a/README.md +++ b/README.md @@ -275,6 +275,30 @@ entry has expired from history. > This behavior only applies when the watch daemon is actively running. Manual > expiration or deletion of entries will not clear the clipboard. +### MIME Type Preference for Watch + +`stash watch` supports a `--mime-type` (short `-t`) option that lets you +prioritise which MIME type the daemon should request from the clipboard when +multiple representations are available. + +- `any` (default): Request any available representation (current behaviour). +- `text`: Prefer text representations (e.g. `text/plain`, `text/html`). +- `image`: Prefer image representations (e.g. `image/png`, `image/jpeg`) so that + image copies from browsers or file managers are stored as images rather than + HTML fragments. + +Example: prefer images when running the watch daemon + +```bash +stash watch --mime-type image +``` + +This is useful when copying images from browsers or file managers where the +clipboard may offer both HTML and image representations; selecting `image` will +ask the compositor for image data first. Most users will be fine using the +default value (`any`) but in the case your browser (or other applications!) +regularly misrepresent data, you might wish to prioritize a different type. + ### Options Some commands take additional flags to modify Stash's behavior. See each diff --git a/src/commands/import.rs b/src/commands/import.rs index a5b4e55..933cf88 100644 --- a/src/commands/import.rs +++ b/src/commands/import.rs @@ -1,12 +1,6 @@ use std::io::{self, BufRead}; -use crate::db::{ - ClipboardDb, - Entry, - SqliteClipboardDb, - StashError, - detect_mime, -}; +use crate::db::{ClipboardDb, Entry, SqliteClipboardDb, StashError}; pub trait ImportCommand { /// Import clipboard entries from TSV format. @@ -44,7 +38,7 @@ impl ImportCommand for SqliteClipboardDb { let entry = Entry { contents: val.as_bytes().to_vec(), - mime: detect_mime(val.as_bytes()), + mime: crate::mime::detect_mime(val.as_bytes()), }; self diff --git a/src/commands/watch.rs b/src/commands/watch.rs index 54706bb..54dc803 100644 --- a/src/commands/watch.rs +++ b/src/commands/watch.rs @@ -8,7 +8,13 @@ use std::{ use smol::Timer; use wl_clipboard_rs::{ copy::{MimeType as CopyMimeType, Options, Source}, - paste::{ClipboardType, Seat, get_contents}, + paste::{ + ClipboardType, + MimeType as PasteMimeType, + Seat, + get_contents, + get_mime_types_ordered, + }, }; use crate::db::{ClipboardDb, SqliteClipboardDb}; @@ -93,6 +99,82 @@ impl ExpirationQueue { } } +/// Get clipboard contents using the source application's preferred MIME type. +/// +/// See, `MimeType::Any` lets wl-clipboard-rs pick a type in arbitrary order, +/// which causes issues when applications offer multiple types (e.g. file +/// managers offering `text/uri-list` + `text/plain`, or Firefox offering +/// `text/html` + `image/png` + `text/plain`). +/// +/// This queries the ordered types via [`get_mime_types_ordered`], which +/// preserves the Wayland protocol's offer order (source application's +/// preference) and then requests the first type with [`MimeType::Specific`]. +/// +/// The two-step approach has a theoretical race (clipboard could change between +/// the calls), but the wl-clipboard-rs API has no single-call variant that +/// respects source ordering. A race simply produces an error that the polling +/// loop handles like any other clipboard-empty/error case. +/// +/// When `preference` is `"text"`, uses `MimeType::Text` directly (single call). +/// When `preference` is `"image"`, picks the first offered `image/*` type. +/// Otherwise picks the source's first offered type. +fn negotiate_mime_type( + preference: &str, +) -> Result<(Box, String), wl_clipboard_rs::paste::Error> { + if preference == "text" { + let (reader, mime_str) = get_contents( + ClipboardType::Regular, + Seat::Unspecified, + PasteMimeType::Text, + )?; + return Ok((Box::new(reader) as Box, mime_str)); + } + + let offered = + get_mime_types_ordered(ClipboardType::Regular, Seat::Unspecified)?; + + let chosen = if preference == "image" { + // Pick the first offered image type, fall back to first overall + offered + .iter() + .find(|m| m.starts_with("image/")) + .or_else(|| offered.first()) + } else { + // XXX: When preference is "any", deprioritize text/html if a more + // concrete type is available. Browsers and Electron apps put + // text/html first even for "Copy Image", but the HTML is just + // a wrapper (), i.e., never what the user wants in a + // clipboard manager. Prefer image/* first, then any non-html + // type, and fall back to text/html only as a last resort. + let has_image = offered.iter().any(|m| m.starts_with("image/")); + if has_image { + offered + .iter() + .find(|m| m.starts_with("image/")) + .or_else(|| offered.first()) + } else if offered.first().is_some_and(|m| m == "text/html") { + offered + .iter() + .find(|m| *m != "text/html") + .or_else(|| offered.first()) + } else { + offered.first() + } + }; + + match chosen { + Some(mime_str) => { + let (reader, actual_mime) = get_contents( + ClipboardType::Regular, + Seat::Unspecified, + PasteMimeType::Specific(mime_str), + )?; + Ok((Box::new(reader) as Box, actual_mime)) + }, + None => Err(wl_clipboard_rs::paste::Error::NoSeats), + } +} + pub trait WatchCommand { fn watch( &self, @@ -100,6 +182,7 @@ pub trait WatchCommand { max_items: u64, excluded_apps: &[String], expire_after: Option, + mime_type_preference: &str, ); } @@ -110,9 +193,13 @@ impl WatchCommand for SqliteClipboardDb { max_items: u64, excluded_apps: &[String], expire_after: Option, + mime_type_preference: &str, ) { smol::block_on(async { - log::info!("Starting clipboard watch daemon"); + log::info!( + "Starting clipboard watch daemon with MIME type preference: \ + {mime_type_preference}" + ); // Build expiration queue from existing entries let mut exp_queue = ExpirationQueue::new(); @@ -160,12 +247,8 @@ impl WatchCommand for SqliteClipboardDb { hasher.finish() }; - // Initialize with current clipboard - if let Ok((mut reader, _)) = get_contents( - ClipboardType::Regular, - Seat::Unspecified, - wl_clipboard_rs::paste::MimeType::Any, - ) { + // Initialize with current clipboard using smart MIME negotiation + if let Ok((mut reader, _)) = negotiate_mime_type(mime_type_preference) { buf.clear(); if reader.read_to_end(&mut buf).is_ok() && !buf.is_empty() { last_hash = Some(hash_contents(&buf)); @@ -202,11 +285,9 @@ impl WatchCommand for SqliteClipboardDb { log::info!("Entry {id} marked as expired"); // Check if this expired entry is currently in the clipboard - if let Ok((mut reader, _)) = get_contents( - ClipboardType::Regular, - Seat::Unspecified, - wl_clipboard_rs::paste::MimeType::Any, - ) { + if let Ok((mut reader, _)) = + negotiate_mime_type(mime_type_preference) + { let mut current_buf = Vec::new(); if reader.read_to_end(&mut current_buf).is_ok() && !current_buf.is_empty() @@ -250,11 +331,7 @@ impl WatchCommand for SqliteClipboardDb { } // Normal clipboard polling - match get_contents( - ClipboardType::Regular, - Seat::Unspecified, - wl_clipboard_rs::paste::MimeType::Any, - ) { + match negotiate_mime_type(mime_type_preference) { Ok((mut reader, _mime_type)) => { buf.clear(); if let Err(e) = reader.read_to_end(&mut buf) { @@ -319,3 +396,108 @@ impl WatchCommand for SqliteClipboardDb { }); } } + +/// Unit-testable helper: given ordered offers and a preference, return the +/// chosen MIME type. This mirrors the selection logic in +/// [`negotiate_mime_type`] without requiring a Wayland connection. +#[cfg(test)] +fn pick_mime<'a>( + offered: &'a [String], + preference: &str, +) -> Option<&'a String> { + if preference == "image" { + offered + .iter() + .find(|m| m.starts_with("image/")) + .or_else(|| offered.first()) + } else { + let has_image = offered.iter().any(|m| m.starts_with("image/")); + if has_image { + offered + .iter() + .find(|m| m.starts_with("image/")) + .or_else(|| offered.first()) + } else if offered.first().is_some_and(|m| m == "text/html") { + offered + .iter() + .find(|m| *m != "text/html") + .or_else(|| offered.first()) + } else { + offered.first() + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_pick_first_offered() { + let offered = vec!["text/uri-list".to_string(), "text/plain".to_string()]; + assert_eq!(pick_mime(&offered, "any").unwrap(), "text/uri-list"); + } + + #[test] + fn test_pick_image_preference_finds_image() { + let offered = vec![ + "text/html".to_string(), + "image/png".to_string(), + "text/plain".to_string(), + ]; + assert_eq!(pick_mime(&offered, "image").unwrap(), "image/png"); + } + + #[test] + fn test_pick_image_preference_falls_back() { + let offered = vec!["text/html".to_string(), "text/plain".to_string()]; + // No image types offered — falls back to first + assert_eq!(pick_mime(&offered, "image").unwrap(), "text/html"); + } + + #[test] + fn test_pick_empty_offered() { + let offered: Vec = vec![]; + assert!(pick_mime(&offered, "any").is_none()); + } + + #[test] + fn test_pick_image_over_html_firefox_copy_image() { + // Firefox "Copy Image" offers html first, then image, then text. + // We should pick the image, not the html wrapper. + let offered = vec![ + "text/html".to_string(), + "image/png".to_string(), + "text/plain".to_string(), + ]; + assert_eq!(pick_mime(&offered, "any").unwrap(), "image/png"); + } + + #[test] + fn test_pick_image_over_html_electron() { + // Electron apps also put text/html before image types + let offered = vec!["text/html".to_string(), "image/jpeg".to_string()]; + assert_eq!(pick_mime(&offered, "any").unwrap(), "image/jpeg"); + } + + #[test] + fn test_pick_html_fallback_when_only_html() { + // When text/html is the only type, pick it + let offered = vec!["text/html".to_string()]; + assert_eq!(pick_mime(&offered, "any").unwrap(), "text/html"); + } + + #[test] + fn test_pick_text_over_html_when_no_image() { + // Rich text copy: html + plain, no image — prefer plain text + let offered = vec!["text/html".to_string(), "text/plain".to_string()]; + assert_eq!(pick_mime(&offered, "any").unwrap(), "text/plain"); + } + + #[test] + fn test_pick_file_manager_uri_list_first() { + // File managers typically offer uri-list first + let offered = vec!["text/uri-list".to_string(), "text/plain".to_string()]; + assert_eq!(pick_mime(&offered, "any").unwrap(), "text/uri-list"); + } +} diff --git a/src/db/mod.rs b/src/db/mod.rs index f2048cd..4b57ae5 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -10,7 +10,6 @@ use std::{ }; use base64::prelude::*; -use imagesize::ImageType; use log::{debug, error, warn}; use regex::Regex; use rusqlite::{Connection, OptionalExtension, params}; @@ -429,7 +428,7 @@ impl ClipboardDb for SqliteClipboardDb { #[allow(clippy::cast_possible_wrap)] let content_hash = hasher.finish() as i64; - let mime = detect_mime_optimized(&buf); + let mime = crate::mime::detect_mime(&buf); // Try to load regex from systemd credential file, then env var let regex = load_sensitive_regex(); @@ -884,51 +883,6 @@ pub fn extract_id(input: &str) -> Result { id_str.parse().map_err(|_| "invalid id") } -pub fn detect_mime_optimized(data: &[u8]) -> Option { - // Check if it's valid UTF-8 first, which most clipboard content are. - // This will be used to return early without unnecessary mimetype detection - // overhead. - if std::str::from_utf8(data).is_ok() { - return Some("text/plain".to_string()); - } - - // Only run image detection on binary data - detect_mime(data) -} - -pub fn detect_mime(data: &[u8]) -> Option { - if let Ok(img_type) = imagesize::image_type(data) { - let mime_str = match img_type { - ImageType::Png => "image/png", - ImageType::Jpeg => "image/jpeg", - ImageType::Gif => "image/gif", - ImageType::Bmp => "image/bmp", - ImageType::Tiff => "image/tiff", - ImageType::Webp => "image/webp", - ImageType::Aseprite => "image/x-aseprite", - ImageType::Dds => "image/vnd.ms-dds", - ImageType::Exr => "image/aces", - ImageType::Farbfeld => "image/farbfeld", - ImageType::Hdr => "image/vnd.radiance", - ImageType::Ico => "image/x-icon", - ImageType::Ilbm => "image/ilbm", - ImageType::Jxl => "image/jxl", - ImageType::Ktx2 => "image/ktx2", - ImageType::Pnm => "image/x-portable-anymap", - ImageType::Psd => "image/vnd.adobe.photoshop", - ImageType::Qoi => "image/qoi", - ImageType::Tga => "image/x-tga", - ImageType::Vtf => "image/x-vtf", - ImageType::Heif(imagesize::Compression::Hevc) => "image/heic", - ImageType::Heif(_) => "image/heif", - _ => "application/octet-stream", - }; - Some(mime_str.to_string()) - } else { - None - } -} - pub fn preview_entry(data: &[u8], mime: Option<&str>, width: u32) -> String { if let Some(mime) = mime { if mime.starts_with("image/") { @@ -1207,6 +1161,13 @@ mod tests { use super::*; + /// Create an in-memory test database with full schema. + fn test_db() -> SqliteClipboardDb { + let conn = + Connection::open_in_memory().expect("Failed to open in-memory db"); + SqliteClipboardDb::new(conn).expect("Failed to create test database") + } + fn get_schema_version(conn: &Connection) -> rusqlite::Result { conn.pragma_query_value(None, "user_version", |row| row.get(0)) } @@ -1239,7 +1200,7 @@ mod tests { assert_eq!( get_schema_version(&db.conn).expect("Failed to get schema version"), - 3 + 5 ); assert!(table_column_exists(&db.conn, "clipboard", "content_hash")); @@ -1290,7 +1251,7 @@ mod tests { assert_eq!( get_schema_version(&db.conn) .expect("Failed to get version after migration"), - 3 + 5 ); assert!(table_column_exists(&db.conn, "clipboard", "content_hash")); @@ -1332,7 +1293,7 @@ mod tests { assert_eq!( get_schema_version(&db.conn) .expect("Failed to get version after migration"), - 3 + 5 ); assert!(table_column_exists(&db.conn, "clipboard", "content_hash")); @@ -1375,7 +1336,7 @@ mod tests { assert_eq!( get_schema_version(&db.conn) .expect("Failed to get version after migration"), - 3 + 5 ); assert!(table_column_exists(&db.conn, "clipboard", "last_accessed")); @@ -1411,7 +1372,7 @@ mod tests { get_schema_version(&db2.conn).expect("Failed to get version"); assert_eq!(version_after_first, version_after_second); - assert_eq!(version_after_first, 3); + assert_eq!(version_after_first, 5); } #[test] @@ -1540,7 +1501,7 @@ mod tests { assert_eq!( get_schema_version(&db.conn).expect("Failed to get version"), - 3 + 5 ); let count: i64 = db @@ -1549,4 +1510,260 @@ mod tests { .expect("Failed to count"); assert_eq!(count, 1, "Existing data should be preserved"); } + + #[test] + fn test_store_uri_list_content() { + let db = test_db(); + let data = b"file:///home/user/document.pdf\nfile:///home/user/image.png"; + let id = db + .store_entry(std::io::Cursor::new(data.to_vec()), 100, 1000, None) + .expect("Failed to store URI list"); + + let mime: Option = db + .conn + .query_row("SELECT mime FROM clipboard WHERE id = ?1", [id], |row| { + row.get(0) + }) + .expect("Failed to get mime"); + assert_eq!(mime, Some("text/uri-list".to_string())); + } + + #[test] + fn test_store_binary_image() { + let db = test_db(); + // Minimal PNG header + let data: Vec = vec![ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature + 0x00, 0x00, 0x00, 0x0D, // IHDR chunk length + 0x49, 0x48, 0x44, 0x52, // "IHDR" + 0x00, 0x00, 0x00, 0x01, // width: 1 + 0x00, 0x00, 0x00, 0x01, // height: 1 + 0x08, 0x02, 0x00, 0x00, 0x00, // bit depth, color, etc. + 0x90, 0x77, 0x53, 0xDE, // CRC + ]; + let id = db + .store_entry(std::io::Cursor::new(data.clone()), 100, 1000, None) + .expect("Failed to store image"); + + let (contents, mime): (Vec, Option) = db + .conn + .query_row( + "SELECT contents, mime FROM clipboard WHERE id = ?1", + [id], + |row| Ok((row.get(0)?, row.get(1)?)), + ) + .expect("Failed to get stored entry"); + assert_eq!(contents, data); + assert_eq!(mime, Some("image/png".to_string())); + } + + #[test] + fn test_deduplication() { + let db = test_db(); + let data = b"duplicate content"; + + let id1 = db + .store_entry(std::io::Cursor::new(data.to_vec()), 100, 1000, None) + .expect("Failed to store first"); + let _id2 = db + .store_entry(std::io::Cursor::new(data.to_vec()), 100, 1000, None) + .expect("Failed to store second"); + + // First entry should have been removed by deduplication + let count: i64 = db + .conn + .query_row("SELECT COUNT(*) FROM clipboard", [], |row| row.get(0)) + .expect("Failed to count"); + assert_eq!(count, 1, "Deduplication should keep only one copy"); + + // The original id should be gone + let exists: bool = db + .conn + .query_row( + "SELECT COUNT(*) FROM clipboard WHERE id = ?1", + [id1], + |row| row.get::<_, i64>(0), + ) + .map(|c| c > 0) + .unwrap_or(false); + assert!(!exists, "Old entry should be removed"); + } + + #[test] + fn test_trim_excess_entries() { + let db = test_db(); + for i in 0..5 { + let data = format!("entry {i}"); + db.store_entry( + std::io::Cursor::new(data.into_bytes()), + 100, + 3, // max 3 items + None, + ) + .expect("Failed to store"); + } + + let count: i64 = db + .conn + .query_row("SELECT COUNT(*) FROM clipboard", [], |row| row.get(0)) + .expect("Failed to count"); + assert!(count <= 3, "Trim should keep at most max_items entries"); + } + + #[test] + fn test_reject_empty_input() { + let db = test_db(); + let result = + db.store_entry(std::io::Cursor::new(Vec::new()), 100, 1000, None); + assert!(matches!(result, Err(StashError::EmptyOrTooLarge))); + } + + #[test] + fn test_reject_whitespace_input() { + let db = test_db(); + let result = db.store_entry( + std::io::Cursor::new(b" \n\t ".to_vec()), + 100, + 1000, + None, + ); + assert!(matches!(result, Err(StashError::AllWhitespace))); + } + + #[test] + fn test_reject_oversized_input() { + let db = test_db(); + // 5MB + 1 byte + let data = vec![b'a'; 5 * 1_000_000 + 1]; + let result = db.store_entry(std::io::Cursor::new(data), 100, 1000, None); + assert!(matches!(result, Err(StashError::EmptyOrTooLarge))); + } + + #[test] + fn test_delete_entries_by_id() { + let db = test_db(); + let id = db + .store_entry(std::io::Cursor::new(b"to delete".to_vec()), 100, 1000, None) + .expect("Failed to store"); + + let input = format!("{id}\tpreview text\n"); + let deleted = db + .delete_entries(std::io::Cursor::new(input.into_bytes())) + .expect("Failed to delete"); + assert_eq!(deleted, 1); + + let count: i64 = db + .conn + .query_row("SELECT COUNT(*) FROM clipboard", [], |row| row.get(0)) + .expect("Failed to count"); + assert_eq!(count, 0); + } + + #[test] + fn test_delete_query_matching() { + let db = test_db(); + db.store_entry( + std::io::Cursor::new(b"secret password 123".to_vec()), + 100, + 1000, + None, + ) + .expect("Failed to store"); + db.store_entry( + std::io::Cursor::new(b"normal text".to_vec()), + 100, + 1000, + None, + ) + .expect("Failed to store"); + + let deleted = db + .delete_query("secret password") + .expect("Failed to delete query"); + assert_eq!(deleted, 1); + + let count: i64 = db + .conn + .query_row("SELECT COUNT(*) FROM clipboard", [], |row| row.get(0)) + .expect("Failed to count"); + assert_eq!(count, 1); + } + + #[test] + fn test_wipe_db() { + let db = test_db(); + for i in 0..3 { + let data = format!("entry {i}"); + db.store_entry(std::io::Cursor::new(data.into_bytes()), 100, 1000, None) + .expect("Failed to store"); + } + + db.wipe_db().expect("Failed to wipe"); + + let count: i64 = db + .conn + .query_row("SELECT COUNT(*) FROM clipboard", [], |row| row.get(0)) + .expect("Failed to count"); + assert_eq!(count, 0); + } + + #[test] + fn test_extract_id_valid() { + assert_eq!(extract_id("42\tsome preview"), Ok(42)); + assert_eq!(extract_id("1"), Ok(1)); + assert_eq!(extract_id("999\t"), Ok(999)); + } + + #[test] + fn test_extract_id_invalid() { + assert!(extract_id("abc\tpreview").is_err()); + assert!(extract_id("").is_err()); + assert!(extract_id("\tpreview").is_err()); + } + + #[test] + fn test_preview_entry_text() { + let data = b"Hello, world!"; + let preview = preview_entry(data, Some("text/plain"), 100); + assert_eq!(preview, "Hello, world!"); + } + + #[test] + fn test_preview_entry_image() { + let data = vec![0x89, 0x50, 0x4E, 0x47]; // PNG-ish bytes + let preview = preview_entry(&data, Some("image/png"), 100); + assert!(preview.contains("binary data")); + assert!(preview.contains("image/png")); + } + + #[test] + fn test_preview_entry_truncation() { + let data = b"This is a rather long piece of text that should be truncated"; + let preview = preview_entry(data, Some("text/plain"), 10); + assert!(preview.len() <= 15); // 10 chars + ellipsis (multi-byte) + assert!(preview.ends_with('…')); + } + + #[test] + fn test_size_str_formatting() { + assert_eq!(size_str(0), "0 B"); + assert_eq!(size_str(512), "512 B"); + assert_eq!(size_str(1024), "1 KiB"); + assert_eq!(size_str(1024 * 1024), "1 MiB"); + } + + #[test] + fn test_copy_entry_returns_data() { + let db = test_db(); + let data = b"copy me"; + let id = db + .store_entry(std::io::Cursor::new(data.to_vec()), 100, 1000, None) + .expect("Failed to store"); + + let (returned_id, contents, mime) = + db.copy_entry(id).expect("Failed to copy"); + assert_eq!(returned_id, id); + assert_eq!(contents, data.to_vec()); + assert_eq!(mime, Some("text/plain".to_string())); + } } diff --git a/src/main.rs b/src/main.rs index aca9838..56c2170 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,7 @@ use inquire::Confirm; mod commands; pub(crate) mod db; +pub(crate) mod mime; mod multicall; #[cfg(feature = "use-toplevel")] mod wayland; @@ -130,6 +131,10 @@ enum Command { /// Expire new entries after duration (e.g., "3s", "500ms", "1h30m"). #[arg(long, value_parser = parse_duration)] expire_after: Option, + + /// MIME type preference for clipboard reading. + #[arg(short = 't', long, default_value = "any")] + mime_type: String, }, } @@ -433,7 +438,10 @@ fn main() -> color_eyre::eyre::Result<()> { } } }, - Some(Command::Watch { expire_after }) => { + Some(Command::Watch { + expire_after, + mime_type, + }) => { db.watch( cli.max_dedupe_search, cli.max_items, @@ -442,6 +450,7 @@ fn main() -> color_eyre::eyre::Result<()> { #[cfg(not(feature = "use-toplevel"))] &[], expire_after, + &mime_type, ); }, diff --git a/src/mime.rs b/src/mime.rs new file mode 100644 index 0000000..3761ab3 --- /dev/null +++ b/src/mime.rs @@ -0,0 +1,273 @@ +use imagesize::ImageType; + +/// Detect MIME type of clipboard data. We try binary detection first using +/// [`imagesize`] followed by a check for text/uri-list for file manager copies +/// and finally fall back to text/plain for UTF-8 or [`None`] for binary. +pub fn detect_mime(data: &[u8]) -> Option { + if data.is_empty() { + return None; + } + + // Try image detection first + if let Ok(img_type) = imagesize::image_type(data) { + return Some(image_type_to_mime(img_type)); + } + + // Check if it's UTF-8 text + if let Ok(text) = std::str::from_utf8(data) { + let trimmed = text.trim(); + + // Check for text/uri-list format (file paths from file managers) + if is_uri_list(trimmed) { + return Some("text/uri-list".to_string()); + } + + // Default to plain text + return Some("text/plain".to_string()); + } + + // Unknown binary data + None +} + +/// Convert [`imagesize`] [`ImageType`] to MIME type string +fn image_type_to_mime(img_type: ImageType) -> String { + let mime = match img_type { + ImageType::Png => "image/png", + ImageType::Jpeg => "image/jpeg", + ImageType::Gif => "image/gif", + ImageType::Bmp => "image/bmp", + ImageType::Tiff => "image/tiff", + ImageType::Webp => "image/webp", + ImageType::Aseprite => "image/x-aseprite", + ImageType::Dds => "image/vnd.ms-dds", + ImageType::Exr => "image/aces", + ImageType::Farbfeld => "image/farbfeld", + ImageType::Hdr => "image/vnd.radiance", + ImageType::Ico => "image/x-icon", + ImageType::Ilbm => "image/ilbm", + ImageType::Jxl => "image/jxl", + ImageType::Ktx2 => "image/ktx2", + ImageType::Pnm => "image/x-portable-anymap", + ImageType::Psd => "image/vnd.adobe.photoshop", + ImageType::Qoi => "image/qoi", + ImageType::Tga => "image/x-tga", + ImageType::Vtf => "image/x-vtf", + ImageType::Heif(imagesize::Compression::Hevc) => "image/heic", + ImageType::Heif(_) => "image/heif", + _ => "application/octet-stream", + }; + mime.to_string() +} + +/// Check if text is a URI list per RFC 2483. +/// +/// Used when copying files from file managers - they provide file paths +/// as text/uri-list format (`file://` URIs, one per line, `#` for comments). +fn is_uri_list(text: &str) -> bool { + if text.is_empty() { + return false; + } + + // Must start with a URI scheme to even consider it + if !text.starts_with("file://") + && !text.starts_with("http://") + && !text.starts_with("https://") + && !text.starts_with("ftp://") + && !text.starts_with('#') + { + return false; + } + + let lines: Vec<&str> = text.lines().map(str::trim).collect(); + + // Check first non-comment line is a URI + let first_content = + lines.iter().find(|l| !l.is_empty() && !l.starts_with('#')); + + if let Some(line) = first_content { + line.starts_with("file://") + || line.starts_with("http://") + || line.starts_with("https://") + || line.starts_with("ftp://") + } else { + false + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_empty_data() { + assert_eq!(detect_mime(b""), None); + } + + #[test] + fn test_plain_text() { + let data = b"Hello, world!"; + assert_eq!(detect_mime(data), Some("text/plain".to_string())); + } + + #[test] + fn test_uri_list_single_file() { + let data = b"file:///home/user/document.pdf"; + assert_eq!(detect_mime(data), Some("text/uri-list".to_string())); + } + + #[test] + fn test_uri_list_multiple_files() { + let data = b"file:///home/user/file1.txt\nfile:///home/user/file2.txt"; + assert_eq!(detect_mime(data), Some("text/uri-list".to_string())); + } + + #[test] + fn test_uri_list_with_comments() { + let data = b"# Comment\nfile:///home/user/file.txt"; + assert_eq!(detect_mime(data), Some("text/uri-list".to_string())); + } + + #[test] + fn test_uri_list_http() { + let data = b"https://example.com/image.png"; + assert_eq!(detect_mime(data), Some("text/uri-list".to_string())); + } + + #[test] + fn test_not_uri_list() { + let data = b"This is just text with file:// in the middle"; + assert_eq!(detect_mime(data), Some("text/plain".to_string())); + } + + #[test] + fn test_unknown_binary() { + // Binary data that's not UTF-8 and not a known format + let data = b"\x80\x81\x82\x83\x84\x85\x86\x87"; + assert_eq!(detect_mime(data), None); + } + + #[test] + fn test_uri_list_trailing_newline() { + let data = b"file:///foo\n"; + assert_eq!(detect_mime(data), Some("text/uri-list".to_string())); + } + + #[test] + fn test_uri_list_ftp() { + let data = b"ftp://host/path"; + assert_eq!(detect_mime(data), Some("text/uri-list".to_string())); + } + + #[test] + fn test_uri_list_mixed_schemes() { + let data = b"file:///home/user/doc.pdf\nhttps://example.com/file.zip"; + assert_eq!(detect_mime(data), Some("text/uri-list".to_string())); + } + + #[test] + fn test_plain_url_in_text() { + let data = b"visit http://example.com for info"; + assert_eq!(detect_mime(data), Some("text/plain".to_string())); + } + + #[test] + fn test_png_magic_bytes() { + // Real PNG header: 8-byte signature + minimal IHDR chunk + let data: &[u8] = &[ + 0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, // PNG signature + 0x00, 0x00, 0x00, 0x0D, // IHDR chunk length + 0x49, 0x48, 0x44, 0x52, // "IHDR" + 0x00, 0x00, 0x00, 0x01, // width: 1 + 0x00, 0x00, 0x00, 0x01, // height: 1 + 0x08, 0x02, // bit depth: 8, color type: 2 (RGB) + 0x00, 0x00, 0x00, // compression, filter, interlace + 0x90, 0x77, 0x53, 0xDE, // CRC + ]; + assert_eq!(detect_mime(data), Some("image/png".to_string())); + } + + #[test] + fn test_jpeg_magic_bytes() { + // JPEG SOI marker + APP0 (JFIF) marker + let data: &[u8] = &[ + 0xFF, 0xD8, 0xFF, 0xE0, // SOI + APP0 + 0x00, 0x10, // Length + 0x4A, 0x46, 0x49, 0x46, 0x00, // "JFIF\0" + 0x01, 0x01, 0x00, 0x00, 0x01, 0x00, 0x01, 0x00, 0x00, + ]; + assert_eq!(detect_mime(data), Some("image/jpeg".to_string())); + } + + #[test] + fn test_gif_magic_bytes() { + // GIF89a header + let data: &[u8] = &[ + 0x47, 0x49, 0x46, 0x38, 0x39, 0x61, // "GIF89a" + 0x01, 0x00, 0x01, 0x00, // 1x1 + 0x80, 0x00, 0x00, // GCT flag, bg, aspect + ]; + assert_eq!(detect_mime(data), Some("image/gif".to_string())); + } + + #[test] + fn test_webp_magic_bytes() { + // RIFF....WEBP header + let data: &[u8] = &[ + 0x52, 0x49, 0x46, 0x46, // "RIFF" + 0x24, 0x00, 0x00, 0x00, // file size + 0x57, 0x45, 0x42, 0x50, // "WEBP" + 0x56, 0x50, 0x38, 0x20, // "VP8 " + 0x18, 0x00, 0x00, 0x00, // chunk size + 0x30, 0x01, 0x00, 0x9D, 0x01, 0x2A, // VP8 bitstream + 0x01, 0x00, 0x01, 0x00, // width/height + ]; + assert_eq!(detect_mime(data), Some("image/webp".to_string())); + } + + #[test] + fn test_whitespace_only() { + let data = b" \n\t "; + // Valid UTF-8 text, even if only whitespace. [`detect_mime`] doesn't reject + // it (store_entry rejects it separately). As text it's text/plain. + assert_eq!(detect_mime(data), Some("text/plain".to_string())); + } + + #[test] + fn test_image_type_to_mime_coverage() { + assert_eq!(image_type_to_mime(ImageType::Png), "image/png"); + assert_eq!(image_type_to_mime(ImageType::Jpeg), "image/jpeg"); + assert_eq!(image_type_to_mime(ImageType::Gif), "image/gif"); + assert_eq!(image_type_to_mime(ImageType::Bmp), "image/bmp"); + assert_eq!(image_type_to_mime(ImageType::Tiff), "image/tiff"); + assert_eq!(image_type_to_mime(ImageType::Webp), "image/webp"); + assert_eq!(image_type_to_mime(ImageType::Aseprite), "image/x-aseprite"); + assert_eq!(image_type_to_mime(ImageType::Dds), "image/vnd.ms-dds"); + assert_eq!(image_type_to_mime(ImageType::Exr), "image/aces"); + assert_eq!(image_type_to_mime(ImageType::Farbfeld), "image/farbfeld"); + assert_eq!(image_type_to_mime(ImageType::Hdr), "image/vnd.radiance"); + assert_eq!(image_type_to_mime(ImageType::Ico), "image/x-icon"); + assert_eq!(image_type_to_mime(ImageType::Ilbm), "image/ilbm"); + assert_eq!(image_type_to_mime(ImageType::Jxl), "image/jxl"); + assert_eq!(image_type_to_mime(ImageType::Ktx2), "image/ktx2"); + assert_eq!( + image_type_to_mime(ImageType::Pnm), + "image/x-portable-anymap" + ); + assert_eq!( + image_type_to_mime(ImageType::Psd), + "image/vnd.adobe.photoshop" + ); + assert_eq!(image_type_to_mime(ImageType::Qoi), "image/qoi"); + assert_eq!(image_type_to_mime(ImageType::Tga), "image/x-tga"); + assert_eq!(image_type_to_mime(ImageType::Vtf), "image/x-vtf"); + assert_eq!( + image_type_to_mime(ImageType::Heif(imagesize::Compression::Hevc)), + "image/heic" + ); + assert_eq!( + image_type_to_mime(ImageType::Heif(imagesize::Compression::Av1)), + "image/heif" + ); + } +}