mime: refactor mime detection to separate module; streamline

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: I489054d2537a4c0de32d79f793478c206a6a6964
This commit is contained in:
raf 2026-01-23 22:38:21 +03:00
commit ff2f272055
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
5 changed files with 189 additions and 74 deletions

View file

@ -1,12 +1,6 @@
use std::io::{self, BufRead};
use crate::db::{
ClipboardDb,
Entry,
SqliteClipboardDb,
StashError,
detect_mime,
};
use crate::db::{ClipboardDb, Entry, SqliteClipboardDb, StashError};
pub trait ImportCommand {
/// Import clipboard entries from TSV format.
@ -44,7 +38,7 @@ impl ImportCommand for SqliteClipboardDb {
let entry = Entry {
contents: val.as_bytes().to_vec(),
mime: detect_mime(val.as_bytes()),
mime: crate::mime::detect_mime(val.as_bytes()),
};
self

View file

@ -100,6 +100,7 @@ pub trait WatchCommand {
max_items: u64,
excluded_apps: &[String],
expire_after: Option<Duration>,
mime_type_preference: &str,
);
}
@ -110,9 +111,13 @@ impl WatchCommand for SqliteClipboardDb {
max_items: u64,
excluded_apps: &[String],
expire_after: Option<Duration>,
mime_type_preference: &str,
) {
smol::block_on(async {
log::info!("Starting clipboard watch daemon");
log::info!(
"Starting clipboard watch daemon with MIME type preference: \
{mime_type_preference}"
);
// Build expiration queue from existing entries
let mut exp_queue = ExpirationQueue::new();
@ -160,12 +165,19 @@ impl WatchCommand for SqliteClipboardDb {
hasher.finish()
};
// Convert MIME type preference string to wl_clipboard_rs enum
let mime_type = match mime_type_preference {
"text" => wl_clipboard_rs::paste::MimeType::Text,
"image" => {
wl_clipboard_rs::paste::MimeType::TextWithPriority("image/png")
},
_ => wl_clipboard_rs::paste::MimeType::Any,
};
// Initialize with current clipboard
if let Ok((mut reader, _)) = get_contents(
ClipboardType::Regular,
Seat::Unspecified,
wl_clipboard_rs::paste::MimeType::Any,
) {
if let Ok((mut reader, _)) =
get_contents(ClipboardType::Regular, Seat::Unspecified, mime_type)
{
buf.clear();
if reader.read_to_end(&mut buf).is_ok() && !buf.is_empty() {
last_hash = Some(hash_contents(&buf));
@ -205,7 +217,7 @@ impl WatchCommand for SqliteClipboardDb {
if let Ok((mut reader, _)) = get_contents(
ClipboardType::Regular,
Seat::Unspecified,
wl_clipboard_rs::paste::MimeType::Any,
mime_type,
) {
let mut current_buf = Vec::new();
if reader.read_to_end(&mut current_buf).is_ok()
@ -250,11 +262,8 @@ impl WatchCommand for SqliteClipboardDb {
}
// Normal clipboard polling
match get_contents(
ClipboardType::Regular,
Seat::Unspecified,
wl_clipboard_rs::paste::MimeType::Any,
) {
match get_contents(ClipboardType::Regular, Seat::Unspecified, mime_type)
{
Ok((mut reader, _mime_type)) => {
buf.clear();
if let Err(e) = reader.read_to_end(&mut buf) {

View file

@ -10,7 +10,6 @@ use std::{
};
use base64::prelude::*;
use imagesize::ImageType;
use log::{debug, error, warn};
use regex::Regex;
use rusqlite::{Connection, OptionalExtension, params};
@ -429,7 +428,7 @@ impl ClipboardDb for SqliteClipboardDb {
#[allow(clippy::cast_possible_wrap)]
let content_hash = hasher.finish() as i64;
let mime = detect_mime_optimized(&buf);
let mime = crate::mime::detect_mime(&buf);
// Try to load regex from systemd credential file, then env var
let regex = load_sensitive_regex();
@ -884,51 +883,6 @@ pub fn extract_id(input: &str) -> Result<i64, &'static str> {
id_str.parse().map_err(|_| "invalid id")
}
pub fn detect_mime_optimized(data: &[u8]) -> Option<String> {
// Check if it's valid UTF-8 first, which most clipboard content are.
// This will be used to return early without unnecessary mimetype detection
// overhead.
if std::str::from_utf8(data).is_ok() {
return Some("text/plain".to_string());
}
// Only run image detection on binary data
detect_mime(data)
}
pub fn detect_mime(data: &[u8]) -> Option<String> {
if let Ok(img_type) = imagesize::image_type(data) {
let mime_str = match img_type {
ImageType::Png => "image/png",
ImageType::Jpeg => "image/jpeg",
ImageType::Gif => "image/gif",
ImageType::Bmp => "image/bmp",
ImageType::Tiff => "image/tiff",
ImageType::Webp => "image/webp",
ImageType::Aseprite => "image/x-aseprite",
ImageType::Dds => "image/vnd.ms-dds",
ImageType::Exr => "image/aces",
ImageType::Farbfeld => "image/farbfeld",
ImageType::Hdr => "image/vnd.radiance",
ImageType::Ico => "image/x-icon",
ImageType::Ilbm => "image/ilbm",
ImageType::Jxl => "image/jxl",
ImageType::Ktx2 => "image/ktx2",
ImageType::Pnm => "image/x-portable-anymap",
ImageType::Psd => "image/vnd.adobe.photoshop",
ImageType::Qoi => "image/qoi",
ImageType::Tga => "image/x-tga",
ImageType::Vtf => "image/x-vtf",
ImageType::Heif(imagesize::Compression::Hevc) => "image/heic",
ImageType::Heif(_) => "image/heif",
_ => "application/octet-stream",
};
Some(mime_str.to_string())
} else {
None
}
}
pub fn preview_entry(data: &[u8], mime: Option<&str>, width: u32) -> String {
if let Some(mime) = mime {
if mime.starts_with("image/") {
@ -1239,7 +1193,7 @@ mod tests {
assert_eq!(
get_schema_version(&db.conn).expect("Failed to get schema version"),
3
5
);
assert!(table_column_exists(&db.conn, "clipboard", "content_hash"));
@ -1290,7 +1244,7 @@ mod tests {
assert_eq!(
get_schema_version(&db.conn)
.expect("Failed to get version after migration"),
3
5
);
assert!(table_column_exists(&db.conn, "clipboard", "content_hash"));
@ -1332,7 +1286,7 @@ mod tests {
assert_eq!(
get_schema_version(&db.conn)
.expect("Failed to get version after migration"),
3
5
);
assert!(table_column_exists(&db.conn, "clipboard", "content_hash"));
@ -1375,7 +1329,7 @@ mod tests {
assert_eq!(
get_schema_version(&db.conn)
.expect("Failed to get version after migration"),
3
5
);
assert!(table_column_exists(&db.conn, "clipboard", "last_accessed"));
@ -1411,7 +1365,7 @@ mod tests {
get_schema_version(&db2.conn).expect("Failed to get version");
assert_eq!(version_after_first, version_after_second);
assert_eq!(version_after_first, 3);
assert_eq!(version_after_first, 5);
}
#[test]
@ -1540,7 +1494,7 @@ mod tests {
assert_eq!(
get_schema_version(&db.conn).expect("Failed to get version"),
3
5
);
let count: i64 = db

View file

@ -11,6 +11,7 @@ use inquire::Confirm;
mod commands;
pub(crate) mod db;
pub(crate) mod mime;
mod multicall;
#[cfg(feature = "use-toplevel")] mod wayland;
@ -130,6 +131,10 @@ enum Command {
/// Expire new entries after duration (e.g., "3s", "500ms", "1h30m").
#[arg(long, value_parser = parse_duration)]
expire_after: Option<Duration>,
/// MIME type preference for clipboard reading.
#[arg(short = 't', long, default_value = "any")]
mime_type: String,
},
}
@ -433,7 +438,10 @@ fn main() -> color_eyre::eyre::Result<()> {
}
}
},
Some(Command::Watch { expire_after }) => {
Some(Command::Watch {
expire_after,
mime_type,
}) => {
db.watch(
cli.max_dedupe_search,
cli.max_items,
@ -442,6 +450,7 @@ fn main() -> color_eyre::eyre::Result<()> {
#[cfg(not(feature = "use-toplevel"))]
&[],
expire_after,
&mime_type,
);
},

149
src/mime.rs Normal file
View file

@ -0,0 +1,149 @@
use imagesize::ImageType;
/// Detect MIME type of clipboard data. We try binary detection first using
/// [`imagesize`] followed by a check for text/uri-list for file manager copies
/// and finally fall back to text/plain for UTF-8 or [`None`] for binary.
pub fn detect_mime(data: &[u8]) -> Option<String> {
if data.is_empty() {
return None;
}
// Try image detection first
if let Ok(img_type) = imagesize::image_type(data) {
return Some(image_type_to_mime(img_type));
}
// Check if it's UTF-8 text
if let Ok(text) = std::str::from_utf8(data) {
let trimmed = text.trim();
// Check for text/uri-list format (file paths from file managers)
if is_uri_list(trimmed) {
return Some("text/uri-list".to_string());
}
// Default to plain text
return Some("text/plain".to_string());
}
// Unknown binary data
None
}
/// Convert [`imagesize`] [`ImageType`] to MIME type string
fn image_type_to_mime(img_type: ImageType) -> String {
let mime = match img_type {
ImageType::Png => "image/png",
ImageType::Jpeg => "image/jpeg",
ImageType::Gif => "image/gif",
ImageType::Bmp => "image/bmp",
ImageType::Tiff => "image/tiff",
ImageType::Webp => "image/webp",
ImageType::Aseprite => "image/x-aseprite",
ImageType::Dds => "image/vnd.ms-dds",
ImageType::Exr => "image/aces",
ImageType::Farbfeld => "image/farbfeld",
ImageType::Hdr => "image/vnd.radiance",
ImageType::Ico => "image/x-icon",
ImageType::Ilbm => "image/ilbm",
ImageType::Jxl => "image/jxl",
ImageType::Ktx2 => "image/ktx2",
ImageType::Pnm => "image/x-portable-anymap",
ImageType::Psd => "image/vnd.adobe.photoshop",
ImageType::Qoi => "image/qoi",
ImageType::Tga => "image/x-tga",
ImageType::Vtf => "image/x-vtf",
ImageType::Heif(imagesize::Compression::Hevc) => "image/heic",
ImageType::Heif(_) => "image/heif",
_ => "application/octet-stream",
};
mime.to_string()
}
/// Check if text is a URI list per RFC 2483.
///
/// Used when copying files from file managers - they provide file paths
/// as text/uri-list format (`file://` URIs, one per line, `#` for comments).
fn is_uri_list(text: &str) -> bool {
if text.is_empty() {
return false;
}
// Must start with a URI scheme to even consider it
if !text.starts_with("file://")
&& !text.starts_with("http://")
&& !text.starts_with("https://")
&& !text.starts_with("ftp://")
&& !text.starts_with('#')
{
return false;
}
let lines: Vec<&str> = text.lines().map(str::trim).collect();
// Check first non-comment line is a URI
let first_content =
lines.iter().find(|l| !l.is_empty() && !l.starts_with('#'));
if let Some(line) = first_content {
line.starts_with("file://")
|| line.starts_with("http://")
|| line.starts_with("https://")
|| line.starts_with("ftp://")
} else {
false
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_empty_data() {
assert_eq!(detect_mime(b""), None);
}
#[test]
fn test_plain_text() {
let data = b"Hello, world!";
assert_eq!(detect_mime(data), Some("text/plain".to_string()));
}
#[test]
fn test_uri_list_single_file() {
let data = b"file:///home/user/document.pdf";
assert_eq!(detect_mime(data), Some("text/uri-list".to_string()));
}
#[test]
fn test_uri_list_multiple_files() {
let data = b"file:///home/user/file1.txt\nfile:///home/user/file2.txt";
assert_eq!(detect_mime(data), Some("text/uri-list".to_string()));
}
#[test]
fn test_uri_list_with_comments() {
let data = b"# Comment\nfile:///home/user/file.txt";
assert_eq!(detect_mime(data), Some("text/uri-list".to_string()));
}
#[test]
fn test_uri_list_http() {
let data = b"https://example.com/image.png";
assert_eq!(detect_mime(data), Some("text/uri-list".to_string()));
}
#[test]
fn test_not_uri_list() {
let data = b"This is just text with file:// in the middle";
assert_eq!(detect_mime(data), Some("text/plain".to_string()));
}
#[test]
fn test_unknown_binary() {
// Binary data that's not UTF-8 and not a known format
let data = b"\x80\x81\x82\x83\x84\x85\x86\x87";
assert_eq!(detect_mime(data), None);
}
}