db: improve content hashing; cache only positive scan result

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: If8035bf1dcd598a992762b9c714253406a6a6964
This commit is contained in:
raf 2026-03-05 14:27:10 +03:00
commit 373affabee
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
4 changed files with 115 additions and 19 deletions

View file

@ -38,6 +38,7 @@ impl StoreCommand for SqliteClipboardDb {
Some(excluded_apps),
min_size,
max_size,
None, // no pre-computed hash for CLI store
)?;
log::info!("Entry stored");
}

View file

@ -363,6 +363,8 @@ impl WatchCommand for SqliteClipboardDb {
if last_hash != Some(current_hash) {
// Clone buf for the async operation since it needs 'static
let buf_clone = buf.clone();
#[allow(clippy::cast_possible_wrap)]
let content_hash = Some(current_hash as i64);
match async_db
.store_entry(
buf_clone,
@ -371,6 +373,7 @@ impl WatchCommand for SqliteClipboardDb {
Some(excluded_apps.to_vec()),
min_size,
max_size,
content_hash,
)
.await
{
@ -433,7 +436,7 @@ impl WatchCommand for SqliteClipboardDb {
}
}
/// Unit-testable helper: given ordered offers and a preference, return the
/// Given ordered offers and a preference, return the
/// chosen MIME type. This mirrors the selection logic in
/// [`negotiate_mime_type`] without requiring a Wayland connection.
#[cfg(test)]

View file

@ -5,11 +5,67 @@ use std::{
io::{BufRead, BufReader, Read, Write},
path::PathBuf,
str,
sync::OnceLock,
sync::{Mutex, OnceLock},
time::{Duration, Instant},
};
pub mod nonblocking;
/// Cache for process scanning results to avoid expensive `/proc` reads on every
/// store operation. TTL of 5 seconds balances freshness with performance.
struct ProcessCache {
last_scan: Instant,
excluded_app: Option<String>,
}
impl ProcessCache {
const TTL: Duration = Duration::from_secs(5);
/// Check cache for recently active excluded app.
/// Only caches positive results (when an excluded app IS found).
/// Negative results (no excluded apps) are never cached to ensure
/// we don't miss exclusions when users switch apps.
fn get(excluded_apps: &[String]) -> Option<String> {
static CACHE: OnceLock<Mutex<ProcessCache>> = OnceLock::new();
let cache = CACHE.get_or_init(|| {
Mutex::new(ProcessCache {
last_scan: Instant::now() - Self::TTL, /* Expire immediately on
* first use */
excluded_app: None,
})
});
if let Ok(mut cache) = cache.lock() {
// Check if we have a valid cached positive result
if cache.last_scan.elapsed() < Self::TTL
&& let Some(ref app) = cache.excluded_app
{
// Verify the cached app is still in the exclusion list
if app_matches_exclusion(app, excluded_apps) {
return Some(app.clone());
}
}
// No valid cache, scan and only cache positive results
let result = get_recently_active_excluded_app_uncached(excluded_apps);
if result.is_some() {
cache.last_scan = Instant::now();
cache.excluded_app = result.clone();
} else {
// Don't cache negative results. We expire cache immediately so next
// call will rescan. This ensures we don't miss exclusions when user
// switches from non-excluded to excluded app.
cache.last_scan = Instant::now() - Self::TTL;
cache.excluded_app = None;
}
result
} else {
// Lock poisoned - fall back to uncached
get_recently_active_excluded_app_uncached(excluded_apps)
}
}
}
/// FNV-1a hasher for deterministic hashing across process runs.
/// Unlike DefaultHasher (SipHash with random seed), this produces stable
/// hashes.
@ -187,6 +243,18 @@ pub enum StashError {
}
pub trait ClipboardDb {
/// Store a new clipboard entry.
///
/// # Arguments
/// * `input` - Reader for the clipboard content
/// * `max_dedupe_search` - Maximum number of recent entries to check for
/// duplicates
/// * `max_items` - Maximum total entries to keep in database
/// * `excluded_apps` - List of app names to exclude
/// * `min_size` - Minimum content size (None for no minimum)
/// * `max_size` - Maximum content size
/// * `content_hash` - Optional pre-computed content hash (avoids re-hashing)
#[allow(clippy::too_many_arguments)]
fn store_entry(
&self,
input: impl Read,
@ -195,6 +263,7 @@ pub trait ClipboardDb {
excluded_apps: Option<&[String]>,
min_size: Option<usize>,
max_size: usize,
content_hash: Option<i64>,
) -> Result<i64, StashError>;
fn deduplicate_by_hash(
@ -308,8 +377,8 @@ impl SqliteClipboardDb {
})?;
}
// Add content_hash column if it doesn't exist
// Migration MUST be done to avoid breaking existing installations.
// Add content_hash column if it doesn't exist. Migration MUST be done to
// avoid breaking existing installations.
if schema_version < 2 {
let has_content_hash: bool = tx
.query_row(
@ -546,6 +615,7 @@ impl ClipboardDb for SqliteClipboardDb {
excluded_apps: Option<&[String]>,
min_size: Option<usize>,
max_size: usize,
content_hash: Option<i64>,
) -> Result<i64, StashError> {
let mut buf = Vec::new();
if input.read_to_end(&mut buf).is_err() || buf.is_empty() {
@ -568,11 +638,14 @@ impl ClipboardDb for SqliteClipboardDb {
return Err(StashError::AllWhitespace);
}
// Calculate content hash for deduplication
// Use pre-computed hash if provided, otherwise calculate it
let content_hash = content_hash.unwrap_or_else(|| {
let mut hasher = Fnv1aHasher::new();
hasher.write(&buf);
#[allow(clippy::cast_possible_wrap)]
let content_hash = hasher.finish() as i64;
let hash = hasher.finish() as i64;
hash
});
let mime = crate::mime::detect_mime(&buf);
@ -1181,7 +1254,8 @@ fn detect_excluded_app_activity(excluded_apps: &[String]) -> bool {
}
// Strategy 2: Check recently active processes (timing correlation)
if let Some(active_app) = get_recently_active_excluded_app(excluded_apps) {
// Use cached results to avoid expensive /proc scanning
if let Some(active_app) = ProcessCache::get(excluded_apps) {
debug!("Clipboard excluded: recent activity from {active_app}");
return true;
}
@ -1212,7 +1286,8 @@ fn get_focused_window_app() -> Option<String> {
}
/// Check for recently active excluded apps using CPU and I/O activity.
fn get_recently_active_excluded_app(
/// This is the uncached version - use `ProcessCache::get()` for cached access.
fn get_recently_active_excluded_app_uncached(
excluded_apps: &[String],
) -> Option<String> {
let proc_dir = std::path::Path::new("/proc");
@ -1586,7 +1661,7 @@ mod tests {
let cursor = std::io::Cursor::new(test_data.to_vec());
let id = db
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE)
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE, None)
.expect("Failed to store entry");
let content_hash: Option<i64> = db
@ -1622,7 +1697,7 @@ mod tests {
let test_data = b"Test content for copy";
let cursor = std::io::Cursor::new(test_data.to_vec());
let id_a = db
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE)
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE, None)
.expect("Failed to store entry A");
let original_last_accessed: i64 = db
@ -1725,6 +1800,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store URI list");
@ -1758,6 +1834,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store image");
@ -1786,6 +1863,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store first");
let _id2 = db
@ -1796,6 +1874,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store second");
@ -1831,6 +1910,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store");
}
@ -1852,6 +1932,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
);
assert!(matches!(result, Err(StashError::EmptyOrTooLarge)));
}
@ -1866,6 +1947,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
);
assert!(matches!(result, Err(StashError::AllWhitespace)));
}
@ -1882,6 +1964,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
);
assert!(matches!(result, Err(StashError::TooLarge(5000000))));
}
@ -1897,6 +1980,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store");
@ -1923,6 +2007,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store");
db.store_entry(
@ -1932,6 +2017,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store");
@ -1959,6 +2045,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store");
}
@ -2038,6 +2125,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store");
@ -2122,6 +2210,7 @@ mod tests {
None,
None,
DEFAULT_MAX_ENTRY_SIZE,
None,
)
.expect("Failed to store");

View file

@ -25,6 +25,7 @@ impl AsyncClipboardDb {
excluded_apps: Option<Vec<String>>,
min_size: Option<usize>,
max_size: usize,
content_hash: Option<i64>,
) -> Result<i64, StashError> {
let path = self.db_path.clone();
blocking::unblock(move || {
@ -36,6 +37,7 @@ impl AsyncClipboardDb {
excluded_apps.as_deref(),
min_size,
max_size,
content_hash,
)
})
.await
@ -170,7 +172,7 @@ mod tests {
let data = b"async test data";
let id = async_db
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
.await
.expect("Failed to store entry");
@ -199,7 +201,7 @@ mod tests {
let data = b"expiring entry";
let id = async_db
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
.await
.expect("Failed to store entry");
@ -231,7 +233,7 @@ mod tests {
let data = b"entry to expire";
let id = async_db
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
.await
.expect("Failed to store entry");
@ -278,12 +280,12 @@ mod tests {
let data = b"clone test";
let id1 = async_db
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
.await
.expect("Failed with original");
let id2 = cloned
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
.await
.expect("Failed with clone");
@ -302,7 +304,8 @@ mod tests {
let db = async_db.clone();
let data = format!("concurrent test {}", i).into_bytes();
smol::spawn(async move {
db.store_entry(data, 100, 1000, None, None, 5_000_000).await
db.store_entry(data, 100, 1000, None, None, 5_000_000, None)
.await
})
})
.collect();