mirror of
https://github.com/NotAShelf/stash.git
synced 2026-04-12 14:07:42 +00:00
db: improve content hashing; cache only positive scan result
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: If8035bf1dcd598a992762b9c714253406a6a6964
This commit is contained in:
parent
0865a1f139
commit
373affabee
4 changed files with 115 additions and 19 deletions
|
|
@ -38,6 +38,7 @@ impl StoreCommand for SqliteClipboardDb {
|
|||
Some(excluded_apps),
|
||||
min_size,
|
||||
max_size,
|
||||
None, // no pre-computed hash for CLI store
|
||||
)?;
|
||||
log::info!("Entry stored");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -363,6 +363,8 @@ impl WatchCommand for SqliteClipboardDb {
|
|||
if last_hash != Some(current_hash) {
|
||||
// Clone buf for the async operation since it needs 'static
|
||||
let buf_clone = buf.clone();
|
||||
#[allow(clippy::cast_possible_wrap)]
|
||||
let content_hash = Some(current_hash as i64);
|
||||
match async_db
|
||||
.store_entry(
|
||||
buf_clone,
|
||||
|
|
@ -371,6 +373,7 @@ impl WatchCommand for SqliteClipboardDb {
|
|||
Some(excluded_apps.to_vec()),
|
||||
min_size,
|
||||
max_size,
|
||||
content_hash,
|
||||
)
|
||||
.await
|
||||
{
|
||||
|
|
@ -433,7 +436,7 @@ impl WatchCommand for SqliteClipboardDb {
|
|||
}
|
||||
}
|
||||
|
||||
/// Unit-testable helper: given ordered offers and a preference, return the
|
||||
/// Given ordered offers and a preference, return the
|
||||
/// chosen MIME type. This mirrors the selection logic in
|
||||
/// [`negotiate_mime_type`] without requiring a Wayland connection.
|
||||
#[cfg(test)]
|
||||
|
|
|
|||
113
src/db/mod.rs
113
src/db/mod.rs
|
|
@ -5,11 +5,67 @@ use std::{
|
|||
io::{BufRead, BufReader, Read, Write},
|
||||
path::PathBuf,
|
||||
str,
|
||||
sync::OnceLock,
|
||||
sync::{Mutex, OnceLock},
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
pub mod nonblocking;
|
||||
|
||||
/// Cache for process scanning results to avoid expensive `/proc` reads on every
|
||||
/// store operation. TTL of 5 seconds balances freshness with performance.
|
||||
struct ProcessCache {
|
||||
last_scan: Instant,
|
||||
excluded_app: Option<String>,
|
||||
}
|
||||
|
||||
impl ProcessCache {
|
||||
const TTL: Duration = Duration::from_secs(5);
|
||||
|
||||
/// Check cache for recently active excluded app.
|
||||
/// Only caches positive results (when an excluded app IS found).
|
||||
/// Negative results (no excluded apps) are never cached to ensure
|
||||
/// we don't miss exclusions when users switch apps.
|
||||
fn get(excluded_apps: &[String]) -> Option<String> {
|
||||
static CACHE: OnceLock<Mutex<ProcessCache>> = OnceLock::new();
|
||||
let cache = CACHE.get_or_init(|| {
|
||||
Mutex::new(ProcessCache {
|
||||
last_scan: Instant::now() - Self::TTL, /* Expire immediately on
|
||||
* first use */
|
||||
excluded_app: None,
|
||||
})
|
||||
});
|
||||
|
||||
if let Ok(mut cache) = cache.lock() {
|
||||
// Check if we have a valid cached positive result
|
||||
if cache.last_scan.elapsed() < Self::TTL
|
||||
&& let Some(ref app) = cache.excluded_app
|
||||
{
|
||||
// Verify the cached app is still in the exclusion list
|
||||
if app_matches_exclusion(app, excluded_apps) {
|
||||
return Some(app.clone());
|
||||
}
|
||||
}
|
||||
|
||||
// No valid cache, scan and only cache positive results
|
||||
let result = get_recently_active_excluded_app_uncached(excluded_apps);
|
||||
if result.is_some() {
|
||||
cache.last_scan = Instant::now();
|
||||
cache.excluded_app = result.clone();
|
||||
} else {
|
||||
// Don't cache negative results. We expire cache immediately so next
|
||||
// call will rescan. This ensures we don't miss exclusions when user
|
||||
// switches from non-excluded to excluded app.
|
||||
cache.last_scan = Instant::now() - Self::TTL;
|
||||
cache.excluded_app = None;
|
||||
}
|
||||
result
|
||||
} else {
|
||||
// Lock poisoned - fall back to uncached
|
||||
get_recently_active_excluded_app_uncached(excluded_apps)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// FNV-1a hasher for deterministic hashing across process runs.
|
||||
/// Unlike DefaultHasher (SipHash with random seed), this produces stable
|
||||
/// hashes.
|
||||
|
|
@ -187,6 +243,18 @@ pub enum StashError {
|
|||
}
|
||||
|
||||
pub trait ClipboardDb {
|
||||
/// Store a new clipboard entry.
|
||||
///
|
||||
/// # Arguments
|
||||
/// * `input` - Reader for the clipboard content
|
||||
/// * `max_dedupe_search` - Maximum number of recent entries to check for
|
||||
/// duplicates
|
||||
/// * `max_items` - Maximum total entries to keep in database
|
||||
/// * `excluded_apps` - List of app names to exclude
|
||||
/// * `min_size` - Minimum content size (None for no minimum)
|
||||
/// * `max_size` - Maximum content size
|
||||
/// * `content_hash` - Optional pre-computed content hash (avoids re-hashing)
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn store_entry(
|
||||
&self,
|
||||
input: impl Read,
|
||||
|
|
@ -195,6 +263,7 @@ pub trait ClipboardDb {
|
|||
excluded_apps: Option<&[String]>,
|
||||
min_size: Option<usize>,
|
||||
max_size: usize,
|
||||
content_hash: Option<i64>,
|
||||
) -> Result<i64, StashError>;
|
||||
|
||||
fn deduplicate_by_hash(
|
||||
|
|
@ -308,8 +377,8 @@ impl SqliteClipboardDb {
|
|||
})?;
|
||||
}
|
||||
|
||||
// Add content_hash column if it doesn't exist
|
||||
// Migration MUST be done to avoid breaking existing installations.
|
||||
// Add content_hash column if it doesn't exist. Migration MUST be done to
|
||||
// avoid breaking existing installations.
|
||||
if schema_version < 2 {
|
||||
let has_content_hash: bool = tx
|
||||
.query_row(
|
||||
|
|
@ -546,6 +615,7 @@ impl ClipboardDb for SqliteClipboardDb {
|
|||
excluded_apps: Option<&[String]>,
|
||||
min_size: Option<usize>,
|
||||
max_size: usize,
|
||||
content_hash: Option<i64>,
|
||||
) -> Result<i64, StashError> {
|
||||
let mut buf = Vec::new();
|
||||
if input.read_to_end(&mut buf).is_err() || buf.is_empty() {
|
||||
|
|
@ -568,11 +638,14 @@ impl ClipboardDb for SqliteClipboardDb {
|
|||
return Err(StashError::AllWhitespace);
|
||||
}
|
||||
|
||||
// Calculate content hash for deduplication
|
||||
let mut hasher = Fnv1aHasher::new();
|
||||
hasher.write(&buf);
|
||||
#[allow(clippy::cast_possible_wrap)]
|
||||
let content_hash = hasher.finish() as i64;
|
||||
// Use pre-computed hash if provided, otherwise calculate it
|
||||
let content_hash = content_hash.unwrap_or_else(|| {
|
||||
let mut hasher = Fnv1aHasher::new();
|
||||
hasher.write(&buf);
|
||||
#[allow(clippy::cast_possible_wrap)]
|
||||
let hash = hasher.finish() as i64;
|
||||
hash
|
||||
});
|
||||
|
||||
let mime = crate::mime::detect_mime(&buf);
|
||||
|
||||
|
|
@ -1181,7 +1254,8 @@ fn detect_excluded_app_activity(excluded_apps: &[String]) -> bool {
|
|||
}
|
||||
|
||||
// Strategy 2: Check recently active processes (timing correlation)
|
||||
if let Some(active_app) = get_recently_active_excluded_app(excluded_apps) {
|
||||
// Use cached results to avoid expensive /proc scanning
|
||||
if let Some(active_app) = ProcessCache::get(excluded_apps) {
|
||||
debug!("Clipboard excluded: recent activity from {active_app}");
|
||||
return true;
|
||||
}
|
||||
|
|
@ -1212,7 +1286,8 @@ fn get_focused_window_app() -> Option<String> {
|
|||
}
|
||||
|
||||
/// Check for recently active excluded apps using CPU and I/O activity.
|
||||
fn get_recently_active_excluded_app(
|
||||
/// This is the uncached version - use `ProcessCache::get()` for cached access.
|
||||
fn get_recently_active_excluded_app_uncached(
|
||||
excluded_apps: &[String],
|
||||
) -> Option<String> {
|
||||
let proc_dir = std::path::Path::new("/proc");
|
||||
|
|
@ -1586,7 +1661,7 @@ mod tests {
|
|||
let cursor = std::io::Cursor::new(test_data.to_vec());
|
||||
|
||||
let id = db
|
||||
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE)
|
||||
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE, None)
|
||||
.expect("Failed to store entry");
|
||||
|
||||
let content_hash: Option<i64> = db
|
||||
|
|
@ -1622,7 +1697,7 @@ mod tests {
|
|||
let test_data = b"Test content for copy";
|
||||
let cursor = std::io::Cursor::new(test_data.to_vec());
|
||||
let id_a = db
|
||||
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE)
|
||||
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE, None)
|
||||
.expect("Failed to store entry A");
|
||||
|
||||
let original_last_accessed: i64 = db
|
||||
|
|
@ -1725,6 +1800,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store URI list");
|
||||
|
||||
|
|
@ -1758,6 +1834,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store image");
|
||||
|
||||
|
|
@ -1786,6 +1863,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store first");
|
||||
let _id2 = db
|
||||
|
|
@ -1796,6 +1874,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store second");
|
||||
|
||||
|
|
@ -1831,6 +1910,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store");
|
||||
}
|
||||
|
|
@ -1852,6 +1932,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
);
|
||||
assert!(matches!(result, Err(StashError::EmptyOrTooLarge)));
|
||||
}
|
||||
|
|
@ -1866,6 +1947,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
);
|
||||
assert!(matches!(result, Err(StashError::AllWhitespace)));
|
||||
}
|
||||
|
|
@ -1882,6 +1964,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
);
|
||||
assert!(matches!(result, Err(StashError::TooLarge(5000000))));
|
||||
}
|
||||
|
|
@ -1897,6 +1980,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store");
|
||||
|
||||
|
|
@ -1923,6 +2007,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store");
|
||||
db.store_entry(
|
||||
|
|
@ -1932,6 +2017,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store");
|
||||
|
||||
|
|
@ -1959,6 +2045,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store");
|
||||
}
|
||||
|
|
@ -2038,6 +2125,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store");
|
||||
|
||||
|
|
@ -2122,6 +2210,7 @@ mod tests {
|
|||
None,
|
||||
None,
|
||||
DEFAULT_MAX_ENTRY_SIZE,
|
||||
None,
|
||||
)
|
||||
.expect("Failed to store");
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ impl AsyncClipboardDb {
|
|||
excluded_apps: Option<Vec<String>>,
|
||||
min_size: Option<usize>,
|
||||
max_size: usize,
|
||||
content_hash: Option<i64>,
|
||||
) -> Result<i64, StashError> {
|
||||
let path = self.db_path.clone();
|
||||
blocking::unblock(move || {
|
||||
|
|
@ -36,6 +37,7 @@ impl AsyncClipboardDb {
|
|||
excluded_apps.as_deref(),
|
||||
min_size,
|
||||
max_size,
|
||||
content_hash,
|
||||
)
|
||||
})
|
||||
.await
|
||||
|
|
@ -170,7 +172,7 @@ mod tests {
|
|||
let data = b"async test data";
|
||||
|
||||
let id = async_db
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||
.await
|
||||
.expect("Failed to store entry");
|
||||
|
||||
|
|
@ -199,7 +201,7 @@ mod tests {
|
|||
let data = b"expiring entry";
|
||||
|
||||
let id = async_db
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||
.await
|
||||
.expect("Failed to store entry");
|
||||
|
||||
|
|
@ -231,7 +233,7 @@ mod tests {
|
|||
let data = b"entry to expire";
|
||||
|
||||
let id = async_db
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||
.await
|
||||
.expect("Failed to store entry");
|
||||
|
||||
|
|
@ -278,12 +280,12 @@ mod tests {
|
|||
let data = b"clone test";
|
||||
|
||||
let id1 = async_db
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||
.await
|
||||
.expect("Failed with original");
|
||||
|
||||
let id2 = cloned
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||
.await
|
||||
.expect("Failed with clone");
|
||||
|
||||
|
|
@ -302,7 +304,8 @@ mod tests {
|
|||
let db = async_db.clone();
|
||||
let data = format!("concurrent test {}", i).into_bytes();
|
||||
smol::spawn(async move {
|
||||
db.store_entry(data, 100, 1000, None, None, 5_000_000).await
|
||||
db.store_entry(data, 100, 1000, None, None, 5_000_000, None)
|
||||
.await
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue