mirror of
https://github.com/NotAShelf/stash.git
synced 2026-04-12 22:17:41 +00:00
db: improve content hashing; cache only positive scan result
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: If8035bf1dcd598a992762b9c714253406a6a6964
This commit is contained in:
parent
0865a1f139
commit
373affabee
4 changed files with 115 additions and 19 deletions
|
|
@ -38,6 +38,7 @@ impl StoreCommand for SqliteClipboardDb {
|
||||||
Some(excluded_apps),
|
Some(excluded_apps),
|
||||||
min_size,
|
min_size,
|
||||||
max_size,
|
max_size,
|
||||||
|
None, // no pre-computed hash for CLI store
|
||||||
)?;
|
)?;
|
||||||
log::info!("Entry stored");
|
log::info!("Entry stored");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -363,6 +363,8 @@ impl WatchCommand for SqliteClipboardDb {
|
||||||
if last_hash != Some(current_hash) {
|
if last_hash != Some(current_hash) {
|
||||||
// Clone buf for the async operation since it needs 'static
|
// Clone buf for the async operation since it needs 'static
|
||||||
let buf_clone = buf.clone();
|
let buf_clone = buf.clone();
|
||||||
|
#[allow(clippy::cast_possible_wrap)]
|
||||||
|
let content_hash = Some(current_hash as i64);
|
||||||
match async_db
|
match async_db
|
||||||
.store_entry(
|
.store_entry(
|
||||||
buf_clone,
|
buf_clone,
|
||||||
|
|
@ -371,6 +373,7 @@ impl WatchCommand for SqliteClipboardDb {
|
||||||
Some(excluded_apps.to_vec()),
|
Some(excluded_apps.to_vec()),
|
||||||
min_size,
|
min_size,
|
||||||
max_size,
|
max_size,
|
||||||
|
content_hash,
|
||||||
)
|
)
|
||||||
.await
|
.await
|
||||||
{
|
{
|
||||||
|
|
@ -433,7 +436,7 @@ impl WatchCommand for SqliteClipboardDb {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Unit-testable helper: given ordered offers and a preference, return the
|
/// Given ordered offers and a preference, return the
|
||||||
/// chosen MIME type. This mirrors the selection logic in
|
/// chosen MIME type. This mirrors the selection logic in
|
||||||
/// [`negotiate_mime_type`] without requiring a Wayland connection.
|
/// [`negotiate_mime_type`] without requiring a Wayland connection.
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
|
||||||
113
src/db/mod.rs
113
src/db/mod.rs
|
|
@ -5,11 +5,67 @@ use std::{
|
||||||
io::{BufRead, BufReader, Read, Write},
|
io::{BufRead, BufReader, Read, Write},
|
||||||
path::PathBuf,
|
path::PathBuf,
|
||||||
str,
|
str,
|
||||||
sync::OnceLock,
|
sync::{Mutex, OnceLock},
|
||||||
|
time::{Duration, Instant},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub mod nonblocking;
|
pub mod nonblocking;
|
||||||
|
|
||||||
|
/// Cache for process scanning results to avoid expensive `/proc` reads on every
|
||||||
|
/// store operation. TTL of 5 seconds balances freshness with performance.
|
||||||
|
struct ProcessCache {
|
||||||
|
last_scan: Instant,
|
||||||
|
excluded_app: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ProcessCache {
|
||||||
|
const TTL: Duration = Duration::from_secs(5);
|
||||||
|
|
||||||
|
/// Check cache for recently active excluded app.
|
||||||
|
/// Only caches positive results (when an excluded app IS found).
|
||||||
|
/// Negative results (no excluded apps) are never cached to ensure
|
||||||
|
/// we don't miss exclusions when users switch apps.
|
||||||
|
fn get(excluded_apps: &[String]) -> Option<String> {
|
||||||
|
static CACHE: OnceLock<Mutex<ProcessCache>> = OnceLock::new();
|
||||||
|
let cache = CACHE.get_or_init(|| {
|
||||||
|
Mutex::new(ProcessCache {
|
||||||
|
last_scan: Instant::now() - Self::TTL, /* Expire immediately on
|
||||||
|
* first use */
|
||||||
|
excluded_app: None,
|
||||||
|
})
|
||||||
|
});
|
||||||
|
|
||||||
|
if let Ok(mut cache) = cache.lock() {
|
||||||
|
// Check if we have a valid cached positive result
|
||||||
|
if cache.last_scan.elapsed() < Self::TTL
|
||||||
|
&& let Some(ref app) = cache.excluded_app
|
||||||
|
{
|
||||||
|
// Verify the cached app is still in the exclusion list
|
||||||
|
if app_matches_exclusion(app, excluded_apps) {
|
||||||
|
return Some(app.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No valid cache, scan and only cache positive results
|
||||||
|
let result = get_recently_active_excluded_app_uncached(excluded_apps);
|
||||||
|
if result.is_some() {
|
||||||
|
cache.last_scan = Instant::now();
|
||||||
|
cache.excluded_app = result.clone();
|
||||||
|
} else {
|
||||||
|
// Don't cache negative results. We expire cache immediately so next
|
||||||
|
// call will rescan. This ensures we don't miss exclusions when user
|
||||||
|
// switches from non-excluded to excluded app.
|
||||||
|
cache.last_scan = Instant::now() - Self::TTL;
|
||||||
|
cache.excluded_app = None;
|
||||||
|
}
|
||||||
|
result
|
||||||
|
} else {
|
||||||
|
// Lock poisoned - fall back to uncached
|
||||||
|
get_recently_active_excluded_app_uncached(excluded_apps)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// FNV-1a hasher for deterministic hashing across process runs.
|
/// FNV-1a hasher for deterministic hashing across process runs.
|
||||||
/// Unlike DefaultHasher (SipHash with random seed), this produces stable
|
/// Unlike DefaultHasher (SipHash with random seed), this produces stable
|
||||||
/// hashes.
|
/// hashes.
|
||||||
|
|
@ -187,6 +243,18 @@ pub enum StashError {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait ClipboardDb {
|
pub trait ClipboardDb {
|
||||||
|
/// Store a new clipboard entry.
|
||||||
|
///
|
||||||
|
/// # Arguments
|
||||||
|
/// * `input` - Reader for the clipboard content
|
||||||
|
/// * `max_dedupe_search` - Maximum number of recent entries to check for
|
||||||
|
/// duplicates
|
||||||
|
/// * `max_items` - Maximum total entries to keep in database
|
||||||
|
/// * `excluded_apps` - List of app names to exclude
|
||||||
|
/// * `min_size` - Minimum content size (None for no minimum)
|
||||||
|
/// * `max_size` - Maximum content size
|
||||||
|
/// * `content_hash` - Optional pre-computed content hash (avoids re-hashing)
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn store_entry(
|
fn store_entry(
|
||||||
&self,
|
&self,
|
||||||
input: impl Read,
|
input: impl Read,
|
||||||
|
|
@ -195,6 +263,7 @@ pub trait ClipboardDb {
|
||||||
excluded_apps: Option<&[String]>,
|
excluded_apps: Option<&[String]>,
|
||||||
min_size: Option<usize>,
|
min_size: Option<usize>,
|
||||||
max_size: usize,
|
max_size: usize,
|
||||||
|
content_hash: Option<i64>,
|
||||||
) -> Result<i64, StashError>;
|
) -> Result<i64, StashError>;
|
||||||
|
|
||||||
fn deduplicate_by_hash(
|
fn deduplicate_by_hash(
|
||||||
|
|
@ -308,8 +377,8 @@ impl SqliteClipboardDb {
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add content_hash column if it doesn't exist
|
// Add content_hash column if it doesn't exist. Migration MUST be done to
|
||||||
// Migration MUST be done to avoid breaking existing installations.
|
// avoid breaking existing installations.
|
||||||
if schema_version < 2 {
|
if schema_version < 2 {
|
||||||
let has_content_hash: bool = tx
|
let has_content_hash: bool = tx
|
||||||
.query_row(
|
.query_row(
|
||||||
|
|
@ -546,6 +615,7 @@ impl ClipboardDb for SqliteClipboardDb {
|
||||||
excluded_apps: Option<&[String]>,
|
excluded_apps: Option<&[String]>,
|
||||||
min_size: Option<usize>,
|
min_size: Option<usize>,
|
||||||
max_size: usize,
|
max_size: usize,
|
||||||
|
content_hash: Option<i64>,
|
||||||
) -> Result<i64, StashError> {
|
) -> Result<i64, StashError> {
|
||||||
let mut buf = Vec::new();
|
let mut buf = Vec::new();
|
||||||
if input.read_to_end(&mut buf).is_err() || buf.is_empty() {
|
if input.read_to_end(&mut buf).is_err() || buf.is_empty() {
|
||||||
|
|
@ -568,11 +638,14 @@ impl ClipboardDb for SqliteClipboardDb {
|
||||||
return Err(StashError::AllWhitespace);
|
return Err(StashError::AllWhitespace);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate content hash for deduplication
|
// Use pre-computed hash if provided, otherwise calculate it
|
||||||
let mut hasher = Fnv1aHasher::new();
|
let content_hash = content_hash.unwrap_or_else(|| {
|
||||||
hasher.write(&buf);
|
let mut hasher = Fnv1aHasher::new();
|
||||||
#[allow(clippy::cast_possible_wrap)]
|
hasher.write(&buf);
|
||||||
let content_hash = hasher.finish() as i64;
|
#[allow(clippy::cast_possible_wrap)]
|
||||||
|
let hash = hasher.finish() as i64;
|
||||||
|
hash
|
||||||
|
});
|
||||||
|
|
||||||
let mime = crate::mime::detect_mime(&buf);
|
let mime = crate::mime::detect_mime(&buf);
|
||||||
|
|
||||||
|
|
@ -1181,7 +1254,8 @@ fn detect_excluded_app_activity(excluded_apps: &[String]) -> bool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Strategy 2: Check recently active processes (timing correlation)
|
// Strategy 2: Check recently active processes (timing correlation)
|
||||||
if let Some(active_app) = get_recently_active_excluded_app(excluded_apps) {
|
// Use cached results to avoid expensive /proc scanning
|
||||||
|
if let Some(active_app) = ProcessCache::get(excluded_apps) {
|
||||||
debug!("Clipboard excluded: recent activity from {active_app}");
|
debug!("Clipboard excluded: recent activity from {active_app}");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -1212,7 +1286,8 @@ fn get_focused_window_app() -> Option<String> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Check for recently active excluded apps using CPU and I/O activity.
|
/// Check for recently active excluded apps using CPU and I/O activity.
|
||||||
fn get_recently_active_excluded_app(
|
/// This is the uncached version - use `ProcessCache::get()` for cached access.
|
||||||
|
fn get_recently_active_excluded_app_uncached(
|
||||||
excluded_apps: &[String],
|
excluded_apps: &[String],
|
||||||
) -> Option<String> {
|
) -> Option<String> {
|
||||||
let proc_dir = std::path::Path::new("/proc");
|
let proc_dir = std::path::Path::new("/proc");
|
||||||
|
|
@ -1586,7 +1661,7 @@ mod tests {
|
||||||
let cursor = std::io::Cursor::new(test_data.to_vec());
|
let cursor = std::io::Cursor::new(test_data.to_vec());
|
||||||
|
|
||||||
let id = db
|
let id = db
|
||||||
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE)
|
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE, None)
|
||||||
.expect("Failed to store entry");
|
.expect("Failed to store entry");
|
||||||
|
|
||||||
let content_hash: Option<i64> = db
|
let content_hash: Option<i64> = db
|
||||||
|
|
@ -1622,7 +1697,7 @@ mod tests {
|
||||||
let test_data = b"Test content for copy";
|
let test_data = b"Test content for copy";
|
||||||
let cursor = std::io::Cursor::new(test_data.to_vec());
|
let cursor = std::io::Cursor::new(test_data.to_vec());
|
||||||
let id_a = db
|
let id_a = db
|
||||||
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE)
|
.store_entry(cursor, 100, 1000, None, None, DEFAULT_MAX_ENTRY_SIZE, None)
|
||||||
.expect("Failed to store entry A");
|
.expect("Failed to store entry A");
|
||||||
|
|
||||||
let original_last_accessed: i64 = db
|
let original_last_accessed: i64 = db
|
||||||
|
|
@ -1725,6 +1800,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store URI list");
|
.expect("Failed to store URI list");
|
||||||
|
|
||||||
|
|
@ -1758,6 +1834,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store image");
|
.expect("Failed to store image");
|
||||||
|
|
||||||
|
|
@ -1786,6 +1863,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store first");
|
.expect("Failed to store first");
|
||||||
let _id2 = db
|
let _id2 = db
|
||||||
|
|
@ -1796,6 +1874,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store second");
|
.expect("Failed to store second");
|
||||||
|
|
||||||
|
|
@ -1831,6 +1910,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store");
|
.expect("Failed to store");
|
||||||
}
|
}
|
||||||
|
|
@ -1852,6 +1932,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
);
|
);
|
||||||
assert!(matches!(result, Err(StashError::EmptyOrTooLarge)));
|
assert!(matches!(result, Err(StashError::EmptyOrTooLarge)));
|
||||||
}
|
}
|
||||||
|
|
@ -1866,6 +1947,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
);
|
);
|
||||||
assert!(matches!(result, Err(StashError::AllWhitespace)));
|
assert!(matches!(result, Err(StashError::AllWhitespace)));
|
||||||
}
|
}
|
||||||
|
|
@ -1882,6 +1964,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
);
|
);
|
||||||
assert!(matches!(result, Err(StashError::TooLarge(5000000))));
|
assert!(matches!(result, Err(StashError::TooLarge(5000000))));
|
||||||
}
|
}
|
||||||
|
|
@ -1897,6 +1980,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store");
|
.expect("Failed to store");
|
||||||
|
|
||||||
|
|
@ -1923,6 +2007,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store");
|
.expect("Failed to store");
|
||||||
db.store_entry(
|
db.store_entry(
|
||||||
|
|
@ -1932,6 +2017,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store");
|
.expect("Failed to store");
|
||||||
|
|
||||||
|
|
@ -1959,6 +2045,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store");
|
.expect("Failed to store");
|
||||||
}
|
}
|
||||||
|
|
@ -2038,6 +2125,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store");
|
.expect("Failed to store");
|
||||||
|
|
||||||
|
|
@ -2122,6 +2210,7 @@ mod tests {
|
||||||
None,
|
None,
|
||||||
None,
|
None,
|
||||||
DEFAULT_MAX_ENTRY_SIZE,
|
DEFAULT_MAX_ENTRY_SIZE,
|
||||||
|
None,
|
||||||
)
|
)
|
||||||
.expect("Failed to store");
|
.expect("Failed to store");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ impl AsyncClipboardDb {
|
||||||
excluded_apps: Option<Vec<String>>,
|
excluded_apps: Option<Vec<String>>,
|
||||||
min_size: Option<usize>,
|
min_size: Option<usize>,
|
||||||
max_size: usize,
|
max_size: usize,
|
||||||
|
content_hash: Option<i64>,
|
||||||
) -> Result<i64, StashError> {
|
) -> Result<i64, StashError> {
|
||||||
let path = self.db_path.clone();
|
let path = self.db_path.clone();
|
||||||
blocking::unblock(move || {
|
blocking::unblock(move || {
|
||||||
|
|
@ -36,6 +37,7 @@ impl AsyncClipboardDb {
|
||||||
excluded_apps.as_deref(),
|
excluded_apps.as_deref(),
|
||||||
min_size,
|
min_size,
|
||||||
max_size,
|
max_size,
|
||||||
|
content_hash,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.await
|
.await
|
||||||
|
|
@ -170,7 +172,7 @@ mod tests {
|
||||||
let data = b"async test data";
|
let data = b"async test data";
|
||||||
|
|
||||||
let id = async_db
|
let id = async_db
|
||||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||||
.await
|
.await
|
||||||
.expect("Failed to store entry");
|
.expect("Failed to store entry");
|
||||||
|
|
||||||
|
|
@ -199,7 +201,7 @@ mod tests {
|
||||||
let data = b"expiring entry";
|
let data = b"expiring entry";
|
||||||
|
|
||||||
let id = async_db
|
let id = async_db
|
||||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||||
.await
|
.await
|
||||||
.expect("Failed to store entry");
|
.expect("Failed to store entry");
|
||||||
|
|
||||||
|
|
@ -231,7 +233,7 @@ mod tests {
|
||||||
let data = b"entry to expire";
|
let data = b"entry to expire";
|
||||||
|
|
||||||
let id = async_db
|
let id = async_db
|
||||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||||
.await
|
.await
|
||||||
.expect("Failed to store entry");
|
.expect("Failed to store entry");
|
||||||
|
|
||||||
|
|
@ -278,12 +280,12 @@ mod tests {
|
||||||
let data = b"clone test";
|
let data = b"clone test";
|
||||||
|
|
||||||
let id1 = async_db
|
let id1 = async_db
|
||||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||||
.await
|
.await
|
||||||
.expect("Failed with original");
|
.expect("Failed with original");
|
||||||
|
|
||||||
let id2 = cloned
|
let id2 = cloned
|
||||||
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000)
|
.store_entry(data.to_vec(), 100, 1000, None, None, 5_000_000, None)
|
||||||
.await
|
.await
|
||||||
.expect("Failed with clone");
|
.expect("Failed with clone");
|
||||||
|
|
||||||
|
|
@ -302,7 +304,8 @@ mod tests {
|
||||||
let db = async_db.clone();
|
let db = async_db.clone();
|
||||||
let data = format!("concurrent test {}", i).into_bytes();
|
let data = format!("concurrent test {}", i).into_bytes();
|
||||||
smol::spawn(async move {
|
smol::spawn(async move {
|
||||||
db.store_entry(data, 100, 1000, None, None, 5_000_000).await
|
db.store_entry(data, 100, 1000, None, None, 5_000_000, None)
|
||||||
|
.await
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue