From 65a6fd853d5d647191c738d28ebae9a3962f55f6 Mon Sep 17 00:00:00 2001 From: NotAShelf Date: Mon, 16 Feb 2026 23:05:52 +0300 Subject: [PATCH] fc-common: add failed paths cache infrastructure Signed-off-by: NotAShelf Change-Id: I35f9bfb044160151cf73c43ed9ada3476a6a6964 --- .../migrations/016_failed_paths_cache.sql | 9 +++ crates/common/src/config.rs | 24 +++++-- crates/common/src/repo/builds.rs | 16 +++-- crates/common/src/repo/failed_paths_cache.rs | 64 +++++++++++++++++++ crates/common/src/repo/mod.rs | 1 + 5 files changed, 105 insertions(+), 9 deletions(-) create mode 100644 crates/common/migrations/016_failed_paths_cache.sql create mode 100644 crates/common/src/repo/failed_paths_cache.rs diff --git a/crates/common/migrations/016_failed_paths_cache.sql b/crates/common/migrations/016_failed_paths_cache.sql new file mode 100644 index 0000000..082332c --- /dev/null +++ b/crates/common/migrations/016_failed_paths_cache.sql @@ -0,0 +1,9 @@ +-- Failed paths cache: prevents rebuilding known-failing derivations +CREATE TABLE failed_paths_cache ( + drv_path TEXT PRIMARY KEY, + source_build_id UUID, + failure_status TEXT, + failed_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW() +); + +CREATE INDEX idx_failed_paths_cache_failed_at ON failed_paths_cache(failed_at); diff --git a/crates/common/src/config.rs b/crates/common/src/config.rs index 0303ac0..607db6e 100644 --- a/crates/common/src/config.rs +++ b/crates/common/src/config.rs @@ -79,6 +79,14 @@ pub struct QueueRunnerConfig { /// retrying. #[serde(default)] pub strict_errors: bool, + + /// Cache failed derivation paths to skip known-failing builds. + #[serde(default = "default_true")] + pub failed_paths_cache: bool, + + /// TTL in seconds for failed paths cache entries (default 24h). + #[serde(default = "default_failed_paths_ttl")] + pub failed_paths_ttl: u64, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -405,6 +413,10 @@ const fn default_true() -> bool { true } +const fn default_failed_paths_ttl() -> u64 { + 86400 +} + const fn default_check_interval() -> i32 { 60 } @@ -521,11 +533,13 @@ impl Default for EvaluatorConfig { impl Default for QueueRunnerConfig { fn default() -> Self { Self { - workers: 4, - poll_interval: 5, - build_timeout: 3600, - work_dir: PathBuf::from("/tmp/fc-queue-runner"), - strict_errors: false, + workers: 4, + poll_interval: 5, + build_timeout: 3600, + work_dir: PathBuf::from("/tmp/fc-queue-runner"), + strict_errors: false, + failed_paths_cache: true, + failed_paths_ttl: 86400, } } } diff --git a/crates/common/src/repo/builds.rs b/crates/common/src/repo/builds.rs index effd46e..28fba64 100644 --- a/crates/common/src/repo/builds.rs +++ b/crates/common/src/repo/builds.rs @@ -276,13 +276,13 @@ pub async fn cancel_cascade(pool: &PgPool, id: Uuid) -> Result> { } /// Restart a build by resetting it to pending state. -/// Only works for failed, succeeded, or cancelled builds. +/// Only works for failed, succeeded, cancelled, or cached_failure builds. pub async fn restart(pool: &PgPool, id: Uuid) -> Result { - sqlx::query_as::<_, Build>( + let build = sqlx::query_as::<_, Build>( "UPDATE builds SET status = 'pending', started_at = NULL, completed_at = \ NULL, log_path = NULL, build_output_path = NULL, error_message = NULL, \ retry_count = retry_count + 1 WHERE id = $1 AND status IN ('failed', \ - 'succeeded', 'cancelled') RETURNING *", + 'succeeded', 'cancelled', 'cached_failure') RETURNING *", ) .bind(id) .fetch_optional(pool) @@ -291,7 +291,15 @@ pub async fn restart(pool: &PgPool, id: Uuid) -> Result { CiError::NotFound(format!( "Build {id} not found or not in a restartable state" )) - }) + })?; + + if let Err(e) = + super::failed_paths_cache::invalidate(pool, &build.drv_path).await + { + tracing::warn!(build_id = %id, "Failed to invalidate failed paths cache: {e}"); + } + + Ok(build) } /// Mark a build's outputs as signed. diff --git a/crates/common/src/repo/failed_paths_cache.rs b/crates/common/src/repo/failed_paths_cache.rs new file mode 100644 index 0000000..a3cc2ef --- /dev/null +++ b/crates/common/src/repo/failed_paths_cache.rs @@ -0,0 +1,64 @@ +use sqlx::PgPool; +use uuid::Uuid; + +use crate::{ + error::{CiError, Result}, + models::BuildStatus, +}; + +pub async fn is_cached_failure(pool: &PgPool, drv_path: &str) -> Result { + let row: Option<(bool,)> = + sqlx::query_as("SELECT true FROM failed_paths_cache WHERE drv_path = $1") + .bind(drv_path) + .fetch_optional(pool) + .await + .map_err(CiError::Database)?; + + Ok(row.is_some()) +} + +pub async fn insert( + pool: &PgPool, + drv_path: &str, + failure_status: BuildStatus, + source_build_id: Uuid, +) -> Result<()> { + let status_str = failure_status.to_string(); + sqlx::query( + "INSERT INTO failed_paths_cache (drv_path, source_build_id, \ + failure_status, failed_at) VALUES ($1, $2, $3, NOW()) ON CONFLICT \ + (drv_path) DO UPDATE SET source_build_id = $2, failure_status = $3, \ + failed_at = NOW()", + ) + .bind(drv_path) + .bind(source_build_id) + .bind(&status_str) + .execute(pool) + .await + .map_err(CiError::Database)?; + + Ok(()) +} + +pub async fn invalidate(pool: &PgPool, drv_path: &str) -> Result<()> { + sqlx::query("DELETE FROM failed_paths_cache WHERE drv_path = $1") + .bind(drv_path) + .execute(pool) + .await + .map_err(CiError::Database)?; + + Ok(()) +} + +pub async fn cleanup_expired(pool: &PgPool, ttl_seconds: u64) -> Result { + let result = sqlx::query( + "DELETE FROM failed_paths_cache WHERE failed_at < NOW() - \ + make_interval(secs => $1)", + ) + .bind(ttl_seconds as f64) + .execute(pool) + .await + .map_err(CiError::Database)?; + + Ok(result.rows_affected()) +} diff --git a/crates/common/src/repo/mod.rs b/crates/common/src/repo/mod.rs index cb41557..013db8e 100644 --- a/crates/common/src/repo/mod.rs +++ b/crates/common/src/repo/mod.rs @@ -6,6 +6,7 @@ pub mod build_steps; pub mod builds; pub mod channels; pub mod evaluations; +pub mod failed_paths_cache; pub mod jobset_inputs; pub mod jobsets; pub mod notification_configs;