fc-common: add failed paths cache infrastructure
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I35f9bfb044160151cf73c43ed9ada3476a6a6964
This commit is contained in:
parent
4c56b192f0
commit
65a6fd853d
5 changed files with 105 additions and 9 deletions
9
crates/common/migrations/016_failed_paths_cache.sql
Normal file
9
crates/common/migrations/016_failed_paths_cache.sql
Normal file
|
|
@ -0,0 +1,9 @@
|
||||||
|
-- Failed paths cache: prevents rebuilding known-failing derivations
|
||||||
|
CREATE TABLE failed_paths_cache (
|
||||||
|
drv_path TEXT PRIMARY KEY,
|
||||||
|
source_build_id UUID,
|
||||||
|
failure_status TEXT,
|
||||||
|
failed_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX idx_failed_paths_cache_failed_at ON failed_paths_cache(failed_at);
|
||||||
|
|
@ -79,6 +79,14 @@ pub struct QueueRunnerConfig {
|
||||||
/// retrying.
|
/// retrying.
|
||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub strict_errors: bool,
|
pub strict_errors: bool,
|
||||||
|
|
||||||
|
/// Cache failed derivation paths to skip known-failing builds.
|
||||||
|
#[serde(default = "default_true")]
|
||||||
|
pub failed_paths_cache: bool,
|
||||||
|
|
||||||
|
/// TTL in seconds for failed paths cache entries (default 24h).
|
||||||
|
#[serde(default = "default_failed_paths_ttl")]
|
||||||
|
pub failed_paths_ttl: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|
@ -405,6 +413,10 @@ const fn default_true() -> bool {
|
||||||
true
|
true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const fn default_failed_paths_ttl() -> u64 {
|
||||||
|
86400
|
||||||
|
}
|
||||||
|
|
||||||
const fn default_check_interval() -> i32 {
|
const fn default_check_interval() -> i32 {
|
||||||
60
|
60
|
||||||
}
|
}
|
||||||
|
|
@ -521,11 +533,13 @@ impl Default for EvaluatorConfig {
|
||||||
impl Default for QueueRunnerConfig {
|
impl Default for QueueRunnerConfig {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
workers: 4,
|
workers: 4,
|
||||||
poll_interval: 5,
|
poll_interval: 5,
|
||||||
build_timeout: 3600,
|
build_timeout: 3600,
|
||||||
work_dir: PathBuf::from("/tmp/fc-queue-runner"),
|
work_dir: PathBuf::from("/tmp/fc-queue-runner"),
|
||||||
strict_errors: false,
|
strict_errors: false,
|
||||||
|
failed_paths_cache: true,
|
||||||
|
failed_paths_ttl: 86400,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -276,13 +276,13 @@ pub async fn cancel_cascade(pool: &PgPool, id: Uuid) -> Result<Vec<Build>> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Restart a build by resetting it to pending state.
|
/// Restart a build by resetting it to pending state.
|
||||||
/// Only works for failed, succeeded, or cancelled builds.
|
/// Only works for failed, succeeded, cancelled, or cached_failure builds.
|
||||||
pub async fn restart(pool: &PgPool, id: Uuid) -> Result<Build> {
|
pub async fn restart(pool: &PgPool, id: Uuid) -> Result<Build> {
|
||||||
sqlx::query_as::<_, Build>(
|
let build = sqlx::query_as::<_, Build>(
|
||||||
"UPDATE builds SET status = 'pending', started_at = NULL, completed_at = \
|
"UPDATE builds SET status = 'pending', started_at = NULL, completed_at = \
|
||||||
NULL, log_path = NULL, build_output_path = NULL, error_message = NULL, \
|
NULL, log_path = NULL, build_output_path = NULL, error_message = NULL, \
|
||||||
retry_count = retry_count + 1 WHERE id = $1 AND status IN ('failed', \
|
retry_count = retry_count + 1 WHERE id = $1 AND status IN ('failed', \
|
||||||
'succeeded', 'cancelled') RETURNING *",
|
'succeeded', 'cancelled', 'cached_failure') RETURNING *",
|
||||||
)
|
)
|
||||||
.bind(id)
|
.bind(id)
|
||||||
.fetch_optional(pool)
|
.fetch_optional(pool)
|
||||||
|
|
@ -291,7 +291,15 @@ pub async fn restart(pool: &PgPool, id: Uuid) -> Result<Build> {
|
||||||
CiError::NotFound(format!(
|
CiError::NotFound(format!(
|
||||||
"Build {id} not found or not in a restartable state"
|
"Build {id} not found or not in a restartable state"
|
||||||
))
|
))
|
||||||
})
|
})?;
|
||||||
|
|
||||||
|
if let Err(e) =
|
||||||
|
super::failed_paths_cache::invalidate(pool, &build.drv_path).await
|
||||||
|
{
|
||||||
|
tracing::warn!(build_id = %id, "Failed to invalidate failed paths cache: {e}");
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(build)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Mark a build's outputs as signed.
|
/// Mark a build's outputs as signed.
|
||||||
|
|
|
||||||
64
crates/common/src/repo/failed_paths_cache.rs
Normal file
64
crates/common/src/repo/failed_paths_cache.rs
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
error::{CiError, Result},
|
||||||
|
models::BuildStatus,
|
||||||
|
};
|
||||||
|
|
||||||
|
pub async fn is_cached_failure(pool: &PgPool, drv_path: &str) -> Result<bool> {
|
||||||
|
let row: Option<(bool,)> =
|
||||||
|
sqlx::query_as("SELECT true FROM failed_paths_cache WHERE drv_path = $1")
|
||||||
|
.bind(drv_path)
|
||||||
|
.fetch_optional(pool)
|
||||||
|
.await
|
||||||
|
.map_err(CiError::Database)?;
|
||||||
|
|
||||||
|
Ok(row.is_some())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn insert(
|
||||||
|
pool: &PgPool,
|
||||||
|
drv_path: &str,
|
||||||
|
failure_status: BuildStatus,
|
||||||
|
source_build_id: Uuid,
|
||||||
|
) -> Result<()> {
|
||||||
|
let status_str = failure_status.to_string();
|
||||||
|
sqlx::query(
|
||||||
|
"INSERT INTO failed_paths_cache (drv_path, source_build_id, \
|
||||||
|
failure_status, failed_at) VALUES ($1, $2, $3, NOW()) ON CONFLICT \
|
||||||
|
(drv_path) DO UPDATE SET source_build_id = $2, failure_status = $3, \
|
||||||
|
failed_at = NOW()",
|
||||||
|
)
|
||||||
|
.bind(drv_path)
|
||||||
|
.bind(source_build_id)
|
||||||
|
.bind(&status_str)
|
||||||
|
.execute(pool)
|
||||||
|
.await
|
||||||
|
.map_err(CiError::Database)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn invalidate(pool: &PgPool, drv_path: &str) -> Result<()> {
|
||||||
|
sqlx::query("DELETE FROM failed_paths_cache WHERE drv_path = $1")
|
||||||
|
.bind(drv_path)
|
||||||
|
.execute(pool)
|
||||||
|
.await
|
||||||
|
.map_err(CiError::Database)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn cleanup_expired(pool: &PgPool, ttl_seconds: u64) -> Result<u64> {
|
||||||
|
let result = sqlx::query(
|
||||||
|
"DELETE FROM failed_paths_cache WHERE failed_at < NOW() - \
|
||||||
|
make_interval(secs => $1)",
|
||||||
|
)
|
||||||
|
.bind(ttl_seconds as f64)
|
||||||
|
.execute(pool)
|
||||||
|
.await
|
||||||
|
.map_err(CiError::Database)?;
|
||||||
|
|
||||||
|
Ok(result.rows_affected())
|
||||||
|
}
|
||||||
|
|
@ -6,6 +6,7 @@ pub mod build_steps;
|
||||||
pub mod builds;
|
pub mod builds;
|
||||||
pub mod channels;
|
pub mod channels;
|
||||||
pub mod evaluations;
|
pub mod evaluations;
|
||||||
|
pub mod failed_paths_cache;
|
||||||
pub mod jobset_inputs;
|
pub mod jobset_inputs;
|
||||||
pub mod jobsets;
|
pub mod jobsets;
|
||||||
pub mod notification_configs;
|
pub mod notification_configs;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue