fc-queue-runner: implement per-build cancellation via CancellationToken

Adds an `ActiveBuild` registry (DashMap of `<Uuid, CancellationToken>`)
to `WorkerPool` and get `dispatch()` to create a per-build token to race
`run_build` against it via Tokio's `select!`.

The `cancel_checker_loop` then polls the DB every N seconds (currently 2)
for builds cancelled while running, and triggers their tokens.

Existing `kill_on_drop(true) on `nix build` processes handles
subprocess cleanup when the future is dropped. Thank you past me for
your insight.

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ic8af58e92972c7d5d104d9c717e9217d6a6a6964
This commit is contained in:
raf 2026-02-16 23:32:40 +03:00
commit f8586a7f3c
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
4 changed files with 82 additions and 21 deletions

1
Cargo.lock generated
View file

@ -874,6 +874,7 @@ dependencies = [
"chrono", "chrono",
"clap", "clap",
"config", "config",
"dashmap",
"fc-common", "fc-common",
"serde", "serde",
"serde_json", "serde_json",

View file

@ -11,6 +11,7 @@ anyhow.workspace = true
chrono.workspace = true chrono.workspace = true
clap.workspace = true clap.workspace = true
config.workspace = true config.workspace = true
dashmap.workspace = true
serde.workspace = true serde.workspace = true
serde_json.workspace = true serde_json.workspace = true
sqlx.workspace = true sqlx.workspace = true

View file

@ -5,8 +5,9 @@ use fc_common::{
config::{Config, GcConfig}, config::{Config, GcConfig},
database::Database, database::Database,
gc_roots, gc_roots,
repo,
}; };
use fc_queue_runner::worker::WorkerPool; use fc_queue_runner::worker::{ActiveBuilds, WorkerPool};
#[derive(Parser)] #[derive(Parser)]
#[command(name = "fc-queue-runner")] #[command(name = "fc-queue-runner")]
@ -78,6 +79,8 @@ async fn main() -> anyhow::Result<()> {
wakeup.clone(), wakeup.clone(),
); );
let active_builds = worker_pool.active_builds().clone();
tokio::select! { tokio::select! {
result = fc_queue_runner::runner_loop::run(db.pool().clone(), worker_pool, poll_interval, wakeup, strict_errors, failed_paths_cache) => { result = fc_queue_runner::runner_loop::run(db.pool().clone(), worker_pool, poll_interval, wakeup, strict_errors, failed_paths_cache) => {
if let Err(e) = result { if let Err(e) = result {
@ -86,6 +89,7 @@ async fn main() -> anyhow::Result<()> {
} }
() = gc_loop(gc_config_for_loop) => {} () = gc_loop(gc_config_for_loop) => {}
() = failed_paths_cleanup_loop(db.pool().clone(), failed_paths_ttl, failed_paths_cache) => {} () = failed_paths_cleanup_loop(db.pool().clone(), failed_paths_ttl, failed_paths_cache) => {}
() = cancel_checker_loop(db.pool().clone(), active_builds) => {}
() = shutdown_signal() => { () = shutdown_signal() => {
tracing::info!("Shutdown signal received, draining in-flight builds..."); tracing::info!("Shutdown signal received, draining in-flight builds...");
worker_pool_for_drain.drain(); worker_pool_for_drain.drain();
@ -176,6 +180,34 @@ async fn failed_paths_cleanup_loop(
} }
} }
async fn cancel_checker_loop(pool: sqlx::PgPool, active_builds: ActiveBuilds) {
let interval = Duration::from_secs(2);
loop {
tokio::time::sleep(interval).await;
let build_ids: Vec<uuid::Uuid> =
active_builds.iter().map(|entry| *entry.key()).collect();
if build_ids.is_empty() {
continue;
}
match repo::builds::get_cancelled_among(&pool, &build_ids).await {
Ok(cancelled_ids) => {
for id in cancelled_ids {
if let Some((_, token)) = active_builds.remove(&id) {
tracing::info!(build_id = %id, "Triggering cancellation for running build");
token.cancel();
}
}
},
Err(e) => {
tracing::warn!("Failed to check for cancelled builds: {e}");
},
}
}
}
async fn shutdown_signal() { async fn shutdown_signal() {
let ctrl_c = async { let ctrl_c = async {
tokio::signal::ctrl_c() tokio::signal::ctrl_c()

View file

@ -1,5 +1,6 @@
use std::{path::PathBuf, sync::Arc, time::Duration}; use std::{path::PathBuf, sync::Arc, time::Duration};
use dashmap::DashMap;
use fc_common::{ use fc_common::{
alerts::AlertManager, alerts::AlertManager,
config::{ config::{
@ -24,6 +25,10 @@ use fc_common::{
}; };
use sqlx::PgPool; use sqlx::PgPool;
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
use tokio_util::sync::CancellationToken;
use uuid::Uuid;
pub type ActiveBuilds = Arc<DashMap<Uuid, CancellationToken>>;
pub struct WorkerPool { pub struct WorkerPool {
semaphore: Arc<Semaphore>, semaphore: Arc<Semaphore>,
@ -37,7 +42,8 @@ pub struct WorkerPool {
signing_config: Arc<SigningConfig>, signing_config: Arc<SigningConfig>,
cache_upload_config: Arc<CacheUploadConfig>, cache_upload_config: Arc<CacheUploadConfig>,
alert_manager: Arc<Option<AlertManager>>, alert_manager: Arc<Option<AlertManager>>,
drain_token: tokio_util::sync::CancellationToken, drain_token: CancellationToken,
active_builds: ActiveBuilds,
} }
impl WorkerPool { impl WorkerPool {
@ -68,7 +74,8 @@ impl WorkerPool {
signing_config: Arc::new(signing_config), signing_config: Arc::new(signing_config),
cache_upload_config: Arc::new(cache_upload_config), cache_upload_config: Arc::new(cache_upload_config),
alert_manager: Arc::new(alert_manager), alert_manager: Arc::new(alert_manager),
drain_token: tokio_util::sync::CancellationToken::new(), drain_token: CancellationToken::new(),
active_builds: Arc::new(DashMap::new()),
} }
} }
@ -95,6 +102,10 @@ impl WorkerPool {
.await; .await;
} }
pub fn active_builds(&self) -> &ActiveBuilds {
&self.active_builds
}
#[tracing::instrument(skip(self, build), fields(build_id = %build.id, job = %build.job_name))] #[tracing::instrument(skip(self, build), fields(build_id = %build.id, job = %build.job_name))]
pub fn dispatch(&self, build: Build) { pub fn dispatch(&self, build: Build) {
if self.drain_token.is_cancelled() { if self.drain_token.is_cancelled() {
@ -112,8 +123,14 @@ impl WorkerPool {
let signing_config = self.signing_config.clone(); let signing_config = self.signing_config.clone();
let cache_upload_config = self.cache_upload_config.clone(); let cache_upload_config = self.cache_upload_config.clone();
let alert_manager = self.alert_manager.clone(); let alert_manager = self.alert_manager.clone();
let active_builds = self.active_builds.clone();
let cancel_token = CancellationToken::new();
let build_id = build.id;
active_builds.insert(build_id, cancel_token.clone());
tokio::spawn(async move { tokio::spawn(async move {
let result = async {
let _permit = match semaphore.acquire().await { let _permit = match semaphore.acquire().await {
Ok(p) => p, Ok(p) => p,
Err(_) => return, Err(_) => return,
@ -135,6 +152,16 @@ impl WorkerPool {
{ {
tracing::error!(build_id = %build.id, "Build dispatch failed: {e}"); tracing::error!(build_id = %build.id, "Build dispatch failed: {e}");
} }
};
tokio::select! {
() = result => {}
() = cancel_token.cancelled() => {
tracing::info!(build_id = %build_id, "Build cancelled, aborting");
}
}
active_builds.remove(&build_id);
}); });
} }
} }