fc-queue-runner: implement per-build cancellation via CancellationToken

Adds an `ActiveBuild` registry (DashMap of `<Uuid, CancellationToken>`)
to `WorkerPool` and get `dispatch()` to create a per-build token to race
`run_build` against it via Tokio's `select!`.

The `cancel_checker_loop` then polls the DB every N seconds (currently 2)
for builds cancelled while running, and triggers their tokens.

Existing `kill_on_drop(true) on `nix build` processes handles
subprocess cleanup when the future is dropped. Thank you past me for
your insight.

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ic8af58e92972c7d5d104d9c717e9217d6a6a6964
This commit is contained in:
raf 2026-02-16 23:32:40 +03:00
commit f8586a7f3c
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
4 changed files with 82 additions and 21 deletions

View file

@ -1,5 +1,6 @@
use std::{path::PathBuf, sync::Arc, time::Duration};
use dashmap::DashMap;
use fc_common::{
alerts::AlertManager,
config::{
@ -24,6 +25,10 @@ use fc_common::{
};
use sqlx::PgPool;
use tokio::sync::Semaphore;
use tokio_util::sync::CancellationToken;
use uuid::Uuid;
pub type ActiveBuilds = Arc<DashMap<Uuid, CancellationToken>>;
pub struct WorkerPool {
semaphore: Arc<Semaphore>,
@ -37,7 +42,8 @@ pub struct WorkerPool {
signing_config: Arc<SigningConfig>,
cache_upload_config: Arc<CacheUploadConfig>,
alert_manager: Arc<Option<AlertManager>>,
drain_token: tokio_util::sync::CancellationToken,
drain_token: CancellationToken,
active_builds: ActiveBuilds,
}
impl WorkerPool {
@ -68,7 +74,8 @@ impl WorkerPool {
signing_config: Arc::new(signing_config),
cache_upload_config: Arc::new(cache_upload_config),
alert_manager: Arc::new(alert_manager),
drain_token: tokio_util::sync::CancellationToken::new(),
drain_token: CancellationToken::new(),
active_builds: Arc::new(DashMap::new()),
}
}
@ -95,6 +102,10 @@ impl WorkerPool {
.await;
}
pub fn active_builds(&self) -> &ActiveBuilds {
&self.active_builds
}
#[tracing::instrument(skip(self, build), fields(build_id = %build.id, job = %build.job_name))]
pub fn dispatch(&self, build: Build) {
if self.drain_token.is_cancelled() {
@ -112,29 +123,45 @@ impl WorkerPool {
let signing_config = self.signing_config.clone();
let cache_upload_config = self.cache_upload_config.clone();
let alert_manager = self.alert_manager.clone();
let active_builds = self.active_builds.clone();
let cancel_token = CancellationToken::new();
let build_id = build.id;
active_builds.insert(build_id, cancel_token.clone());
tokio::spawn(async move {
let _permit = match semaphore.acquire().await {
Ok(p) => p,
Err(_) => return,
let result = async {
let _permit = match semaphore.acquire().await {
Ok(p) => p,
Err(_) => return,
};
if let Err(e) = run_build(
&pool,
&build,
&work_dir,
timeout,
&log_config,
&gc_config,
&notifications_config,
&signing_config,
&cache_upload_config,
&alert_manager,
)
.await
{
tracing::error!(build_id = %build.id, "Build dispatch failed: {e}");
}
};
if let Err(e) = run_build(
&pool,
&build,
&work_dir,
timeout,
&log_config,
&gc_config,
&notifications_config,
&signing_config,
&cache_upload_config,
&alert_manager,
)
.await
{
tracing::error!(build_id = %build.id, "Build dispatch failed: {e}");
tokio::select! {
() = result => {}
() = cancel_token.cancelled() => {
tracing::info!(build_id = %build_id, "Build cancelled, aborting");
}
}
active_builds.remove(&build_id);
});
}
}