fc-common: add unsupported_timeout for queue runner
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I76805c31bbfc11e0a596c6b3b88c52c06a6a6964
This commit is contained in:
parent
b43a11756a
commit
f5c16aef83
3 changed files with 66 additions and 1 deletions
|
|
@ -923,3 +923,20 @@ unsupported_timeout = "2h 30m"
|
|||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod humantime_option_test {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_option_humantime_missing() {
|
||||
let toml = r#"
|
||||
workers = 4
|
||||
poll_interval = 5
|
||||
build_timeout = 3600
|
||||
work_dir = "/tmp/fc"
|
||||
"#;
|
||||
let config: QueueRunnerConfig = toml::from_str(toml).unwrap();
|
||||
assert_eq!(config.unsupported_timeout, None);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
let failed_paths_cache = qr_config.failed_paths_cache;
|
||||
let failed_paths_ttl = qr_config.failed_paths_ttl;
|
||||
let work_dir = qr_config.work_dir;
|
||||
let unsupported_timeout = qr_config.unsupported_timeout;
|
||||
|
||||
// Ensure the work directory exists
|
||||
tokio::fs::create_dir_all(&work_dir).await?;
|
||||
|
|
@ -82,7 +83,7 @@ async fn main() -> anyhow::Result<()> {
|
|||
let active_builds = worker_pool.active_builds().clone();
|
||||
|
||||
tokio::select! {
|
||||
result = fc_queue_runner::runner_loop::run(db.pool().clone(), worker_pool, poll_interval, wakeup, strict_errors, failed_paths_cache, notifications_config.clone()) => {
|
||||
result = fc_queue_runner::runner_loop::run(db.pool().clone(), worker_pool, poll_interval, wakeup, strict_errors, failed_paths_cache, notifications_config.clone(), unsupported_timeout) => {
|
||||
if let Err(e) = result {
|
||||
tracing::error!("Runner loop failed: {e}");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ pub async fn run(
|
|||
strict_errors: bool,
|
||||
failed_paths_cache: bool,
|
||||
notifications_config: fc_common::config::NotificationsConfig,
|
||||
unsupported_timeout: Option<Duration>,
|
||||
) -> anyhow::Result<()> {
|
||||
// Reset orphaned builds from previous crashes (older than 5 minutes)
|
||||
match repo::builds::reset_orphaned(&pool, 300).await {
|
||||
|
|
@ -207,6 +208,52 @@ pub async fn run(
|
|||
},
|
||||
}
|
||||
|
||||
// Unsupported system timeout: abort builds with no available builders
|
||||
if let Some(timeout) = unsupported_timeout {
|
||||
if let Some(system) = &build.system {
|
||||
match repo::remote_builders::find_for_system(&pool, system).await {
|
||||
Ok(builders) if builders.is_empty() => {
|
||||
let timeout_at = build.created_at + timeout;
|
||||
if chrono::Utc::now() > timeout_at {
|
||||
tracing::info!(
|
||||
build_id = %build.id,
|
||||
system = %system,
|
||||
timeout = ?timeout,
|
||||
"Aborting build: no builder available for system type"
|
||||
);
|
||||
|
||||
if let Err(e) = repo::builds::start(&pool, build.id).await {
|
||||
tracing::warn!(build_id = %build.id, "Failed to start unsupported build: {e}");
|
||||
}
|
||||
|
||||
if let Err(e) = repo::builds::complete(
|
||||
&pool,
|
||||
build.id,
|
||||
BuildStatus::UnsupportedSystem,
|
||||
None,
|
||||
None,
|
||||
Some("No builder available for system type"),
|
||||
)
|
||||
.await
|
||||
{
|
||||
tracing::warn!(build_id = %build.id, "Failed to complete unsupported build: {e}");
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
},
|
||||
Ok(_) => {}, // Builders available, proceed normally
|
||||
Err(e) => {
|
||||
tracing::error!(
|
||||
build_id = %build.id,
|
||||
"Failed to check builders for unsupported system: {e}"
|
||||
);
|
||||
continue;
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// One-at-a-time scheduling: check if jobset allows concurrent builds
|
||||
// First, get the evaluation to find the jobset
|
||||
let eval =
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue