From ec5fbb453dae4cf97bb369e83bf7888e157058e4 Mon Sep 17 00:00:00 2001 From: NotAShelf Date: Sat, 14 Feb 2026 01:37:09 +0300 Subject: [PATCH] fc-common: add `AlertConfig` and `AlertManager` for error tracking Signed-off-by: NotAShelf Change-Id: Iaf2f52f6e0cf33e3275528ac13cd92046a6a6964 --- crates/common/src/alerts.rs | 86 +++++++++++++++++++++++++++++++++++++ crates/common/src/config.rs | 23 +++++++++- crates/common/src/lib.rs | 1 + 3 files changed, 108 insertions(+), 2 deletions(-) create mode 100644 crates/common/src/alerts.rs diff --git a/crates/common/src/alerts.rs b/crates/common/src/alerts.rs new file mode 100644 index 0000000..6d55f6c --- /dev/null +++ b/crates/common/src/alerts.rs @@ -0,0 +1,86 @@ +use std::sync::Arc; + +use chrono::Utc; +use sqlx::PgPool; +use tokio::sync::RwLock; +use tracing::info; +use uuid::Uuid; + +use crate::{config::AlertConfig, repo::build_metrics}; + +#[derive(Debug, Clone)] +pub struct AlertState { + pub last_alert_at: chrono::DateTime, +} + +impl Default for AlertState { + fn default() -> Self { + Self { + last_alert_at: chrono::DateTime::::MIN_UTC, + } + } +} + +pub struct AlertManager { + config: AlertConfig, + state: Arc>, +} + +impl std::fmt::Debug for AlertManager { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("AlertManager") + .field("config", &self.config) + .finish() + } +} + +impl AlertManager { + pub fn new(config: AlertConfig) -> Self { + Self { + config, + state: Arc::new(RwLock::new(AlertState::default())), + } + } + + pub fn is_enabled(&self) -> bool { + self.config.enabled + } + + pub async fn check_and_alert( + &self, + pool: &PgPool, + project_id: Option, + jobset_id: Option, + ) -> Option { + if !self.is_enabled() { + return None; + } + + let failure_rate = match build_metrics::calculate_failure_rate( + pool, + project_id, + jobset_id, + self.config.time_window_minutes, + ) + .await + { + Ok(rate) => rate, + Err(_) => return None, + }; + + if failure_rate > self.config.error_threshold { + let mut state = self.state.write().await; + let time_since_last = (Utc::now() - state.last_alert_at).num_minutes(); + + if time_since_last >= self.config.time_window_minutes { + state.last_alert_at = Utc::now(); + info!( + "Alert: failure rate {:.1}% exceeds threshold {:.1}%", + failure_rate, self.config.error_threshold + ); + return Some(failure_rate); + } + } + None + } +} diff --git a/crates/common/src/config.rs b/crates/common/src/config.rs index 3b9e52f..ead09ea 100644 --- a/crates/common/src/config.rs +++ b/crates/common/src/config.rs @@ -118,6 +118,25 @@ pub struct NotificationsConfig { pub gitlab_url: Option, pub gitlab_token: Option, pub email: Option, + pub alerts: Option, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct AlertConfig { + pub enabled: bool, + pub error_threshold: f64, + pub time_window_minutes: i64, +} + +impl Default for AlertConfig { + fn default() -> Self { + Self { + enabled: false, + error_threshold: 20.0, + time_window_minutes: 60, + } + } } #[derive(Debug, Clone, Serialize, Deserialize, Default)] @@ -220,8 +239,8 @@ pub struct DeclarativeProject { /// Declarative notification configuration. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DeclarativeNotification { - /// Notification type: `github_status`, email, `gitlab_status`, `gitea_status`, - /// `run_command` + /// Notification type: `github_status`, email, `gitlab_status`, + /// `gitea_status`, `run_command` pub notification_type: String, /// Type-specific configuration (JSON object) pub config: serde_json::Value, diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index 0c6b6ea..6303886 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -1,5 +1,6 @@ //! Common types and utilities for CI +pub mod alerts; pub mod config; pub mod database; pub mod error;