fc-common: add AlertConfig and AlertManager for error tracking
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Iaf2f52f6e0cf33e3275528ac13cd92046a6a6964
This commit is contained in:
parent
f8f9703faa
commit
ec5fbb453d
3 changed files with 108 additions and 2 deletions
86
crates/common/src/alerts.rs
Normal file
86
crates/common/src/alerts.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use chrono::Utc;
|
||||||
|
use sqlx::PgPool;
|
||||||
|
use tokio::sync::RwLock;
|
||||||
|
use tracing::info;
|
||||||
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use crate::{config::AlertConfig, repo::build_metrics};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct AlertState {
|
||||||
|
pub last_alert_at: chrono::DateTime<Utc>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for AlertState {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
last_alert_at: chrono::DateTime::<Utc>::MIN_UTC,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct AlertManager {
|
||||||
|
config: AlertConfig,
|
||||||
|
state: Arc<RwLock<AlertState>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Debug for AlertManager {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
f.debug_struct("AlertManager")
|
||||||
|
.field("config", &self.config)
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AlertManager {
|
||||||
|
pub fn new(config: AlertConfig) -> Self {
|
||||||
|
Self {
|
||||||
|
config,
|
||||||
|
state: Arc::new(RwLock::new(AlertState::default())),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_enabled(&self) -> bool {
|
||||||
|
self.config.enabled
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn check_and_alert(
|
||||||
|
&self,
|
||||||
|
pool: &PgPool,
|
||||||
|
project_id: Option<Uuid>,
|
||||||
|
jobset_id: Option<Uuid>,
|
||||||
|
) -> Option<f64> {
|
||||||
|
if !self.is_enabled() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
let failure_rate = match build_metrics::calculate_failure_rate(
|
||||||
|
pool,
|
||||||
|
project_id,
|
||||||
|
jobset_id,
|
||||||
|
self.config.time_window_minutes,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
{
|
||||||
|
Ok(rate) => rate,
|
||||||
|
Err(_) => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
if failure_rate > self.config.error_threshold {
|
||||||
|
let mut state = self.state.write().await;
|
||||||
|
let time_since_last = (Utc::now() - state.last_alert_at).num_minutes();
|
||||||
|
|
||||||
|
if time_since_last >= self.config.time_window_minutes {
|
||||||
|
state.last_alert_at = Utc::now();
|
||||||
|
info!(
|
||||||
|
"Alert: failure rate {:.1}% exceeds threshold {:.1}%",
|
||||||
|
failure_rate, self.config.error_threshold
|
||||||
|
);
|
||||||
|
return Some(failure_rate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -118,6 +118,25 @@ pub struct NotificationsConfig {
|
||||||
pub gitlab_url: Option<String>,
|
pub gitlab_url: Option<String>,
|
||||||
pub gitlab_token: Option<String>,
|
pub gitlab_token: Option<String>,
|
||||||
pub email: Option<EmailConfig>,
|
pub email: Option<EmailConfig>,
|
||||||
|
pub alerts: Option<AlertConfig>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
#[serde(default)]
|
||||||
|
pub struct AlertConfig {
|
||||||
|
pub enabled: bool,
|
||||||
|
pub error_threshold: f64,
|
||||||
|
pub time_window_minutes: i64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for AlertConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
enabled: false,
|
||||||
|
error_threshold: 20.0,
|
||||||
|
time_window_minutes: 60,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||||
|
|
@ -220,8 +239,8 @@ pub struct DeclarativeProject {
|
||||||
/// Declarative notification configuration.
|
/// Declarative notification configuration.
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
pub struct DeclarativeNotification {
|
pub struct DeclarativeNotification {
|
||||||
/// Notification type: `github_status`, email, `gitlab_status`, `gitea_status`,
|
/// Notification type: `github_status`, email, `gitlab_status`,
|
||||||
/// `run_command`
|
/// `gitea_status`, `run_command`
|
||||||
pub notification_type: String,
|
pub notification_type: String,
|
||||||
/// Type-specific configuration (JSON object)
|
/// Type-specific configuration (JSON object)
|
||||||
pub config: serde_json::Value,
|
pub config: serde_json::Value,
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
//! Common types and utilities for CI
|
//! Common types and utilities for CI
|
||||||
|
|
||||||
|
pub mod alerts;
|
||||||
pub mod config;
|
pub mod config;
|
||||||
pub mod database;
|
pub mod database;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue