fc-common: add AlertConfig and AlertManager for error tracking
Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: Iaf2f52f6e0cf33e3275528ac13cd92046a6a6964
This commit is contained in:
parent
f8f9703faa
commit
ec5fbb453d
3 changed files with 108 additions and 2 deletions
86
crates/common/src/alerts.rs
Normal file
86
crates/common/src/alerts.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
use std::sync::Arc;
|
||||
|
||||
use chrono::Utc;
|
||||
use sqlx::PgPool;
|
||||
use tokio::sync::RwLock;
|
||||
use tracing::info;
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::{config::AlertConfig, repo::build_metrics};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct AlertState {
|
||||
pub last_alert_at: chrono::DateTime<Utc>,
|
||||
}
|
||||
|
||||
impl Default for AlertState {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
last_alert_at: chrono::DateTime::<Utc>::MIN_UTC,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AlertManager {
|
||||
config: AlertConfig,
|
||||
state: Arc<RwLock<AlertState>>,
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for AlertManager {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("AlertManager")
|
||||
.field("config", &self.config)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl AlertManager {
|
||||
pub fn new(config: AlertConfig) -> Self {
|
||||
Self {
|
||||
config,
|
||||
state: Arc::new(RwLock::new(AlertState::default())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_enabled(&self) -> bool {
|
||||
self.config.enabled
|
||||
}
|
||||
|
||||
pub async fn check_and_alert(
|
||||
&self,
|
||||
pool: &PgPool,
|
||||
project_id: Option<Uuid>,
|
||||
jobset_id: Option<Uuid>,
|
||||
) -> Option<f64> {
|
||||
if !self.is_enabled() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let failure_rate = match build_metrics::calculate_failure_rate(
|
||||
pool,
|
||||
project_id,
|
||||
jobset_id,
|
||||
self.config.time_window_minutes,
|
||||
)
|
||||
.await
|
||||
{
|
||||
Ok(rate) => rate,
|
||||
Err(_) => return None,
|
||||
};
|
||||
|
||||
if failure_rate > self.config.error_threshold {
|
||||
let mut state = self.state.write().await;
|
||||
let time_since_last = (Utc::now() - state.last_alert_at).num_minutes();
|
||||
|
||||
if time_since_last >= self.config.time_window_minutes {
|
||||
state.last_alert_at = Utc::now();
|
||||
info!(
|
||||
"Alert: failure rate {:.1}% exceeds threshold {:.1}%",
|
||||
failure_rate, self.config.error_threshold
|
||||
);
|
||||
return Some(failure_rate);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
|
@ -118,6 +118,25 @@ pub struct NotificationsConfig {
|
|||
pub gitlab_url: Option<String>,
|
||||
pub gitlab_token: Option<String>,
|
||||
pub email: Option<EmailConfig>,
|
||||
pub alerts: Option<AlertConfig>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
#[serde(default)]
|
||||
pub struct AlertConfig {
|
||||
pub enabled: bool,
|
||||
pub error_threshold: f64,
|
||||
pub time_window_minutes: i64,
|
||||
}
|
||||
|
||||
impl Default for AlertConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
enabled: false,
|
||||
error_threshold: 20.0,
|
||||
time_window_minutes: 60,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
|
||||
|
|
@ -220,8 +239,8 @@ pub struct DeclarativeProject {
|
|||
/// Declarative notification configuration.
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DeclarativeNotification {
|
||||
/// Notification type: `github_status`, email, `gitlab_status`, `gitea_status`,
|
||||
/// `run_command`
|
||||
/// Notification type: `github_status`, email, `gitlab_status`,
|
||||
/// `gitea_status`, `run_command`
|
||||
pub notification_type: String,
|
||||
/// Type-specific configuration (JSON object)
|
||||
pub config: serde_json::Value,
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
//! Common types and utilities for CI
|
||||
|
||||
pub mod alerts;
|
||||
pub mod config;
|
||||
pub mod database;
|
||||
pub mod error;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue