fc-common: add disk space monitoring and error handling

Adds a `DiskSpace` error variant and an `is_disk_full()` helper
alongside a `check_disk_space()` util that uses libc's `statsfs` on
unix. FC now detects disk space errors and logs recovery instructions
if applicable.

Signed-off-by: NotAShelf <raf@notashelf.dev>
Change-Id: Ia6d2c472219dc9d6eed6901dc733d7bb6a6a6964
This commit is contained in:
raf 2026-02-05 22:39:47 +03:00
commit 550603c4bb
Signed by: NotAShelf
GPG key ID: 29D95B64378DB4BF
3 changed files with 202 additions and 0 deletions

View file

@ -14,6 +14,7 @@ clap.workspace = true
config.workspace = true config.workspace = true
git2.workspace = true git2.workspace = true
hex.workspace = true hex.workspace = true
libc.workspace = true
lettre.workspace = true lettre.workspace = true
regex.workspace = true regex.workspace = true
reqwest.workspace = true reqwest.workspace = true

View file

@ -37,6 +37,9 @@ pub enum CiError {
#[error("Nix evaluation error: {0}")] #[error("Nix evaluation error: {0}")]
NixEval(String), NixEval(String),
#[error("Disk space error: {0}")]
DiskSpace(String),
#[error("Unauthorized: {0}")] #[error("Unauthorized: {0}")]
Unauthorized(String), Unauthorized(String),
@ -47,4 +50,155 @@ pub enum CiError {
Internal(String), Internal(String),
} }
impl CiError {
pub fn is_disk_full(&self) -> bool {
let msg = self.to_string().to_lowercase();
msg.contains("no space left on device")
|| msg.contains("disk full")
|| msg.contains("enospc")
|| msg.contains("cannot create directory")
|| msg.contains("sqlite.*busy")
}
}
pub type Result<T> = std::result::Result<T, CiError>; pub type Result<T> = std::result::Result<T, CiError>;
/// Check disk space on the given path
pub fn check_disk_space(path: &std::path::Path) -> Result<DiskSpaceInfo> {
fn to_gb(bytes: u64) -> f64 {
bytes as f64 / 1024.0 / 1024.0 / 1024.0
}
#[cfg(unix)]
{
use std::{ffi::CString, os::unix::ffi::OsStrExt};
let cpath = CString::new(path.as_os_str().as_bytes()).map_err(|_| {
CiError::DiskSpace("Invalid path for disk check".to_string())
})?;
let mut statfs: libc::statfs = unsafe { std::mem::zeroed() };
if unsafe { libc::statfs(cpath.as_ptr(), &mut statfs) } != 0 {
return Err(CiError::Io(std::io::Error::last_os_error()));
}
let bavail = statfs.f_bavail * (statfs.f_bsize as u64);
let bfree = statfs.f_bfree * (statfs.f_bsize as u64);
let btotal = statfs.f_blocks * (statfs.f_bsize as u64);
Ok(DiskSpaceInfo {
total_gb: to_gb(btotal),
free_gb: to_gb(bfree),
available_gb: to_gb(bavail),
percent_used: if btotal > 0 {
((btotal - bfree) as f64 / btotal as f64) * 100.0
} else {
0.0
},
})
}
#[cfg(not(unix))]
{
let available = fs_available_space(path)?;
Ok(DiskSpaceInfo {
total_gb: 0.0,
free_gb: to_gb(available),
available_gb: to_gb(available),
percent_used: 0.0,
})
}
}
#[cfg(not(unix))]
fn fs_available_space(path: &std::path::Path) -> Result<u64> {
use std::io::Read;
let metadata = std::fs::metadata(path)?;
let volume = path.to_path_buf();
if let Some(parent) = path.parent() {
let volume = if path.is_file() {
parent.to_path_buf()
} else {
volume
};
#[cfg(windows)]
{
let vol = widestring::WideCString::from_os_str(&volume).map_err(|e| {
CiError::Io(std::io::Error::new(std::io::ErrorKind::Other, e))
})?;
let mut lp_sz_path: [u16; 261] = [0; 261];
for (i, c) in
std::os::windows::ffi::OsStrExt::encode_wide(&vol).enumerate()
{
if i < 261 {
lp_sz_path[i] = c;
}
}
let mut lp_free_bytes: u64 = 0;
let mut lp_total_bytes: u64 = 0;
let lp_sectors_per_cluster: u64 = 0;
let lp_bytes_per_sector: u64 = 0;
unsafe {
GetDiskFreeSpaceW(
lp_sz_path.as_ptr(),
&mut lp_sectors_per_cluster as *mut _ as *mut _,
&mut lp_bytes_per_sector as *mut _ as *mut _,
&mut lp_free_bytes,
&mut lp_total_bytes,
);
}
Ok(lp_free_bytes)
}
#[cfg(not(windows))]
Err(CiError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
"Disk space check not implemented for this platform",
)))
} else {
Err(CiError::Io(std::io::Error::new(
std::io::ErrorKind::Other,
"Cannot determine parent path",
)))
}
}
#[cfg(windows)]
extern "system" {
fn GetDiskFreeSpaceW(
lp_root_path_name: *const u16,
lp_sectors_per_cluster: *mut u64,
lp_bytes_per_sector: *mut u64,
lp_free_bytes_available_to_caller: *mut u64,
lp_total_number_of_bytes: *mut u64,
) -> i32;
}
/// Disk space information
#[derive(Debug, Clone)]
pub struct DiskSpaceInfo {
pub total_gb: f64,
pub free_gb: f64,
pub available_gb: f64,
pub percent_used: f64,
}
impl DiskSpaceInfo {
/// Check if disk space is critically low (less than 1GB available)
pub fn is_critical(&self) -> bool {
self.available_gb < 1.0
}
/// Check if disk space is low (less than 5GB available)
pub fn is_low(&self) -> bool {
self.available_gb < 5.0
}
/// Get a human-readable summary
pub fn summary(&self) -> String {
format!(
"Total: {:.1}GB, Free: {:.1}GB ({:.1}%), Available: {:.1}GB",
self.total_gb, self.free_gb, self.percent_used, self.available_gb
)
}
}

View file

@ -2,6 +2,7 @@ use std::{collections::HashMap, time::Duration};
use fc_common::{ use fc_common::{
config::EvaluatorConfig, config::EvaluatorConfig,
error::check_disk_space,
models::{CreateBuild, CreateEvaluation, EvaluationStatus, JobsetInput}, models::{CreateBuild, CreateEvaluation, EvaluationStatus, JobsetInput},
repo, repo,
}; };
@ -44,6 +45,22 @@ async fn run_cycle(
jobset_name = %jobset.name, jobset_name = %jobset.name,
"Failed to evaluate jobset: {e}" "Failed to evaluate jobset: {e}"
); );
let msg = e.to_string().to_lowercase();
if msg.contains("no space left on device")
|| msg.contains("disk full")
|| msg.contains("enospc")
|| msg.contains("cannot create")
|| msg.contains("sqlite")
{
tracing::error!(
"DISK SPACE ISSUE DETECTED: Evaluation failed due to disk space \
problems. Please free up space on the server:\n- Run \
`nix-collect-garbage -d` to clean the Nix store\n- Clear \
/tmp/fc-evaluator directory\n- Check build logs directory if \
configured"
);
}
} }
} }
}) })
@ -64,6 +81,36 @@ async fn evaluate_jobset(
let project_name = jobset.project_name.clone(); let project_name = jobset.project_name.clone();
let branch = jobset.branch.clone(); let branch = jobset.branch.clone();
tracing::info!(
jobset = %jobset.name,
project = %project_name,
"Starting evaluation cycle"
);
if let Err(e) = check_disk_space(&work_dir) {
tracing::warn!(
jobset = %jobset.name,
"Disk space check failed: {}. Proceeding anyway...",
e
);
}
if let Ok(info) = check_disk_space(&work_dir) {
if info.is_critical() {
tracing::error!(
jobset = %jobset.name,
"CRITICAL: Less than 1GB disk space available. {}",
info.summary()
);
} else if info.is_low() {
tracing::warn!(
jobset = %jobset.name,
"LOW: Less than 5GB disk space available. {}",
info.summary()
);
}
}
// Clone/fetch in a blocking task (git2 is sync) with timeout // Clone/fetch in a blocking task (git2 is sync) with timeout
let (repo_path, commit_hash) = tokio::time::timeout( let (repo_path, commit_hash) = tokio::time::timeout(
git_timeout, git_timeout,