Signed-off-by: NotAShelf <raf@notashelf.dev> Change-Id: I794f9152bb02e3dd91c9738369b94fc66a6a6964
698 lines
18 KiB
Rust
698 lines
18 KiB
Rust
#![allow(improper_ctypes_definitions)]
|
|
|
|
use std::{
|
|
fs,
|
|
io::Read,
|
|
path::{
|
|
Path,
|
|
PathBuf,
|
|
},
|
|
sync::{
|
|
atomic::{
|
|
AtomicBool,
|
|
Ordering,
|
|
},
|
|
Arc,
|
|
},
|
|
time::{
|
|
Duration,
|
|
Instant,
|
|
SystemTime,
|
|
UNIX_EPOCH,
|
|
},
|
|
};
|
|
|
|
use clap::Parser;
|
|
use libloading::Library;
|
|
use pscand_core::{
|
|
logging::{
|
|
LogLevel,
|
|
RingBufferLogger,
|
|
},
|
|
scanner::Scanner,
|
|
Config as CoreConfig,
|
|
};
|
|
use sha2::{
|
|
Digest,
|
|
Sha256,
|
|
};
|
|
use tokio::{
|
|
sync::RwLock,
|
|
time::interval,
|
|
};
|
|
|
|
type ScannerCreator = pscand_core::ScannerCreatorFfi;
|
|
|
|
fn expand_path(path: &Path) -> Result<PathBuf, Box<dyn std::error::Error>> {
|
|
let path_str = path.to_str().ok_or("Invalid path encoding")?;
|
|
if path_str.starts_with("~/") {
|
|
if let Some(home) = dirs::home_dir() {
|
|
return Ok(home.join(&path_str[2..]));
|
|
}
|
|
}
|
|
Ok(path.to_path_buf())
|
|
}
|
|
|
|
#[derive(Parser, Debug)]
|
|
#[command(
|
|
name = "pscand",
|
|
version = "0.1.0",
|
|
about = "Pluggable System Condition Monitoring Daemon"
|
|
)]
|
|
enum Args {
|
|
Run(RunArgs),
|
|
List,
|
|
}
|
|
|
|
#[derive(Parser, Debug)]
|
|
struct RunArgs {
|
|
#[arg(short, long, default_value = "~/.config/pscand/pscand.toml")]
|
|
config: PathBuf,
|
|
|
|
#[arg(short, long)]
|
|
debug: bool,
|
|
}
|
|
|
|
fn verify_library(path: &Path) -> Result<(), String> {
|
|
let mut file = fs::File::open(path).map_err(|e| {
|
|
format!(
|
|
"failed to open library {} for verification: {}",
|
|
path.display(),
|
|
e
|
|
)
|
|
})?;
|
|
let mut buffer = [0u8; 4096];
|
|
let bytes_read = file.read(&mut buffer).map_err(|e| {
|
|
format!(
|
|
"failed to read library {} for hash calculation: {}",
|
|
path.display(),
|
|
e
|
|
)
|
|
})?;
|
|
|
|
if bytes_read < 4 {
|
|
return Err(format!(
|
|
"library {} is too small to be valid",
|
|
path.display()
|
|
));
|
|
}
|
|
|
|
let mut hasher = Sha256::new();
|
|
hasher.update(&buffer[..bytes_read]);
|
|
let _hash = hasher.finalize();
|
|
|
|
Ok(())
|
|
}
|
|
|
|
struct LoadedScanner {
|
|
name: String,
|
|
scanner: Arc<RwLock<Box<dyn Scanner>>>,
|
|
interval: Duration,
|
|
#[allow(dead_code)]
|
|
library: Library,
|
|
}
|
|
|
|
#[derive(Clone)]
|
|
struct DaemonState {
|
|
running: Arc<AtomicBool>,
|
|
shutdown_requested: Arc<AtomicBool>,
|
|
start_time: Arc<RwLock<SystemTime>>,
|
|
last_collection: Arc<RwLock<SystemTime>>,
|
|
collection_count: Arc<RwLock<u64>>,
|
|
error_count: Arc<RwLock<u64>>,
|
|
heartbeat_path: PathBuf,
|
|
}
|
|
|
|
impl DaemonState {
|
|
fn new(heartbeat_path: PathBuf) -> Self {
|
|
Self {
|
|
running: Arc::new(AtomicBool::new(true)),
|
|
shutdown_requested: Arc::new(AtomicBool::new(false)),
|
|
start_time: Arc::new(RwLock::new(SystemTime::now())),
|
|
last_collection: Arc::new(RwLock::new(SystemTime::UNIX_EPOCH)),
|
|
collection_count: Arc::new(RwLock::new(0)),
|
|
error_count: Arc::new(RwLock::new(0)),
|
|
heartbeat_path,
|
|
}
|
|
}
|
|
|
|
async fn record_collection(&self) {
|
|
*self.last_collection.write().await = SystemTime::now();
|
|
*self.collection_count.write().await += 1;
|
|
}
|
|
|
|
async fn record_error(&self) {
|
|
*self.error_count.write().await += 1;
|
|
}
|
|
|
|
fn get_stats_sync(&self) -> (u64, u64, u64) {
|
|
let collections = self.collection_count.try_read().map(|r| *r).unwrap_or(0);
|
|
let errors = self.error_count.try_read().map(|r| *r).unwrap_or(0);
|
|
let uptime = self
|
|
.start_time
|
|
.try_read()
|
|
.map(|t| {
|
|
SystemTime::now()
|
|
.duration_since(*t)
|
|
.map(|d| d.as_secs())
|
|
.unwrap_or(0)
|
|
})
|
|
.unwrap_or(0);
|
|
(collections, errors, uptime)
|
|
}
|
|
|
|
async fn update_heartbeat(&self) -> std::io::Result<()> {
|
|
let now = SystemTime::now()
|
|
.duration_since(UNIX_EPOCH)
|
|
.map(|d| d.as_secs())
|
|
.unwrap_or(0);
|
|
let (collections, errors, uptime) = self.get_stats_sync();
|
|
|
|
let heartbeat = format!(
|
|
"{},{},{},{},{}\n",
|
|
now,
|
|
collections,
|
|
errors,
|
|
uptime,
|
|
if self.shutdown_requested.load(Ordering::SeqCst) {
|
|
"shutdown"
|
|
} else {
|
|
"running"
|
|
}
|
|
);
|
|
|
|
std::fs::write(&self.heartbeat_path, heartbeat)?;
|
|
Ok(())
|
|
}
|
|
|
|
async fn write_status(
|
|
&self,
|
|
logger: &RingBufferLogger,
|
|
) -> std::io::Result<()> {
|
|
let uptime = SystemTime::now()
|
|
.duration_since(
|
|
self.start_time.try_read().map(|t| *t).unwrap_or(UNIX_EPOCH),
|
|
)
|
|
.map(|d| d.as_secs())
|
|
.unwrap_or(0);
|
|
let collections = self.collection_count.try_read().map(|c| *c).unwrap_or(0);
|
|
let errors = self.error_count.try_read().map(|e| *e).unwrap_or(0);
|
|
let last = self
|
|
.last_collection
|
|
.try_read()
|
|
.map(|l| *l)
|
|
.unwrap_or(UNIX_EPOCH);
|
|
let last_secs = last
|
|
.duration_since(UNIX_EPOCH)
|
|
.map(|d| d.as_secs())
|
|
.unwrap_or(0);
|
|
|
|
let status = serde_json::json!({
|
|
"uptime_seconds": uptime,
|
|
"total_collections": collections,
|
|
"total_errors": errors,
|
|
"last_collection": last_secs,
|
|
"shutdown_requested": self.shutdown_requested.load(Ordering::SeqCst),
|
|
});
|
|
|
|
logger.log(
|
|
LogLevel::Info,
|
|
"daemon",
|
|
"status",
|
|
serde_json::to_string(&status).unwrap_or_default(),
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
let args = Args::parse();
|
|
|
|
match args {
|
|
Args::Run(run_args) => {
|
|
run_daemon(run_args).await?;
|
|
},
|
|
Args::List => {
|
|
list_scanners().await?;
|
|
},
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
async fn run_daemon(args: RunArgs) -> Result<(), Box<dyn std::error::Error>> {
|
|
if args.debug {
|
|
env_logger::Builder::from_env(
|
|
env_logger::Env::default().default_filter_or("debug"),
|
|
)
|
|
.init();
|
|
} else {
|
|
env_logger::Builder::from_env(
|
|
env_logger::Env::default().default_filter_or("info"),
|
|
)
|
|
.init();
|
|
}
|
|
|
|
log::info!("Starting pscand daemon");
|
|
|
|
// Expand ~ in config path
|
|
let config_path = expand_path(&args.config)
|
|
.map_err(|e| format!("Failed to expand config path: {}", e))?;
|
|
|
|
let config = if config_path.exists() {
|
|
CoreConfig::load(&config_path)?
|
|
} else {
|
|
log::warn!("Config file not found at {:?}", config_path);
|
|
log::info!(
|
|
"Creating default config. Run with --config to specify a different path."
|
|
);
|
|
|
|
// Create default config directory if it doesn't exist
|
|
if let Some(parent) = config_path.parent() {
|
|
if let Err(e) = std::fs::create_dir_all(parent) {
|
|
log::error!("Failed to create config directory {:?}: {}", parent, e);
|
|
}
|
|
}
|
|
|
|
CoreConfig::default()
|
|
};
|
|
|
|
std::fs::create_dir_all(&config.log_dir).map_err(|e| {
|
|
format!("Failed to create log directory {:?}: {}", config.log_dir, e)
|
|
})?;
|
|
|
|
let log_file = config.log_dir.join("pscand.log");
|
|
let logger = Arc::new(RingBufferLogger::new(
|
|
config.ring_buffer_size,
|
|
Some(log_file),
|
|
config.journal_enabled,
|
|
config.file_enabled,
|
|
));
|
|
|
|
let heartbeat_path = config.log_dir.join("heartbeat");
|
|
let daemon_state = DaemonState::new(heartbeat_path);
|
|
|
|
daemon_state.update_heartbeat().await?;
|
|
|
|
logger.log(
|
|
LogLevel::Info,
|
|
"daemon",
|
|
"startup",
|
|
serde_json::json!({
|
|
"version": "0.1.0",
|
|
"ring_buffer_size": config.ring_buffer_size,
|
|
"journal_enabled": config.journal_enabled,
|
|
"file_enabled": config.file_enabled,
|
|
})
|
|
.to_string(),
|
|
);
|
|
|
|
std::panic::set_hook(Box::new({
|
|
let logger = Arc::clone(&logger);
|
|
let daemon_state = daemon_state.clone();
|
|
let log_dir = config.log_dir.clone();
|
|
move |panic_info| {
|
|
daemon_state.running.store(false, Ordering::SeqCst);
|
|
|
|
let (collections, errors, uptime) = daemon_state.get_stats_sync();
|
|
let entries = logger.get_recent(60);
|
|
let crash_log = log_dir.join("crash.log");
|
|
|
|
if let Ok(mut file) = std::fs::File::create(&crash_log) {
|
|
use std::io::Write;
|
|
let _ = writeln!(file, "=== Crash at {} ===", chrono::Utc::now());
|
|
let _ = writeln!(file, "Panic: {}", panic_info);
|
|
let _ = writeln!(file, "Uptime: {} seconds", uptime);
|
|
let _ = writeln!(file, "Total collections: {}", collections);
|
|
let _ = writeln!(file, "Total errors: {}", errors);
|
|
let _ = writeln!(file, "\n=== Last {} log entries ===", entries.len());
|
|
for entry in entries {
|
|
let _ = writeln!(file, "{}", entry.to_json());
|
|
}
|
|
}
|
|
|
|
if let Ok(mut file) = std::fs::OpenOptions::new()
|
|
.create(true)
|
|
.append(true)
|
|
.open(&daemon_state.heartbeat_path)
|
|
{
|
|
use std::io::Write;
|
|
let _ = writeln!(
|
|
file,
|
|
"PANIC,{},{},{},{}",
|
|
uptime,
|
|
collections,
|
|
errors,
|
|
chrono::Utc::now()
|
|
);
|
|
}
|
|
}
|
|
}));
|
|
|
|
let scanners = load_scanners(&config, &logger).await?;
|
|
|
|
if scanners.is_empty() {
|
|
log::error!("No scanners loaded!");
|
|
log::error!("Please ensure:");
|
|
log::error!(
|
|
" 1. Scanner plugins are installed in one of the configured directories"
|
|
);
|
|
log::error!(
|
|
" 2. Scanner directories are correctly set in config file or \
|
|
PSCAND_SCANNER_DIRS env var"
|
|
);
|
|
log::error!(" 3. Scanners are not disabled in the configuration");
|
|
logger.log(
|
|
LogLevel::Error,
|
|
"daemon",
|
|
"no_scanners",
|
|
"No scanner plugins loaded".to_string(),
|
|
);
|
|
return Err("No scanners loaded. See error messages above.".into());
|
|
} else {
|
|
log::info!("Loaded {} scanners", scanners.len());
|
|
logger.log(
|
|
LogLevel::Info,
|
|
"daemon",
|
|
"scanners_loaded",
|
|
serde_json::json!({
|
|
"count": scanners.len(),
|
|
"names": scanners.iter().map(|s| s.name.clone()).collect::<Vec<_>>()
|
|
})
|
|
.to_string(),
|
|
);
|
|
}
|
|
|
|
let scanner_handles = Arc::new(RwLock::new(Vec::new()));
|
|
let scanner_handles_shutdown = scanner_handles.clone();
|
|
let daemon_state_clone = daemon_state.clone();
|
|
let logger_clone = Arc::clone(&logger);
|
|
|
|
let scanner_task = tokio::spawn(async move {
|
|
let mut handles = Vec::new();
|
|
|
|
for loaded in scanners {
|
|
let logger = Arc::clone(&logger_clone);
|
|
let name = loaded.name.clone();
|
|
let scanner = loaded.scanner.clone();
|
|
let scanner_interval = loaded.interval;
|
|
let state = daemon_state_clone.clone();
|
|
|
|
let handle = tokio::spawn(async move {
|
|
let mut ticker = interval(scanner_interval);
|
|
let _collection_start = Instant::now();
|
|
|
|
loop {
|
|
ticker.tick().await;
|
|
|
|
if state.shutdown_requested.load(Ordering::SeqCst) {
|
|
let scanner_guard = scanner.read().await;
|
|
match scanner_guard.collect() {
|
|
Ok(metrics) => {
|
|
logger.log(
|
|
LogLevel::Info,
|
|
&name,
|
|
"final_collection",
|
|
serde_json::json!({
|
|
"metrics": metrics,
|
|
"reason": "shutdown"
|
|
})
|
|
.to_string(),
|
|
);
|
|
},
|
|
Err(e) => {
|
|
logger.log(
|
|
LogLevel::Error,
|
|
&name,
|
|
"final_collection_error",
|
|
e.to_string(),
|
|
);
|
|
},
|
|
}
|
|
state.record_collection().await;
|
|
if let Err(e) = state.update_heartbeat().await {
|
|
log::warn!("Failed to update heartbeat during shutdown: {}", e);
|
|
}
|
|
break;
|
|
}
|
|
|
|
let scan_start = Instant::now();
|
|
let collect_result = {
|
|
let guard = scanner.read().await;
|
|
guard.collect()
|
|
};
|
|
|
|
match collect_result {
|
|
Ok(metrics) => {
|
|
let elapsed = scan_start.elapsed().as_millis();
|
|
logger.log(
|
|
LogLevel::Info,
|
|
&name,
|
|
"metrics",
|
|
serde_json::json!({
|
|
"metrics": metrics,
|
|
"collection_time_ms": elapsed
|
|
})
|
|
.to_string(),
|
|
);
|
|
state.record_collection().await;
|
|
},
|
|
Err(e) => {
|
|
logger.log(
|
|
LogLevel::Error,
|
|
&name,
|
|
"collection_error",
|
|
e.to_string(),
|
|
);
|
|
state.record_error().await;
|
|
},
|
|
}
|
|
if let Err(e) = state.update_heartbeat().await {
|
|
log::warn!("Failed to update heartbeat: {}", e);
|
|
}
|
|
}
|
|
|
|
if let Err(e) = scanner.write().await.cleanup() {
|
|
logger.log(LogLevel::Warn, &name, "cleanup_error", e.to_string());
|
|
}
|
|
});
|
|
|
|
handles.push(handle);
|
|
}
|
|
|
|
*scanner_handles_shutdown.write().await = handles;
|
|
});
|
|
|
|
let daemon_state_hb = daemon_state.clone();
|
|
let heartbeat_task = tokio::spawn(async move {
|
|
let mut ticker = interval(Duration::from_secs(5));
|
|
loop {
|
|
ticker.tick().await;
|
|
if let Err(e) = daemon_state_hb.update_heartbeat().await {
|
|
log::warn!("Failed to update heartbeat: {}", e);
|
|
}
|
|
}
|
|
});
|
|
|
|
let sigint = tokio::signal::ctrl_c();
|
|
let mut sigterm =
|
|
tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())?;
|
|
|
|
tokio::select! {
|
|
_ = sigint => {
|
|
log::info!("Received Ctrl+C, initiating graceful shutdown");
|
|
}
|
|
_ = sigterm.recv() => {
|
|
log::info!("Received SIGTERM, initiating graceful shutdown");
|
|
}
|
|
}
|
|
|
|
daemon_state
|
|
.shutdown_requested
|
|
.store(true, Ordering::SeqCst);
|
|
daemon_state.running.store(false, Ordering::SeqCst);
|
|
|
|
let (collections, errors, _) = daemon_state.get_stats_sync();
|
|
logger.log(
|
|
LogLevel::Info,
|
|
"daemon",
|
|
"shutdown_initiated",
|
|
format!(
|
|
"{{\"collections\": {}, \"errors\": {}}}",
|
|
collections, errors
|
|
),
|
|
);
|
|
|
|
heartbeat_task.abort();
|
|
let _ = heartbeat_task.await;
|
|
|
|
daemon_state
|
|
.shutdown_requested
|
|
.store(true, Ordering::SeqCst);
|
|
scanner_task.await.ok();
|
|
|
|
daemon_state.write_status(&logger).await.ok();
|
|
daemon_state.update_heartbeat().await.ok();
|
|
|
|
logger.log(
|
|
LogLevel::Info,
|
|
"daemon",
|
|
"shutdown_complete",
|
|
"{}".to_string(),
|
|
);
|
|
|
|
let ring_buffer_path = config.log_dir.join("pscand.buffer");
|
|
logger.flush_to_file(&ring_buffer_path)?;
|
|
|
|
log::info!("pscand shut down cleanly");
|
|
Ok(())
|
|
}
|
|
|
|
async fn load_scanners(
|
|
config: &CoreConfig,
|
|
logger: &RingBufferLogger,
|
|
) -> Result<Vec<LoadedScanner>, Box<dyn std::error::Error>> {
|
|
let mut loaded = Vec::new();
|
|
|
|
let mut missing_dirs = Vec::new();
|
|
|
|
for dir in &config.scanner_dirs {
|
|
if !dir.exists() {
|
|
missing_dirs.push(dir.clone());
|
|
log::warn!("Scanner directory does not exist: {:?}", dir);
|
|
continue;
|
|
}
|
|
|
|
log::info!("Loading scanners from {:?}", dir);
|
|
|
|
for entry in std::fs::read_dir(dir)? {
|
|
let entry = entry?;
|
|
let path = entry.path();
|
|
|
|
if path.extension().and_then(|s| s.to_str()) != Some("so") {
|
|
continue;
|
|
}
|
|
|
|
// Verify library before loading
|
|
if let Err(e) = verify_library(&path) {
|
|
log::error!("Scanner {:?} failed verification: {}", path, e);
|
|
logger.log(
|
|
LogLevel::Error,
|
|
"loader",
|
|
"verification_failed",
|
|
format!("{}: {}", path.display(), e),
|
|
);
|
|
continue;
|
|
}
|
|
|
|
unsafe {
|
|
match Library::new(&path) {
|
|
Ok(lib) => {
|
|
let creator: libloading::Symbol<ScannerCreator> =
|
|
match lib.get(b"pscand_scanner") {
|
|
Ok(s) => s,
|
|
Err(e) => {
|
|
log::warn!("Scanner {:?} missing symbol: {}", path, e);
|
|
logger.log(
|
|
LogLevel::Warn,
|
|
"loader",
|
|
"missing_symbol",
|
|
format!("{}: {}", path.display(), e),
|
|
);
|
|
continue;
|
|
},
|
|
};
|
|
|
|
let scanner = match pscand_core::get_scanner(creator()) {
|
|
Some(s) => s,
|
|
None => {
|
|
log::error!(
|
|
"Failed to get scanner from library {}",
|
|
path.display()
|
|
);
|
|
continue;
|
|
},
|
|
};
|
|
let name = scanner.name().to_string();
|
|
|
|
let scanner_enabled = config.is_scanner_enabled(&name);
|
|
|
|
if !scanner_enabled {
|
|
log::info!("Scanner {} disabled in config", name);
|
|
continue;
|
|
}
|
|
|
|
let mut scanner = scanner;
|
|
|
|
if let Some(scanner_config) = config.scanner_config(&name) {
|
|
let toml_map: toml::map::Map<String, toml::Value> =
|
|
scanner_config.extra.clone().into_iter().collect();
|
|
let toml_val = toml::Value::Table(toml_map);
|
|
if let Err(e) = scanner.init(&toml_val) {
|
|
log::error!("Failed to init scanner {}: {}", name, e);
|
|
logger.log(
|
|
LogLevel::Error,
|
|
"loader",
|
|
"init_failed",
|
|
format!("{}: {}", name, e),
|
|
);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Determine interval: config override > scanner default
|
|
let interval = config
|
|
.scanner_config(&name)
|
|
.and_then(|c| c.interval_secs)
|
|
.map(Duration::from_secs)
|
|
.unwrap_or_else(|| scanner.interval());
|
|
|
|
loaded.push(LoadedScanner {
|
|
name,
|
|
scanner: Arc::new(RwLock::new(scanner)),
|
|
interval,
|
|
library: lib,
|
|
});
|
|
},
|
|
Err(e) => {
|
|
log::warn!("Failed to load scanner {:?}: {}", path, e);
|
|
logger.log(
|
|
LogLevel::Warn,
|
|
"loader",
|
|
"load_failed",
|
|
format!("{}: {}", path.display(), e),
|
|
);
|
|
},
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if !missing_dirs.is_empty() {
|
|
log::warn!("The following scanner directories do not exist:");
|
|
for dir in &missing_dirs {
|
|
log::warn!(" - {:?}", dir);
|
|
}
|
|
log::info!("Create these directories or update scanner_dirs in config");
|
|
}
|
|
|
|
Ok(loaded)
|
|
}
|
|
|
|
async fn list_scanners() -> Result<(), Box<dyn std::error::Error>> {
|
|
println!("Available built-in scanners:");
|
|
println!(" - system: CPU, memory, disk, network, load average");
|
|
println!(" - sensor: hwmon temperature, fan, voltage sensors");
|
|
println!(" - power: battery and power supply status");
|
|
println!(" - proc: process count and zombie detection");
|
|
println!(
|
|
"\nDynamic scanners are loaded from $PSCAND_SCANNER_DIRS (colon-separated)"
|
|
);
|
|
println!(
|
|
" Default fallback: ~/.local/share/pscand/scanners/ or \
|
|
~/.config/pscand/scanners/"
|
|
);
|
|
Ok(())
|
|
}
|