Eris/src/config.rs
NotAShelf 9297ba4e0c
eris: allow various log formats
Supports plain, pretty, json and pretty-json
2025-05-02 11:22:52 +03:00

551 lines
18 KiB
Rust

use clap::Parser;
use ipnetwork::IpNetwork;
use regex::Regex;
use serde::{Deserialize, Serialize};
use std::env;
use std::fs;
use std::net::IpAddr;
use std::path::{Path, PathBuf};
// Command-line arguments using clap
#[derive(Parser, Debug, Clone)]
#[clap(
author,
version,
about = "Markov chain based HTTP tarpit/honeypot that delays and tracks potential attackers"
)]
pub struct Args {
#[clap(
long,
default_value = "0.0.0.0:8888",
help = "Address and port to listen for incoming HTTP requests (format: ip:port)"
)]
pub listen_addr: String,
#[clap(
long,
default_value = "0.0.0.0:9100",
help = "Address and port to expose Prometheus metrics and status endpoint (format: ip:port)"
)]
pub metrics_addr: String,
#[clap(long, help = "Disable Prometheus metrics server completely")]
pub disable_metrics: bool,
#[clap(
long,
default_value = "127.0.0.1:80",
help = "Backend server address to proxy legitimate requests to (format: ip:port)"
)]
pub backend_addr: String,
#[clap(
long,
default_value = "1000",
help = "Minimum delay in milliseconds between chunks sent to attacker"
)]
pub min_delay: u64,
#[clap(
long,
default_value = "15000",
help = "Maximum delay in milliseconds between chunks sent to attacker"
)]
pub max_delay: u64,
#[clap(
long,
default_value = "600",
help = "Maximum time in seconds to keep an attacker in the tarpit before disconnecting"
)]
pub max_tarpit_time: u64,
#[clap(
long,
default_value = "3",
help = "Number of hits to honeypot patterns before permanently blocking an IP"
)]
pub block_threshold: u32,
#[clap(
long,
help = "Base directory for all application data (overrides XDG directory structure)"
)]
pub base_dir: Option<PathBuf>,
#[clap(
long,
help = "Path to configuration file (JSON or TOML, overrides command line options)"
)]
pub config_file: Option<PathBuf>,
#[clap(
long,
default_value = "info",
help = "Log level: trace, debug, info, warn, error"
)]
pub log_level: String,
#[clap(
long,
default_value = "pretty",
help = "Log format: plain, pretty, json, pretty-json"
)]
pub log_format: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
pub enum LogFormat {
Plain,
#[default]
Pretty,
Json,
PrettyJson,
}
// Trap pattern structure. It can be either a plain string
// regex to catch more advanced patterns necessitated by
// more sophisticated crawlers.
#[derive(Clone, Debug, Deserialize, Serialize)]
#[serde(untagged)]
pub enum TrapPattern {
Plain(String),
Regex { pattern: String, regex: bool },
}
impl TrapPattern {
pub fn as_plain(value: &str) -> Self {
Self::Plain(value.to_string())
}
pub fn as_regex(value: &str) -> Self {
Self::Regex {
pattern: value.to_string(),
regex: true,
}
}
pub fn matches(&self, path: &str) -> bool {
match self {
Self::Plain(pattern) => path.contains(pattern),
Self::Regex {
pattern,
regex: true,
} => {
if let Ok(re) = Regex::new(pattern) {
re.is_match(path)
} else {
false
}
}
_ => false,
}
}
}
// Configuration structure
#[derive(Clone, Debug, Deserialize, Serialize)]
pub struct Config {
pub listen_addr: String,
pub metrics_addr: String,
pub disable_metrics: bool,
pub backend_addr: String,
pub min_delay: u64,
pub max_delay: u64,
pub max_tarpit_time: u64,
pub block_threshold: u32,
pub trap_patterns: Vec<TrapPattern>,
pub whitelist_networks: Vec<String>,
pub markov_corpora_dir: String,
pub lua_scripts_dir: String,
pub data_dir: String,
pub config_dir: String,
pub cache_dir: String,
pub log_format: LogFormat,
}
impl Default for Config {
fn default() -> Self {
Self {
listen_addr: "0.0.0.0:8888".to_string(),
metrics_addr: "0.0.0.0:9100".to_string(),
disable_metrics: false,
backend_addr: "127.0.0.1:80".to_string(),
min_delay: 1000,
max_delay: 15000,
max_tarpit_time: 600,
block_threshold: 3,
trap_patterns: vec![
// Basic attack patterns as plain strings
TrapPattern::as_plain("/vendor/phpunit"),
TrapPattern::as_plain("eval-stdin.php"),
TrapPattern::as_plain("/wp-admin"),
TrapPattern::as_plain("/wp-login.php"),
TrapPattern::as_plain("/xmlrpc.php"),
TrapPattern::as_plain("/phpMyAdmin"),
TrapPattern::as_plain("/solr/"),
TrapPattern::as_plain("/.env"),
TrapPattern::as_plain("/config"),
TrapPattern::as_plain("/actuator/"),
// More aggressive patterns for various PHP exploits.
// XXX: I dedicate this entire section to that one single crawler
// that has been scanning my entire network, hitting 403s left and right
// but not giving up, and coming back the next day at the same time to
// scan the same paths over and over. Kudos to you, random crawler.
TrapPattern::as_regex(r"/.*phpunit.*eval-stdin\.php"),
TrapPattern::as_regex(r"/index\.php\?s=/index/\\think\\app/invokefunction"),
TrapPattern::as_regex(r".*%ADd\+auto_prepend_file%3dphp://input.*"),
TrapPattern::as_regex(r".*%ADd\+allow_url_include%3d1.*"),
TrapPattern::as_regex(r".*/wp-content/plugins/.*\.php"),
TrapPattern::as_regex(r".*/wp-content/themes/.*\.php"),
TrapPattern::as_regex(r".*eval\(.*\).*"),
TrapPattern::as_regex(r".*/adminer\.php.*"),
TrapPattern::as_regex(r".*/admin\.php.*"),
TrapPattern::as_regex(r".*/administrator/.*"),
TrapPattern::as_regex(r".*/wp-json/.*"),
TrapPattern::as_regex(r".*/api/.*\.php.*"),
TrapPattern::as_regex(r".*/cgi-bin/.*"),
TrapPattern::as_regex(r".*/owa/.*"),
TrapPattern::as_regex(r".*/ecp/.*"),
TrapPattern::as_regex(r".*/webshell\.php.*"),
TrapPattern::as_regex(r".*/shell\.php.*"),
TrapPattern::as_regex(r".*/cmd\.php.*"),
TrapPattern::as_regex(r".*/struts.*"),
],
whitelist_networks: vec![
"192.168.0.0/16".to_string(),
"10.0.0.0/8".to_string(),
"172.16.0.0/12".to_string(),
"127.0.0.0/8".to_string(),
],
markov_corpora_dir: "./corpora".to_string(),
lua_scripts_dir: "./scripts".to_string(),
data_dir: "./data".to_string(),
config_dir: "./conf".to_string(),
cache_dir: "./cache".to_string(),
log_format: LogFormat::Pretty,
}
}
}
// Gets standard XDG directory paths for config, data and cache.
// XXX: This could be "simplified" by using the Dirs crate, but I can't
// really justify pulling a library for something I can handle in less
// than 30 lines. Unless cross-platform becomes necessary, the below
// implementation is good enough. For alternative platforms, we can simply
// enhance the current implementation as needed.
pub fn get_xdg_dirs() -> (PathBuf, PathBuf, PathBuf) {
let config_home = env::var_os("XDG_CONFIG_HOME")
.map(PathBuf::from)
.unwrap_or_else(|| {
let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from);
home.join(".config")
});
let data_home = env::var_os("XDG_DATA_HOME")
.map(PathBuf::from)
.unwrap_or_else(|| {
let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from);
home.join(".local").join("share")
});
let cache_home = env::var_os("XDG_CACHE_HOME")
.map(PathBuf::from)
.unwrap_or_else(|| {
let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from);
home.join(".cache")
});
let config_dir = config_home.join("eris");
let data_dir = data_home.join("eris");
let cache_dir = cache_home.join("eris");
(config_dir, data_dir, cache_dir)
}
impl Config {
// Create configuration from command-line args. We'll be falling back to this
// when the configuration is invalid, so it must be validated more strictly.
pub fn from_args(args: &Args) -> Self {
let (config_dir, data_dir, cache_dir) = if let Some(base_dir) = &args.base_dir {
let base_str = base_dir.to_string_lossy().to_string();
(
format!("{base_str}/conf"),
format!("{base_str}/data"),
format!("{base_str}/cache"),
)
} else {
let (c, d, cache) = get_xdg_dirs();
(
c.to_string_lossy().to_string(),
d.to_string_lossy().to_string(),
cache.to_string_lossy().to_string(),
)
};
Self {
listen_addr: args.listen_addr.clone(),
metrics_addr: args.metrics_addr.clone(),
disable_metrics: args.disable_metrics,
backend_addr: args.backend_addr.clone(),
min_delay: args.min_delay,
max_delay: args.max_delay,
max_tarpit_time: args.max_tarpit_time,
block_threshold: args.block_threshold,
markov_corpora_dir: format!("{data_dir}/corpora"),
lua_scripts_dir: format!("{data_dir}/scripts"),
data_dir,
config_dir,
cache_dir,
..Default::default()
}
}
// Load configuration from a file (JSON or TOML)
pub fn load_from_file(path: &Path) -> std::io::Result<Self> {
let content = fs::read_to_string(path)?;
let extension = path
.extension()
.map(|ext| ext.to_string_lossy().to_lowercase())
.unwrap_or_default();
let config = match extension.as_str() {
"toml" => toml::from_str(&content).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Failed to parse TOML: {e}"),
)
})?,
_ => {
// Default to JSON for any other extension
serde_json::from_str(&content).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Failed to parse JSON: {e}"),
)
})?
}
};
Ok(config)
}
// Save configuration to a file (JSON or TOML)
pub fn save_to_file(&self, path: &Path) -> std::io::Result<()> {
if let Some(parent) = path.parent() {
fs::create_dir_all(parent)?;
}
let extension = path
.extension()
.map(|ext| ext.to_string_lossy().to_lowercase())
.unwrap_or_default();
let content = match extension.as_str() {
"toml" => toml::to_string_pretty(self).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Failed to serialize to TOML: {e}"),
)
})?,
_ => {
// Default to JSON for any other extension
serde_json::to_string_pretty(self).map_err(|e| {
std::io::Error::new(
std::io::ErrorKind::InvalidData,
format!("Failed to serialize to JSON: {e}"),
)
})?
}
};
fs::write(path, content)?;
Ok(())
}
// Create required directories if they don't exist
pub fn ensure_dirs_exist(&self) -> std::io::Result<()> {
let dirs = [
&self.markov_corpora_dir,
&self.lua_scripts_dir,
&self.data_dir,
&self.config_dir,
&self.cache_dir,
];
for dir in dirs {
fs::create_dir_all(dir)?;
log::debug!("Created directory: {dir}");
}
Ok(())
}
}
// Decide if a request should be tarpitted based on path and IP
pub fn should_tarpit(path: &str, ip: &IpAddr, config: &Config) -> bool {
// Check whitelist IPs first to avoid unnecessary pattern matching
for network_str in &config.whitelist_networks {
if let Ok(network) = network_str.parse::<IpNetwork>() {
if network.contains(*ip) {
return false;
}
}
}
// Use pattern matching based on the trap pattern type. It can be
// a plain string or regex.
for pattern in &config.trap_patterns {
if pattern.matches(path) {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::*;
use std::net::{IpAddr, Ipv4Addr};
#[test]
fn test_config_from_args() {
let args = Args {
listen_addr: "127.0.0.1:8080".to_string(),
metrics_addr: "127.0.0.1:9000".to_string(),
disable_metrics: true,
backend_addr: "127.0.0.1:8081".to_string(),
min_delay: 500,
max_delay: 10000,
max_tarpit_time: 300,
block_threshold: 5,
base_dir: Some(PathBuf::from("/tmp/eris")),
config_file: None,
log_level: "debug".to_string(),
log_format: "pretty".to_string(),
};
let config = Config::from_args(&args);
assert_eq!(config.listen_addr, "127.0.0.1:8080");
assert_eq!(config.metrics_addr, "127.0.0.1:9000");
assert!(config.disable_metrics);
assert_eq!(config.backend_addr, "127.0.0.1:8081");
assert_eq!(config.min_delay, 500);
assert_eq!(config.max_delay, 10000);
assert_eq!(config.max_tarpit_time, 300);
assert_eq!(config.block_threshold, 5);
assert_eq!(config.markov_corpora_dir, "/tmp/eris/data/corpora");
assert_eq!(config.lua_scripts_dir, "/tmp/eris/data/scripts");
assert_eq!(config.data_dir, "/tmp/eris/data");
assert_eq!(config.config_dir, "/tmp/eris/conf");
assert_eq!(config.cache_dir, "/tmp/eris/cache");
}
#[test]
fn test_trap_pattern_matching() {
// Test plain string pattern
let plain = TrapPattern::as_plain("phpunit");
assert!(plain.matches("path/to/phpunit/test"));
assert!(!plain.matches("path/to/something/else"));
// Test regex pattern
let regex = TrapPattern::as_regex(r".*eval-stdin\.php.*");
assert!(regex.matches("/vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php"));
assert!(regex.matches("/tests/eval-stdin.php?param"));
assert!(!regex.matches("/normal/path"));
// Test invalid regex pattern (should return false)
let invalid = TrapPattern::Regex {
pattern: "(invalid[regex".to_string(),
regex: true,
};
assert!(!invalid.matches("anything"));
}
#[tokio::test]
async fn test_should_tarpit() {
let config = Config::default();
// Test trap patterns
assert!(should_tarpit(
"/vendor/phpunit/whatever",
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
&config
));
assert!(should_tarpit(
"/wp-admin/login.php",
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
&config
));
assert!(should_tarpit(
"/.env",
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
&config
));
// Test whitelist networks
assert!(!should_tarpit(
"/wp-admin/login.php",
&IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
&config
));
assert!(!should_tarpit(
"/vendor/phpunit/whatever",
&IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1)),
&config
));
// Test legitimate paths
assert!(!should_tarpit(
"/index.html",
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
&config
));
assert!(!should_tarpit(
"/images/logo.png",
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
&config
));
// Test regex patterns
assert!(should_tarpit(
"/index.php?s=/index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=md5&vars[1][]=Hello",
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
&config
));
assert!(should_tarpit(
"/hello.world?%ADd+allow_url_include%3d1+%ADd+auto_prepend_file%3dphp://input",
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
&config
));
}
#[test]
fn test_config_file_formats() {
// Create temporary JSON config file
let temp_dir = std::env::temp_dir();
let json_path = temp_dir.join("temp_config.json");
let toml_path = temp_dir.join("temp_config.toml");
let config = Config::default();
// Test JSON serialization and deserialization
config.save_to_file(&json_path).unwrap();
let loaded_json = Config::load_from_file(&json_path).unwrap();
assert_eq!(loaded_json.listen_addr, config.listen_addr);
assert_eq!(loaded_json.min_delay, config.min_delay);
// Test TOML serialization and deserialization
config.save_to_file(&toml_path).unwrap();
let loaded_toml = Config::load_from_file(&toml_path).unwrap();
assert_eq!(loaded_toml.listen_addr, config.listen_addr);
assert_eq!(loaded_toml.min_delay, config.min_delay);
// Clean up
let _ = std::fs::remove_file(json_path);
let _ = std::fs::remove_file(toml_path);
}
}