use clap::Parser; use ipnetwork::IpNetwork; use regex::Regex; use serde::{Deserialize, Serialize}; use std::env; use std::fs; use std::net::IpAddr; use std::path::{Path, PathBuf}; // Command-line arguments using clap #[derive(Parser, Debug, Clone)] #[clap( author, version, about = "Markov chain based HTTP tarpit/honeypot that delays and tracks potential attackers" )] pub struct Args { #[clap( long, default_value = "0.0.0.0:8888", help = "Address and port to listen for incoming HTTP requests (format: ip:port)" )] pub listen_addr: String, #[clap( long, default_value = "0.0.0.0:9100", help = "Address and port to expose Prometheus metrics and status endpoint (format: ip:port)" )] pub metrics_addr: String, #[clap(long, help = "Disable Prometheus metrics server completely")] pub disable_metrics: bool, #[clap( long, default_value = "127.0.0.1:80", help = "Backend server address to proxy legitimate requests to (format: ip:port)" )] pub backend_addr: String, #[clap( long, default_value = "1000", help = "Minimum delay in milliseconds between chunks sent to attacker" )] pub min_delay: u64, #[clap( long, default_value = "15000", help = "Maximum delay in milliseconds between chunks sent to attacker" )] pub max_delay: u64, #[clap( long, default_value = "600", help = "Maximum time in seconds to keep an attacker in the tarpit before disconnecting" )] pub max_tarpit_time: u64, #[clap( long, default_value = "3", help = "Number of hits to honeypot patterns before permanently blocking an IP" )] pub block_threshold: u32, #[clap( long, help = "Base directory for all application data (overrides XDG directory structure)" )] pub base_dir: Option, #[clap( long, help = "Path to configuration file (JSON or TOML, overrides command line options)" )] pub config_file: Option, #[clap( long, default_value = "info", help = "Log level: trace, debug, info, warn, error" )] pub log_level: String, #[clap( long, default_value = "pretty", help = "Log format: plain, pretty, json, pretty-json" )] pub log_format: String, } #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)] pub enum LogFormat { Plain, #[default] Pretty, Json, PrettyJson, } // Trap pattern structure. It can be either a plain string // regex to catch more advanced patterns necessitated by // more sophisticated crawlers. #[derive(Clone, Debug, Deserialize, Serialize)] #[serde(untagged)] pub enum TrapPattern { Plain(String), Regex { pattern: String, regex: bool }, } impl TrapPattern { pub fn as_plain(value: &str) -> Self { Self::Plain(value.to_string()) } pub fn as_regex(value: &str) -> Self { Self::Regex { pattern: value.to_string(), regex: true, } } pub fn matches(&self, path: &str) -> bool { match self { Self::Plain(pattern) => path.contains(pattern), Self::Regex { pattern, regex: true, } => { if let Ok(re) = Regex::new(pattern) { re.is_match(path) } else { false } } _ => false, } } } // Configuration structure #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { pub listen_addr: String, pub metrics_addr: String, pub disable_metrics: bool, pub backend_addr: String, pub min_delay: u64, pub max_delay: u64, pub max_tarpit_time: u64, pub block_threshold: u32, pub trap_patterns: Vec, pub whitelist_networks: Vec, pub markov_corpora_dir: String, pub lua_scripts_dir: String, pub data_dir: String, pub config_dir: String, pub cache_dir: String, pub log_format: LogFormat, } impl Default for Config { fn default() -> Self { Self { listen_addr: "0.0.0.0:8888".to_string(), metrics_addr: "0.0.0.0:9100".to_string(), disable_metrics: false, backend_addr: "127.0.0.1:80".to_string(), min_delay: 1000, max_delay: 15000, max_tarpit_time: 600, block_threshold: 3, trap_patterns: vec![ // Basic attack patterns as plain strings TrapPattern::as_plain("/vendor/phpunit"), TrapPattern::as_plain("eval-stdin.php"), TrapPattern::as_plain("/wp-admin"), TrapPattern::as_plain("/wp-login.php"), TrapPattern::as_plain("/xmlrpc.php"), TrapPattern::as_plain("/phpMyAdmin"), TrapPattern::as_plain("/solr/"), TrapPattern::as_plain("/.env"), TrapPattern::as_plain("/config"), TrapPattern::as_plain("/actuator/"), // More aggressive patterns for various PHP exploits. // XXX: I dedicate this entire section to that one single crawler // that has been scanning my entire network, hitting 403s left and right // but not giving up, and coming back the next day at the same time to // scan the same paths over and over. Kudos to you, random crawler. TrapPattern::as_regex(r"/.*phpunit.*eval-stdin\.php"), TrapPattern::as_regex(r"/index\.php\?s=/index/\\think\\app/invokefunction"), TrapPattern::as_regex(r".*%ADd\+auto_prepend_file%3dphp://input.*"), TrapPattern::as_regex(r".*%ADd\+allow_url_include%3d1.*"), TrapPattern::as_regex(r".*/wp-content/plugins/.*\.php"), TrapPattern::as_regex(r".*/wp-content/themes/.*\.php"), TrapPattern::as_regex(r".*eval\(.*\).*"), TrapPattern::as_regex(r".*/adminer\.php.*"), TrapPattern::as_regex(r".*/admin\.php.*"), TrapPattern::as_regex(r".*/administrator/.*"), TrapPattern::as_regex(r".*/wp-json/.*"), TrapPattern::as_regex(r".*/api/.*\.php.*"), TrapPattern::as_regex(r".*/cgi-bin/.*"), TrapPattern::as_regex(r".*/owa/.*"), TrapPattern::as_regex(r".*/ecp/.*"), TrapPattern::as_regex(r".*/webshell\.php.*"), TrapPattern::as_regex(r".*/shell\.php.*"), TrapPattern::as_regex(r".*/cmd\.php.*"), TrapPattern::as_regex(r".*/struts.*"), ], whitelist_networks: vec![ "192.168.0.0/16".to_string(), "10.0.0.0/8".to_string(), "172.16.0.0/12".to_string(), "127.0.0.0/8".to_string(), ], markov_corpora_dir: "./corpora".to_string(), lua_scripts_dir: "./scripts".to_string(), data_dir: "./data".to_string(), config_dir: "./conf".to_string(), cache_dir: "./cache".to_string(), log_format: LogFormat::Pretty, } } } // Gets standard XDG directory paths for config, data and cache. // XXX: This could be "simplified" by using the Dirs crate, but I can't // really justify pulling a library for something I can handle in less // than 30 lines. Unless cross-platform becomes necessary, the below // implementation is good enough. For alternative platforms, we can simply // enhance the current implementation as needed. pub fn get_xdg_dirs() -> (PathBuf, PathBuf, PathBuf) { let config_home = env::var_os("XDG_CONFIG_HOME") .map(PathBuf::from) .unwrap_or_else(|| { let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from); home.join(".config") }); let data_home = env::var_os("XDG_DATA_HOME") .map(PathBuf::from) .unwrap_or_else(|| { let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from); home.join(".local").join("share") }); let cache_home = env::var_os("XDG_CACHE_HOME") .map(PathBuf::from) .unwrap_or_else(|| { let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from); home.join(".cache") }); let config_dir = config_home.join("eris"); let data_dir = data_home.join("eris"); let cache_dir = cache_home.join("eris"); (config_dir, data_dir, cache_dir) } impl Config { // Create configuration from command-line args. We'll be falling back to this // when the configuration is invalid, so it must be validated more strictly. pub fn from_args(args: &Args) -> Self { let (config_dir, data_dir, cache_dir) = if let Some(base_dir) = &args.base_dir { let base_str = base_dir.to_string_lossy().to_string(); ( format!("{base_str}/conf"), format!("{base_str}/data"), format!("{base_str}/cache"), ) } else { let (c, d, cache) = get_xdg_dirs(); ( c.to_string_lossy().to_string(), d.to_string_lossy().to_string(), cache.to_string_lossy().to_string(), ) }; Self { listen_addr: args.listen_addr.clone(), metrics_addr: args.metrics_addr.clone(), disable_metrics: args.disable_metrics, backend_addr: args.backend_addr.clone(), min_delay: args.min_delay, max_delay: args.max_delay, max_tarpit_time: args.max_tarpit_time, block_threshold: args.block_threshold, markov_corpora_dir: format!("{data_dir}/corpora"), lua_scripts_dir: format!("{data_dir}/scripts"), data_dir, config_dir, cache_dir, ..Default::default() } } // Load configuration from a file (JSON or TOML) pub fn load_from_file(path: &Path) -> std::io::Result { let content = fs::read_to_string(path)?; let extension = path .extension() .map(|ext| ext.to_string_lossy().to_lowercase()) .unwrap_or_default(); let config = match extension.as_str() { "toml" => toml::from_str(&content).map_err(|e| { std::io::Error::new( std::io::ErrorKind::InvalidData, format!("Failed to parse TOML: {e}"), ) })?, _ => { // Default to JSON for any other extension serde_json::from_str(&content).map_err(|e| { std::io::Error::new( std::io::ErrorKind::InvalidData, format!("Failed to parse JSON: {e}"), ) })? } }; Ok(config) } // Save configuration to a file (JSON or TOML) pub fn save_to_file(&self, path: &Path) -> std::io::Result<()> { if let Some(parent) = path.parent() { fs::create_dir_all(parent)?; } let extension = path .extension() .map(|ext| ext.to_string_lossy().to_lowercase()) .unwrap_or_default(); let content = match extension.as_str() { "toml" => toml::to_string_pretty(self).map_err(|e| { std::io::Error::new( std::io::ErrorKind::InvalidData, format!("Failed to serialize to TOML: {e}"), ) })?, _ => { // Default to JSON for any other extension serde_json::to_string_pretty(self).map_err(|e| { std::io::Error::new( std::io::ErrorKind::InvalidData, format!("Failed to serialize to JSON: {e}"), ) })? } }; fs::write(path, content)?; Ok(()) } // Create required directories if they don't exist pub fn ensure_dirs_exist(&self) -> std::io::Result<()> { let dirs = [ &self.markov_corpora_dir, &self.lua_scripts_dir, &self.data_dir, &self.config_dir, &self.cache_dir, ]; for dir in dirs { fs::create_dir_all(dir)?; log::debug!("Created directory: {dir}"); } Ok(()) } } // Decide if a request should be tarpitted based on path and IP pub fn should_tarpit(path: &str, ip: &IpAddr, config: &Config) -> bool { // Check whitelist IPs first to avoid unnecessary pattern matching for network_str in &config.whitelist_networks { if let Ok(network) = network_str.parse::() { if network.contains(*ip) { return false; } } } // Use pattern matching based on the trap pattern type. It can be // a plain string or regex. for pattern in &config.trap_patterns { if pattern.matches(path) { return true; } } false } #[cfg(test)] mod tests { use super::*; use std::net::{IpAddr, Ipv4Addr}; #[test] fn test_config_from_args() { let args = Args { listen_addr: "127.0.0.1:8080".to_string(), metrics_addr: "127.0.0.1:9000".to_string(), disable_metrics: true, backend_addr: "127.0.0.1:8081".to_string(), min_delay: 500, max_delay: 10000, max_tarpit_time: 300, block_threshold: 5, base_dir: Some(PathBuf::from("/tmp/eris")), config_file: None, log_level: "debug".to_string(), log_format: "pretty".to_string(), }; let config = Config::from_args(&args); assert_eq!(config.listen_addr, "127.0.0.1:8080"); assert_eq!(config.metrics_addr, "127.0.0.1:9000"); assert!(config.disable_metrics); assert_eq!(config.backend_addr, "127.0.0.1:8081"); assert_eq!(config.min_delay, 500); assert_eq!(config.max_delay, 10000); assert_eq!(config.max_tarpit_time, 300); assert_eq!(config.block_threshold, 5); assert_eq!(config.markov_corpora_dir, "/tmp/eris/data/corpora"); assert_eq!(config.lua_scripts_dir, "/tmp/eris/data/scripts"); assert_eq!(config.data_dir, "/tmp/eris/data"); assert_eq!(config.config_dir, "/tmp/eris/conf"); assert_eq!(config.cache_dir, "/tmp/eris/cache"); } #[test] fn test_trap_pattern_matching() { // Test plain string pattern let plain = TrapPattern::as_plain("phpunit"); assert!(plain.matches("path/to/phpunit/test")); assert!(!plain.matches("path/to/something/else")); // Test regex pattern let regex = TrapPattern::as_regex(r".*eval-stdin\.php.*"); assert!(regex.matches("/vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php")); assert!(regex.matches("/tests/eval-stdin.php?param")); assert!(!regex.matches("/normal/path")); // Test invalid regex pattern (should return false) let invalid = TrapPattern::Regex { pattern: "(invalid[regex".to_string(), regex: true, }; assert!(!invalid.matches("anything")); } #[tokio::test] async fn test_should_tarpit() { let config = Config::default(); // Test trap patterns assert!(should_tarpit( "/vendor/phpunit/whatever", &IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), &config )); assert!(should_tarpit( "/wp-admin/login.php", &IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), &config )); assert!(should_tarpit( "/.env", &IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), &config )); // Test whitelist networks assert!(!should_tarpit( "/wp-admin/login.php", &IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)), &config )); assert!(!should_tarpit( "/vendor/phpunit/whatever", &IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1)), &config )); // Test legitimate paths assert!(!should_tarpit( "/index.html", &IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), &config )); assert!(!should_tarpit( "/images/logo.png", &IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), &config )); // Test regex patterns assert!(should_tarpit( "/index.php?s=/index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=md5&vars[1][]=Hello", &IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), &config )); assert!(should_tarpit( "/hello.world?%ADd+allow_url_include%3d1+%ADd+auto_prepend_file%3dphp://input", &IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)), &config )); } #[test] fn test_config_file_formats() { // Create temporary JSON config file let temp_dir = std::env::temp_dir(); let json_path = temp_dir.join("temp_config.json"); let toml_path = temp_dir.join("temp_config.toml"); let config = Config::default(); // Test JSON serialization and deserialization config.save_to_file(&json_path).unwrap(); let loaded_json = Config::load_from_file(&json_path).unwrap(); assert_eq!(loaded_json.listen_addr, config.listen_addr); assert_eq!(loaded_json.min_delay, config.min_delay); // Test TOML serialization and deserialization config.save_to_file(&toml_path).unwrap(); let loaded_toml = Config::load_from_file(&toml_path).unwrap(); assert_eq!(loaded_toml.listen_addr, config.listen_addr); assert_eq!(loaded_toml.min_delay, config.min_delay); // Clean up let _ = std::fs::remove_file(json_path); let _ = std::fs::remove_file(toml_path); } }