551 lines
18 KiB
Rust
551 lines
18 KiB
Rust
use clap::Parser;
|
|
use ipnetwork::IpNetwork;
|
|
use regex::Regex;
|
|
use serde::{Deserialize, Serialize};
|
|
use std::env;
|
|
use std::fs;
|
|
use std::net::IpAddr;
|
|
use std::path::{Path, PathBuf};
|
|
|
|
// Command-line arguments using clap
|
|
#[derive(Parser, Debug, Clone)]
|
|
#[clap(
|
|
author,
|
|
version,
|
|
about = "Markov chain based HTTP tarpit/honeypot that delays and tracks potential attackers"
|
|
)]
|
|
pub struct Args {
|
|
#[clap(
|
|
long,
|
|
default_value = "0.0.0.0:8888",
|
|
help = "Address and port to listen for incoming HTTP requests (format: ip:port)"
|
|
)]
|
|
pub listen_addr: String,
|
|
|
|
#[clap(
|
|
long,
|
|
default_value = "0.0.0.0:9100",
|
|
help = "Address and port to expose Prometheus metrics and status endpoint (format: ip:port)"
|
|
)]
|
|
pub metrics_addr: String,
|
|
|
|
#[clap(long, help = "Disable Prometheus metrics server completely")]
|
|
pub disable_metrics: bool,
|
|
|
|
#[clap(
|
|
long,
|
|
default_value = "127.0.0.1:80",
|
|
help = "Backend server address to proxy legitimate requests to (format: ip:port)"
|
|
)]
|
|
pub backend_addr: String,
|
|
|
|
#[clap(
|
|
long,
|
|
default_value = "1000",
|
|
help = "Minimum delay in milliseconds between chunks sent to attacker"
|
|
)]
|
|
pub min_delay: u64,
|
|
|
|
#[clap(
|
|
long,
|
|
default_value = "15000",
|
|
help = "Maximum delay in milliseconds between chunks sent to attacker"
|
|
)]
|
|
pub max_delay: u64,
|
|
|
|
#[clap(
|
|
long,
|
|
default_value = "600",
|
|
help = "Maximum time in seconds to keep an attacker in the tarpit before disconnecting"
|
|
)]
|
|
pub max_tarpit_time: u64,
|
|
|
|
#[clap(
|
|
long,
|
|
default_value = "3",
|
|
help = "Number of hits to honeypot patterns before permanently blocking an IP"
|
|
)]
|
|
pub block_threshold: u32,
|
|
|
|
#[clap(
|
|
long,
|
|
help = "Base directory for all application data (overrides XDG directory structure)"
|
|
)]
|
|
pub base_dir: Option<PathBuf>,
|
|
|
|
#[clap(
|
|
long,
|
|
help = "Path to configuration file (JSON or TOML, overrides command line options)"
|
|
)]
|
|
pub config_file: Option<PathBuf>,
|
|
|
|
#[clap(
|
|
long,
|
|
default_value = "info",
|
|
help = "Log level: trace, debug, info, warn, error"
|
|
)]
|
|
pub log_level: String,
|
|
|
|
#[clap(
|
|
long,
|
|
default_value = "pretty",
|
|
help = "Log format: plain, pretty, json, pretty-json"
|
|
)]
|
|
pub log_format: String,
|
|
}
|
|
|
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Default)]
|
|
pub enum LogFormat {
|
|
Plain,
|
|
#[default]
|
|
Pretty,
|
|
Json,
|
|
PrettyJson,
|
|
}
|
|
|
|
// Trap pattern structure. It can be either a plain string
|
|
// regex to catch more advanced patterns necessitated by
|
|
// more sophisticated crawlers.
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
|
#[serde(untagged)]
|
|
pub enum TrapPattern {
|
|
Plain(String),
|
|
Regex { pattern: String, regex: bool },
|
|
}
|
|
|
|
impl TrapPattern {
|
|
pub fn as_plain(value: &str) -> Self {
|
|
Self::Plain(value.to_string())
|
|
}
|
|
|
|
pub fn as_regex(value: &str) -> Self {
|
|
Self::Regex {
|
|
pattern: value.to_string(),
|
|
regex: true,
|
|
}
|
|
}
|
|
|
|
pub fn matches(&self, path: &str) -> bool {
|
|
match self {
|
|
Self::Plain(pattern) => path.contains(pattern),
|
|
Self::Regex {
|
|
pattern,
|
|
regex: true,
|
|
} => {
|
|
if let Ok(re) = Regex::new(pattern) {
|
|
re.is_match(path)
|
|
} else {
|
|
false
|
|
}
|
|
}
|
|
_ => false,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Configuration structure
|
|
#[derive(Clone, Debug, Deserialize, Serialize)]
|
|
pub struct Config {
|
|
pub listen_addr: String,
|
|
pub metrics_addr: String,
|
|
pub disable_metrics: bool,
|
|
pub backend_addr: String,
|
|
pub min_delay: u64,
|
|
pub max_delay: u64,
|
|
pub max_tarpit_time: u64,
|
|
pub block_threshold: u32,
|
|
pub trap_patterns: Vec<TrapPattern>,
|
|
pub whitelist_networks: Vec<String>,
|
|
pub markov_corpora_dir: String,
|
|
pub lua_scripts_dir: String,
|
|
pub data_dir: String,
|
|
pub config_dir: String,
|
|
pub cache_dir: String,
|
|
pub log_format: LogFormat,
|
|
}
|
|
|
|
impl Default for Config {
|
|
fn default() -> Self {
|
|
Self {
|
|
listen_addr: "0.0.0.0:8888".to_string(),
|
|
metrics_addr: "0.0.0.0:9100".to_string(),
|
|
disable_metrics: false,
|
|
backend_addr: "127.0.0.1:80".to_string(),
|
|
min_delay: 1000,
|
|
max_delay: 15000,
|
|
max_tarpit_time: 600,
|
|
block_threshold: 3,
|
|
trap_patterns: vec![
|
|
// Basic attack patterns as plain strings
|
|
TrapPattern::as_plain("/vendor/phpunit"),
|
|
TrapPattern::as_plain("eval-stdin.php"),
|
|
TrapPattern::as_plain("/wp-admin"),
|
|
TrapPattern::as_plain("/wp-login.php"),
|
|
TrapPattern::as_plain("/xmlrpc.php"),
|
|
TrapPattern::as_plain("/phpMyAdmin"),
|
|
TrapPattern::as_plain("/solr/"),
|
|
TrapPattern::as_plain("/.env"),
|
|
TrapPattern::as_plain("/config"),
|
|
TrapPattern::as_plain("/actuator/"),
|
|
// More aggressive patterns for various PHP exploits.
|
|
// XXX: I dedicate this entire section to that one single crawler
|
|
// that has been scanning my entire network, hitting 403s left and right
|
|
// but not giving up, and coming back the next day at the same time to
|
|
// scan the same paths over and over. Kudos to you, random crawler.
|
|
TrapPattern::as_regex(r"/.*phpunit.*eval-stdin\.php"),
|
|
TrapPattern::as_regex(r"/index\.php\?s=/index/\\think\\app/invokefunction"),
|
|
TrapPattern::as_regex(r".*%ADd\+auto_prepend_file%3dphp://input.*"),
|
|
TrapPattern::as_regex(r".*%ADd\+allow_url_include%3d1.*"),
|
|
TrapPattern::as_regex(r".*/wp-content/plugins/.*\.php"),
|
|
TrapPattern::as_regex(r".*/wp-content/themes/.*\.php"),
|
|
TrapPattern::as_regex(r".*eval\(.*\).*"),
|
|
TrapPattern::as_regex(r".*/adminer\.php.*"),
|
|
TrapPattern::as_regex(r".*/admin\.php.*"),
|
|
TrapPattern::as_regex(r".*/administrator/.*"),
|
|
TrapPattern::as_regex(r".*/wp-json/.*"),
|
|
TrapPattern::as_regex(r".*/api/.*\.php.*"),
|
|
TrapPattern::as_regex(r".*/cgi-bin/.*"),
|
|
TrapPattern::as_regex(r".*/owa/.*"),
|
|
TrapPattern::as_regex(r".*/ecp/.*"),
|
|
TrapPattern::as_regex(r".*/webshell\.php.*"),
|
|
TrapPattern::as_regex(r".*/shell\.php.*"),
|
|
TrapPattern::as_regex(r".*/cmd\.php.*"),
|
|
TrapPattern::as_regex(r".*/struts.*"),
|
|
],
|
|
whitelist_networks: vec![
|
|
"192.168.0.0/16".to_string(),
|
|
"10.0.0.0/8".to_string(),
|
|
"172.16.0.0/12".to_string(),
|
|
"127.0.0.0/8".to_string(),
|
|
],
|
|
markov_corpora_dir: "./corpora".to_string(),
|
|
lua_scripts_dir: "./scripts".to_string(),
|
|
data_dir: "./data".to_string(),
|
|
config_dir: "./conf".to_string(),
|
|
cache_dir: "./cache".to_string(),
|
|
log_format: LogFormat::Pretty,
|
|
}
|
|
}
|
|
}
|
|
|
|
// Gets standard XDG directory paths for config, data and cache.
|
|
// XXX: This could be "simplified" by using the Dirs crate, but I can't
|
|
// really justify pulling a library for something I can handle in less
|
|
// than 30 lines. Unless cross-platform becomes necessary, the below
|
|
// implementation is good enough. For alternative platforms, we can simply
|
|
// enhance the current implementation as needed.
|
|
pub fn get_xdg_dirs() -> (PathBuf, PathBuf, PathBuf) {
|
|
let config_home = env::var_os("XDG_CONFIG_HOME")
|
|
.map(PathBuf::from)
|
|
.unwrap_or_else(|| {
|
|
let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from);
|
|
home.join(".config")
|
|
});
|
|
|
|
let data_home = env::var_os("XDG_DATA_HOME")
|
|
.map(PathBuf::from)
|
|
.unwrap_or_else(|| {
|
|
let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from);
|
|
home.join(".local").join("share")
|
|
});
|
|
|
|
let cache_home = env::var_os("XDG_CACHE_HOME")
|
|
.map(PathBuf::from)
|
|
.unwrap_or_else(|| {
|
|
let home = env::var_os("HOME").map_or_else(|| PathBuf::from("."), PathBuf::from);
|
|
home.join(".cache")
|
|
});
|
|
|
|
let config_dir = config_home.join("eris");
|
|
let data_dir = data_home.join("eris");
|
|
let cache_dir = cache_home.join("eris");
|
|
|
|
(config_dir, data_dir, cache_dir)
|
|
}
|
|
|
|
impl Config {
|
|
// Create configuration from command-line args. We'll be falling back to this
|
|
// when the configuration is invalid, so it must be validated more strictly.
|
|
pub fn from_args(args: &Args) -> Self {
|
|
let (config_dir, data_dir, cache_dir) = if let Some(base_dir) = &args.base_dir {
|
|
let base_str = base_dir.to_string_lossy().to_string();
|
|
(
|
|
format!("{base_str}/conf"),
|
|
format!("{base_str}/data"),
|
|
format!("{base_str}/cache"),
|
|
)
|
|
} else {
|
|
let (c, d, cache) = get_xdg_dirs();
|
|
(
|
|
c.to_string_lossy().to_string(),
|
|
d.to_string_lossy().to_string(),
|
|
cache.to_string_lossy().to_string(),
|
|
)
|
|
};
|
|
|
|
Self {
|
|
listen_addr: args.listen_addr.clone(),
|
|
metrics_addr: args.metrics_addr.clone(),
|
|
disable_metrics: args.disable_metrics,
|
|
backend_addr: args.backend_addr.clone(),
|
|
min_delay: args.min_delay,
|
|
max_delay: args.max_delay,
|
|
max_tarpit_time: args.max_tarpit_time,
|
|
block_threshold: args.block_threshold,
|
|
markov_corpora_dir: format!("{data_dir}/corpora"),
|
|
lua_scripts_dir: format!("{data_dir}/scripts"),
|
|
data_dir,
|
|
config_dir,
|
|
cache_dir,
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
// Load configuration from a file (JSON or TOML)
|
|
pub fn load_from_file(path: &Path) -> std::io::Result<Self> {
|
|
let content = fs::read_to_string(path)?;
|
|
|
|
let extension = path
|
|
.extension()
|
|
.map(|ext| ext.to_string_lossy().to_lowercase())
|
|
.unwrap_or_default();
|
|
|
|
let config = match extension.as_str() {
|
|
"toml" => toml::from_str(&content).map_err(|e| {
|
|
std::io::Error::new(
|
|
std::io::ErrorKind::InvalidData,
|
|
format!("Failed to parse TOML: {e}"),
|
|
)
|
|
})?,
|
|
_ => {
|
|
// Default to JSON for any other extension
|
|
serde_json::from_str(&content).map_err(|e| {
|
|
std::io::Error::new(
|
|
std::io::ErrorKind::InvalidData,
|
|
format!("Failed to parse JSON: {e}"),
|
|
)
|
|
})?
|
|
}
|
|
};
|
|
|
|
Ok(config)
|
|
}
|
|
|
|
// Save configuration to a file (JSON or TOML)
|
|
pub fn save_to_file(&self, path: &Path) -> std::io::Result<()> {
|
|
if let Some(parent) = path.parent() {
|
|
fs::create_dir_all(parent)?;
|
|
}
|
|
|
|
let extension = path
|
|
.extension()
|
|
.map(|ext| ext.to_string_lossy().to_lowercase())
|
|
.unwrap_or_default();
|
|
|
|
let content = match extension.as_str() {
|
|
"toml" => toml::to_string_pretty(self).map_err(|e| {
|
|
std::io::Error::new(
|
|
std::io::ErrorKind::InvalidData,
|
|
format!("Failed to serialize to TOML: {e}"),
|
|
)
|
|
})?,
|
|
_ => {
|
|
// Default to JSON for any other extension
|
|
serde_json::to_string_pretty(self).map_err(|e| {
|
|
std::io::Error::new(
|
|
std::io::ErrorKind::InvalidData,
|
|
format!("Failed to serialize to JSON: {e}"),
|
|
)
|
|
})?
|
|
}
|
|
};
|
|
|
|
fs::write(path, content)?;
|
|
Ok(())
|
|
}
|
|
|
|
// Create required directories if they don't exist
|
|
pub fn ensure_dirs_exist(&self) -> std::io::Result<()> {
|
|
let dirs = [
|
|
&self.markov_corpora_dir,
|
|
&self.lua_scripts_dir,
|
|
&self.data_dir,
|
|
&self.config_dir,
|
|
&self.cache_dir,
|
|
];
|
|
|
|
for dir in dirs {
|
|
fs::create_dir_all(dir)?;
|
|
log::debug!("Created directory: {dir}");
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
}
|
|
|
|
// Decide if a request should be tarpitted based on path and IP
|
|
pub fn should_tarpit(path: &str, ip: &IpAddr, config: &Config) -> bool {
|
|
// Check whitelist IPs first to avoid unnecessary pattern matching
|
|
for network_str in &config.whitelist_networks {
|
|
if let Ok(network) = network_str.parse::<IpNetwork>() {
|
|
if network.contains(*ip) {
|
|
return false;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Use pattern matching based on the trap pattern type. It can be
|
|
// a plain string or regex.
|
|
for pattern in &config.trap_patterns {
|
|
if pattern.matches(path) {
|
|
return true;
|
|
}
|
|
}
|
|
|
|
false
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::net::{IpAddr, Ipv4Addr};
|
|
|
|
#[test]
|
|
fn test_config_from_args() {
|
|
let args = Args {
|
|
listen_addr: "127.0.0.1:8080".to_string(),
|
|
metrics_addr: "127.0.0.1:9000".to_string(),
|
|
disable_metrics: true,
|
|
backend_addr: "127.0.0.1:8081".to_string(),
|
|
min_delay: 500,
|
|
max_delay: 10000,
|
|
max_tarpit_time: 300,
|
|
block_threshold: 5,
|
|
base_dir: Some(PathBuf::from("/tmp/eris")),
|
|
config_file: None,
|
|
log_level: "debug".to_string(),
|
|
log_format: "pretty".to_string(),
|
|
};
|
|
|
|
let config = Config::from_args(&args);
|
|
assert_eq!(config.listen_addr, "127.0.0.1:8080");
|
|
assert_eq!(config.metrics_addr, "127.0.0.1:9000");
|
|
assert!(config.disable_metrics);
|
|
assert_eq!(config.backend_addr, "127.0.0.1:8081");
|
|
assert_eq!(config.min_delay, 500);
|
|
assert_eq!(config.max_delay, 10000);
|
|
assert_eq!(config.max_tarpit_time, 300);
|
|
assert_eq!(config.block_threshold, 5);
|
|
assert_eq!(config.markov_corpora_dir, "/tmp/eris/data/corpora");
|
|
assert_eq!(config.lua_scripts_dir, "/tmp/eris/data/scripts");
|
|
assert_eq!(config.data_dir, "/tmp/eris/data");
|
|
assert_eq!(config.config_dir, "/tmp/eris/conf");
|
|
assert_eq!(config.cache_dir, "/tmp/eris/cache");
|
|
}
|
|
|
|
#[test]
|
|
fn test_trap_pattern_matching() {
|
|
// Test plain string pattern
|
|
let plain = TrapPattern::as_plain("phpunit");
|
|
assert!(plain.matches("path/to/phpunit/test"));
|
|
assert!(!plain.matches("path/to/something/else"));
|
|
|
|
// Test regex pattern
|
|
let regex = TrapPattern::as_regex(r".*eval-stdin\.php.*");
|
|
assert!(regex.matches("/vendor/phpunit/phpunit/src/Util/PHP/eval-stdin.php"));
|
|
assert!(regex.matches("/tests/eval-stdin.php?param"));
|
|
assert!(!regex.matches("/normal/path"));
|
|
|
|
// Test invalid regex pattern (should return false)
|
|
let invalid = TrapPattern::Regex {
|
|
pattern: "(invalid[regex".to_string(),
|
|
regex: true,
|
|
};
|
|
assert!(!invalid.matches("anything"));
|
|
}
|
|
|
|
#[tokio::test]
|
|
async fn test_should_tarpit() {
|
|
let config = Config::default();
|
|
|
|
// Test trap patterns
|
|
assert!(should_tarpit(
|
|
"/vendor/phpunit/whatever",
|
|
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
|
&config
|
|
));
|
|
assert!(should_tarpit(
|
|
"/wp-admin/login.php",
|
|
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
|
&config
|
|
));
|
|
assert!(should_tarpit(
|
|
"/.env",
|
|
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
|
&config
|
|
));
|
|
|
|
// Test whitelist networks
|
|
assert!(!should_tarpit(
|
|
"/wp-admin/login.php",
|
|
&IpAddr::V4(Ipv4Addr::new(127, 0, 0, 1)),
|
|
&config
|
|
));
|
|
assert!(!should_tarpit(
|
|
"/vendor/phpunit/whatever",
|
|
&IpAddr::V4(Ipv4Addr::new(192, 168, 1, 1)),
|
|
&config
|
|
));
|
|
|
|
// Test legitimate paths
|
|
assert!(!should_tarpit(
|
|
"/index.html",
|
|
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
|
&config
|
|
));
|
|
assert!(!should_tarpit(
|
|
"/images/logo.png",
|
|
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
|
&config
|
|
));
|
|
|
|
// Test regex patterns
|
|
assert!(should_tarpit(
|
|
"/index.php?s=/index/\\think\\app/invokefunction&function=call_user_func_array&vars[0]=md5&vars[1][]=Hello",
|
|
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
|
&config
|
|
));
|
|
|
|
assert!(should_tarpit(
|
|
"/hello.world?%ADd+allow_url_include%3d1+%ADd+auto_prepend_file%3dphp://input",
|
|
&IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)),
|
|
&config
|
|
));
|
|
}
|
|
|
|
#[test]
|
|
fn test_config_file_formats() {
|
|
// Create temporary JSON config file
|
|
let temp_dir = std::env::temp_dir();
|
|
let json_path = temp_dir.join("temp_config.json");
|
|
let toml_path = temp_dir.join("temp_config.toml");
|
|
|
|
let config = Config::default();
|
|
|
|
// Test JSON serialization and deserialization
|
|
config.save_to_file(&json_path).unwrap();
|
|
let loaded_json = Config::load_from_file(&json_path).unwrap();
|
|
assert_eq!(loaded_json.listen_addr, config.listen_addr);
|
|
assert_eq!(loaded_json.min_delay, config.min_delay);
|
|
|
|
// Test TOML serialization and deserialization
|
|
config.save_to_file(&toml_path).unwrap();
|
|
let loaded_toml = Config::load_from_file(&toml_path).unwrap();
|
|
assert_eq!(loaded_toml.listen_addr, config.listen_addr);
|
|
assert_eq!(loaded_toml.min_delay, config.min_delay);
|
|
|
|
// Clean up
|
|
let _ = std::fs::remove_file(json_path);
|
|
let _ = std::fs::remove_file(toml_path);
|
|
}
|
|
}
|