use rand::prelude::*; use std::collections::HashMap; use std::fs; use std::path::Path; use std::sync::Mutex; const DEFAULT_ORDER: usize = 2; #[derive(Debug)] struct Chain { order: usize, states: HashMap, Vec>, start_states: Vec>, } impl Chain { fn new(order: usize) -> Self { Self { order, states: HashMap::new(), start_states: Vec::new(), } } fn add(&mut self, line: &str) { // Tokenize and process the line let words: Vec = line .split_whitespace() .map(std::string::ToString::to_string) .collect(); if words.len() <= self.order { return; } // Add start state let start = words[0..self.order].to_vec(); self.start_states.push(start); // Add transitions for i in 0..words.len() - self.order { let state = words[i..i + self.order].to_vec(); let next = words[i + self.order].clone(); self.states.entry(state).or_default().push(next); } } fn generate(&self, max_words: usize) -> String { if self.start_states.is_empty() { return String::new(); } let mut rng = rand::rng(); let mut result = Vec::new(); // Choose a random start state let mut current = self .start_states .choose(&mut rng) .cloned() .unwrap_or_else(|| vec![String::new(); self.order]); result.extend(current.clone()); // Generate sequence for _ in self.order..max_words { if let Some(next_options) = self.states.get(¤t) { if next_options.is_empty() { break; } let next = next_options.choose(&mut rng).unwrap().clone(); result.push(next.clone()); // Slide the window current = current[1..].to_vec(); current.push(next); } else { break; } } result.join(" ") } } pub struct MarkovGenerator { chains: Mutex>, } impl MarkovGenerator { pub fn new(corpus_dir: &str) -> Self { let mut chains = HashMap::new(); // Initialize with default types let types = ["php_exploit", "wordpress", "api", "generic"]; for t in &types { chains.insert((*t).to_string(), Chain::new(DEFAULT_ORDER)); } // Load corpus files if they exist let path = Path::new(corpus_dir); if path.exists() && path.is_dir() { if let Ok(entries) = fs::read_dir(path) { entries.for_each(|entry| { if let Ok(entry) = entry { let file_path = entry.path(); if let Some(file_name) = file_path.file_stem() { if let Some(file_name_str) = file_name.to_str() { if types.contains(&file_name_str) { if let Ok(content) = fs::read_to_string(&file_path) { let mut chain = Chain::new(DEFAULT_ORDER); for line in content.lines() { chain.add(line); } chains.insert(file_name_str.to_string(), chain); } } } } } }); } } // If corpus files didn't exist, initialize with some default content if chains["php_exploit"].start_states.is_empty() { let mut chain = Chain::new(DEFAULT_ORDER); chain.add("PHP Fatal error: Uncaught Error: Call to undefined function"); chain.add("PHP Warning: file_get_contents() expects parameter 1 to be string"); chain.add("PHP Notice: Undefined variable: data in /var/www/html/index.php on line 26"); chain.add("Warning: Invalid argument supplied for foreach() in /var/www/html/vendor/autoload.php"); chains.insert("php_exploit".to_string(), chain); } if chains["wordpress"].start_states.is_empty() { let mut chain = Chain::new(DEFAULT_ORDER); chain.add("WordPress database error: [Table 'wp_users' doesn't exist]"); chain.add("Warning: Cannot modify header information - headers already sent by"); chain.add("Fatal error: Allowed memory size of 41943040 bytes exhausted"); chains.insert("wordpress".to_string(), chain); } // Seed with common "interesting" terms that bots look for let seed_words = [ "username", "password", "token", "secret", "key", "admin", "root", "shell", "config", "api_key", "database", "ssh", "private", "credential", "system", "vulnerability", "exploit", "access", "error", "warning", "mysql", "postgresql", ]; // Add seed words to each chain Self::add_seed_words_to_chains(&mut chains, &seed_words); Self { chains: Mutex::new(chains), } } // Helper function to add seed words to chains fn add_seed_words_to_chains(chains: &mut HashMap, seed_words: &[&str]) { // Process each chain separately for (_, chain) in chains.iter_mut() { // Create new state if no states exist if chain.states.is_empty() && !chain.start_states.is_empty() { let state = chain.start_states[0].clone(); let mut next_words = Vec::new(); for word in seed_words { next_words.push((*word).to_string()); } chain.states.insert(state, next_words); } else { // Find a state to add words to if let Some((_state, next_words)) = chain.states.iter_mut().next() { for word in seed_words { next_words.push((*word).to_string()); } } } } } pub fn generate(&self, chain_type: &str, max_words: usize) -> String { let chains = self.chains.lock().unwrap(); match chains.get(chain_type) { Some(chain) => chain.generate(max_words), None => { // Fall back to generic chains .get("generic") .map(|c| c.generate(max_words)) .unwrap_or_default() } } } }