216 lines
6.9 KiB
Rust
216 lines
6.9 KiB
Rust
use rand::prelude::*;
|
|
use std::collections::HashMap;
|
|
use std::fs;
|
|
use std::path::Path;
|
|
use std::sync::Mutex;
|
|
|
|
const DEFAULT_ORDER: usize = 2;
|
|
|
|
#[derive(Debug)]
|
|
struct Chain {
|
|
order: usize,
|
|
states: HashMap<Vec<String>, Vec<String>>,
|
|
start_states: Vec<Vec<String>>,
|
|
}
|
|
|
|
impl Chain {
|
|
fn new(order: usize) -> Self {
|
|
Self {
|
|
order,
|
|
states: HashMap::new(),
|
|
start_states: Vec::new(),
|
|
}
|
|
}
|
|
|
|
fn add(&mut self, line: &str) {
|
|
// Tokenize and process the line
|
|
let words: Vec<String> = line
|
|
.split_whitespace()
|
|
.map(std::string::ToString::to_string)
|
|
.collect();
|
|
|
|
if words.len() <= self.order {
|
|
return;
|
|
}
|
|
|
|
// Add start state
|
|
let start = words[0..self.order].to_vec();
|
|
self.start_states.push(start);
|
|
|
|
// Add transitions
|
|
for i in 0..words.len() - self.order {
|
|
let state = words[i..i + self.order].to_vec();
|
|
let next = words[i + self.order].clone();
|
|
|
|
self.states.entry(state).or_default().push(next);
|
|
}
|
|
}
|
|
|
|
fn generate(&self, max_words: usize) -> String {
|
|
if self.start_states.is_empty() {
|
|
return String::new();
|
|
}
|
|
|
|
let mut rng = rand::rng();
|
|
let mut result = Vec::new();
|
|
|
|
// Choose a random start state
|
|
let mut current = self
|
|
.start_states
|
|
.choose(&mut rng)
|
|
.cloned()
|
|
.unwrap_or_else(|| vec![String::new(); self.order]);
|
|
|
|
result.extend(current.clone());
|
|
|
|
// Generate sequence
|
|
for _ in self.order..max_words {
|
|
if let Some(next_options) = self.states.get(¤t) {
|
|
if next_options.is_empty() {
|
|
break;
|
|
}
|
|
|
|
let next = next_options.choose(&mut rng).unwrap().clone();
|
|
result.push(next.clone());
|
|
|
|
// Slide the window
|
|
current = current[1..].to_vec();
|
|
current.push(next);
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
result.join(" ")
|
|
}
|
|
}
|
|
|
|
pub struct MarkovGenerator {
|
|
chains: Mutex<HashMap<String, Chain>>,
|
|
}
|
|
|
|
impl MarkovGenerator {
|
|
pub fn new(corpus_dir: &str) -> Self {
|
|
let mut chains = HashMap::new();
|
|
|
|
// Initialize with default types
|
|
let types = ["php_exploit", "wordpress", "api", "generic"];
|
|
for t in &types {
|
|
chains.insert((*t).to_string(), Chain::new(DEFAULT_ORDER));
|
|
}
|
|
|
|
// Load corpus files if they exist
|
|
let path = Path::new(corpus_dir);
|
|
if path.exists() && path.is_dir() {
|
|
if let Ok(entries) = fs::read_dir(path) {
|
|
entries.for_each(|entry| {
|
|
if let Ok(entry) = entry {
|
|
let file_path = entry.path();
|
|
if let Some(file_name) = file_path.file_stem() {
|
|
if let Some(file_name_str) = file_name.to_str() {
|
|
if types.contains(&file_name_str) {
|
|
if let Ok(content) = fs::read_to_string(&file_path) {
|
|
let mut chain = Chain::new(DEFAULT_ORDER);
|
|
for line in content.lines() {
|
|
chain.add(line);
|
|
}
|
|
chains.insert(file_name_str.to_string(), chain);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
});
|
|
}
|
|
}
|
|
|
|
// If corpus files didn't exist, initialize with some default content
|
|
if chains["php_exploit"].start_states.is_empty() {
|
|
let mut chain = Chain::new(DEFAULT_ORDER);
|
|
chain.add("PHP Fatal error: Uncaught Error: Call to undefined function");
|
|
chain.add("PHP Warning: file_get_contents() expects parameter 1 to be string");
|
|
chain.add("PHP Notice: Undefined variable: data in /var/www/html/index.php on line 26");
|
|
chain.add("Warning: Invalid argument supplied for foreach() in /var/www/html/vendor/autoload.php");
|
|
chains.insert("php_exploit".to_string(), chain);
|
|
}
|
|
|
|
if chains["wordpress"].start_states.is_empty() {
|
|
let mut chain = Chain::new(DEFAULT_ORDER);
|
|
chain.add("WordPress database error: [Table 'wp_users' doesn't exist]");
|
|
chain.add("Warning: Cannot modify header information - headers already sent by");
|
|
chain.add("Fatal error: Allowed memory size of 41943040 bytes exhausted");
|
|
chains.insert("wordpress".to_string(), chain);
|
|
}
|
|
|
|
// Seed with common "interesting" terms that bots look for
|
|
let seed_words = [
|
|
"username",
|
|
"password",
|
|
"token",
|
|
"secret",
|
|
"key",
|
|
"admin",
|
|
"root",
|
|
"shell",
|
|
"config",
|
|
"api_key",
|
|
"database",
|
|
"ssh",
|
|
"private",
|
|
"credential",
|
|
"system",
|
|
"vulnerability",
|
|
"exploit",
|
|
"access",
|
|
"error",
|
|
"warning",
|
|
"mysql",
|
|
"postgresql",
|
|
];
|
|
|
|
// Add seed words to each chain
|
|
Self::add_seed_words_to_chains(&mut chains, &seed_words);
|
|
|
|
Self {
|
|
chains: Mutex::new(chains),
|
|
}
|
|
}
|
|
|
|
// Helper function to add seed words to chains
|
|
fn add_seed_words_to_chains(chains: &mut HashMap<String, Chain>, seed_words: &[&str]) {
|
|
// Process each chain separately
|
|
for (_, chain) in chains.iter_mut() {
|
|
// Create new state if no states exist
|
|
if chain.states.is_empty() && !chain.start_states.is_empty() {
|
|
let state = chain.start_states[0].clone();
|
|
let mut next_words = Vec::new();
|
|
for word in seed_words {
|
|
next_words.push((*word).to_string());
|
|
}
|
|
chain.states.insert(state, next_words);
|
|
} else {
|
|
// Find a state to add words to
|
|
if let Some((_state, next_words)) = chain.states.iter_mut().next() {
|
|
for word in seed_words {
|
|
next_words.push((*word).to_string());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn generate(&self, chain_type: &str, max_words: usize) -> String {
|
|
let chains = self.chains.lock().unwrap();
|
|
|
|
match chains.get(chain_type) {
|
|
Some(chain) => chain.generate(max_words),
|
|
None => {
|
|
// Fall back to generic
|
|
chains
|
|
.get("generic")
|
|
.map(|c| c.generate(max_words))
|
|
.unwrap_or_default()
|
|
}
|
|
}
|
|
}
|
|
}
|