Eris/src/markov.rs
2025-05-02 07:22:22 +03:00

216 lines
6.9 KiB
Rust

use rand::prelude::*;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::sync::Mutex;
const DEFAULT_ORDER: usize = 2;
#[derive(Debug)]
struct Chain {
order: usize,
states: HashMap<Vec<String>, Vec<String>>,
start_states: Vec<Vec<String>>,
}
impl Chain {
fn new(order: usize) -> Self {
Self {
order,
states: HashMap::new(),
start_states: Vec::new(),
}
}
fn add(&mut self, line: &str) {
// Tokenize and process the line
let words: Vec<String> = line
.split_whitespace()
.map(std::string::ToString::to_string)
.collect();
if words.len() <= self.order {
return;
}
// Add start state
let start = words[0..self.order].to_vec();
self.start_states.push(start);
// Add transitions
for i in 0..words.len() - self.order {
let state = words[i..i + self.order].to_vec();
let next = words[i + self.order].clone();
self.states.entry(state).or_default().push(next);
}
}
fn generate(&self, max_words: usize) -> String {
if self.start_states.is_empty() {
return String::new();
}
let mut rng = rand::rng();
let mut result = Vec::new();
// Choose a random start state
let mut current = self
.start_states
.choose(&mut rng)
.cloned()
.unwrap_or_else(|| vec![String::new(); self.order]);
result.extend(current.clone());
// Generate sequence
for _ in self.order..max_words {
if let Some(next_options) = self.states.get(&current) {
if next_options.is_empty() {
break;
}
let next = next_options.choose(&mut rng).unwrap().clone();
result.push(next.clone());
// Slide the window
current = current[1..].to_vec();
current.push(next);
} else {
break;
}
}
result.join(" ")
}
}
pub struct MarkovGenerator {
chains: Mutex<HashMap<String, Chain>>,
}
impl MarkovGenerator {
pub fn new(corpus_dir: &str) -> Self {
let mut chains = HashMap::new();
// Initialize with default types
let types = ["php_exploit", "wordpress", "api", "generic"];
for t in &types {
chains.insert((*t).to_string(), Chain::new(DEFAULT_ORDER));
}
// Load corpus files if they exist
let path = Path::new(corpus_dir);
if path.exists() && path.is_dir() {
if let Ok(entries) = fs::read_dir(path) {
entries.for_each(|entry| {
if let Ok(entry) = entry {
let file_path = entry.path();
if let Some(file_name) = file_path.file_stem() {
if let Some(file_name_str) = file_name.to_str() {
if types.contains(&file_name_str) {
if let Ok(content) = fs::read_to_string(&file_path) {
let mut chain = Chain::new(DEFAULT_ORDER);
for line in content.lines() {
chain.add(line);
}
chains.insert(file_name_str.to_string(), chain);
}
}
}
}
}
});
}
}
// If corpus files didn't exist, initialize with some default content
if chains["php_exploit"].start_states.is_empty() {
let mut chain = Chain::new(DEFAULT_ORDER);
chain.add("PHP Fatal error: Uncaught Error: Call to undefined function");
chain.add("PHP Warning: file_get_contents() expects parameter 1 to be string");
chain.add("PHP Notice: Undefined variable: data in /var/www/html/index.php on line 26");
chain.add("Warning: Invalid argument supplied for foreach() in /var/www/html/vendor/autoload.php");
chains.insert("php_exploit".to_string(), chain);
}
if chains["wordpress"].start_states.is_empty() {
let mut chain = Chain::new(DEFAULT_ORDER);
chain.add("WordPress database error: [Table 'wp_users' doesn't exist]");
chain.add("Warning: Cannot modify header information - headers already sent by");
chain.add("Fatal error: Allowed memory size of 41943040 bytes exhausted");
chains.insert("wordpress".to_string(), chain);
}
// Seed with common "interesting" terms that bots look for
let seed_words = [
"username",
"password",
"token",
"secret",
"key",
"admin",
"root",
"shell",
"config",
"api_key",
"database",
"ssh",
"private",
"credential",
"system",
"vulnerability",
"exploit",
"access",
"error",
"warning",
"mysql",
"postgresql",
];
// Add seed words to each chain
Self::add_seed_words_to_chains(&mut chains, &seed_words);
Self {
chains: Mutex::new(chains),
}
}
// Helper function to add seed words to chains
fn add_seed_words_to_chains(chains: &mut HashMap<String, Chain>, seed_words: &[&str]) {
// Process each chain separately
for (_, chain) in chains.iter_mut() {
// Create new state if no states exist
if chain.states.is_empty() && !chain.start_states.is_empty() {
let state = chain.start_states[0].clone();
let mut next_words = Vec::new();
for word in seed_words {
next_words.push((*word).to_string());
}
chain.states.insert(state, next_words);
} else {
// Find a state to add words to
if let Some((_state, next_words)) = chain.states.iter_mut().next() {
for word in seed_words {
next_words.push((*word).to_string());
}
}
}
}
}
pub fn generate(&self, chain_type: &str, max_words: usize) -> String {
let chains = self.chains.lock().unwrap();
match chains.get(chain_type) {
Some(chain) => chain.generate(max_words),
None => {
// Fall back to generic
chains
.get("generic")
.map(|c| c.generate(max_words))
.unwrap_or_default()
}
}
}
}