Skip to content
Snippets Groups Projects
Commit 91cfb4ef authored by Patrick Lam's avatar Patrick Lam
Browse files

looking at the past

parent e00e9e47
No related branches found
No related tags found
No related merge requests found
......@@ -74,44 +74,81 @@ fn test_token_splitter() {
assert_eq!(split_line, vec!["check", "pass;", "user", "unknown"]);
}
fn process_dictionary_builder_line(line: String, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>, prev1: Option<String>, prev2: Option<String>) -> (Option<String>, Option<String>){
let mut tokens = token_splitter(line, &regexp, &regexps);
if tokens.is_empty() {
return (None, None);
}
tokens.iter().for_each(|t| all_token_list.push(t.clone()));
let last1 = match tokens.len() {
0 => None,
n => Some(tokens[n-1].clone())
};
let last2 = match tokens.len() {
0 => None,
1 => None,
n => Some(tokens[n-2].clone())
};
// todo: across line boundaries, as mentioned in the paper; right now we don't cross lines.
let mut tokens2 = match prev1 {
None => tokens,
Some(x) => { let mut t = vec![x]; t.append(&mut tokens); t }
};
for doubles in tokens2.windows(2) {
let double_tmp = format!("{}^{}", doubles[0], doubles[1]);
if let Some(count) = dbl.get(&double_tmp) {
dbl.insert(double_tmp, count+1);
} else {
dbl.insert(double_tmp, 1);
}
}
let tokens3 = match prev2 {
None => tokens2,
Some(x) => { let mut t = vec![x]; t.append(&mut tokens2); t }
};
for triples in tokens3.windows(3) {
let triple_tmp = format!("{}^{}^{}", triples[0], triples[1], triples[2]);
if let Some(count) = trpl.get(&triple_tmp) {
trpl.insert(triple_tmp, count+1);
} else {
trpl.insert(triple_tmp, 1);
}
}
return (last1, last2);
}
fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (HashMap<String, i32>, HashMap<String, i32>, Vec<String>) {
let mut dbl = HashMap::new(); dbl.insert("dictionary^DHT".to_string(), -1);
let mut trpl = HashMap::new(); trpl.insert("dictionary^DHT^triple".to_string(), -1);
let mut dbl = HashMap::new(); // dbl.insert("dictionary^DHT".to_string(), -1);
let mut trpl = HashMap::new(); // trpl.insert("dictionary^DHT^triple".to_string(), -1);
let mut all_token_list = vec![];
let regex = regex_generator(format);
let mut prev1 = None; let mut prev2 = None;
if let Ok(lines) = read_lines(raw_fn) {
for line in lines {
if let Ok(ip) = line {
let tokens = token_splitter(ip, &regex, &regexps);
if tokens.is_empty() {
continue;
}
tokens.iter().for_each(|t| all_token_list.push(t.clone()));
// todo: across line boundaries, as mentioned in the paper; right now we don't cross lines.
for triples in tokens.windows(3) {
let triple_tmp = format!("{}^{}^{}", triples[0], triples[1], triples[2]);
if let Some(count) = trpl.get(&triple_tmp) {
trpl.insert(triple_tmp, count+1);
} else {
trpl.insert(triple_tmp, 1);
}
}
for doubles in tokens.windows(2) {
let double_tmp = format!("{}^{}", doubles[0], doubles[1]);
if let Some(count) = dbl.get(&double_tmp) {
dbl.insert(double_tmp, count+1);
} else {
dbl.insert(double_tmp, 1);
}
}
(prev1, prev2) = process_dictionary_builder_line(ip, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2);
}
}
}
return (dbl, trpl, all_token_list)
}
#[test]
fn test_dictionary_builder_process_line() {
let line = "Jun 14 15:16:02 combo sshd(pam_unix)[19937]: check pass; user unknown".to_string();
let re = regex_generator("<Month> <Date> <Time> <Level> <Component>(\\[<PID>\\])?: <Content>".to_string());
let linux_res = vec![Regex::new(r"(\d+\.){3}\d+").unwrap(), Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()];
let mut dbl = HashMap::new();
let mut trpl = HashMap::new();
let split_line = process_dictionary_builder_line(line, &re, &linux_res, &dbl, &trpl, None, None);
assert_eq!(split_line, vec!["check", "pass;", "user", "unknown"]);
}
pub fn parse_raw(raw_fn: String) {
let linux_format : String = "<Month> <Date> <Time> <Level> <Component>(\\[<PID>\\])?: <Content>".to_string(); // Linux format
let linux_re : Vec<Regex> = vec![Regex::new(r"(\d+\.){3}\d+").unwrap(), Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()];
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment