Skip to content
Snippets Groups Projects
Commit d3780e03 authored by Patrick Lam's avatar Patrick Lam
Browse files

looking at the future

parent 91cfb4ef
No related branches found
No related tags found
No related merge requests found
......@@ -74,12 +74,27 @@ fn test_token_splitter() {
assert_eq!(split_line, vec!["check", "pass;", "user", "unknown"]);
}
fn process_dictionary_builder_line(line: String, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>, prev1: Option<String>, prev2: Option<String>) -> (Option<String>, Option<String>){
// processes line, assuming that prev1 and prev2 were the last tokens on the previous line, and returns the first 2 tokens on this line
fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>, prev1: Option<String>, prev2: Option<String>) -> (Option<String>, Option<String>){
let (next1, next2) = match lookahead_line {
None => (None, None),
Some(ll) => {
let next_tokens = token_splitter(ll, &regexp, &regexps);
match next_tokens.len() {
0 => (None, None),
1 => (Some(next_tokens[0].clone()), None),
_ => (Some(next_tokens[0].clone()), Some(next_tokens[1].clone()))
}
}
};
let mut tokens = token_splitter(line, &regexp, &regexps);
if tokens.is_empty() {
return (None, None);
}
tokens.iter().for_each(|t| all_token_list.push(t.clone()));
// keep this for later when we'll return it
let last1 = match tokens.len() {
0 => None,
n => Some(tokens[n-1].clone())
......@@ -90,11 +105,14 @@ fn process_dictionary_builder_line(line: String, regexp:&Regex, regexps:&Vec<Reg
n => Some(tokens[n-2].clone())
};
// todo: across line boundaries, as mentioned in the paper; right now we don't cross lines.
let mut tokens2 = match prev1 {
let mut tokens2_ = match prev1 {
None => tokens,
Some(x) => { let mut t = vec![x]; t.append(&mut tokens); t }
};
let mut tokens2 = match next1 {
None => tokens2_,
Some(x) => { tokens2_.push(x); tokens2_ }
};
for doubles in tokens2.windows(2) {
let double_tmp = format!("{}^{}", doubles[0], doubles[1]);
......@@ -105,10 +123,14 @@ fn process_dictionary_builder_line(line: String, regexp:&Regex, regexps:&Vec<Reg
}
}
let tokens3 = match prev2 {
let mut tokens3_ = match prev2 {
None => tokens2,
Some(x) => { let mut t = vec![x]; t.append(&mut tokens2); t }
};
let tokens3 = match next2 {
None => tokens3_,
Some(x) => { tokens3_.push(x); tokens3_ }
};
for triples in tokens3.windows(3) {
let triple_tmp = format!("{}^{}^{}", triples[0], triples[1], triples[2]);
if let Some(count) = trpl.get(&triple_tmp) {
......@@ -121,17 +143,27 @@ fn process_dictionary_builder_line(line: String, regexp:&Regex, regexps:&Vec<Reg
}
fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (HashMap<String, i32>, HashMap<String, i32>, Vec<String>) {
let mut dbl = HashMap::new(); // dbl.insert("dictionary^DHT".to_string(), -1);
let mut trpl = HashMap::new(); // trpl.insert("dictionary^DHT^triple".to_string(), -1);
let mut dbl = HashMap::new();
let mut trpl = HashMap::new();
let mut all_token_list = vec![];
let regex = regex_generator(format);
let mut prev1 = None; let mut prev2 = None;
if let Ok(lines) = read_lines(raw_fn) {
for line in lines {
if let Ok(ip) = line {
(prev1, prev2) = process_dictionary_builder_line(ip, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2);
let mut lp = lines.peekable();
loop {
match lp.next() {
None => break,
Some(Ok(ip)) =>
match lp.peek() {
None =>
(prev1, prev2) = process_dictionary_builder_line(ip, None, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2),
Some(Ok(next_line)) =>
(prev1, prev2) = process_dictionary_builder_line(ip, Some(next_line.clone()), &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2),
Some(&Err(_)) => panic!("should not happen")
}
Some(Err(_)) => panic!("should not happen")
}
}
}
......@@ -139,14 +171,56 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H
}
#[test]
fn test_dictionary_builder_process_line() {
fn test_dictionary_builder_process_line_lookahead_is_none() {
let line = "Jun 14 15:16:02 combo sshd(pam_unix)[19937]: check pass; user unknown".to_string();
let re = regex_generator("<Month> <Date> <Time> <Level> <Component>(\\[<PID>\\])?: <Content>".to_string());
let linux_res = vec![Regex::new(r"(\d+\.){3}\d+").unwrap(), Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()];
let mut dbl = HashMap::new();
let mut trpl = HashMap::new();
let split_line = process_dictionary_builder_line(line, &re, &linux_res, &dbl, &trpl, None, None);
assert_eq!(split_line, vec!["check", "pass;", "user", "unknown"]);
let mut all_token_list = vec![];
let (last1, last2) = process_dictionary_builder_line(line, None, &re, &linux_res, &mut dbl, &mut trpl, &mut all_token_list, None, None);
assert_eq!((last1, last2), (Some("unknown".to_string()), Some("user".to_string())));
let mut dbl_oracle = HashMap::new();
dbl_oracle.insert("user^unknown".to_string(), 1);
dbl_oracle.insert("pass;^user".to_string(), 1);
dbl_oracle.insert("check^pass;".to_string(), 1);
assert_eq!(dbl, dbl_oracle);
let mut trpl_oracle = HashMap::new();
trpl_oracle.insert("pass;^user^unknown".to_string(), 1);
trpl_oracle.insert("check^pass;^user".to_string(), 1);
assert_eq!(trpl, trpl_oracle);
}
#[test]
fn test_dictionary_builder_process_line_lookahead_is_some() {
let line = "Jun 14 15:16:02 combo sshd(pam_unix)[19937]: check pass; user unknown".to_string();
let next_line = "Jun 14 15:16:02 combo sshd(pam_unix)[19937]: baz bad".to_string();
let re = regex_generator("<Month> <Date> <Time> <Level> <Component>(\\[<PID>\\])?: <Content>".to_string());
let linux_res = vec![Regex::new(r"(\d+\.){3}\d+").unwrap(), Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()];
let mut dbl = HashMap::new();
let mut trpl = HashMap::new();
let mut all_token_list = vec![];
let (last1, last2) = process_dictionary_builder_line(line, Some(next_line), &re, &linux_res, &mut dbl, &mut trpl, &mut all_token_list, Some("foo".to_string()), Some("bar".to_string()));
assert_eq!((last1, last2), (Some("unknown".to_string()), Some("user".to_string())));
let mut dbl_oracle = HashMap::new();
dbl_oracle.insert("unknown^baz".to_string(), 1);
dbl_oracle.insert("foo^check".to_string(), 1);
dbl_oracle.insert("user^unknown".to_string(), 1);
dbl_oracle.insert("pass;^user".to_string(), 1);
dbl_oracle.insert("check^pass;".to_string(), 1);
assert_eq!(dbl, dbl_oracle);
let mut trpl_oracle = HashMap::new();
trpl_oracle.insert("pass;^user^unknown".to_string(), 1);
trpl_oracle.insert("check^pass;^user".to_string(), 1);
trpl_oracle.insert("unknown^baz^bad".to_string(), 1);
trpl_oracle.insert("foo^check^pass;".to_string(), 1);
trpl_oracle.insert("bar^foo^check".to_string(), 1);
trpl_oracle.insert("user^unknown^baz".to_string(), 1);
assert_eq!(trpl, trpl_oracle);
}
pub fn parse_raw(raw_fn: String) {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment