Skip to content
Snippets Groups Projects
Commit 038bd800 authored by Patrick Lam's avatar Patrick Lam
Browse files

actually don't need to wrap backwards

parent d3780e03
No related branches found
No related tags found
No related merge requests found
Jun 14 15:16:02 combo sshd(pam_unix)[19937]: Found block rdd_42_20 locally
Jun 14 15:16:02 combo sshd(pam_unix)[19937]: Found block rdd_42_22 locally
Jun 14 15:16:02 combo sshd(pam_unix)[19937]: Found block rdd_42_23 locally
Jun 14 15:16:02 combo sshd(pam_unix)[19937]: Found block rdd_42_24 locally
......@@ -75,7 +75,7 @@ fn test_token_splitter() {
}
// processes line, assuming that prev1 and prev2 were the last tokens on the previous line, and returns the first 2 tokens on this line
fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>, prev1: Option<String>, prev2: Option<String>) -> (Option<String>, Option<String>){
fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>) {
let (next1, next2) = match lookahead_line {
None => (None, None),
Some(ll) => {
......@@ -90,28 +90,13 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
let mut tokens = token_splitter(line, &regexp, &regexps);
if tokens.is_empty() {
return (None, None);
return;
}
tokens.iter().for_each(|t| all_token_list.push(t.clone()));
tokens.iter().for_each(|t| if !all_token_list.contains(t) { all_token_list.push(t.clone()) } );
// keep this for later when we'll return it
let last1 = match tokens.len() {
0 => None,
n => Some(tokens[n-1].clone())
};
let last2 = match tokens.len() {
0 => None,
1 => None,
n => Some(tokens[n-2].clone())
};
let mut tokens2_ = match prev1 {
None => tokens,
Some(x) => { let mut t = vec![x]; t.append(&mut tokens); t }
};
let mut tokens2 = match next1 {
None => tokens2_,
Some(x) => { tokens2_.push(x); tokens2_ }
None => tokens,
Some(x) => { tokens.push(x); tokens }
};
for doubles in tokens2.windows(2) {
......@@ -123,13 +108,9 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
}
}
let mut tokens3_ = match prev2 {
None => tokens2,
Some(x) => { let mut t = vec![x]; t.append(&mut tokens2); t }
};
let tokens3 = match next2 {
None => tokens3_,
Some(x) => { tokens3_.push(x); tokens3_ }
None => tokens2,
Some(x) => { tokens2.push(x); tokens2 }
};
for triples in tokens3.windows(3) {
let triple_tmp = format!("{}^{}^{}", triples[0], triples[1], triples[2]);
......@@ -139,7 +120,6 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
trpl.insert(triple_tmp, 1);
}
}
return (last1, last2);
}
fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (HashMap<String, i32>, HashMap<String, i32>, Vec<String>) {
......@@ -148,8 +128,6 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H
let mut all_token_list = vec![];
let regex = regex_generator(format);
let mut prev1 = None; let mut prev2 = None;
if let Ok(lines) = read_lines(raw_fn) {
let mut lp = lines.peekable();
loop {
......@@ -158,9 +136,9 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H
Some(Ok(ip)) =>
match lp.peek() {
None =>
(prev1, prev2) = process_dictionary_builder_line(ip, None, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2),
process_dictionary_builder_line(ip, None, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list),
Some(Ok(next_line)) =>
(prev1, prev2) = process_dictionary_builder_line(ip, Some(next_line.clone()), &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2),
process_dictionary_builder_line(ip, Some(next_line.clone()), &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list),
Some(&Err(_)) => panic!("should not happen")
}
Some(Err(_)) => panic!("should not happen")
......@@ -178,8 +156,7 @@ fn test_dictionary_builder_process_line_lookahead_is_none() {
let mut dbl = HashMap::new();
let mut trpl = HashMap::new();
let mut all_token_list = vec![];
let (last1, last2) = process_dictionary_builder_line(line, None, &re, &linux_res, &mut dbl, &mut trpl, &mut all_token_list, None, None);
assert_eq!((last1, last2), (Some("unknown".to_string()), Some("user".to_string())));
process_dictionary_builder_line(line, None, &re, &linux_res, &mut dbl, &mut trpl, &mut all_token_list);
let mut dbl_oracle = HashMap::new();
dbl_oracle.insert("user^unknown".to_string(), 1);
......@@ -202,12 +179,10 @@ fn test_dictionary_builder_process_line_lookahead_is_some() {
let mut dbl = HashMap::new();
let mut trpl = HashMap::new();
let mut all_token_list = vec![];
let (last1, last2) = process_dictionary_builder_line(line, Some(next_line), &re, &linux_res, &mut dbl, &mut trpl, &mut all_token_list, Some("foo".to_string()), Some("bar".to_string()));
assert_eq!((last1, last2), (Some("unknown".to_string()), Some("user".to_string())));
process_dictionary_builder_line(line, Some(next_line), &re, &linux_res, &mut dbl, &mut trpl, &mut all_token_list);
let mut dbl_oracle = HashMap::new();
dbl_oracle.insert("unknown^baz".to_string(), 1);
dbl_oracle.insert("foo^check".to_string(), 1);
dbl_oracle.insert("user^unknown".to_string(), 1);
dbl_oracle.insert("pass;^user".to_string(), 1);
dbl_oracle.insert("check^pass;".to_string(), 1);
......@@ -217,15 +192,47 @@ fn test_dictionary_builder_process_line_lookahead_is_some() {
trpl_oracle.insert("pass;^user^unknown".to_string(), 1);
trpl_oracle.insert("check^pass;^user".to_string(), 1);
trpl_oracle.insert("unknown^baz^bad".to_string(), 1);
trpl_oracle.insert("foo^check^pass;".to_string(), 1);
trpl_oracle.insert("bar^foo^check".to_string(), 1);
trpl_oracle.insert("user^unknown^baz".to_string(), 1);
assert_eq!(trpl, trpl_oracle);
}
pub fn parse_raw(raw_fn: String) {
pub fn parse_raw(raw_fn: String) -> (HashMap<String, i32>, HashMap<String, i32>, Vec<String>) {
let linux_format : String = "<Month> <Date> <Time> <Level> <Component>(\\[<PID>\\])?: <Content>".to_string(); // Linux format
let linux_re : Vec<Regex> = vec![Regex::new(r"(\d+\.){3}\d+").unwrap(), Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()];
let (double_dict, triple_dict, all_token_list) = dictionary_builder(raw_fn, linux_format, linux_re);
println!("double dictionary list len {}, triple {}, all tokens {}", double_dict.len(), triple_dict.len(), all_token_list.len());
return (double_dict, triple_dict, all_token_list);
}
#[test]
fn test_parse_raw() {
let (double_dict, triple_dict, all_token_list) = parse_raw("data/from_paper.log".to_string());
let all_token_list_oracle = vec!["Found".to_string(), "block".to_string(), "rdd_42_20".to_string(), "locally".to_string(),"rdd_42_22".to_string(), "rdd_42_23".to_string(), "rdd_42_24".to_string()];
assert_eq!(all_token_list, all_token_list_oracle);
let mut double_dict_oracle = HashMap::new();
double_dict_oracle.insert("Found^block".to_string(), 4);
double_dict_oracle.insert("block^rdd_42_20".to_string(), 1);
double_dict_oracle.insert("block^rdd_42_22".to_string(), 1);
double_dict_oracle.insert("block^rdd_42_23".to_string(), 1);
double_dict_oracle.insert("block^rdd_42_24".to_string(), 1);
double_dict_oracle.insert("rdd_42_20^locally".to_string(), 1);
double_dict_oracle.insert("rdd_42_22^locally".to_string(), 1);
double_dict_oracle.insert("rdd_42_23^locally".to_string(), 1);
double_dict_oracle.insert("rdd_42_24^locally".to_string(), 1);
double_dict_oracle.insert("locally^Found".to_string(), 3);
assert_eq!(double_dict, double_dict_oracle);
let mut triple_dict_oracle = HashMap::new();
triple_dict_oracle.insert("block^rdd_42_20^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_22^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_23^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_24^locally".to_string(), 1);
triple_dict_oracle.insert("rdd_42_20^locally^Found".to_string(), 1);
triple_dict_oracle.insert("rdd_42_22^locally^Found".to_string(), 1);
triple_dict_oracle.insert("rdd_42_23^locally^Found".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_20".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_22".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_23".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_24".to_string(), 1);
triple_dict_oracle.insert("locally^Found^block".to_string(), 3);
assert_eq!(triple_dict, triple_dict_oracle);
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment