Skip to content
Snippets Groups Projects
Commit c749a8c4 authored by Patrick Lam's avatar Patrick Lam
Browse files

restore wrapping backwards, dunno why I removed it

parent abf74caa
No related branches found
No related tags found
No related merge requests found
......@@ -23,7 +23,7 @@ struct Args {
fn test_derive_2grams_from_trigram() {
let twograms_oracle = vec![("one^two"),
("two^three")];
let twograms = derive_2grams("one^two^three");
let twograms = derive_2grams_from_trigram("one^two^three");
assert_eq!(twograms_oracle, twograms);
}
......
......@@ -102,8 +102,8 @@ fn test_token_splitter() {
assert_eq!(split_line, vec!["check", "pass;", "user", "unknown"]);
}
// processes line, assuming that prev1 and prev2 were the last tokens on the previous line, and returns the first 2 tokens on this line
fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>) {
// processes line, adding to the end of line the first two tokens from lookahead_line, and returns the first 2 tokens on this line
fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>, prev1: Option<String>, prev2: Option<String>) -> (Option<String>, Option<String>) {
let (next1, next2) = match lookahead_line {
None => (None, None),
Some(ll) => {
......@@ -118,13 +118,28 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
let mut tokens = token_splitter(line, &regexp, &regexps);
if tokens.is_empty() {
return;
return (None, None);
}
tokens.iter().for_each(|t| if !all_token_list.contains(t) { all_token_list.push(t.clone()) } );
let mut tokens2 = match next1 {
// keep this for later when we'll return it
let last1 = match tokens.len() {
0 => None,
n => Some(tokens[n-1].clone())
};
let last2 = match tokens.len() {
0 => None,
1 => None,
n => Some(tokens[n-2].clone())
};
let mut tokens2_ = match prev1 {
None => tokens,
Some(x) => { tokens.push(x); tokens }
Some(x) => { let mut t = vec![x]; t.append(&mut tokens); t}
};
let mut tokens2 = match next1 {
None => tokens2_,
Some(x) => { tokens2_.push(x); tokens2_ }
};
for doubles in tokens2.windows(2) {
......@@ -136,9 +151,13 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
}
}
let tokens3 = match next2 {
let mut tokens3_ = match prev2 {
None => tokens2,
Some(x) => { tokens2.push(x); tokens2 }
Some(x) => { let mut t = vec![x]; t.append(&mut tokens2); t}
};
let tokens3 = match next2 {
None => tokens3_,
Some(x) => { tokens3_.push(x); tokens3_ }
};
for triples in tokens3.windows(3) {
let triple_tmp = format!("{}^{}^{}", triples[0], triples[1], triples[2]);
......@@ -148,6 +167,7 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
trpl.insert(triple_tmp, 1);
}
}
return (last1, last2);
}
fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (HashMap<String, i32>, HashMap<String, i32>, Vec<String>) {
......@@ -156,6 +176,8 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H
let mut all_token_list = vec![];
let regex = regex_generator(format);
let mut prev1 = None; let mut prev2 = None;
if let Ok(lines) = read_lines(raw_fn) {
let mut lp = lines.peekable();
loop {
......@@ -164,9 +186,9 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H
Some(Ok(ip)) =>
match lp.peek() {
None =>
process_dictionary_builder_line(ip, None, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list),
(prev1, prev2) = process_dictionary_builder_line(ip, None, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2),
Some(Ok(next_line)) =>
process_dictionary_builder_line(ip, Some(next_line.clone()), &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list),
(prev1, prev2) = process_dictionary_builder_line(ip, Some(next_line.clone()), &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2),
Some(Err(_)) => {} // meh, some weirdly-encoded line, throw it out
}
Some(Err(_)) => {} // meh, some weirdly-encoded line, throw it out
......@@ -183,7 +205,8 @@ fn test_dictionary_builder_process_line_lookahead_is_none() {
let mut dbl = HashMap::new();
let mut trpl = HashMap::new();
let mut all_token_list = vec![];
process_dictionary_builder_line(line, None, &re, &linux_censored_regexps(), &mut dbl, &mut trpl, &mut all_token_list);
let (last1, last2) = process_dictionary_builder_line(line, None, &re, &linux_censored_regexps(), &mut dbl, &mut trpl, &mut all_token_list, None, None);
assert_eq!((last1, last2), (Some("unknown".to_string()), Some("user".to_string())));
let mut dbl_oracle = HashMap::new();
dbl_oracle.insert("user^unknown".to_string(), 1);
......@@ -205,10 +228,12 @@ fn test_dictionary_builder_process_line_lookahead_is_some() {
let mut dbl = HashMap::new();
let mut trpl = HashMap::new();
let mut all_token_list = vec![];
process_dictionary_builder_line(line, Some(next_line), &re, &linux_censored_regexps(), &mut dbl, &mut trpl, &mut all_token_list);
let (last1, last2) = process_dictionary_builder_line(line, Some(next_line), &re, &linux_censored_regexps(), &mut dbl, &mut trpl, &mut all_token_list, Some("foo".to_string()), Some("bar".to_string()));
assert_eq!((last1, last2), (Some("unknown".to_string()), Some("user".to_string())));
let mut dbl_oracle = HashMap::new();
dbl_oracle.insert("unknown^baz".to_string(), 1);
dbl_oracle.insert("foo^check".to_string(), 1);
dbl_oracle.insert("user^unknown".to_string(), 1);
dbl_oracle.insert("pass;^user".to_string(), 1);
dbl_oracle.insert("check^pass;".to_string(), 1);
......@@ -218,6 +243,8 @@ fn test_dictionary_builder_process_line_lookahead_is_some() {
trpl_oracle.insert("pass;^user^unknown".to_string(), 1);
trpl_oracle.insert("check^pass;^user".to_string(), 1);
trpl_oracle.insert("unknown^baz^bad".to_string(), 1);
trpl_oracle.insert("foo^check^pass;".to_string(), 1);
trpl_oracle.insert("bar^foo^check".to_string(), 1);
trpl_oracle.insert("user^unknown^baz".to_string(), 1);
assert_eq!(trpl, trpl_oracle);
}
......@@ -249,21 +276,21 @@ fn test_parse_raw_linux() {
double_dict_oracle.insert("rdd_42_22^locally".to_string(), 1);
double_dict_oracle.insert("rdd_42_23^locally".to_string(), 1);
double_dict_oracle.insert("rdd_42_24^locally".to_string(), 1);
double_dict_oracle.insert("locally^Found".to_string(), 3);
double_dict_oracle.insert("locally^Found".to_string(), 6);
assert_eq!(double_dict, double_dict_oracle);
let mut triple_dict_oracle = HashMap::new();
triple_dict_oracle.insert("block^rdd_42_20^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_22^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_23^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_24^locally".to_string(), 1);
triple_dict_oracle.insert("rdd_42_20^locally^Found".to_string(), 1);
triple_dict_oracle.insert("rdd_42_22^locally^Found".to_string(), 1);
triple_dict_oracle.insert("rdd_42_23^locally^Found".to_string(), 1);
triple_dict_oracle.insert("rdd_42_20^locally^Found".to_string(), 2);
triple_dict_oracle.insert("rdd_42_22^locally^Found".to_string(), 2);
triple_dict_oracle.insert("rdd_42_23^locally^Found".to_string(), 2);
triple_dict_oracle.insert("Found^block^rdd_42_20".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_22".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_23".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_24".to_string(), 1);
triple_dict_oracle.insert("locally^Found^block".to_string(), 3);
triple_dict_oracle.insert("locally^Found^block".to_string(), 6);
assert_eq!(triple_dict, triple_dict_oracle);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment