Skip to content
Snippets Groups Projects
Commit c749a8c4 authored by Patrick Lam's avatar Patrick Lam
Browse files

restore wrapping backwards, dunno why I removed it

parent abf74caa
No related branches found
No related tags found
No related merge requests found
...@@ -23,7 +23,7 @@ struct Args { ...@@ -23,7 +23,7 @@ struct Args {
fn test_derive_2grams_from_trigram() { fn test_derive_2grams_from_trigram() {
let twograms_oracle = vec![("one^two"), let twograms_oracle = vec![("one^two"),
("two^three")]; ("two^three")];
let twograms = derive_2grams("one^two^three"); let twograms = derive_2grams_from_trigram("one^two^three");
assert_eq!(twograms_oracle, twograms); assert_eq!(twograms_oracle, twograms);
} }
......
...@@ -102,8 +102,8 @@ fn test_token_splitter() { ...@@ -102,8 +102,8 @@ fn test_token_splitter() {
assert_eq!(split_line, vec!["check", "pass;", "user", "unknown"]); assert_eq!(split_line, vec!["check", "pass;", "user", "unknown"]);
} }
// processes line, assuming that prev1 and prev2 were the last tokens on the previous line, and returns the first 2 tokens on this line // processes line, adding to the end of line the first two tokens from lookahead_line, and returns the first 2 tokens on this line
fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>) { fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, regexp:&Regex, regexps:&Vec<Regex>, dbl: &mut HashMap<String, i32>, trpl: &mut HashMap<String, i32>, all_token_list: &mut Vec<String>, prev1: Option<String>, prev2: Option<String>) -> (Option<String>, Option<String>) {
let (next1, next2) = match lookahead_line { let (next1, next2) = match lookahead_line {
None => (None, None), None => (None, None),
Some(ll) => { Some(ll) => {
...@@ -118,13 +118,28 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, ...@@ -118,13 +118,28 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
let mut tokens = token_splitter(line, &regexp, &regexps); let mut tokens = token_splitter(line, &regexp, &regexps);
if tokens.is_empty() { if tokens.is_empty() {
return; return (None, None);
} }
tokens.iter().for_each(|t| if !all_token_list.contains(t) { all_token_list.push(t.clone()) } ); tokens.iter().for_each(|t| if !all_token_list.contains(t) { all_token_list.push(t.clone()) } );
let mut tokens2 = match next1 { // keep this for later when we'll return it
let last1 = match tokens.len() {
0 => None,
n => Some(tokens[n-1].clone())
};
let last2 = match tokens.len() {
0 => None,
1 => None,
n => Some(tokens[n-2].clone())
};
let mut tokens2_ = match prev1 {
None => tokens, None => tokens,
Some(x) => { tokens.push(x); tokens } Some(x) => { let mut t = vec![x]; t.append(&mut tokens); t}
};
let mut tokens2 = match next1 {
None => tokens2_,
Some(x) => { tokens2_.push(x); tokens2_ }
}; };
for doubles in tokens2.windows(2) { for doubles in tokens2.windows(2) {
...@@ -136,9 +151,13 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, ...@@ -136,9 +151,13 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
} }
} }
let tokens3 = match next2 { let mut tokens3_ = match prev2 {
None => tokens2, None => tokens2,
Some(x) => { tokens2.push(x); tokens2 } Some(x) => { let mut t = vec![x]; t.append(&mut tokens2); t}
};
let tokens3 = match next2 {
None => tokens3_,
Some(x) => { tokens3_.push(x); tokens3_ }
}; };
for triples in tokens3.windows(3) { for triples in tokens3.windows(3) {
let triple_tmp = format!("{}^{}^{}", triples[0], triples[1], triples[2]); let triple_tmp = format!("{}^{}^{}", triples[0], triples[1], triples[2]);
...@@ -148,6 +167,7 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>, ...@@ -148,6 +167,7 @@ fn process_dictionary_builder_line(line: String, lookahead_line: Option<String>,
trpl.insert(triple_tmp, 1); trpl.insert(triple_tmp, 1);
} }
} }
return (last1, last2);
} }
fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (HashMap<String, i32>, HashMap<String, i32>, Vec<String>) { fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (HashMap<String, i32>, HashMap<String, i32>, Vec<String>) {
...@@ -156,6 +176,8 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H ...@@ -156,6 +176,8 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H
let mut all_token_list = vec![]; let mut all_token_list = vec![];
let regex = regex_generator(format); let regex = regex_generator(format);
let mut prev1 = None; let mut prev2 = None;
if let Ok(lines) = read_lines(raw_fn) { if let Ok(lines) = read_lines(raw_fn) {
let mut lp = lines.peekable(); let mut lp = lines.peekable();
loop { loop {
...@@ -164,9 +186,9 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H ...@@ -164,9 +186,9 @@ fn dictionary_builder(raw_fn: String, format: String, regexps: Vec<Regex>) -> (H
Some(Ok(ip)) => Some(Ok(ip)) =>
match lp.peek() { match lp.peek() {
None => None =>
process_dictionary_builder_line(ip, None, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list), (prev1, prev2) = process_dictionary_builder_line(ip, None, &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2),
Some(Ok(next_line)) => Some(Ok(next_line)) =>
process_dictionary_builder_line(ip, Some(next_line.clone()), &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list), (prev1, prev2) = process_dictionary_builder_line(ip, Some(next_line.clone()), &regex, &regexps, &mut dbl, &mut trpl, &mut all_token_list, prev1, prev2),
Some(Err(_)) => {} // meh, some weirdly-encoded line, throw it out Some(Err(_)) => {} // meh, some weirdly-encoded line, throw it out
} }
Some(Err(_)) => {} // meh, some weirdly-encoded line, throw it out Some(Err(_)) => {} // meh, some weirdly-encoded line, throw it out
...@@ -183,7 +205,8 @@ fn test_dictionary_builder_process_line_lookahead_is_none() { ...@@ -183,7 +205,8 @@ fn test_dictionary_builder_process_line_lookahead_is_none() {
let mut dbl = HashMap::new(); let mut dbl = HashMap::new();
let mut trpl = HashMap::new(); let mut trpl = HashMap::new();
let mut all_token_list = vec![]; let mut all_token_list = vec![];
process_dictionary_builder_line(line, None, &re, &linux_censored_regexps(), &mut dbl, &mut trpl, &mut all_token_list); let (last1, last2) = process_dictionary_builder_line(line, None, &re, &linux_censored_regexps(), &mut dbl, &mut trpl, &mut all_token_list, None, None);
assert_eq!((last1, last2), (Some("unknown".to_string()), Some("user".to_string())));
let mut dbl_oracle = HashMap::new(); let mut dbl_oracle = HashMap::new();
dbl_oracle.insert("user^unknown".to_string(), 1); dbl_oracle.insert("user^unknown".to_string(), 1);
...@@ -205,10 +228,12 @@ fn test_dictionary_builder_process_line_lookahead_is_some() { ...@@ -205,10 +228,12 @@ fn test_dictionary_builder_process_line_lookahead_is_some() {
let mut dbl = HashMap::new(); let mut dbl = HashMap::new();
let mut trpl = HashMap::new(); let mut trpl = HashMap::new();
let mut all_token_list = vec![]; let mut all_token_list = vec![];
process_dictionary_builder_line(line, Some(next_line), &re, &linux_censored_regexps(), &mut dbl, &mut trpl, &mut all_token_list); let (last1, last2) = process_dictionary_builder_line(line, Some(next_line), &re, &linux_censored_regexps(), &mut dbl, &mut trpl, &mut all_token_list, Some("foo".to_string()), Some("bar".to_string()));
assert_eq!((last1, last2), (Some("unknown".to_string()), Some("user".to_string())));
let mut dbl_oracle = HashMap::new(); let mut dbl_oracle = HashMap::new();
dbl_oracle.insert("unknown^baz".to_string(), 1); dbl_oracle.insert("unknown^baz".to_string(), 1);
dbl_oracle.insert("foo^check".to_string(), 1);
dbl_oracle.insert("user^unknown".to_string(), 1); dbl_oracle.insert("user^unknown".to_string(), 1);
dbl_oracle.insert("pass;^user".to_string(), 1); dbl_oracle.insert("pass;^user".to_string(), 1);
dbl_oracle.insert("check^pass;".to_string(), 1); dbl_oracle.insert("check^pass;".to_string(), 1);
...@@ -218,6 +243,8 @@ fn test_dictionary_builder_process_line_lookahead_is_some() { ...@@ -218,6 +243,8 @@ fn test_dictionary_builder_process_line_lookahead_is_some() {
trpl_oracle.insert("pass;^user^unknown".to_string(), 1); trpl_oracle.insert("pass;^user^unknown".to_string(), 1);
trpl_oracle.insert("check^pass;^user".to_string(), 1); trpl_oracle.insert("check^pass;^user".to_string(), 1);
trpl_oracle.insert("unknown^baz^bad".to_string(), 1); trpl_oracle.insert("unknown^baz^bad".to_string(), 1);
trpl_oracle.insert("foo^check^pass;".to_string(), 1);
trpl_oracle.insert("bar^foo^check".to_string(), 1);
trpl_oracle.insert("user^unknown^baz".to_string(), 1); trpl_oracle.insert("user^unknown^baz".to_string(), 1);
assert_eq!(trpl, trpl_oracle); assert_eq!(trpl, trpl_oracle);
} }
...@@ -249,21 +276,21 @@ fn test_parse_raw_linux() { ...@@ -249,21 +276,21 @@ fn test_parse_raw_linux() {
double_dict_oracle.insert("rdd_42_22^locally".to_string(), 1); double_dict_oracle.insert("rdd_42_22^locally".to_string(), 1);
double_dict_oracle.insert("rdd_42_23^locally".to_string(), 1); double_dict_oracle.insert("rdd_42_23^locally".to_string(), 1);
double_dict_oracle.insert("rdd_42_24^locally".to_string(), 1); double_dict_oracle.insert("rdd_42_24^locally".to_string(), 1);
double_dict_oracle.insert("locally^Found".to_string(), 3); double_dict_oracle.insert("locally^Found".to_string(), 6);
assert_eq!(double_dict, double_dict_oracle); assert_eq!(double_dict, double_dict_oracle);
let mut triple_dict_oracle = HashMap::new(); let mut triple_dict_oracle = HashMap::new();
triple_dict_oracle.insert("block^rdd_42_20^locally".to_string(), 1); triple_dict_oracle.insert("block^rdd_42_20^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_22^locally".to_string(), 1); triple_dict_oracle.insert("block^rdd_42_22^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_23^locally".to_string(), 1); triple_dict_oracle.insert("block^rdd_42_23^locally".to_string(), 1);
triple_dict_oracle.insert("block^rdd_42_24^locally".to_string(), 1); triple_dict_oracle.insert("block^rdd_42_24^locally".to_string(), 1);
triple_dict_oracle.insert("rdd_42_20^locally^Found".to_string(), 1); triple_dict_oracle.insert("rdd_42_20^locally^Found".to_string(), 2);
triple_dict_oracle.insert("rdd_42_22^locally^Found".to_string(), 1); triple_dict_oracle.insert("rdd_42_22^locally^Found".to_string(), 2);
triple_dict_oracle.insert("rdd_42_23^locally^Found".to_string(), 1); triple_dict_oracle.insert("rdd_42_23^locally^Found".to_string(), 2);
triple_dict_oracle.insert("Found^block^rdd_42_20".to_string(), 1); triple_dict_oracle.insert("Found^block^rdd_42_20".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_22".to_string(), 1); triple_dict_oracle.insert("Found^block^rdd_42_22".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_23".to_string(), 1); triple_dict_oracle.insert("Found^block^rdd_42_23".to_string(), 1);
triple_dict_oracle.insert("Found^block^rdd_42_24".to_string(), 1); triple_dict_oracle.insert("Found^block^rdd_42_24".to_string(), 1);
triple_dict_oracle.insert("locally^Found^block".to_string(), 3); triple_dict_oracle.insert("locally^Found^block".to_string(), 6);
assert_eq!(triple_dict, triple_dict_oracle); assert_eq!(triple_dict, triple_dict_oracle);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment