Skip to content
Snippets Groups Projects
Commit e2b2dda5 authored by Patrick Lam's avatar Patrick Lam
Browse files

add print-dict functionality and a new regexp for dates in linux logs

parent 038bd800
No related branches found
No related tags found
No related merge requests found
......@@ -13,5 +13,7 @@ struct Args {
fn main() {
let args = Args::parse();
packages::parser::parse_raw(args.raw);
let (double_dict, triple_dict, all_token_list) = packages::parser::parse_raw(args.raw);
packages::parser::print_dict(triple_dict);
}
......@@ -3,6 +3,7 @@ use std::io::{self, BufRead};
use std::path::Path;
use regex::Regex;
use std::collections::HashMap;
use std::collections::BTreeSet;
// https://doc.rust-lang.org/rust-by-example/std_misc/file/read_lines.html
// The output is wrapped in a Result to allow matching on errors
......@@ -49,10 +50,12 @@ fn apply_domain_specific_re(log_line: String, domain_specific_re:&Vec<Regex>) ->
#[test]
fn test_apply_domain_specific_re() {
let line = "q2.34.4.5 Jun 14 15:16:02 combo sshd(pam_unix)[19937]: check pass; user unknown".to_string();
let linux_res = vec![Regex::new(r"(\d+\.){3}\d+").unwrap(), Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()];
let line = "q2.34.4.5 Jun 14 15:16:02 combo sshd(pam_unix)[19937]: check pass; Fri Jun 17 20:55:07 2005 user unknown".to_string();
let linux_res = vec![Regex::new(r"(\d+\.){3}\d+").unwrap(),
Regex::new(r"\w{3} \w{3} \d{2} \d{2}:\d{2}:\d{2} \d{4}").unwrap(),
Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()];
let censored_line = apply_domain_specific_re(line, &linux_res);
assert_eq!(censored_line, " q<*> Jun 14 <*> combo sshd(pam_unix)[19937]: check pass; user unknown");
assert_eq!(censored_line, " q<*> Jun 14 <*> combo sshd(pam_unix)[19937]: check pass; <*> user unknown");
}
fn token_splitter(log_line: String, re:&Regex, domain_specific_re:&Vec<Regex>) -> Vec<String> {
......@@ -198,7 +201,10 @@ fn test_dictionary_builder_process_line_lookahead_is_some() {
pub fn parse_raw(raw_fn: String) -> (HashMap<String, i32>, HashMap<String, i32>, Vec<String>) {
let linux_format : String = "<Month> <Date> <Time> <Level> <Component>(\\[<PID>\\])?: <Content>".to_string(); // Linux format
let linux_re : Vec<Regex> = vec![Regex::new(r"(\d+\.){3}\d+").unwrap(), Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()];
let linux_re : Vec<Regex> = vec![Regex::new(r"\w{3} \w{3} (\d{2}| \d{1}) \d{2}:\d{2}:\d{2} \d{4}").unwrap(),
Regex::new(r"(\d+\.){3}\d+").unwrap(),
Regex::new(r"\d{2}:\d{2}:\d{2}").unwrap()
];
let (double_dict, triple_dict, all_token_list) = dictionary_builder(raw_fn, linux_format, linux_re);
println!("double dictionary list len {}, triple {}, all tokens {}", double_dict.len(), triple_dict.len(), all_token_list.len());
return (double_dict, triple_dict, all_token_list);
......@@ -236,3 +242,24 @@ fn test_parse_raw() {
triple_dict_oracle.insert("locally^Found^block".to_string(), 3);
assert_eq!(triple_dict, triple_dict_oracle);
}
pub fn print_dict(d: HashMap<String, i32>) {
let mut reverse_d : HashMap<i32, Vec<String>> = HashMap::new();
let mut key_set = BTreeSet::new();
for (key, val) in d.iter() {
if reverse_d.contains_key(val) {
let existing_keys = reverse_d.get_mut(val).unwrap();
existing_keys.push(key.to_string());
} else {
reverse_d.insert(*val, vec![key.to_string()]);
key_set.insert(val);
}
}
println!("printing dict");
for key in &key_set {
println!("{}: {:?}", key, reverse_d.get(key).unwrap());
}
println!("---");
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment