Skip to content
Snippets Groups Projects
Commit 2199b841 authored by Patrick Lam's avatar Patrick Lam
Browse files

parameter for cutoff

parent d8a47409
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,7 @@ You can run cargo test to run the test cases.
Here's how you can invoke the program itself.
```
$ cargo run --release -- --raw-spark data/from_paper.log --to-parse "17/06/09 20:11:11 INFO storage.BlockManager: Found block rdd_42_20 locally" --before "split: hdfs://hostname/2kSOSP.log:29168+7292" --after "Found block"
$ cargo run --release -- --raw-linux data/Linux_2k.log --to-parse "Jun 23 23:30:05 combo sshd(pam_unix)[26190]: authentication failure; logname= uid=0 euid=0 tty=NODEVssh ruser= rhost=218.22.3.51 user=root" --before "rhost=<*> user=root" --after "session opened"
$ cargo run --release -- --raw-spark data/from_paper.log --to-parse "17/06/09 20:11:11 INFO storage.BlockManager: Found block rdd_42_20 locally" --before "split: hdfs://hostname/2kSOSP.log:29168+7292" --after "Found block" --cutoff 3
$ cargo run --release -- --raw-linux data/Linux_2k.log --to-parse "Jun 23 23:30:05 combo sshd(pam_unix)[26190]: authentication failure; logname= uid=0 euid=0 tty=NODEVssh ruser= rhost=218.22.3.51 user=root" --before "rhost=<*> user=root" --after "session opened" --cutoff 100
$ cargo run --release -- --raw-openstack data/openstack_normal2.log --to-parse "nova-compute.log.2017-05-17_12:02:35 2017-05-17 12:02:30.397 2931 INFO nova.virt.libvirt.imagecache [req-addc1839-2ed5-4778-b57e-5854eb7b8b09 - - - - -] image 0673dd71-34c5-4fbb-86c4-40623fbe45b4 at (/var/lib/nova/instances/_base/a489c868f0c37da93b76227c91bb03908ac0e742): in use: on this node 1 local, 0 on other nodes sharing this instance storage"
```
......@@ -28,6 +28,9 @@ struct Args {
#[arg(long)]
after: Option<String>,
#[arg(long)]
cutoff: Option<i32>,
}
#[test]
......@@ -70,6 +73,10 @@ fn main() {
log_format = Some(Spark);
input_fn = Some(raw_spark);
}
let cutoff = match args.cutoff {
None => 3,
Some(c) => c,
};
let (double_dict, triple_dict, all_token_list) = match log_format {
Some(Linux) => packages::parser::parse_raw_linux(input_fn.unwrap()),
......@@ -126,16 +133,15 @@ fn main() {
extended_sample_string_tokens.append(&mut afters);
println!("{:?}", extended_sample_string_tokens);
// collect 3-grams from extended_sample_string that occur less often than CUTOFF in the corpus
const CUTOFF : i32 = 3;
// collect 3-grams from extended_sample_string that occur less often than cutoff in the corpus
let mut uncommon_3grams = vec![];
for triple in extended_sample_string_tokens.windows(3) {
let three_gram = format!("{}^{}^{}", triple[0], triple[1], triple[2]);
if triple_dict.get(&three_gram).unwrap() < &CUTOFF {
if triple_dict.get(&three_gram).unwrap() < &cutoff {
// println!("3-gram {}, count {}", three_gram, &triple_dict.get(&three_gram).unwrap());
uncommon_3grams.push(three_gram);
}
// println!("3-gram {}, count {}", three_gram, &triple_dict.get(&three_gram).unwrap());
}
let mut deduped_2grams_from_uncommon_3grams : HashSet<String> = HashSet::new();
......@@ -147,7 +153,8 @@ fn main() {
let mut uncommon_2grams : Vec<String> = vec![];
for two_g in deduped_2grams_from_uncommon_3grams {
let two_g_count = double_dict.get(&two_g).unwrap();
if two_g_count < &CUTOFF {
println!("2-gram {}, count {}", two_g, two_g_count);
if two_g_count < &cutoff {
uncommon_2grams.push(two_g);
// println!("2-gram {}, count {}", two_g, two_g_count);
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment