import sys import time import csv import path_constant as pc from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, pyll class hyper_parameter(object): def __init__(self, # data path path_to_datasrc = "googlenet_mem_bound.csv", path_to_topology = "googlenet.csv", target_col = "DRAM_cycle", # problem definition number_of_partition = 4, max_iteration = 100, algo = tpe.suggest, # constraint max_res_available = 960, initial_res = 0, res_step = 1, penalty_offest = 10000000000 ): self.topology_file = path_to_topology self.k = number_of_partition self.max_iter = max_iteration self.best_candidate = [0] * number_of_partition self.search_algo = algo self.max_res_unit = max_res_available self.res_step = res_step self.target_col = target_col self.data_src = {} self.layers = self.parse_topology_file() self.parse_data_set_file(path_to_datasrc) self.penalty_offest = penalty_offest self.ending_iter = 0 self.search_space = self.generate_space() self.start = time.time() def parse_topology_file(self): layers = [] with open(pc.TOPOLOGIES_PATH+self.topology_file, 'r') as f: next(f) for line in f: elems = line.strip().split(',') layers.append(elems[0]) for layer in layers: self.data_src[layer] = {} return layers def generate_space(self): space = {} for i in range(self.k - 1): space[i] = hp.randint(str(i), len(self.layers) - 1) return space def parse_data_set_file(self, path_to_data_csv): first = True target_idx = 2 with open(pc.DATA_SOURCE_PATH+path_to_data_csv, 'r') as f: for line in f: elems = line.strip().split(',') # #print(elems) if first: for idx, col in enumerate(elems): if self.target_col in col: target_idx = idx break first = False else: self.data_src[elems[1]][int(elems[0])] = int(float(elems[target_idx])) def decode_gene(self, gene): gene = list(gene) gene.sort() solution_layer_domain = [] part = [] idx = 0 for idx, l in enumerate(self.layers): if not gene: part = self.layers[idx:] solution_layer_domain.append(part) break part.append(l) if idx == gene[0]: solution_layer_domain.append(part) part = [] gene.pop(0) return solution_layer_domain def filter_hybird(self, gene): if len(gene) != len(set(gene)): return False return True def penalty_hybird(self, gene): return self.penalty_offest * (len(gene) - len(set(gene))) def find_max_latency(self, layer_partition, res_partitions): latencies = [0] * len(layer_partition) max_latency_idx = 0 for idx, part in enumerate(layer_partition): res = res_partitions[idx] for layer in part: latencies[idx] += self.data_src[layer][res] if latencies[idx] > latencies[max_latency_idx]: max_latency_idx = idx return latencies, max_latency_idx def evaluate_hybird(self, gene): layer = self.decode_gene(gene) res = [self.res_step] * self.k latencies = [] for _ in range(0, int(self.max_res_unit/self.res_step - self.k*self.res_step)): latencies, max_idx = self.find_max_latency(layer, res) res[max_idx] += self.res_step return latencies[max_idx], latencies, res, layer, gene def obj_wrapper(self, sample): loss = 0 valid = self.filter_hybird(sample.values()) result = {} if valid: loss, _, _, result['layer_assignment'], _ = self.evaluate_hybird(sample.values()) else: loss = self.penalty_hybird(sample.values()) result['loss'] = loss result['status'] = STATUS_OK result['eval_time'] = time.time() result['valid'] = valid return result def run(self): self.trials = Trials() self.best_candidate = \ fmin(self.obj_wrapper, self.search_space, self.search_algo, self.max_iter, trials=self.trials) return True def report(self): max_latency = 0 layer = [] res = [] latencies = [] full_latency, full_max_idx = self.find_max_latency([self.layers], [self.max_res_unit]*len(self.layers)) feasable = 0 max_latency = 100000000 with open(pc.RESULT_CSV_PATH+'hyperopt.csv', 'a') as csvFile: for stuff in self.trials.trials: self.i = stuff.get("tid") self.end = stuff.get("result").get("eval_time") if int(stuff.get("result").get("loss")) < max_latency: max_latency = int(stuff.get("result").get("loss")) layer = stuff.get("result").get("layer_assignment") if feasable == 0: feasable = int(stuff.get("result").get("valid")) writer = csv.writer(csvFile, delimiter=',', lineterminator="\n") writer.writerow([self.target_col,self.i,self.k, self.topology_file, feasable,(1/max_latency), max_latency*self.k, 1/full_latency[full_max_idx], full_latency[full_max_idx], (1/max_latency)/(1/full_latency[full_max_idx]), (max_latency*self.k)/full_latency[full_max_idx], layer, res, self.end-self.start, 0, 0, 100, 1, 1, self.max_res_unit, "hyper"]) csvFile.close if self.filter_hybird(self.best_candidate.values()): max_latency, latencies, res, layer, gene = self.evaluate_hybird(self.best_candidate.values()) else: print("RESULT NOT VALID") print("Layer:", self.best_candidate.values()) print("Sum: ", sum(self.best_candidate.values())) return print("================================= RESULT =================================") print("Layer assignment:") print(layer) print("Res mapping:") print(res) print("Latency for each partition: ") print(latencies) print("Final Latency:", max_latency*self.k, "|| Throught put:", 1/max_latency) print("==========================================================================") print("Map to full array (", self.max_res_unit, ")") print("Final Latency:", full_latency[full_max_idx], "|| Throught put:", 1/full_latency[full_max_idx]) print("==========================================================================") print("Throughtput Ratio:", (1/max_latency)/(1/full_latency[full_max_idx])) print("Latency increase:", (max_latency*self.k)/full_latency[full_max_idx]) if __name__ == "__main__": print("Hybird") # python3 hyper_parameter_ga.py googlenet 20 960 DRAM_cycle 1000 topology = sys.argv[1] k = int(sys.argv[2]) max_res_unit = int(sys.argv[3]) target_col = sys.argv[4] max_iteration = int(sys.argv[5]) hyper_hybird = hyper_parameter( path_to_datasrc = str(topology)+"_mem_bound.csv", path_to_topology = str(topology)+".csv", target_col = str(target_col), number_of_partition = k, max_iteration = max_iteration, max_res_available = max_res_unit, initial_res = 0, res_step = 1, penalty_offest = 10000000000 ) hyper_hybird.run() hyper_hybird.report()