import cma from multiprocessing import Pool from os import cpu_count import time import path_constant as pc import packing_penalty as pp from os import makedirs from copy import deepcopy class cma_approach(object): def __init__(self, # data path path_to_datasrc = "alexnet_data.csv", path_to_topology = "alexnet.csv", target_col = "Cycles", # problem definition number_of_partition = 4, max_iteration = 100, sigma = 0.5, population_size = 10, # constraint max_res_unit = 960, initial_res = 0, res_step = 1, penalty_offest = 10000000000, seeding_type="optimised", hybrid = True, print_to_csv = True, max_pack_size = 129 ): self.target_col = target_col self.start = time.time() self.k = number_of_partition self.max_iter = max_iteration self.sigma = sigma self.max_res_unit = max_res_unit self.res_step = res_step self.population_size = population_size self.penalty_offest = penalty_offest self.ending_iter = 0 self.is_hybrid = hybrid self.data_src = {} self.topology_file = path_to_topology self.layers = self.parse_topology_file() self.parse_data_set_file(path_to_datasrc) self.best_layer = number_of_partition * [0] self.best_res = number_of_partition * [0] self.total_valid_solution = 0 self.trial = 1 self.seeding_type = seeding_type self.max_res_available = max_res_unit self.print_to_csv = print_to_csv self.max_pack_size = max_pack_size def parse_topology_file(self): layers = [] with open(pc.TOPOLOGIES_PATH+self.topology_file, 'r') as f: next(f) for line in f: elems = line.strip().split(',') layers.append(elems[0]) for layer in layers: self.data_src[layer] = {} return layers def parse_data_set_file(self, path_to_data_csv): first = True target_idx = 2 with open(pc.DATA_SOURCE_PATH+path_to_data_csv, 'r') as f: for line in f: elems = line.strip().split(',') # print(elems) if first: for idx, col in enumerate(elems): if self.target_col in col: target_idx = idx break first = False else: self.data_src[elems[1]][int(elems[0])] = int(float(elems[target_idx])) def regroup_layers(self, sample): # #print("DEBUG", sample) detail_sample = [] idx = 0 for size in sample: part = [] if size == 1: part.append(self.layers[idx]) idx += 1 else: for i in range(0, size): part.append(self.layers[i + idx]) idx += size detail_sample.append(part) return detail_sample def decode(self, val, max_val): return int(val * max_val) def encode(self, val, max_val): return float(val / max_val) def filter_layer(self, layer): for idx in range(self.k): if layer[idx] <= 0: return False if sum(layer) != len(self.layers): return False return True def filter_res(self, res): # #print(layer, res) for idx in range(self.k): if res[idx] <= 0: return False if sum(res) != self.max_res_unit: return False return True def penalty_layer(self, layer): penalty_score = self.penalty_offest if sum(layer) != len(self.layers): penalty_score += self.penalty_offest else: layer = [abs(val) for val in layer] for idx in range(self.k): if layer[idx] <= 0: penalty_score *= 1.05 percent_diff = (abs(sum(layer) - len(self.layers)) / len(self.layers)) penalty_score += percent_diff * self.penalty_offest return penalty_score def find_max_latency(self, layer_partition, res_partitions): latencies = [0] * len(layer_partition) max_latency_idx = 0 # print(layer_partition) # print(res_partitions) for idx, part in enumerate(layer_partition): res = res_partitions[idx] for layer in part: latencies[idx] += self.data_src[layer][res] if latencies[idx] > latencies[max_latency_idx]: max_latency_idx = idx return latencies, max_latency_idx """ Decide partition sizes and evaluate the soln. Should always return a packable solution. """ def eva_hybrid_sq(self, layer): # res is a list where each element corresponds to a partition. res_step # is the minimum amount by which partition edge length increases. res = [self.res_step] * self.k latencies = [] # max_res_unit = 1920*9*1 from sq_approach_faster variable_max_res_unit = self.max_res_unit # Do a binary search to find the largest packable variable_max_res_unit. search_upper_bound = self.max_res_unit search_lower_bound = sum([r*r for r in res]) last_packable_res = [] last_packable_max_res_unit = 0 while search_upper_bound > search_lower_bound: variable_max_res_unit = \ int((search_upper_bound + search_lower_bound)/2) limit_reached = False while not limit_reached: latencies, max_idx = self.find_max_latency(layer, res) res[max_idx] += self.res_step # If this addition puts the solution over the limit, we need to # revert the last partition addition. TODO write some code to # see if we can assign the remaining units. if sum([r**2 for r in res]) > variable_max_res_unit: res[max_idx] -= self.res_step limit_reached = True if pp.isPackable(res, self.max_pack_size): last_packable_res = deepcopy(res) last_packable_max_res_unit = variable_max_res_unit # The desired max_res_unit value is greater than its current # value. search_lower_bound = variable_max_res_unit else: # The desired max_res_unit value is less than its current # value. search_upper_bound = variable_max_res_unit # Calculate latencies of final solution. latencies, max_idx = self.find_max_latency(layer, last_packable_res) # TODO we want to penalize based on how much we had to decrease # variable_max_res_unit. max_res_unit_decrease = self.max_res_unit - variable_max_res_unit packing_penalty = pp.calculatePackingPenalty(max_res_unit_decrease) return latencies[max_idx] + packing_penalty, latencies, last_packable_res, layer def evaluation_top_level(self, in_val): pid, sampling = in_val layer = [self.decode(val, len(self.layers)) for val in sampling] layer.append(len(self.layers) - sum(layer)) penalty = 0 if not self.filter_layer(layer): penalty = self.penalty_layer(layer) if self.is_hybrid: return pid, penalty else: return pid, penalty*4 # regroup_layers assigns layers to the partitions. Returns a list of # partition lists which contain layers. layer = self.regroup_layers(layer) return pid, self.eva_hybrid_sq(layer)[0] def run(self): self.trial += self.trial if (self.seeding_type=="allzeros"): self.seed = [0]*(self.k-1) self.seed_od = self.seed elif (self.seeding_type=="optimised"): self.seed = [] for i in range(self.k - 1): self.seed.append(int(len(self.layers)/self.k)) self.seed.append(len(self.layers) - sum(self.seed)) self.seed_od = self.seed self.seed = [self.encode(val, len(self.layers)) for val in self.seed[:-1]] else: raise ValueError('Invalid Seeding Strategy') self.es = cma.CMAEvolutionStrategy(self.seed, self.sigma, \ {'popsize' : self.population_size}) best_overall = self.penalty_offest self.i = 0 temp_out = [] while not self.es.stop() and self.i < self.max_iter: samples = self.es.ask() id_list = [(idx, sample) for idx, sample in enumerate(samples)] scores = [0] * self.es.popsize invalid_sampling = 0 res_combintaions = [0] * self.es.popsize # pool = Pool(processes = cpu_count() - 4) # for result in pool.imap_unordered(self.evaluation_top_level, id_list): # scores[result[0]] = result[1] # if result[1] >= self.penalty_offest: # invalid_sampling += 1 # else: # if not self.is_hybrid: # res_combintaions[result[0]] = result[2] # pool.close() # pool.join() for tup in id_list: _, scores[tup[0]] = self.evaluation_top_level(tup) if scores[tup[0]] >= self.penalty_offest: invalid_sampling += 1 if not self.is_hybrid: best_in_iteration = min(scores) if best_in_iteration < best_overall and best_in_iteration < self.penalty_offest: best_overall = best_in_iteration self.best_res = res_combintaions[scores.index(min(scores))] ##print(str(self.i) + ":", \ # "Sigma:",round(self.es.sigma, 4), \ # "|| Valid sampling percentage:", \ # (self.population_size - invalid_sampling) /self.population_size *100) ##print("invalid sampling", invalid_sampling) self.valid_sampling_percentage = (self.population_size - invalid_sampling) /self.population_size *100 self.total_valid_solution += self.population_size - invalid_sampling self.samples = samples self.scores = scores self.es.tell(samples, scores) self.end = time.time() self.best_layer = [self.decode(val, len(self.layers)) for val in self.es.result[0]] self.best_layer.append(len(self.layers) - sum(self.best_layer)) temp_out.append(self.report(False)[1]) self.i += 1 self.ending_iter = self.i return temp_out def report(self, output_png): ##print(self.i, self.es.sigma) max_latency = 0 layer = [] res = [] latencies = [] if not self.filter_layer(self.best_layer): ##print("RESULT NOT VALID") ##print("Layer:", self.best_layer, "sum: ", sum(self.best_layer)) #print(self.penalty_layer(self.best_layer)) if self.print_to_csv: with open(pc.RESULT_CSV_PATH+'cma_logmore_sq.csv', 'a') as csvFile: writer = csv.writer(csvFile, delimiter=',', lineterminator="\n") writer.writerow([self.target_col,self.i,self.k, self.topology_file, 0, 0, 0, 0, 0, 0, 0, layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type]) csvFile.close result = [self.target_col,self.i,self.k, self.topology_file, 0, 0, 0, 0, 0, 0, 0, layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type] return False, result layer = self.regroup_layers(self.best_layer) max_latency, latencies, res, _ = self.eva_hybrid_sq(layer) # generate data for mapping the full array (129 * 129) full_latency, full_max_idx = self.find_max_latency([self.layers], [129]*len(self.layers)) # PLEASE UNCOMMENT OUT THIS PART IF YOU NOT USING THE BASH SCRIPT WE HAVE PROVIDED # print("================================= RESULT =================================") # print("Solution: (out of", self.total_valid_solution, "solutions)") # print(layer) # print("Res mapping:") # print(res) # print("Latency for each partition: ") # print(latencies) # print("Final Latency:", max_latency*self.k, "|| Throught put:", 1/max_latency) # print("==========================================================================") # print("Map to full array (", self.max_res_unit, ")") # print("Final Latency:", full_latency[full_max_idx], "|| Throught put:", 1/full_latency[full_max_idx]) # print("==========================================================================") # print("Throughtput Ratio:", (1/max_latency)/(1/full_latency[full_max_idx])) # print("Latency increase:", (max_latency*self.k)/full_latency[full_max_idx]) if self.print_to_csv: with open(pc.RESULT_CSV_PATH+'cma_logmore_sq.csv', 'a') as csvFile: writer = csv.writer(csvFile, delimiter=',', lineterminator="\n") writer.writerow([self.target_col,self.i,self.k, self.topology_file, 1,(1/max_latency), max_latency*self.k, 1/full_latency[full_max_idx], full_latency[full_max_idx], (1/max_latency)/(1/full_latency[full_max_idx]), (max_latency*self.k)/full_latency[full_max_idx], layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type]) csvFile.close if self.valid_sampling_percentage > 0: directory_path = pc.RESULT_SCREENSHOT_PATH + \ str(self.topology_file.replace(".csv", "")) + "/" + \ "pack_size_" + str(self.max_pack_size) + "/" + \ "penalty_constant_" + str(pp.PENALTY_CONSTANT) + "/" makedirs(directory_path, exist_ok = True) pngFileName = "k=" + str(self.k) + "_max=" + str(self.max_res_unit) \ + "_latency=" + str(max_latency) + ".png" if pp.isPackable(res, self.max_pack_size) and output_png: bin_area = self.max_pack_size ** 2 packed_area = 0 for rect in res: square_area = rect ** 2 packed_area += square_area percentage_wasted = 100 * (bin_area - packed_area) / bin_area consumed_area = 0 pp.printPNG(res, self.max_pack_size, directory_path + pngFileName) result = [self.target_col,self.i,self.k, self.topology_file, 1,(1/max_latency), max_latency*self.k, 1/full_latency[full_max_idx], full_latency[full_max_idx], (1/max_latency)/(1/full_latency[full_max_idx]), (max_latency*self.k)/full_latency[full_max_idx], layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type] return True, result if __name__ == "__main__": import csv import sys topology = sys.argv[1] k = int(sys.argv[2]) population_size = int(sys.argv[3]) max_res_unit = int(sys.argv[4]) seeding_type = sys.argv[5] target_col = sys.argv[6] es_hybrid = cma_approach( path_to_datasrc = str(topology)+"_square_mem_bound.csv", path_to_topology = str(topology)+".csv", target_col = str(target_col), number_of_partition = k, max_iteration = 10000, sigma = 0.5, population_size = population_size, max_res_unit = max_res_unit, initial_res = 0, res_step = 3, penalty_offest = 100000000000, seeding_type = seeding_type, hybrid = True, print_to_csv = True ) trials = 1 es_hybrid.run() while not es_hybrid.report(True) and trials < 20: es_hybrid.run() trials += 1 # k += 1 #print("convergence takes", trials, "trials")