Commit 668c69a6 authored by Justin Borromeo's avatar Justin Borromeo
Browse files

WIP

parent 5204925b
...@@ -10,24 +10,22 @@ import csv ...@@ -10,24 +10,22 @@ import csv
class cma_approach(object): class cma_approach(object):
def __init__(self, def __init__(self,
# data path path_to_datasrc = "alexnet_data.csv",
path_to_datasrc = "alexnet_data.csv", path_to_topology = "alexnet.csv",
path_to_topology = "alexnet.csv", target_col = "Cycles",
target_col = "Cycles", number_of_partition = 4,
max_iteration = 100,
# problem definition sigma = 0.5,
number_of_partition = 4, max_iteration = 100, population_size = 10,
sigma = 0.5, population_size = 10, max_res_unit = 17280,
initial_res = 0,
# constraint res_step = 1,
max_res_unit = 960, initial_res = 0, penalty_offset = 10000000000,
res_step = 1, seeding_type="optimised",
penalty_offest = 10000000000, hybrid = True,
seeding_type="optimised", print_to_csv = True,
hybrid = True, max_pack_size = 129,
print_to_csv = True, packing_strategy = "no_penalty"):
max_pack_size = 129
):
self.target_col = target_col self.target_col = target_col
self.start = time.time() self.start = time.time()
self.k = number_of_partition self.k = number_of_partition
...@@ -36,26 +34,22 @@ class cma_approach(object): ...@@ -36,26 +34,22 @@ class cma_approach(object):
self.max_res_unit = max_res_unit self.max_res_unit = max_res_unit
self.res_step = res_step self.res_step = res_step
self.population_size = population_size self.population_size = population_size
self.penalty_offest = penalty_offest self.penalty_offset = penalty_offset
self.ending_iter = 0 self.ending_iter = 0
self.is_hybrid = hybrid self.is_hybrid = hybrid
self.data_src = {} self.data_src = {}
self.topology_file = path_to_topology self.topology_file = path_to_topology
self.layers = self.parse_topology_file() self.layers = self.parse_topology_file()
self.parse_data_set_file(path_to_datasrc) self.parse_data_set_file(path_to_datasrc)
self.best_layer = number_of_partition * [0] self.best_layer = number_of_partition * [0]
self.best_res = number_of_partition * [0] self.best_res = number_of_partition * [0]
self.total_valid_solution = 0 self.total_valid_solution = 0
self.trial = 1 self.trial = 1
self.seeding_type = seeding_type self.seeding_type = seeding_type
self.max_res_available = max_res_unit self.max_res_available = max_res_unit
self.print_to_csv = print_to_csv self.print_to_csv = print_to_csv
self.max_pack_size = max_pack_size self.max_pack_size = max_pack_size
self.packing_strategy = packing_strategy
def parse_topology_file(self): def parse_topology_file(self):
layers = [] layers = []
...@@ -75,7 +69,6 @@ class cma_approach(object): ...@@ -75,7 +69,6 @@ class cma_approach(object):
with open(pc.DATA_SOURCE_PATH+path_to_data_csv, 'r') as f: with open(pc.DATA_SOURCE_PATH+path_to_data_csv, 'r') as f:
for line in f: for line in f:
elems = line.strip().split(',') elems = line.strip().split(',')
# print(elems)
if first: if first:
for idx, col in enumerate(elems): for idx, col in enumerate(elems):
if self.target_col in col: if self.target_col in col:
...@@ -86,7 +79,7 @@ class cma_approach(object): ...@@ -86,7 +79,7 @@ class cma_approach(object):
self.data_src[elems[1]][int(elems[0])] = int(float(elems[target_idx])) self.data_src[elems[1]][int(elems[0])] = int(float(elems[target_idx]))
def regroup_layers(self, sample): def regroup_layers(self, sample):
# #print("DEBUG", sample) # print("DEBUG", sample)
detail_sample = [] detail_sample = []
idx = 0 idx = 0
for size in sample: for size in sample:
...@@ -132,10 +125,10 @@ class cma_approach(object): ...@@ -132,10 +125,10 @@ class cma_approach(object):
return True return True
def penalty_layer(self, layer): def penalty_layer(self, layer):
penalty_score = self.penalty_offest penalty_score = self.penalty_offset
if sum(layer) != len(self.layers): if sum(layer) != len(self.layers):
penalty_score += self.penalty_offest penalty_score += self.penalty_offset
else: else:
layer = [abs(val) for val in layer] layer = [abs(val) for val in layer]
...@@ -144,15 +137,13 @@ class cma_approach(object): ...@@ -144,15 +137,13 @@ class cma_approach(object):
penalty_score *= 1.05 penalty_score *= 1.05
percent_diff = (abs(sum(layer) - len(self.layers)) / len(self.layers)) percent_diff = (abs(sum(layer) - len(self.layers)) / len(self.layers))
penalty_score += percent_diff * self.penalty_offest penalty_score += percent_diff * self.penalty_offset
return penalty_score return penalty_score
def find_max_latency(self, layer_partition, res_partitions): def find_max_latency(self, layer_partition, res_partitions):
latencies = [0] * len(layer_partition) latencies = [0] * len(layer_partition)
max_latency_idx = 0 max_latency_idx = 0
# print(layer_partition)
# print(res_partitions)
for idx, part in enumerate(layer_partition): for idx, part in enumerate(layer_partition):
res = res_partitions[idx] res = res_partitions[idx]
for layer in part: for layer in part:
...@@ -167,12 +158,85 @@ class cma_approach(object): ...@@ -167,12 +158,85 @@ class cma_approach(object):
packable solution. packable solution.
""" """
def eva_hybrid_sq(self, layer): def eva_hybrid_sq(self, layer):
if self.packing_strategy == "unconstrained":
return self.unconstrained_partition_sizing(layer)
elif self.packing_strategy == "no_penalty":
return self.max_packable_res_units(layer)
elif self.packing_strategy == "overflow_penalty":
return self.max_packable_res_units_penalty(layer)
else:
raise Exception("Unknown Strategy")
def unconstrained_partition_sizing(self, layer):
# res is a list where each element corresponds to a partition. res_step
# is the minimum amount by which partition edge length increases.
res = [self.res_step] * self.k
latencies = []
limit_reached = False
while not limit_reached:
latencies, max_idx = self.find_max_latency(layer, res)
res[max_idx] += self.res_step
if sum([r**2 for r in res]) > self.max_res_unit:
res[max_idx] -= self.res_step
limit_reached = True
# Calculate latencies of final solution.
latencies, max_idx = self.find_max_latency(layer, res)
return latencies[max_idx], latencies, res, layer
def max_packable_res_units(self, layer):
# res is a list where each element corresponds to a partition. res_step # res is a list where each element corresponds to a partition. res_step
# is the minimum amount by which partition edge length increases. # is the minimum amount by which partition edge length increases.
res = [self.res_step] * self.k res = [self.res_step] * self.k
latencies = [] latencies = []
# max_res_unit = 1920*9*1 from sq_approach_faster variable_max_res_unit = self.max_res_unit
# Do a binary search to find the largest packable variable_max_res_unit.
search_upper_bound = self.max_res_unit
search_lower_bound = sum([r**2 for r in res])
last_packable_res = []
last_packable_max_res_unit = 0
while search_upper_bound > search_lower_bound:
variable_max_res_unit = \
int((search_upper_bound + search_lower_bound)/2)
limit_reached = False
while not limit_reached:
latencies, max_idx = self.find_max_latency(layer, res)
res[max_idx] += self.res_step
# If this addition puts the solution over the limit, we need to
# revert the last partition addition. TODO write some code to
# see if we can assign the remaining units.
# TODO investigate if we can decrease the better-performing
# partitions to make room for the worst partition?
if sum([r**2 for r in res]) > self.max_res_unit:
res[max_idx] -= self.res_step
limit_reached = True
if pp.isPackable(res, self.max_pack_size):
last_packable_res = deepcopy(res)
last_packable_max_res_unit = variable_max_res_unit
# The desired max_res_unit value is greater than its current
# value.
search_lower_bound = variable_max_res_unit
else:
# The desired max_res_unit value is less than its current
# value.
search_upper_bound = variable_max_res_unit
# Calculate latencies of final solution.
latencies, max_idx = self.find_max_latency(layer, last_packable_res)
return latencies[max_idx], latencies, res, layer
def max_packable_res_units_penalty(self, layer):
# res is a list where each element corresponds to a partition. res_step
# is the minimum amount by which partition edge length increases.
res = [self.res_step] * self.k
latencies = []
variable_max_res_unit = self.max_res_unit variable_max_res_unit = self.max_res_unit
# Do a binary search to find the largest packable variable_max_res_unit. # Do a binary search to find the largest packable variable_max_res_unit.
...@@ -191,6 +255,9 @@ class cma_approach(object): ...@@ -191,6 +255,9 @@ class cma_approach(object):
# If this addition puts the solution over the limit, we need to # If this addition puts the solution over the limit, we need to
# revert the last partition addition. TODO write some code to # revert the last partition addition. TODO write some code to
# see if we can assign the remaining units. # see if we can assign the remaining units.
# TODO investigate if we can decrease the better-performing
# partitions to make room for the worst partition
if sum([r**2 for r in res]) > variable_max_res_unit: if sum([r**2 for r in res]) > variable_max_res_unit:
res[max_idx] -= self.res_step res[max_idx] -= self.res_step
limit_reached = True limit_reached = True
...@@ -212,7 +279,7 @@ class cma_approach(object): ...@@ -212,7 +279,7 @@ class cma_approach(object):
# variable_max_res_unit. # variable_max_res_unit.
max_res_unit_decrease = self.max_res_unit - variable_max_res_unit max_res_unit_decrease = self.max_res_unit - variable_max_res_unit
packing_penalty = pp.calculatePackingPenalty(max_res_unit_decrease) packing_penalty = pp.calculatePackingPenalty(max_res_unit_decrease)
return latencies[max_idx] + packing_penalty, latencies, last_packable_res, layer return latencies[max_idx], latencies, res, layer
def evaluation_top_level(self, in_val): def evaluation_top_level(self, in_val):
pid, sampling = in_val pid, sampling = in_val
...@@ -251,7 +318,7 @@ class cma_approach(object): ...@@ -251,7 +318,7 @@ class cma_approach(object):
self.es = cma.CMAEvolutionStrategy(self.seed, self.sigma, \ self.es = cma.CMAEvolutionStrategy(self.seed, self.sigma, \
{'popsize' : self.population_size}) {'popsize' : self.population_size})
best_overall = self.penalty_offest best_overall = self.penalty_offset
self.i = 0 self.i = 0
temp_out = [] temp_out = []
while not self.es.stop() and self.i < self.max_iter: while not self.es.stop() and self.i < self.max_iter:
...@@ -260,30 +327,30 @@ class cma_approach(object): ...@@ -260,30 +327,30 @@ class cma_approach(object):
scores = [0] * self.es.popsize scores = [0] * self.es.popsize
invalid_sampling = 0 invalid_sampling = 0
res_combintaions = [0] * self.es.popsize res_combinations = [0] * self.es.popsize
# pool = Pool(processes = cpu_count() - 4) # pool = Pool(processes = cpu_count() - 4)
# for result in pool.imap_unordered(self.evaluation_top_level, id_list): # for result in pool.imap_unordered(self.evaluation_top_level, id_list):
# scores[result[0]] = result[1] # scores[result[0]] = result[1]
# if result[1] >= self.penalty_offest: # if result[1] >= self.penalty_offset:
# invalid_sampling += 1 # invalid_sampling += 1
# else: # else:
# if not self.is_hybrid: # if not self.is_hybrid:
# res_combintaions[result[0]] = result[2] # res_combinations[result[0]] = result[2]
# pool.close() # pool.close()
# pool.join() # pool.join()
for tup in id_list: for tup in id_list:
_, scores[tup[0]] = self.evaluation_top_level(tup) _, scores[tup[0]] = self.evaluation_top_level(tup)
if scores[tup[0]] >= self.penalty_offest: if scores[tup[0]] >= self.penalty_offset:
invalid_sampling += 1 invalid_sampling += 1
if not self.is_hybrid: if not self.is_hybrid:
best_in_iteration = min(scores) best_in_iteration = min(scores)
if best_in_iteration < best_overall and best_in_iteration < self.penalty_offest: if best_in_iteration < best_overall and best_in_iteration < self.penalty_offset:
best_overall = best_in_iteration best_overall = best_in_iteration
self.best_res = res_combintaions[scores.index(min(scores))] self.best_res = res_combinations[scores.index(min(scores))]
##print(str(self.i) + ":", \ ##print(str(self.i) + ":", \
# "Sigma:",round(self.es.sigma, 4), \ # "Sigma:",round(self.es.sigma, 4), \
...@@ -306,29 +373,45 @@ class cma_approach(object): ...@@ -306,29 +373,45 @@ class cma_approach(object):
return temp_out return temp_out
def report(self, output_png): def report(self, output_png):
##print(self.i, self.es.sigma)
max_latency = 0 max_latency = 0
layer = [] layer = []
res = [] res = []
latencies = [] latencies = []
if not self.filter_layer(self.best_layer): if not self.filter_layer(self.best_layer):
##print("RESULT NOT VALID") # Result not valid.
##print("Layer:", self.best_layer, "sum: ", sum(self.best_layer)) result = [
#print(self.penalty_layer(self.best_layer)) self.target_col,
self.i,
self.k,
self.topology_file,
0,
0,
0,
0,
0,
0,
0,
layer,
res,
self.end - self.start,
self.es.sigma,
self.seed_od,
self.valid_sampling_percentage,
self.trial,
self.population_size,
self.max_res_unit,
self.seeding_type,
self.packing_strategy]
if self.print_to_csv: if self.print_to_csv:
with open(pc.RESULT_CSV_PATH+'cma_logmore_sq.csv', 'a') as csvFile: with open(pc.RESULT_CSV_PATH + 'cma_logmore_packed_sq.csv', 'a') as csv_file:
writer = csv.writer(csvFile, delimiter=',', lineterminator="\n") writer = csv.writer(csv_file, delimiter=',', lineterminator="\n")
writer.writerow([self.target_col,self.i,self.k, self.topology_file, 0, 0, 0, 0, 0, 0, 0, layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type]) writer.writerow(result)
csvFile.close
result = [self.target_col,self.i,self.k, self.topology_file, 0, 0, 0, 0, 0, 0, 0, layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type]
return False, result return False, result
# Result valid.
layer = self.regroup_layers(self.best_layer) layer = self.regroup_layers(self.best_layer)
max_latency, latencies, res, _ = self.eva_hybrid_sq(layer) max_latency, latencies, res, _ = self.eva_hybrid_sq(layer)
# generate data for mapping the full array (129 * 129) # Generate data for mapping the full array (129 * 129).
full_latency, full_max_idx = self.find_max_latency([self.layers], [129]*len(self.layers)) full_latency, full_max_idx = self.find_max_latency([self.layers], [129]*len(self.layers))
# PLEASE UNCOMMENT OUT THIS PART IF YOU NOT USING THE BASH SCRIPT WE HAVE PROVIDED # PLEASE UNCOMMENT OUT THIS PART IF YOU NOT USING THE BASH SCRIPT WE HAVE PROVIDED
...@@ -346,43 +429,53 @@ class cma_approach(object): ...@@ -346,43 +429,53 @@ class cma_approach(object):
# print("==========================================================================") # print("==========================================================================")
# print("Throughtput Ratio:", (1/max_latency)/(1/full_latency[full_max_idx])) # print("Throughtput Ratio:", (1/max_latency)/(1/full_latency[full_max_idx]))
# print("Latency increase:", (max_latency*self.k)/full_latency[full_max_idx]) # print("Latency increase:", (max_latency*self.k)/full_latency[full_max_idx])
pngFileName="No PNG Generated"
directory_path = pc.RESULT_SCREENSHOT_PATH + \
str(self.topology_file.replace(".csv", "")) + "/" + \
self.packing_strategy + "/" + \
"pack_size_" + str(self.max_pack_size) + "/"
makedirs(directory_path, exist_ok = True)
pngFileName = "k=" + str(self.k) + "_max=" + str(self.max_res_unit) \
+ "_latency=" + str(max_latency) + ".png"
if not self.packing_strategy == 'unconstrained' and output_png:
if pp.isPackable(res, self.max_pack_size):
# bin_area = self.max_pack_size ** 2
# packed_area = sum([r**2 for r in res])
pp.printPNG(res, self.max_pack_size, directory_path + pngFileName)
else:
print('asdf')
result = [self.target_col,
self.i,
self.k,
self.topology_file,
1,
(1/max_latency),
max_latency*self.k,
1/full_latency[full_max_idx],
full_latency[full_max_idx],
(1/max_latency)/(1/full_latency[full_max_idx]),
(max_latency*self.k)/full_latency[full_max_idx],
layer,
res,
self.end-self.start,
self.es.sigma,
self.seed_od,
self.valid_sampling_percentage,
self.trial,
self.population_size,
self.max_res_unit,
self.seeding_type,
self.packing_strategy,
pngFileName]
if self.print_to_csv: if self.print_to_csv:
with open(pc.RESULT_CSV_PATH+'cma_logmore_sq.csv', 'a') as csvFile: with open(pc.RESULT_CSV_PATH+'cma_logmore_packed_sq.csv', 'a') as csv_file:
writer = csv.writer(csvFile, delimiter=',', lineterminator="\n") writer = csv.writer(csv_file, delimiter=',', lineterminator="\n")
writer.writerow([self.target_col,self.i,self.k, self.topology_file, 1, (1/max_latency), max_latency*self.k, 1/full_latency[full_max_idx], full_latency[full_max_idx], (1/max_latency)/(1/full_latency[full_max_idx]), (max_latency*self.k)/full_latency[full_max_idx], layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type]) writer.writerow(result)
csvFile.close
if self.valid_sampling_percentage > 0:
directory_path = pc.RESULT_SCREENSHOT_PATH + \
str(self.topology_file.replace(".csv", "")) + "/" + \
"pack_size_" + str(self.max_pack_size) + "/" + \
"penalty_constant_" + str(pp.PENALTY_CONSTANT) + "/"
makedirs(directory_path, exist_ok = True)
pngFileName = "k=" + str(self.k) + "_max=" + str(self.max_res_unit) \
+ "_latency=" + str(max_latency) + ".png"
if pp.isPackable(res, self.max_pack_size) and output_png:
bin_area = self.max_pack_size ** 2
packed_area = 0
for rect in res:
square_area = rect ** 2
packed_area += square_area
percentage_wasted = 100 * (bin_area - packed_area) / bin_area
consumed_area = 0
pp.printPNG(res, self.max_pack_size, directory_path + pngFileName)
with open(pc.RESULT_CSV_PATH+'cma_bsearch_sq.csv', 'a') as csvFile:
writer = csv.writer(csvFile, delimiter=',', lineterminator="\n")
writer.writerow([self.k, # number of partitions
self.topology_file, # topology
1/max_latency, # throughput
max_latency, # maximum latency
self.max_res_unit, # number of DSP48 units
pngFileName # output file
])
result = [self.target_col,self.i,self.k, self.topology_file, 1,(1/max_latency), max_latency*self.k, 1/full_latency[full_max_idx], full_latency[full_max_idx], (1/max_latency)/(1/full_latency[full_max_idx]), (max_latency*self.k)/full_latency[full_max_idx], layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type]
return True, result return True, result
if __name__ == "__main__": if __name__ == "__main__":
...@@ -406,7 +499,7 @@ if __name__ == "__main__": ...@@ -406,7 +499,7 @@ if __name__ == "__main__":
max_res_unit = max_res_unit, initial_res = 0, max_res_unit = max_res_unit, initial_res = 0,
res_step = 3, res_step = 3,
penalty_offest = 100000000000, penalty_offset = 100000000000,
seeding_type = seeding_type, seeding_type = seeding_type,
hybrid = True, hybrid = True,
print_to_csv = True print_to_csv = True
......
...@@ -15,7 +15,7 @@ def isPackable(partitions, full_array): ...@@ -15,7 +15,7 @@ def isPackable(partitions, full_array):
return len(packer[0]) == len(partitions) return len(packer[0]) == len(partitions)
# 150 # 150
PENALTY_CONSTANT = 0 PENALTY_CONSTANT = 100
def calculatePackingPenalty(max_res_unit_decrement): def calculatePackingPenalty(max_res_unit_decrement):
return max_res_unit_decrement * PENALTY_CONSTANT return max_res_unit_decrement * PENALTY_CONSTANT
......
from multiprocessing import Pool from multiprocessing import Pool
from os import cpu_count from os import cpu_count
import itertools
import path_constant as pc import path_constant as pc
import cma_approach_square_size as ap import cma_approach_square_size as ap
...@@ -15,46 +16,41 @@ nets = [ ...@@ -15,46 +16,41 @@ nets = [
('resnet_50_v1', 53, 27) ('resnet_50_v1', 53, 27)
] ]
max_res_sizes = [ max_res_sizes = [1920*9*1]
1920*9*1
# 1920*9*2, 1920*9*3
]
pop_size = [ pop_size = [100]
100
# 200, 500
]
targets = [ targets = ['DRAM_cycle', 'Cycles']
'DRAM_cycle', 'Cycles'