Commit 9799fe30 authored by harry1080ti's avatar harry1080ti
Browse files

added necessary file

parent 6a4bcd49
......@@ -175,6 +175,7 @@ class cma_approach(object):
return latencies[max_idx], latencies, res, layer
# not really in used
def evaluate_full_relaxed(self, layer):
seed = []
......
import cma
from multiprocessing import Pool
from os import cpu_count
import time
import path_constant as pc
class cma_approach(object):
def __init__(self,
# data path
path_to_datasrc = "alexnet_data.csv",
path_to_topology = "alexnet.csv",
target_col = "Cycles",
# problem definition
number_of_partition = 4, max_iteration = 100,
sigma = 0.5, population_size = 10,
# constraint
max_res_unit = 960, initial_res = 0,
res_step = 1,
penalty_offest = 10000000000,
seeding_type="optimised",
hybird = True
):
self.target_col = target_col
self.start = time.time()
self.k = number_of_partition
self.max_iter = max_iteration
self.sigma = sigma
self.max_res_unit = max_res_unit
self.res_step = res_step
self.population_size = population_size
self.penalty_offest = penalty_offest
self.ending_iter = 0
self.is_hybird = hybird
self.data_src = {}
self.topology_file = path_to_topology
self.layers = self.parse_topology_file()
self.parse_data_set_file(path_to_datasrc)
self.best_layer = number_of_partition * [0]
self.best_res = number_of_partition * [0]
self.total_valid_solution = 0
self.trial = 1
self.seeding_type = seeding_type
self.max_res_available = 1920*9
def parse_topology_file(self):
layers = []
with open(pc.TOPOLOGIES_PATH+self.topology_file, 'r') as f:
next(f)
for line in f:
elems = line.strip().split(',')
layers.append(elems[0])
for layer in layers:
self.data_src[layer] = {}
return layers
def parse_data_set_file(self, path_to_data_csv):
first = True
target_idx = 2
with open(pc.DATA_SOURCE_PATH+path_to_data_csv, 'r') as f:
for line in f:
elems = line.strip().split(',')
# print(elems)
if first:
for idx, col in enumerate(elems):
if self.target_col in col:
target_idx = idx
break
first = False
else:
self.data_src[elems[1]][int(elems[0])] = int(float(elems[target_idx]))
def regroup_layers(self, sample):
# #print("DEBUG", sample)
detail_sample = []
idx = 0
for size in sample:
part = []
if size == 1:
part.append(self.layers[idx])
idx += 1
else:
for i in range(0, size):
part.append(self.layers[i + idx])
idx += size
detail_sample.append(part)
return detail_sample
def decode(self, val, max_val):
return int(val * max_val)
def encode(self, val, max_val):
return float(val / max_val)
def filter_layer(self, layer):
for idx in range(self.k):
if layer[idx] <= 0:
return False
if sum(layer) != len(self.layers):
return False
return True
def filter_res(self, res):
# #print(layer, res)
for idx in range(self.k):
if res[idx] <= 0:
return False
if sum(res) != self.max_res_unit:
return False
return True
def penalty_layer(self, layer):
penalty_score = self.penalty_offest
if sum(layer) != len(self.layers):
penalty_score += self.penalty_offest
else:
layer = [abs(val) for val in layer]
for idx in range(self.k):
if layer[idx] <= 0:
penalty_score *= 1.05
percent_diff = (abs(sum(layer) - len(self.layers)) / len(self.layers))
penalty_score += percent_diff * self.penalty_offest
return penalty_score
# def penalty_res(self, res):
# penalty_score = self.penalty_offest
# if sum(res) != self.max_res_unit:
# penalty_score += self.penalty_offest
# else:
# res = [abs(val) for val in res]
# for idx in range(self.k):
# if res[idx] <= 0:
# penalty_score *= 1.05
# percent_diff = abs(sum(res) - self.max_res_unit) / self.max_res_unit
# penalty_score += percent_diff * self.penalty_offest
# return penalty_score
def find_max_latency(self, layer_partition, res_partitions):
latencies = [0] * len(layer_partition)
max_latency_idx = 0
# print(layer_partition)
# print(res_partitions)
for idx, part in enumerate(layer_partition):
res = res_partitions[idx]
for layer in part:
latencies[idx] += self.data_src[layer][res]
if latencies[idx] > latencies[max_latency_idx]:
max_latency_idx = idx
return latencies, max_latency_idx
def eva_hybird_sq(self, layer):
res = [self.res_step] * self.k
latencies = []
while sum([r*r for r in res]) < self.max_res_unit:
latencies, max_idx = self.find_max_latency(layer, res)
res[max_idx] += self.res_step
# for i in range(0, int(self.max_res_unit/self.res_step - self.k*self.res_step)):
# latencies, max_idx = self.find_max_latency(layer, res)
# res[max_idx] += self.res_step
return latencies[max_idx], latencies, res, layer
# not really in used
# def evaluate_full_relaxed(self, layer):
# seed = []
# for i in range(self.k - 1):
# seed.append(int(self.max_res_unit/self.k))
# seed.append(self.max_res_unit - sum(seed))
# # #print(seed)
# seed = [self.encode(val, self.max_res_unit) for val in seed[:-1]]
# es_res = cma.CMAEvolutionStrategy(seed, \
# self.sigma, {'popsize' : self.population_size})
# i = 0
# while not es_res.stop() and i < self.max_iter:
# samples = es_res.ask()
# scores = [0] * es_res.popsize
# res = [0] * es_res.popsize
# for idx, sample in enumerate(samples):
# res_assign = [self.decode(val, self.max_res_unit) for val in sample]
# res_assign.append(self.max_res_unit - sum(res_assign))
# res[idx] = res_assign
# for idx, r in enumerate(res):
# if self.filter_res(r):
# latencies, max_idx = self.find_max_latency(layer, r)
# scores[idx] = latencies[max_idx]
# else:
# scores[idx] = self.penalty_res(r)
# # for idx in range(self.population_size):
# # #print(samples[idx], scores[idx])
# es_res.tell(samples, scores)
# i += 1
# res = [self.decode(val, self.max_res_unit) for val in es_res.result[0]]
# res.append(self.max_res_unit - sum(res))
# if self.filter_res(r):
# latencies, max_idx = self.find_max_latency(layer, res)
# else:
# max_latency = self.penalty_res(r)
# latencies = [max_latency]*self.k
# max_idx = 0
# return latencies[max_idx], latencies, res, layer
def evaluation_top_level(self, in_val):
pid, sampling = in_val
layer = [self.decode(val, len(self.layers)) for val in sampling]
layer.append(len(self.layers) - sum(layer))
penalty = 0
if not self.filter_layer(layer):
penalty = self.penalty_layer(layer)
if self.is_hybird:
return pid, penalty
else:
return pid, penalty*4
layer = self.regroup_layers(layer)
# if self.is_hybird:
# return pid, self.eva_hybird_sq(layer)[0]
# else:
# score, _, res, _ = self.evaluate_full_relaxed(layer)
# return pid, score, res
return pid, self.eva_hybird_sq(layer)[0]
def run(self):
self.trial += self.trial
if (self.seeding_type=="allzeros"):
self.seed = [0]*(self.k-1)
self.seed_od = self.seed
elif (self.seeding_type=="optimised"):
self.seed = []
for i in range(self.k - 1):
self.seed.append(int(len(self.layers)/self.k))
self.seed.append(len(self.layers) - sum(self.seed))
self.seed_od = self.seed
self.seed = [self.encode(val, len(self.layers)) for val in self.seed[:-1]]
else:
raise ValueError('Invalid Seeding Strategy')
self.es = cma.CMAEvolutionStrategy(self.seed, self.sigma, \
{'popsize' : self.population_size})
best_overall = self.penalty_offest
self.i = 0
while not self.es.stop() and self.i < self.max_iter:
samples = self.es.ask()
id_list = [(idx, sample) for idx, sample in enumerate(samples)]
scores = [0] * self.es.popsize
invalid_sampling = 0
res_combintaions = [0] * self.es.popsize
pool = Pool(processes = 1)#cpu_count() - 4)
for result in pool.imap_unordered(self.evaluation_top_level, id_list):
scores[result[0]] = result[1]
if result[1] >= self.penalty_offest:
invalid_sampling += 1
else:
if not self.is_hybird:
res_combintaions[result[0]] = result[2]
pool.close()
pool.join()
if not self.is_hybird:
best_in_iteration = min(scores)
if best_in_iteration < best_overall and best_in_iteration < self.penalty_offest:
best_overall = best_in_iteration
self.best_res = res_combintaions[scores.index(min(scores))]
##print(str(self.i) + ":", \
# "Sigma:",round(self.es.sigma, 4), \
# "|| Valid sampling percentage:", \
# (self.population_size - invalid_sampling) /self.population_size *100)
##print("invalid sampling", invalid_sampling)
self.valid_sampling_percentage = (self.population_size - invalid_sampling) /self.population_size *100
self.total_valid_solution += self.population_size - invalid_sampling
self.samples = samples
self.scores = scores
self.es.tell(samples, scores)
self.end = time.time()
self.best_layer = [self.decode(val, len(self.layers)) for val in self.es.result[0]]
self.best_layer.append(len(self.layers) - sum(self.best_layer))
self.report()
self.i += 1
self.ending_iter = self.i
def report(self):
##print(self.i, self.es.sigma)
max_latency = 0
layer = []
res = []
latencies = []
if self.is_hybird:
if not self.filter_layer(self.best_layer):
##print("RESULT NOT VALID")
##print("Layer:", self.best_layer, "sum: ", sum(self.best_layer))
#print(self.penalty_layer(self.best_layer))
with open(pc.RESULT_CSV_PATH+'cma_logmore_sq.csv', 'a') as csvFile:
writer = csv.writer(csvFile, delimiter=',', lineterminator="\n")
writer.writerow([self.target_col,self.i,self.k, self.topology_file, 0,0, 0, 0, 0, 0, 0, layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type])
csvFile.close
return False
layer = self.regroup_layers(self.best_layer)
max_latency, latencies, res, layers = self.eva_hybird_sq(layer)
else:
if not self.filter_res(self.best_res) and not self.filter_layer(self.best_layer):
#print("RESULT NOT VALID")
#print("Layer:", self.best_layer, "sum: ", sum(self.best_layer))
#print("Res:", self.best_res, "sum: ", sum(self.best_res))
return False
layer = self.regroup_layers(self.best_layer)
res = self.best_res
latencies, max_idx = self.find_max_latency(layer, self.best_res)
max_latency = latencies[max_idx]
# generate data for mapping the full array
full_latency, full_max_idx = self.find_max_latency([self.layers], [129]*len(self.layers))
# PLEASE UNCOMMENT OUT THIS PART IF YOU NOT USING THE BASH SCRIPT WE HAVE PROVIDED
print("================================= RESULT =================================")
print("Solution: (out of", self.total_valid_solution, "solutions)")
print(layer)
print("Res mapping:")
print(res)
print("Latency for each partition: ")
print(latencies)
print("Final Latency:", max_latency*self.k, "|| Throught put:", 1/max_latency)
print("==========================================================================")
print("Map to full array (", self.max_res_unit, ")")
print("Final Latency:", full_latency[full_max_idx], "|| Throught put:", 1/full_latency[full_max_idx])
print("==========================================================================")
print("Throughtput Ratio:", (1/max_latency)/(1/full_latency[full_max_idx]))
print("Latency increase:", (max_latency*self.k)/full_latency[full_max_idx])
with open(pc.RESULT_CSV_PATH+'cma_logmore_sq.csv', 'a') as csvFile:
writer = csv.writer(csvFile, delimiter=',', lineterminator="\n")
writer.writerow([self.target_col,self.i,self.k, self.topology_file, 1,(1/max_latency), max_latency*self.k, 1/full_latency[full_max_idx], full_latency[full_max_idx], (1/max_latency)/(1/full_latency[full_max_idx]), (max_latency*self.k)/full_latency[full_max_idx], layer, res, self.end-self.start, self.es.sigma, self.seed_od,self.valid_sampling_percentage, self.trial, self.population_size, self.max_res_unit, self.seeding_type])
csvFile.close
return True
if __name__ == "__main__":
import csv
import sys
topology = sys.argv[1]
k = int(sys.argv[2])
population_size = int(sys.argv[3])
max_res_unit = int(sys.argv[4])
seeding_type = sys.argv[5]
target_col = sys.argv[6]
es_hybird = cma_approach(
path_to_datasrc = str(topology)+"_square_mem_bound.csv",
path_to_topology = str(topology)+".csv",
target_col = str(target_col),
number_of_partition = k, max_iteration = 10000,
sigma = 0.5, population_size = population_size,
max_res_unit = max_res_unit, initial_res = 0,
res_step = 3,
penalty_offest = 100000000000,
seeding_type = seeding_type,
hybird = True
)
trials = 1
#print("======== HYBRID ======== ( k:", k, "trials:", trials, ")")
es_hybird.run()
while not es_hybird.report() and trials < 20:
#print("======== HYBRID ======== ( k:", k, "trials:", trials, ")")
es_hybird.run()
trials += 1
k += 1
#print("convergence takes", trials, "trials")
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
......@@ -3,41 +3,56 @@ import pandas as pd
import os.path
import sys
if __name__ == "__main__":
path = sys.argv[1]
runname = sys.argv[2]
out_path = sys.argv[3]
maxsize = int(sys.argv[4])
def organize(cycle_csv_path, detail_csv_path):
basic_cycle = pd.read_csv(cycle_csv_path)
detail = pd.read_csv(detail_csv_path)
frames = []
# remove unname column
basic_cycle = basic_cycle.loc[:, ~basic_cycle.columns.str.contains('^Unnamed')]
# basic_cycle = basic_cycle.loc[:, ~basic_cycle.columns.str.contains('% Utilization')]
detail = detail.loc[:, ~detail.columns.str.contains('^Unnamed')]
for i in range(maxsize):
cycle_csv_path = path + runname + "_ws_9x" + str(i+1) + "/" + runname + "_cycles.csv"
detail_csv_path = path + runname + "_ws_9x" + str(i+1) + "/" + runname + "_detail.csv"
detail.columns = [col.strip('\t') for col in detail.columns]
basic_cycle.columns = [col.strip('\t') for col in basic_cycle.columns]
if not os.path.exists(cycle_csv_path):
cycle_csv_path = path + runname + "_ws_9x" + str(i+1) + "/9x" + str(i+1) + "_" + runname + "_cycles.csv"
detail_csv_path = path + runname + "_ws_9x" + str(i+1) + "/9x" + str(i+1) + "_" + runname + "_detail.csv"
detail['DRAM_cycle'] = detail['DRAM_OFMAP_stop'] - detail['DRAM_IFMAP_start']
detail['SRAM_cycle'] = detail['SRAM_write_stop'] - detail['SRAM_read_start']
basic_cycle = pd.read_csv(cycle_csv_path)
detail = pd.read_csv(detail_csv_path)
df = pd.merge(basic_cycle, detail, on='Layer')
df['ArraySize'] = [(i+1)] * len(detail['Layer'])
# remove unname column
basic_cycle = basic_cycle.loc[:, ~basic_cycle.columns.str.contains('^Unnamed')]
# basic_cycle = basic_cycle.loc[:, ~basic_cycle.columns.str.contains('% Utilization')]
return df
detail = detail.loc[:, ~detail.columns.str.contains('^Unnamed')]
if __name__ == "__main__":
path = sys.argv[1]
runname = sys.argv[2]
out_path = sys.argv[3]
maxsize = int(sys.argv[4])
square = bool(sys.argv[5])
frames = []
dimension_found = []
detail.columns = [col.strip('\t') for col in detail.columns]
basic_cycle.columns = [col.strip('\t') for col in basic_cycle.columns]
if square:
for i in range(maxsize):
demension = str(i+1) + "x" + str(i+1)
cycle_csv_path = path + runname + "_ws_" + demension + "/" + demension + "_" + runname + "_cycles.csv"
detail_csv_path = path + runname + "_ws_" + demension + "/" + demension + "_" + runname + "_detail.csv"
detail['DRAM_cycle'] = detail['DRAM_OFMAP_stop'] - detail['DRAM_IFMAP_start']
detail['SRAM_cycle'] = detail['SRAM_write_stop'] - detail['SRAM_read_start']
if os.path.exists(cycle_csv_path) and os.path.exists(cycle_csv_path):
frames.append(organize(cycle_csv_path, detail_csv_path))
print(demension)
else:
for i in range(maxsize):
cycle_csv_path = path + runname + "_ws_9x" + str(i+1) + "/" + runname + "_cycles.csv"
detail_csv_path = path + runname + "_ws_9x" + str(i+1) + "/" + runname + "_detail.csv"
df = pd.merge(basic_cycle, detail, on='Layer')
df['ArraySize'] = [(i+1)] * len(detail['Layer'])
if not os.path.exists(cycle_csv_path):
cycle_csv_path = path + runname + "_ws_9x" + str(i+1) + "/9x" + str(i+1) + "_" + runname + "_cycles.csv"
detail_csv_path = path + runname + "_ws_9x" + str(i+1) + "/9x" + str(i+1) + "_" + runname + "_detail.csv"
frames.append(df)
if os.path.exists(cycle_csv_path) and os.path.exists(cycle_csv_path):
frames.append(organize(cycle_csv_path, detail_csv_path))
result = pd.concat(frames)
......
......@@ -2,7 +2,7 @@
# ./generate_final_csv.sh yolo_tiny optimization_algo/yolo_tiny_mem_bound.csv 960
if [ "$1" != "" ]; then
python3 csv_reorganizer.py outputs/ $1 $2 $3
python3 csv_reorganizer.py outputs/ $1 $2 $3 false
else
echo "Positional parameter 1 is empty"
fi
\ No newline at end of file
#!/bin/bash
# ./generate_final_csv.sh yolo_tiny optimization_algo/yolo_tiny_mem_bound.csv 960
if [ "$1" != "" ]; then
python3 csv_reorganizer.py outputs/ $1 $2 $3 true
else
echo "Positional parameter 1 is empty"
fi
\ No newline at end of file
#!/bin/bash
# ./generate_data_set.sh configs/US_sim.cfg topologies/960_DNN/Alexnet.csv
# ./generate_square_data_set.sh configs/US_sim.cfg topologies/960_DNN/Alexnet.csv
if [ "$1" != "" ]; then
python3 scale.py -arch_config=$1 -network=$2 -ultraScale=False -enable_mp=True -custom_mem_size=True
python3 scale.py -arch_config=$1 -network=$2 -ultraScale=False -enable_mp=True -custom_mem_size=True -res_unit_limit=$3
else
echo "Positional parameter 1 is empty"
fi
\ No newline at end of file
......@@ -16,6 +16,7 @@ flags.DEFINE_boolean("ultraScale",False,"whether to allow using ultrascale or no
flags.DEFINE_boolean("rmtrace",True,"whether to remove trace or not")
flags.DEFINE_boolean("enable_mp",False,"whether to enable multi-processing or not")
flags.DEFINE_boolean("custom_mem_size",False,"whether to enable customize memory size or not")
flags.DEFINE_integer("res_unit_limit",0,"res unit limit used by square array (0 for non-square)")
class scale:
def __init__(self, sweep = False, save = False):
......@@ -93,6 +94,9 @@ class scale:
#self.topology_file = topology_file.split('"')[1] #Config reads the quotes as wells
self.topology_file= FLAGS.network
self.square_res_limit = FLAGS.res_unit_limit
def run_scale(self):
if FLAGS.ultraScale:
us.core(FLAGS.arch_config, FLAGS.network, FLAGS.rmtrace)
......@@ -250,7 +254,7 @@ class scale:
# enough_DSP = 9024 > (ar_dim[0] * ar_dim[1])
# enough_RAM = 4032 > ar_dim[1] * ((ar_dim[0] * ar_dim[1]) % 3)
# return enough_DSP and enough_RAM
return (ar_dim[0] * ar_dim[1]) < (9 * 1920)
return (ar_dim[0] * ar_dim[1]) < self.square_res_limit and ar_dim[0] == ar_dim[1]
def run_mp_sweep(self):
# python3 scale.py -arch_config=configs/US_sim.cfg -network=topologies/alexnet.csv -ultraScale=False -enable_mp=True
......@@ -260,16 +264,20 @@ class scale:
all_arr_dim_list = []
# 9024 / 3 = 3008
for k in range(6, 3009):
for i in range(1, int(3008 / (k * 3))):