diff --git a/EzPC/EzPC/test_suite/random_forest.ezpc b/EzPC/EzPC/test_suite/random_forest.ezpc new file mode 100644 index 0000000000000000000000000000000000000000..6d2b70331b672d3d0dd4aaa9195b23af419725ff --- /dev/null +++ b/EzPC/EzPC/test_suite/random_forest.ezpc @@ -0,0 +1,128 @@ +(* Random Forest implementation for inference on California Housing Dataset -- Regression *) + +(* These values come from a python program that I used to train +the Random Forest. Accuracy on testing set (20% of full dataset) = 83.04 *) +uint32 noOfFeatures = 13; +(* This depth is the depth in python code + 1 *) +uint32 maxDepth = 11; +int32 maxDepthInt = 11; +uint32 noOfTrees = 10; +uint32 noOfNodes = 3035; (* (2^maxDepth) *) + +(* To get secret shares of an element from array when the element's index is secret shared *) +def void accessElementOneD(uint64_bl[noOfFeatures] arr, uint64_bl idx, uint64_bl[1] result) +{ + result[0] = 0uL; + uint64_pl ctr = 0uL; + for i=[0:noOfFeatures] + { + result[0] = result[0] +_bl (idx == ctr?arr[i]:0uL); + ctr = ctr + 1uL; + }; +} + +def void accessElementTwoD(uint64_bl[noOfTrees][noOfNodes] arr, int64_pl treeId, uint64_al idx, uint64_bl[1] result, uint32_pl start, uint32_pl end) +{ + result[0] = 0uL; + uint64_pl ctr = 0uL; + uint64_bl idxcpy = idx; + for i=[start:end] + { + result[0] = result[0] +_bl (idxcpy == ctr?arr[treeId][i]:0uL); + ctr = ctr + 1uL; + }; +} + +def uint32_pl pow(uint32_pl base, int32_pl power) +{ + uint32_pl result = 1; + for i=[0:power] + { + result = result * base; + }; + return result; +} + +def void infer(uint64_bl[noOfTrees][noOfNodes] modelFeatureChoice, + uint64_bl[noOfTrees][noOfNodes] modelThresholdValue, + uint64_bl[noOfFeatures] query, + uint64_bl[noOfTrees] result) +{ + uint64_al currentIndex = 0uL; + uint64_al leftOrRight = 0uL; + uint64_bl[1] currentSharedFeatureChoice; + uint64_bl[1] currentSharedThreshold; + uint64_bl[1] currentSharedQueryValue; + currentSharedFeatureChoice[0] = 0uL; + currentSharedThreshold[0] = 0uL; + currentSharedQueryValue[0] = 0uL; + uint32_pl startIdx = 0; + uint32_pl endIdx = 0; + + for i=[0:noOfTrees] + { + currentIndex = 0uL; + + (* Root level *) + currentSharedFeatureChoice[0] = modelFeatureChoice[i][0]; + currentSharedThreshold[0] = modelThresholdValue[i][0]; + accessElementOneD(query, currentSharedFeatureChoice[0], currentSharedQueryValue); + leftOrRight = (currentSharedThreshold[0] > currentSharedQueryValue[0])?1L:2L; + + for j=[1:maxDepthInt-1] + { + currentIndex = (currentIndex *_al 2uL) +_al leftOrRight; + startIdx = pow(2u, j)-1u; + endIdx = pow(2u, j+1)-1u; + accessElementTwoD(modelThresholdValue, i, currentIndex, currentSharedThreshold, startIdx, endIdx); + accessElementTwoD(modelFeatureChoice, i, currentIndex, currentSharedFeatureChoice, startIdx, endIdx); + accessElementOneD(query, currentSharedFeatureChoice[0], currentSharedQueryValue); + leftOrRight = (currentSharedThreshold[0] > currentSharedQueryValue[0])?1L:2L; + }; + + (* Leaf level *) + currentIndex = (currentIndex *_al 2uL) +_al leftOrRight; + startIdx = pow(2u, maxDepthInt-1)-1u; + endIdx = pow(2u, maxDepthInt)-1u; + accessElementTwoD(modelThresholdValue, i, currentIndex, currentSharedThreshold, startIdx, endIdx); + result[i] = currentSharedThreshold[0]; + }; +} + +def void main() +{ + (* Taking inputs as boolean shares because comparison are performed *) + uint64_bl[noOfFeatures] inferenceQuery; + uint64_bl[noOfTrees][noOfNodes] modelFeatureChoice; + (* Threshold value for leaves = inference output from that tree *) + uint64_bl[noOfTrees][noOfNodes] modelThresholdValue; + uint64_bl[noOfTrees] inferenceResult; + uint64_bl finalResult = 0uL; + + (* One party inputs the inference query *) + for i=[0:noOfFeatures] + { + inferenceQuery[i] = 20uL; + }; + + (* Other party inputs the trained model in the form of the feature choice + at each node as well as the threshold value of the feature at that node *) + for i=[0:noOfTrees] + { + for j=[0:noOfNodes] + { + modelFeatureChoice[i][j] = (((noOfNodes))%noOfFeatures)+0uL; + modelThresholdValue[i][j] = (noOfNodes)+351uL; + }; + }; + + infer(modelFeatureChoice, modelThresholdValue, inferenceQuery, inferenceResult); + + (* Take mean of all the values in inference result *) + for i=[0:noOfTrees] + { + finalResult = finalResult +_al inferenceResult[i]; + }; + (* finalResult = finalResult/(noOfTrees+0uL); *) + output(ALL, finalResult) +} diff --git a/EzPC/EzPC/test_suite/random_forest_polish.ezpc b/EzPC/EzPC/test_suite/random_forest_polish.ezpc new file mode 100644 index 0000000000000000000000000000000000000000..6fae1be3adc906c926f33e4847cb220a6312ec1a --- /dev/null +++ b/EzPC/EzPC/test_suite/random_forest_polish.ezpc @@ -0,0 +1,151 @@ +(* Random Forest implementation for inference on Polish companies backruptcy Dataset (on UCI repo) -- Classification *) + +(* Accuracy numbers from python code -- imputation used is Mean because dataset has empty cells at places: +This dataset is divided into 5 parts: 1year, 2 year, 3 year, 4 year and 5 year + +Depth = 12 +Trees = 10 + +1 year: 95.1 +2 year: 91.6 +3 year: 89.7 +4 year: 88.7 +5 year: 92.4 + +Depth = 11 +Trees = 5 + +1 year: 92.1 +2 year: 87.3 +3 year: 86.7 +4 year: 84.9 +5 year: 89.0 + +*) + + + +uint32 noOfFeatures = 64; +(* This depth is the depth in python code + 1 *) +uint32 maxDepth = 11; +int32 maxDepthInt = 11; +uint32 noOfTrees = 5; +uint32 noOfNodes = 2048; (* (2^maxDepth) *) + +(* To get secret shares of an element from array when the element's index is secret shared *) +def void accessElementOneD(uint64_bl[noOfFeatures] arr, uint64_bl idx, uint64_bl[1] result) +{ + result[0] = 0uL; + uint64_pl ctr = 0uL; + for i=[0:noOfFeatures] + { + result[0] = result[0] +_bl (idx == ctr?arr[i]:0uL); + ctr = ctr + 1uL; + }; +} + +def void accessElementTwoD(uint64_bl[noOfTrees][noOfNodes] arr, int64_pl treeId, uint64_al idx, uint64_bl[1] result, uint32_pl start, uint32_pl end) +{ + result[0] = 0uL; + uint64_pl ctr = 0uL; + uint64_bl idxcpy = idx; + for i=[start:end] + { + result[0] = result[0] +_bl (idxcpy == ctr?arr[treeId][i]:0uL); + ctr = ctr + 1uL; + }; +} + +def uint32_pl pow(uint32_pl base, int32_pl power) +{ + uint32_pl result = 1; + for i=[0:power] + { + result = result * base; + }; + return result; +} + +def void infer(uint64_bl[noOfTrees][noOfNodes] modelFeatureChoice, + uint64_bl[noOfTrees][noOfNodes] modelThresholdValue, + uint64_bl[noOfFeatures] query, + uint64_bl[noOfTrees] result) +{ + uint64_al currentIndex = 0uL; + uint64_al leftOrRight = 0uL; + uint64_bl[1] currentSharedFeatureChoice; + uint64_bl[1] currentSharedThreshold; + uint64_bl[1] currentSharedQueryValue; + currentSharedFeatureChoice[0] = 0uL; + currentSharedThreshold[0] = 0uL; + currentSharedQueryValue[0] = 0uL; + uint32_pl startIdx = 0; + uint32_pl endIdx = 0; + + for i=[0:noOfTrees] + { + currentIndex = 0uL; + + (* Root level *) + currentSharedFeatureChoice[0] = modelFeatureChoice[i][0]; + currentSharedThreshold[0] = modelThresholdValue[i][0]; + accessElementOneD(query, currentSharedFeatureChoice[0], currentSharedQueryValue); + leftOrRight = (currentSharedThreshold[0] > currentSharedQueryValue[0])?1L:2L; + + for j=[1:maxDepthInt-1] + { + currentIndex = (currentIndex *_al 2uL) +_al leftOrRight; + startIdx = pow(2u, j)-1u; + endIdx = pow(2u, j+1)-1u; + accessElementTwoD(modelThresholdValue, i, currentIndex, currentSharedThreshold, startIdx, endIdx); + accessElementTwoD(modelFeatureChoice, i, currentIndex, currentSharedFeatureChoice, startIdx, endIdx); + accessElementOneD(query, currentSharedFeatureChoice[0], currentSharedQueryValue); + leftOrRight = (currentSharedThreshold[0] > currentSharedQueryValue[0])?1L:2L; + }; + + (* Leaf level *) + currentIndex = (currentIndex *_al 2uL) +_al leftOrRight; + startIdx = pow(2u, maxDepthInt-1)-1u; + endIdx = pow(2u, maxDepthInt)-1u; + accessElementTwoD(modelThresholdValue, i, currentIndex, currentSharedThreshold, startIdx, endIdx); + result[i] = currentSharedThreshold[0]; + }; +} + +def void main() +{ + (* Taking inputs as boolean shares because comparison are performed *) + uint64_bl[noOfFeatures] inferenceQuery; + uint64_bl[noOfTrees][noOfNodes] modelFeatureChoice; + (* Threshold value for leaves = inference output from that tree *) + uint64_bl[noOfTrees][noOfNodes] modelThresholdValue; + uint64_bl[noOfTrees] inferenceResult; + uint64_bl finalResult = 0uL; + + (* One party inputs the inference query *) + for i=[0:noOfFeatures] + { + inferenceQuery[i] = 20uL; + }; + + (* Other party inputs the trained model in the form of the feature choice + at each node as well as the threshold value of the feature at that node *) + for i=[0:noOfTrees] + { + for j=[0:noOfNodes] + { + modelFeatureChoice[i][j] = (((noOfNodes))%noOfFeatures)+0uL; + modelThresholdValue[i][j] = (noOfNodes)+351uL; + }; + }; + + infer(modelFeatureChoice, modelThresholdValue, inferenceQuery, inferenceResult); + + (* Take mean of all the values in inference result *) + for i=[0:noOfTrees] + { + finalResult = finalResult +_al inferenceResult[i]; + }; + (* finalResult = finalResult/(noOfTrees+0uL); *) + output(ALL, finalResult) +}