-
Notifications
You must be signed in to change notification settings - Fork 0
/
benchmark_plot.py
155 lines (111 loc) · 6.76 KB
/
benchmark_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#GMIT 2020
#Computational Thinking with Algorithms module
#Computer Science - Data Analytics course
#Noa P Prada Schnor G00364074
#Application used to benchmark 5 sorting algorithms: insertion sort, merge sort, counting sort, quicksort and timsort. Application's files:
#sorting_algorithms.py - contains the 5 sorting functions
#randomArrays.py - contains the functions that creates random arrays, and an array of arrays of different sizes
#benchmark_plot.py - run this to get the results: table and plot
#########################################################################################################
#Import functions and variables from other .py files
from sorting_algorithms import insertionSort, mergeSort,countingSort, quickSort, timSort
from randomArrays import list_arr, arr20, arr50, arr100, arr250, arr500, arr750, arr1000, arr1250, arr2500, arr3750, arr5000, arr6250, arr7500, arr8750, arr10000, size_n
import time #to ge the elapsed time (run time of each sorting function)
import copy # deepcopy of arrays
import numpy as np #convert to no arrapy
import pandas as pd #create a dataframe with all results (running time)
#Libraries to plot
from matplotlib import pyplot as plt
import seaborn as sns
sns.set() #setting seaborn default style
##################################################################################################################
#Benchmark
def timerRun(function, array): # function to get the avg run time. The arg array is a list of 10 different arrays with same number of elements
arr = copy.deepcopy(array) #avoid the original array to be sorted
results = [] # will store the running time of the 10 times
i = 0 #set i to zero. It will be used in the while loop.
while i < 10: #will run 10 times
start_time = time.time() #log the start time in seconds
function(arr[i]) # each time the function is called a different array is used as an arg
end_time = time.time() #log the end time in seconds
results.append(end_time-start_time) #add into the results array the time in seconds that the function run
i += 1 #increase i by 1
r = sum(results) # sum the results of 10 runs
l = len(results) # get the number of times that it run
# final result = total running time divided by number of times it run, and multiplied by 1000 (to convert the result to milliseconds)
avg_run = (r/l)*1000
# return the average of run time with 3 decimal places
return (round(avg_run, 3))
##################################################
#get the average running time of sorting algorithms to sort random arrays of different sizes
def sortRun(sort_function):
avg_time = [] #empty list to be populated with avg running time of the sorting function with different input sizes
global list_arr #list that contains 14 lists with different size and each list contains 10 nested arrays with same number of elements
new_list = copy.deepcopy(list_arr) #independent copy of list_arr. So, each time sortRun is called, the list_arr remains the same and an independent copy is make
index = 0 #index starts at 0 and will be increased to 14, then the while loop will stop.
while index < 15: #there are 15 different input sizes (check randomArrays.py for more info)
avg_run_test = timerRun(sort_function, new_list[index]) #get the avg run time of 10 times the function run with a specific input size
avg_time.append(avg_run_test) #populate the avg_time array with the results
index += 1 #increase index by 1
return avg_time #return the results after all the tests were done
##################################################
#Create the list with running time for each sorting algorithm
insertionSort_time = sortRun(insertionSort)
mergeSort_time = sortRun(mergeSort)
countingSort_time = sortRun(countingSort)
quickSort_time = sortRun(quickSort)
timSort_time = sortRun(timSort)
##################################################
#Console output containing the results
def dataframeResult():
sort_name = ["Insertion Sort", "Merge Sort", "Counting Sort","Quicksort", "Timsort"] #will be used as the index of the dataframe
input_size = ["20","50","100","250","500","750","1000","1250","2500","3750","5000","6250","7500","8750","10000"] #will be used as the header of the df
#Create the arrays with the avg running time
insertionSort_t = np.array(insertionSort_time)
mergeSort_t = np.array(mergeSort_time)
countingSort_t = np.array(countingSort_time)
quickSort_t = np.array(quickSort_time)
timSort_t = np.array(timSort_time)
#Tuple containing all the avg running time
time_result = (insertionSort_t,mergeSort_t,countingSort_t, quickSort_t,timSort_t)
#Append multiple arrays into an array of arrays
time_result = np.vstack(time_result)
#Create dataframe to get a neat console output containing the average time vs input size for all the sorting algorithms tested
time_output = pd.DataFrame(time_result, sort_name, input_size)
#Set options on how to display the dataframe
pd.set_option('display.max_rows', 20) #max number of rows to be displayed: 20
pd.set_option('display.max_columns', 20) #max number of columns to be displayed: 20
pd.set_option('display.width', 150)
return time_output
#################################################################
def plotResult():
#Graph of the test showing array size and average time of 10 runs per array size
#fig, ax = plt.subplots(nrows=1,ncols=2,figsize=(8,4))
#plt.style.use("dark_background")
plt.figure().canvas.manager.set_window_title(
"Sorting Algorithms - Time Complexity")
#axs.subplots_adjust(0.125, 0.1, 0.9, 0.9, 0.2, 0.2)
x = size_n
plot1 = plt.plot(x, insertionSort_time, '-g', marker='o',
label='Insertion sort') # insertionSort is solid green
plot2, = plt.plot(x, mergeSort_time, '-y', marker='o',
label='Merge sort') # mergeSort is solid yellow
plot3, = plt.plot(x, countingSort_time, '-b', marker='o',
label='Courting sort') # countingSort is solid blue
plot4, = plt.plot(x, quickSort_time, '-k', marker='o',
label='Quicksort') # quickSort is solid black
plot5, = plt.plot(x, timSort_time, '-m', marker='o',
label='Timsort') # timSort is solid magenta
plt.ylabel = 'Running time (in milliseconds)'
#ax.set_title = ('Complexity plot')
plt.legend()
plt.grid() # show grid
#plt.savefig("sortPerformance.png") #save the plot
plt.show()
####################################################################################################################
if __name__ == "__main__":
#show dataframe containing the results of running time with different input sizes
dataframe = dataframeResult()
print(dataframe)
#show the Complexity plot of chosen sorting algorithms
plotResult() #one plot containing the running time of all 5 sorting alg