-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDimensionality_Reduction_Latent_Space.py
335 lines (281 loc) · 17.7 KB
/
Dimensionality_Reduction_Latent_Space.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
import numpy as np
import pacmap # will need to change numba version: pip install numba==0.53
import matplotlib.pyplot as plt
import matplotlib
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from smoothness_testing import smoothness
import cv2
from matplotlib.collections import LineCollection
########################################################################################################################
# Latent Feature Cluster for Training Data using PaCMAP
def PaCMAP_reduction(latent_points, latent_dimensionality, random_state=1):
# initializing the pacmap instance
X = latent_points
embedding = pacmap.PaCMAP(n_components=2, n_neighbors=None, MN_ratio=0.5, FP_ratio=2.0, random_state=random_state)
# fit the data (The index of transformed data corresponds to the index of the original data)
X_transformed = embedding.fit_transform(X, init="pca")
# visualize the embedding
x = X_transformed[:, 0]
y = X_transformed[:, 1]
title = "PaCMAP with Predicted Points\nLatent Space Dimensionality: " + str(latent_dimensionality)
return x, y, title, embedding
########################################################################################################################
# Latent Feature Cluster for Training Data using PCA and Predicted Latent Points
def PCA_reduction(latent_points, latent_dimensionality):
pca = PCA(n_components=2, random_state=0)
embedding = pca
pca_fit = pca.fit_transform(latent_points)
# configuring the parameters
# the number of components = dimension of the embedded space
# default perplexity = 30 " Perplexity balances the attention t-SNE gives to local and global aspects of the data.
# It is roughly a guess of the number of close neighbors each point has. ..a denser dataset ... requires higher perplexity value"
# default learning rate = 200 "If the learning rate is too high, the data may look like a ‘ball’ with any point
# approximately equidistant from its nearest neighbours. If the learning rate is too low,
# most points may look compressed in a dense cloud with few outliers."
title = "PCA with Predicted Points + \nLatent Space Dimensionality: " + str(latent_dimensionality)
x = pca_fit[:, 0]
y = pca_fit[:, 1]
return x, y, title, embedding
########################################################################################################################
# Latent Feature Cluster for Training Data using T-SNE
def TSNE_reduction(latent_points, latent_dimensionality, perplexity=30, learning_rate=20):
model = TSNE(n_components=2, random_state=0, perplexity=perplexity,
learning_rate=learning_rate) # Perplexity(5-50) | learning_rate(10-1000)
embedding = model
# configuring the parameters
# the number of components = dimension of the embedded space
# default perplexity = 30 " Perplexity balances the attention t-SNE gives to local and global aspects of the data.
# It is roughly a guess of the number of close neighbors each point has. ..a denser dataset ... requires higher perplexity value"
# default learning rate = 200 "If the learning rate is too high, the data may look like a ‘ball’ with any point
# approximately equidistant from its nearest neighbours. If the learning rate is too low,
# most points may look compressed in a dense cloud with few outliers."
tsne_data = model.fit_transform(
latent_points) # When there are more data points, trainX should be the first couple hundred points so TSNE doesn't take too long
x = tsne_data[:, 0]
y = tsne_data[:, 1]
title = ("T-SNE of Data\nPerplexity: " + str(perplexity) + "\nLearning Rate: "
+ str(learning_rate) + "\nLatent Space Dimensionality: " + str(latent_dimensionality))
return x, y, title, embedding
########################################################################################################################
def plot_dimensionality_reduction(x, y, label_set, title):
plt.title(title)
if label_set[0].dtype == float:
plt.scatter(x, y, c=label_set)
plt.colorbar()
print("using scatter")
else:
for label in set(label_set):
cond = np.where(np.array(label_set) == str(label))
plt.plot(x[cond], y[cond], marker='o', linestyle='none', label=label)
plt.legend(numpoints=1)
plt.show()
plt.close()
########################################################################################################################
# Scatter with images instead of points
def imscatter(x, y, ax, imageData, image_size, zoom):
images = []
for i in range(len(x)):
x0, y0 = x[i], y[i]
# Convert to image
img = imageData[i] * 255.
img = img.astype(np.uint8).reshape([image_size, image_size])
img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
# Note: OpenCV uses BGR and plt uses RGB
image = OffsetImage(img, zoom=zoom)
ab = AnnotationBbox(image, (x0, y0), xycoords='data', frameon=False)
images.append(ax.add_artist(ab))
ax.update_datalim(np.column_stack([x, y]))
ax.autoscale()
# Plot images in latent space with respective reduction method
def Latent_Image_Proj(image_arrays, image_size,train_latent_points, latent_dimensionality, embedding):
# Compute Reduction embedding of latent space
x, y, title, reduction_embedding = embedding
# Plot images according to reduction embedding
image_arrays = np.pad(image_arrays, 1, mode='constant')
fig, ax = plt.subplots()
imscatter(x, y, imageData=image_arrays, ax=ax, zoom=0.6, image_size=image_size+2)
plt.title(title)
plt.show()
########################################################################################################################
def plot_interpolation_smoothness(original_data_labels, interpolated_latent_points, embedding, image_arrays,
image_size, number_of_interpolations, markersize=8,
marker_color='black', mesh_predicted_interps=None,
plot_points=True, color_bar_min=85, color_bar_max=100, title="",
plot_row_segments=False, plot_col_segments=False, plot_lines=False):
# combines all the latent points of the training data and the interpolation
# train_data_latent_points = np.append(original_data_latent_points, interpolated_latent_points, axis=0)
# Perform Reduction to get points for Training Images
x1, y1, title1, reduction_embedding = embedding
# Use the Embedding to append points for the Interpolated Images
embedded_interpolated_latent_points = reduction_embedding.transform(interpolated_latent_points)
x2 = embedded_interpolated_latent_points[:, 0]
y2 = embedded_interpolated_latent_points[:, 1]
x1 = np.append(x1, x2)
y1 = np.append(y1, y2)
# Check if there is an input for mesh_predicted_interps
if mesh_predicted_interps is not None:
mesh_predicted_interps = np.reshape(mesh_predicted_interps, (number_of_interpolations, number_of_interpolations,
image_size, image_size))
# reshape so that the images can be indexed by row/column
# Information needed to plot the smoothness of the rows and columns in the mesh
# Get the smoothness of each row in the mesh
count_row = []
smoothness_line_row = []
for row in range(np.shape(mesh_predicted_interps)[0]):
count_row.append(row)
interpolation = mesh_predicted_interps[row, :]
smoothness_line_row.append(smoothness(interpolation)[0]) # adds the average smoothness to our array
plt.scatter(count_row, smoothness_line_row, label="Row Smoothness")
# Get the smoothness for each column in the mesh
count_col = []
smoothness_line_col = []
for col in range(np.shape(mesh_predicted_interps)[1]):
count_col.append(col)
interpolation = mesh_predicted_interps[:, col]
smoothness_line_col.append(smoothness(interpolation)[0]) # adds the average smoothness to our array
plt.scatter(count_col, smoothness_line_col, label="Column Smoothness")
plt.legend(fontsize=20)
plt.xlabel("Rows/Columns", fontsize=20)
plt.ylabel("Smoothness (%)", fontsize=20)
plt.title("Smoothness over mesh ", fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.ylim([60, 100])
plt.show()
# Get labels for all the points in a single list
combined_label = original_data_labels # Contains the labels for all the points
for i in range(len(interpolated_latent_points)):
combined_label = np.append(combined_label, np.array("Predicted Points"))
# Establish plot reduction of images
image_arrays_padded = np.pad(image_arrays, 1, mode='constant') # Puts a black box surrounding each array
fig, ax = plt.subplots()
# Sort and plot the points and images into the latent space
for label in set(combined_label):
cond = np.where(np.array(combined_label) == str(label))
if label != "Predicted Points": # Plots the images
if mesh_predicted_interps is not None:
image_arrays = np.array(image_arrays)
image_arrays[image_arrays < 2] = 0.5 # Replaces the training images with gray boxes
image_arrays_gray = np.pad(image_arrays, 1, mode='constant') # Puts a black box surrounding each array
images = image_arrays_gray
else:
images = image_arrays_padded
# Plot the training images
imscatter(x1[cond], y1[cond], imageData=images[cond], ax=ax, zoom=0.6, image_size=image_size + 2)
else:
if plot_points is True: # Plots the predicted points
ax.plot(x1[cond], y1[cond], marker='o', c=marker_color, markersize=markersize, linestyle='none',
label=label, zorder=5)
if plot_lines:
ax.plot(x1[cond], y1[cond], 'ro-', zorder=10)
# Perform Mesh Operations
line_segment_title = ""
if mesh_predicted_interps is not None:
# Pull Coordinates from Reduction for plotting the Mesh
interpolation_cords_x = x1[-np.shape(interpolated_latent_points)[0]:] # coordinates of the interpolation points x(ordered)
interpolation_cords_x = np.reshape(interpolation_cords_x, (np.shape(mesh_predicted_interps)[0], np.shape(mesh_predicted_interps)[1]))
interpolation_cords_y = y1[-np.shape(interpolated_latent_points)[0]:] # coordinates of the interpolation points y(ordered)
interpolation_cords_y = np.reshape(interpolation_cords_y, (np.shape(mesh_predicted_interps)[0], np.shape(mesh_predicted_interps)[1]))
# Create the Segments between the rows and columns in the Mesh
row_lines = []
for row in range(np.shape(interpolation_cords_x)[0]):
row_lines.append([(interpolation_cords_x[row, 0], interpolation_cords_y[row, 0]),
(interpolation_cords_x[row, -1], interpolation_cords_y[row,-1])])
col_lines = []
for col in range(np.shape(interpolation_cords_x)[1]):
col_lines.append([(interpolation_cords_x[0, col], interpolation_cords_y[0, col]),
(interpolation_cords_x[-1, col], interpolation_cords_y[-1, col])])
# Plot the Line Segments in the rows and columns in the mesh
smoothness_line_row = np.array(smoothness_line_row) / 100 # Calculates the smoothness of each row
smoothness_line_col = np.array(smoothness_line_col) / 100 # Calculates the smoothness of each column
if plot_row_segments == plot_col_segments == True: # Plots rows and columns
plot_line_segments_rows_columns(row_lines, col_lines, smoothness_line_row, smoothness_line_col, ax, "Row", "Column")
line_segment_title = ": Smoothness of Rows and Columns Represented by Line Segments"
elif plot_col_segments is True: # Plots the columns only
plot_line_segments(col_lines, smoothness_line_col, ax, color_bar_min=color_bar_min,
color_bar_max=color_bar_max) # function that plots the line segments and color codes them
line_segment_title = ": Smoothness Columns Represented by Line Segments"
elif plot_row_segments is True: # Plots the rows only
plot_line_segments(row_lines, smoothness_line_row, ax, color_bar_min=color_bar_min,
color_bar_max=color_bar_max) # function that plots the line segments and color codes them
line_segment_title = ": Smoothness of Rows Represented by Line Segments"
# else: # Recommends the user to use a different function
# print("Use plot_reduction_interpolation if you do not want line segments on your figure")
# line_segment_title = ""
# Plotting the Images in the 4 Corners of the Mesh
images_corners = []
x_corners = []
y_corners = []
for point in [(0, 0), (0, -1), (-1, 0), (-1, -1)]: # Loop through the corner points in the mesh
images_corners.append(np.pad(mesh_predicted_interps[point], 1, mode='constant')) # Puts a black box surrounding each array
x_corners.append(interpolation_cords_x[point])
y_corners.append(interpolation_cords_y[point])
imscatter(x_corners, y_corners, imageData=images_corners, ax=ax, zoom=1.5, image_size=image_size + 2)
# Plots the predicted points, line segments, training images, and images from the 4 corners of the mesh on a
# single figure
plt.legend(numpoints=1, fontsize=20)
plt.title(title + line_segment_title)
plt.show()
########################################################################################################################
def plot_line_segments(segments, smoothness_of_segment, ax, color_bar_min=85, color_bar_max=100):
# Segments - list of line coordinates
# Smoothness of Segment - the smoothness of the images over the segment
# ax - the predefined axis that is being used to plot the data
# Setup Colorbar Color, Min and Max
cmap = matplotlib.colormaps['viridis'] # A function that returns the color value of a number (0-1)
norm = matplotlib.colors.Normalize(vmin=color_bar_min / 100,
vmax=color_bar_max / 100) # A function to normalize values between a desired min and max
# Plot the Line segments
line_segment_rows = LineCollection(segments, colors=cmap(norm(smoothness_of_segment)), linestyles='solid',
zorder=20, linewidths=4)
ax.add_collection(line_segment_rows)
fig = plt.gcf()
# Color bar settings for Line Segments
cbar = fig.colorbar(line_segment_rows,
ticks=[0, norm(min(smoothness_of_segment)),norm(max(smoothness_of_segment)), 1]) # Locations of labels on Color Bar
cbar.set_label('Smoothness (%)', fontsize=20) # Title of the color bar
cbar.ax.set_yticklabels(
[str(color_bar_min),
str(round(min(smoothness_of_segment) * 100, 2)) + " - Min",
str(round(max(smoothness_of_segment) * 100, 2)) + " - Max",
'100'], fontsize=16) # Labels on Color Bar
ax.autoscale()
########################################################################################################################
def plot_line_segments_rows_columns(segments1, segments2, smoothness_of_segment1, smoothness_of_segment2, ax,
name_segment_1="Segment Set 1", name_segment_2="Segment Set 2",
color_bar_min=85, color_bar_max=100):
# Used to plot two different sets of segments on the same scale
# Segments - list of line coordinates
# Smoothness of Segment - the smoothness of the images over the segment
# ax - the predefined axis that is being used to plot the data
# Setup Colorbar Color, Min and Max
cmap = matplotlib.colormaps['viridis'] # A function that returns the color value of a number (0-1)
norm = matplotlib.colors.Normalize(vmin=color_bar_min / 100,
vmax=color_bar_max / 100) # A function to normalize values between a desired min and max
# Combine Segments for Plotting
collective_segments = np.append(segments1, segments2,axis=0)
collective_smoothness = np.append(smoothness_of_segment1,smoothness_of_segment2)
# Plot the Line segments
line_segment_rows = LineCollection(collective_segments, colors=cmap(norm(collective_smoothness)), linestyles='solid',
zorder=20, linewidths=4)
ax.add_collection(line_segment_rows)
fig = plt.gcf()
# Color bar settings for Line Segments
cbar = fig.colorbar(line_segment_rows,
ticks=[0,
norm(min(smoothness_of_segment1)),
norm(max(smoothness_of_segment1)),
norm(min(smoothness_of_segment2)),
norm(max(smoothness_of_segment2)),
1]) # Locations of labels on Color Bar
cbar.set_label('Smoothness (%)', fontsize=20)
cbar.ax.set_yticklabels(
[str(color_bar_min),
str(round(min(smoothness_of_segment1) * 100, 2)) + " - Min of " + name_segment_1,
str(round(max(smoothness_of_segment1) * 100, 2)) + " - Max of " + name_segment_1,
str(round(min(smoothness_of_segment2) * 100, 2)) + " - Min of " + name_segment_2,
str(round(max(smoothness_of_segment2) * 100, 2)) + " - Max of " + name_segment_2,
'100'], fontsize=16) # Labels on Color Bar
ax.autoscale()