-
Notifications
You must be signed in to change notification settings - Fork 1
/
dataPro.py
97 lines (78 loc) · 3.67 KB
/
dataPro.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# coding=utf-8
import os
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
class DataPro:
def __init__(self, FLAGS):
self.FLAGS = FLAGS
self.featureTypes = np.array(["DrugFingerPrint", "DrugPhy", "L1000"])
self.cls = np.array(["A375", "HA1E", "HT29", "MCF7", "PC3"])
self.num4FeatureTypes = self.featureTypes.shape[0]
self.num4Features4Instance = 0
self.num4Features4DrugFingerPrint = 0
self.num4Features4DrugPhy = 0
self.num4Features4L1000 = 0
# ===== Get feature data =====
def getData(self):
path4Data, path4Label = self.FLAGS.path4Data, self.FLAGS.path4Label
for featureType in self.featureTypes:
if featureType == "DrugFingerPrint":
self.data4DrugFingerPrint = np.load(os.path.join(path4Data, "DrugFingerPrint", "DrugFingerPrint_6052SAMPLE.npy"))
self.num4Features4DrugFingerPrint = self.data4DrugFingerPrint.shape[1]
self.num4Features4Instance += 1
elif featureType == "DrugPhy":
self.data4DrugPhy = np.load(os.path.join(path4Data, "DrugPhy", "DrugPhy_6052SAMPLE.npy"))
self.num4Features4DrugPhy = self.data4DrugPhy.shape[1]
self.num4Features4Instance += 1
elif featureType == "L1000":
self.data4L1000A375 = np.load(os.path.join(path4Data, "L1000", "L1000_A375_6052SAMPLE.npy"))
self.data4L1000HA1E = np.load(os.path.join(path4Data, "L1000", "L1000_HA1E_6052SAMPLE.npy"))
self.data4L1000HT29 = np.load(os.path.join(path4Data, "L1000", "L1000_HT29_6052SAMPLE.npy"))
self.data4L1000MCF7 = np.load(os.path.join(path4Data, "L1000", "L1000_MCF7_6052SAMPLE.npy"))
self.data4L1000PC3 = np.load(os.path.join(path4Data, "L1000", "L1000_PC3_6052SAMPLE.npy"))
self.num4Features4L1000 = self.data4L1000A375.shape[1]
self.num4Features4Instance += 5
# ===== Get feature types as label for discriminator =====
def getLabels4Discriminator(self, num4BatchInIteration):
for ind, featureType in enumerate(self.featureTypes):
if featureType == "DrugFingerPrint":
tempLabel = np.zeros([num4BatchInIteration, self.num4FeatureTypes])
tempLabel[:, ind] = 1
if ind == 0:
self.label4Discriminator = tempLabel
else:
self.label4Discriminator = np.vstack(
(self.label4Discriminator, tempLabel))
elif featureType == "DrugPhy":
tempLabel = np.zeros([num4BatchInIteration, self.num4FeatureTypes])
tempLabel[:, ind] = 1
if ind == 0:
self.label4Discriminator = tempLabel
else:
self.label4Discriminator = np.vstack(
(self.label4Discriminator, tempLabel))
elif featureType == "L1000":
num4Cls = self.cls.shape[0]
tempLabel = np.zeros([num4BatchInIteration * num4Cls, self.num4FeatureTypes])
tempLabel[:, ind] = 1
if ind == 0:
self.label4Discriminator = tempLabel
else:
self.label4Discriminator = np.vstack(
(self.label4Discriminator, tempLabel))
return self.label4Discriminator
# ===== Get label for classification =====
def getLabels4Classification(self):
path4LabelNPY = os.path.join(self.FLAGS.path4Label, "Label_6052SAMPLE.npy")
self.label4Classification = np.load(path4LabelNPY)
# ===== Split data into train and validation set =====
def splitData2TrainAndVal(self):
# Split index, because too many feature types
index = np.array(range(0, self.FLAGS.num4Data))
xTrainIndex, xTestIndex, yTrainIndex, yTestIndex = train_test_split(
index,
index,
test_size = self.FLAGS.testSize,
random_state = 24)
return xTrainIndex, xTestIndex, yTrainIndex, yTestIndex