updated documentations and added default values to the functions

xinglab-ai · Jul 18, 2023 · 2967d94 · 2967d94
1 parent cc511fe
commit 2967d94
Show file tree

Hide file tree

Showing 10 changed files with 86 additions and 13 deletions.
diff --git a/.idea/.gitignore b/.idea/.gitignore
diff --git a/.idea/aws.xml b/.idea/aws.xml
diff --git a/.idea/genomap.iml b/.idea/genomap.iml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/.idea/modules.xml b/.idea/modules.xml
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
diff --git a/README.md b/README.md
@@ -196,8 +196,8 @@ y = np.squeeze(dx['classLabel'])
 dx = sio.loadmat('batchLabel.mat')
 ybatch = np.squeeze(dx['batchLabel'])
 
-# Apply genoMOI
-resVis=gp.genoMOI(data, data2, data3, data4, data5, colNum=44, rowNum=44)
+# Apply genoMOI with genomap size of 44x44 and dimension of 32 for the returned integrated data
+resVis=gp.genoMOI(data, data2, data3, data4, data5, colNum=44, rowNum=44, n_dim=32)
 
 # Visualize the integrated data using UMAP
 embedding = umap.UMAP(n_neighbors=30,min_dist=0.3,n_epochs=200).fit_transform(resVis)
@@ -227,6 +227,7 @@ data=dx['X']
 # Load data labels
 label = pd.read_csv('groundTruth_divseq.csv',header=None)
 # Load gene names corresponding to the columns of the data
+# Here we create artificial gene names as Gene_1, Gene_2. You can upload your gene sets
 gene_names = ['Gene_' + str(i) for i in range(1, data.shape[1]+1)]
 gene_names=np.array(gene_names)
 

diff --git a/genomap/genoMOI/genoMOI.py b/genomap/genoMOI/genoMOI.py
@@ -14,11 +14,12 @@
 from genomap.utils.utils_MOI import * 
 from genomap.utils.util_Sig import select_n_features
 
-def genoMOI(*arrays,n_clusters=None, colNum, rowNum):  
+def genoMOI(*arrays, n_clusters=None, n_dim=32, colNum=32, rowNum=32):
 
-# arrays: number of arrays such as array1,array2
+# arrays: a number of arrays such as array1, array2 from different sources
 # n_clusters: number of data classes
-# colNum and rowNum: column are rwo number of genomaps
+# n_dim: number of the dimension in returned integrated data
+# colNum and rowNum: column and row number of genomaps
 #
 # Pre-align data with bbknn
     batch_corrected_data=apply_bbknn_and_return_batch_corrected(*arrays)
@@ -34,12 +35,12 @@ def genoMOI(*arrays,n_clusters=None, colNum, rowNum):
         cluster_labels = adata.obs['louvain']
         n_clusters = len(np.unique(cluster_labels))        
 
-    resVis=extract_genoVis_features(dataDX,n_clusters=n_clusters, colNum=colNum,rowNum=rowNum)
+    resVis=extract_genoVis_features(dataDX, n_clusters=n_clusters, n_dim=n_dim, colNum=colNum,rowNum=rowNum)
     return resVis
 
 
-def extract_genoVis_features(data,n_clusters=20, colNum=32,rowNum=32,batch_size=64,verbose=1,
-                    pretrain_epochs=100,maxiter=300):
+def extract_genoVis_features(data,n_clusters=20, n_dim=32, colNum=32, rowNum=32, batch_size=64, verbose=1,
+                    pretrain_epochs=100, maxiter=300):
 # rowNum and colNum are the row and column numbers of constructed genomaps
 # n_clusters: number of data classes in the data
 # batch_size: number of samples in each mini batch while training the deep neural network
@@ -56,7 +57,7 @@ def extract_genoVis_features(data,n_clusters=20, colNum=32,rowNum=32,batch_size=
 
 # Deep learning-based dimensionality reduction and clustering
     optimizer = Adam()    
-    model = ConvIDEC(input_shape=genoMaps.shape[1:], filters=[32, 64, 128, 32], n_clusters=n_clusters)
+    model = ConvIDEC(input_shape=genoMaps.shape[1:], filters=[32, 64, 128, n_dim], n_clusters=n_clusters)
     model.compile(optimizer=optimizer, loss=['kld', 'mse'], loss_weights=[0.1, 1.0])
     pretrain_optimizer ='adam'
     update_interval=50

diff --git a/genomap/genoSig/genoSig.py b/genomap/genoSig/genoSig.py
@@ -79,6 +79,7 @@ def rgb2gray(image):
 import pandas as pd
 
 def arrays_to_dataframe(arrays, strings):
+    # converts a numpy array to a panda dataframe
     # Check if the number of arrays is even
     if len(arrays) % 2 != 0:
         raise ValueError("The number of arrays should be even.")
@@ -105,6 +106,25 @@ def arrays_to_dataframe(arrays, strings):
 from sklearn.preprocessing import LabelEncoder
 
 def genoSig(genoMaps,T,label,userPD,gene_names, epochs=100):
+
+    """
+    Returns the gene names and their importance score in the range of 0 to 255 in a specific data class
+
+    Parameters
+    ----------
+    genoMaps : ndarray, shape (cellNum, rowNum, colNum, 1)
+    T: numpy array, shape (geneNum, geneNum)
+        transfer function that converts the transformation of 1D to 2D.
+    label : numpy array,
+         cell labels of the data
+    userPD : numpy array,
+         the classes for which gene signature should be computed
+
+    Returns
+    -------
+    result : panda dataframe containing the gene names and their importance scores in different classes
+    """
+
     genoMaps_3d = np.repeat(genoMaps, 3, axis=-1)
 
     # first, convert the strings to integer labels
@@ -117,14 +137,10 @@ def genoSig(genoMaps,T,label,userPD,gene_names, epochs=100):
         lc = np.append(lc, label_encoded[first_occurrence[0]])
 
     lc = np.array(lc)
-
     n_clusters = len(np.unique(label))
     y_train = to_categorical(label_encoded)
-    # meanI=compute_genoSig(X_train,y_train, [y_train[0],y_train[1]])
     meanI = compute_genoSig(genoMaps_3d, label_encoded, lc, epochs=epochs)
 
-
-
     result = pd.DataFrame()
 
     for ii in range(0, len(meanI)):