Skip to content

Commit

Permalink
updated documentations and added default values to the functions
Browse files Browse the repository at this point in the history
  • Loading branch information
tauhidstanford committed Jul 18, 2023
1 parent cc511fe commit 2967d94
Show file tree
Hide file tree
Showing 10 changed files with 86 additions and 13 deletions.
8 changes: 8 additions & 0 deletions .idea/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions .idea/aws.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions .idea/genomap.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/inspectionProfiles/profiles_settings.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 6 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ y = np.squeeze(dx['classLabel'])
dx = sio.loadmat('batchLabel.mat')
ybatch = np.squeeze(dx['batchLabel'])

# Apply genoMOI
resVis=gp.genoMOI(data, data2, data3, data4, data5, colNum=44, rowNum=44)
# Apply genoMOI with genomap size of 44x44 and dimension of 32 for the returned integrated data
resVis=gp.genoMOI(data, data2, data3, data4, data5, colNum=44, rowNum=44, n_dim=32)

# Visualize the integrated data using UMAP
embedding = umap.UMAP(n_neighbors=30,min_dist=0.3,n_epochs=200).fit_transform(resVis)
Expand Down Expand Up @@ -227,6 +227,7 @@ data=dx['X']
# Load data labels
label = pd.read_csv('groundTruth_divseq.csv',header=None)
# Load gene names corresponding to the columns of the data
# Here we create artificial gene names as Gene_1, Gene_2. You can upload your gene sets
gene_names = ['Gene_' + str(i) for i in range(1, data.shape[1]+1)]
gene_names=np.array(gene_names)

Expand Down
15 changes: 8 additions & 7 deletions genomap/genoMOI/genoMOI.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
from genomap.utils.utils_MOI import *
from genomap.utils.util_Sig import select_n_features

def genoMOI(*arrays,n_clusters=None, colNum, rowNum):
def genoMOI(*arrays, n_clusters=None, n_dim=32, colNum=32, rowNum=32):

# arrays: number of arrays such as array1,array2
# arrays: a number of arrays such as array1, array2 from different sources
# n_clusters: number of data classes
# colNum and rowNum: column are rwo number of genomaps
# n_dim: number of the dimension in returned integrated data
# colNum and rowNum: column and row number of genomaps
#
# Pre-align data with bbknn
batch_corrected_data=apply_bbknn_and_return_batch_corrected(*arrays)
Expand All @@ -34,12 +35,12 @@ def genoMOI(*arrays,n_clusters=None, colNum, rowNum):
cluster_labels = adata.obs['louvain']
n_clusters = len(np.unique(cluster_labels))

resVis=extract_genoVis_features(dataDX,n_clusters=n_clusters, colNum=colNum,rowNum=rowNum)
resVis=extract_genoVis_features(dataDX, n_clusters=n_clusters, n_dim=n_dim, colNum=colNum,rowNum=rowNum)
return resVis


def extract_genoVis_features(data,n_clusters=20, colNum=32,rowNum=32,batch_size=64,verbose=1,
pretrain_epochs=100,maxiter=300):
def extract_genoVis_features(data,n_clusters=20, n_dim=32, colNum=32, rowNum=32, batch_size=64, verbose=1,
pretrain_epochs=100, maxiter=300):
# rowNum and colNum are the row and column numbers of constructed genomaps
# n_clusters: number of data classes in the data
# batch_size: number of samples in each mini batch while training the deep neural network
Expand All @@ -56,7 +57,7 @@ def extract_genoVis_features(data,n_clusters=20, colNum=32,rowNum=32,batch_size=

# Deep learning-based dimensionality reduction and clustering
optimizer = Adam()
model = ConvIDEC(input_shape=genoMaps.shape[1:], filters=[32, 64, 128, 32], n_clusters=n_clusters)
model = ConvIDEC(input_shape=genoMaps.shape[1:], filters=[32, 64, 128, n_dim], n_clusters=n_clusters)
model.compile(optimizer=optimizer, loss=['kld', 'mse'], loss_weights=[0.1, 1.0])
pretrain_optimizer ='adam'
update_interval=50
Expand Down
24 changes: 20 additions & 4 deletions genomap/genoSig/genoSig.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def rgb2gray(image):
import pandas as pd

def arrays_to_dataframe(arrays, strings):
# converts a numpy array to a panda dataframe
# Check if the number of arrays is even
if len(arrays) % 2 != 0:
raise ValueError("The number of arrays should be even.")
Expand All @@ -105,6 +106,25 @@ def arrays_to_dataframe(arrays, strings):
from sklearn.preprocessing import LabelEncoder

def genoSig(genoMaps,T,label,userPD,gene_names, epochs=100):

"""
Returns the gene names and their importance score in the range of 0 to 255 in a specific data class
Parameters
----------
genoMaps : ndarray, shape (cellNum, rowNum, colNum, 1)
T: numpy array, shape (geneNum, geneNum)
transfer function that converts the transformation of 1D to 2D.
label : numpy array,
cell labels of the data
userPD : numpy array,
the classes for which gene signature should be computed
Returns
-------
result : panda dataframe containing the gene names and their importance scores in different classes
"""

genoMaps_3d = np.repeat(genoMaps, 3, axis=-1)

# first, convert the strings to integer labels
Expand All @@ -117,14 +137,10 @@ def genoSig(genoMaps,T,label,userPD,gene_names, epochs=100):
lc = np.append(lc, label_encoded[first_occurrence[0]])

lc = np.array(lc)

n_clusters = len(np.unique(label))
y_train = to_categorical(label_encoded)
# meanI=compute_genoSig(X_train,y_train, [y_train[0],y_train[1]])
meanI = compute_genoSig(genoMaps_3d, label_encoded, lc, epochs=epochs)



result = pd.DataFrame()

for ii in range(0, len(meanI)):
Expand Down

0 comments on commit 2967d94

Please sign in to comment.