Skip to content

Commit

Permalink
Merge pull request #39 from VasudhaJha/feature/export-all-files
Browse files Browse the repository at this point in the history
Feature/export all files
  • Loading branch information
VasudhaJha authored Jul 24, 2023
2 parents 28f75c3 + b984ca3 commit 7ee63fe
Show file tree
Hide file tree
Showing 10 changed files with 557 additions and 29 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ jobs:
steps:
- uses: rymndhng/release-on-push-action@master
with:
bump_version_scheme: patch # can be either "major", "minor", "patch" or "norelease"
bump_version_scheme: minor # can be either "major", "minor", "patch" or "norelease"
tag_prefix: v
81 changes: 67 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -171,12 +171,10 @@ plt.show()
```python
import scanpy as sc
import matplotlib.pyplot as plt
import genomap.genoMOI as gp
import scipy.io as sio
import numpy as np
import pandas as pd
import umap

from genomap.genoMOI import genoMOIvis, genoMOItraj

# Load five different pancreatic datasets
dx = sio.loadmat('dataBaronX.mat')
Expand All @@ -196,23 +194,79 @@ y = np.squeeze(dx['classLabel'])
dx = sio.loadmat('batchLabel.mat')
ybatch = np.squeeze(dx['batchLabel'])

# Apply genoMOI with genomap size of 44x44 and dimension of 32 for the returned integrated data
resVis=gp.genoMOI(data, data2, data3, data4, data5, colNum=44, rowNum=44, n_dim=32)
# Apply genomap-based multi omic integration and visualize the integrated data with local structure for cluster analysis
# returns 2D visualization, cluster labels, and intgerated data
resVis,cli,int_data=genoMOIvis(data, data2, data3, data4, data5, colNum=12, rowNum=12, n_dim=32, epoch=10, prealign_method='scanorama')

# Visualize the integrated data using UMAP
embedding = umap.UMAP(n_neighbors=30,min_dist=0.3,n_epochs=200).fit_transform(resVis)

plt.figure(figsize=(15, 10))
plt.rcParams.update({'font.size': 28})
h1=plt.scatter(embedding[:, 0], embedding[:, 1], c=y,cmap='jet', marker='o', s=18) # ax = plt.subplot(3, n, i + 1*10+1)
plt.xlabel('UMAP1')
plt.ylabel('UMAP2')
h1=plt.scatter(resVis[:, 0], resVis[:, 1], c=y,cmap='jet', marker='o', s=18) # ax = plt.subplot(3, n, i + 1*10+1)
plt.xlabel('genoVis1')
plt.ylabel('genoVis2')
plt.tight_layout()
plt.colorbar(h1)
plt.show()

plt.figure(figsize=(15, 10))
plt.rcParams.update({'font.size': 28})
h1=plt.scatter(resVis[:, 0], resVis[:, 1], c=ybatch,cmap='jet', marker='o', s=18) # ax = plt.subplot(3, n, i + 1*10+1)
plt.xlabel('genoVis1')
plt.ylabel('genoVis2')
plt.tight_layout()
plt.colorbar(h1)
plt.show()
```

### Example 6 - Try genoSig for finding gene signatures for cell/data classes
```python
# Apply genomap-based multi omic integration and visualize the integrated data with global structure for trajectory analysis

# returns 2D embedding, cluster labels, and intgerated data
resTraj,cli,int_data=genoMOItraj(data, data2, data3, data4, data5, colNum=12, rowNum=12, n_dim=32, epoch=10, prealign_method='scanorama')


plt.figure(figsize=(15, 10))
plt.rcParams.update({'font.size': 28})
h1=plt.scatter(resTraj[:, 0], resTraj[:, 1], c=y,cmap='jet', marker='o', s=18) # ax = plt.subplot(3, n, i + 1*10+1)
plt.xlabel('genoTraj1')
plt.ylabel('genoTraj2')
plt.tight_layout()
plt.colorbar(h1)
plt.show()

plt.figure(figsize=(15, 10))
plt.rcParams.update({'font.size': 28})
h1=plt.scatter(resTraj[:, 0], resTraj[:, 1], c=ybatch,cmap='jet', marker='o', s=18) # ax = plt.subplot(3, n, i + 1*10+1)
plt.xlabel('genoTraj1')
plt.ylabel('genoTraj2')
plt.tight_layout()
plt.colorbar(h1)
plt.show()
```

### Example 6 - Try genoAnnotate for cell annotation

```python
import scanpy as sc
import pandas as pd
import genomap.genoAnnotate as gp
#Load the PBMC dataset
adata = sc.read_10x_mtx("pbmc3k_filtered_gene_bc_matrices/")

# Input: adata: annData containing the raw gene counts
# tissue type: e.g. Immune system,Pancreas,Liver,Eye,Kidney,Brain,Lung,Adrenal,Heart,Intestine,Muscle,Placenta,Spleen,Stomach,Thymus

adataP = gp.genoAnnotate(adata,tissue_type="Immune system")


# Compute UMAP (requires neighborhood graph, see the previous code for Louvain clustering)
sc.tl.umap(adataP)
# Create a UMAP plot colored by cell type labels
cell_annotations=adataP.obs['cell_type']
sc.pl.umap(adataP, color='cell_type')
```

### Example 7 - Try genoSig for finding gene signatures for cell/data classes

```python
import numpy as np
Expand Down Expand Up @@ -244,7 +298,7 @@ result=gp.genoSig(genoMaps,T,label,userPD,gene_namesRe, epochs=50)
print(result.head())
```

### Example 7 - Try genoClassification for tabular data classification
### Example 8 - Try genoClassification for tabular data classification

```python
import pandas as pd
Expand Down Expand Up @@ -281,8 +335,7 @@ est=gp.genoClassification(training_data, training_labels, test_data, rowNum=rowN
print('Classification accuracy of genomap approach:'+str(np.sum(est==groundTruthTest) / est.shape[0]))
```


### Example 8 - Try genoRegression for tabular data regression
### Example 9 - Try genoRegression for tabular data regression

```python
import pandas as pd
Expand Down
3 changes: 1 addition & 2 deletions data/readme.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
Please download the data from https://drive.google.com/drive/u/3/folders/1QNJdPdXf1lfq0Mu5p5JrzMDhwJJCwgO7
and put it in this folder (data/)
Please download the data from https://drive.google.com/drive/folders/1xq3bBgVP0NCMD7bGTXit0qRkL8fbutZ6
1 change: 1 addition & 0 deletions genomap/genoAnnotate/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .genoAnnotate import *
92 changes: 92 additions & 0 deletions genomap/genoAnnotate/genoAnnotate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# -*- coding: utf-8 -*-
"""
Created on Sun Jul 23 15:18:43 2023
@author: Md Tauhidul Islam
# This code is inspired by scType (https://github.com/IanevskiAleksandr/sc-type)
# We are in the process of using image matching techique for further enhancement
# of the cell annotation
"""

from genomap.genotype import *
import scanpy as sc

def genoAnnotate(adata,tissue_type,database=None):
# Input: adata: annData containing the raw gene counts
# tissue type: e.g. Immune system,Pancreas,Liver,Eye,Kidney,Brain,Lung,Adrenal,Heart,Intestine,Muscle,Placenta,Spleen,Stomach,Thymus
# database: User can select his/her own database in excel format

# Database file
if database==None:
database = "https://raw.githubusercontent.com/xinglab-ai/self-consistent-expression-recovery-machine/master/demo/data/genoANN_db.xlsx";

# Filter cells
sc.pp.filter_cells(adata, min_genes=200)
# Normalize data
adata.raw = adata
sc.pp.normalize_total(adata, target_sum=10000)
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000)
adata = adata[:, adata.var['highly_variable']]
# Scale data and run PCA
sc.pp.scale(adata, max_value=10)
sc.tl.pca(adata)

# Prepare positive and negative gene sets
result = gene_sets_prepare(database, tissue_type)
gs = result['gs_positive']
gs2 = result['gs_negative']
cell_types = result['cell_types']


data=adata.raw.X.toarray()
# Get cell-type by cell matrix
scRNAseqData = pd.DataFrame(data, index=adata.raw.obs_names, columns=adata.raw.var_names)

# Compute cell-type score fro each cell
es_max = sctype_score(scRNAseqData=scRNAseqData, scaled=True, gs=gs, gs2=gs2, cell_types=cell_types)
es_max.columns = cell_types
es_max.index = scRNAseqData.index

# Calculate neighborhood graph of cells (replace 'adata' with your actual AnnData object)
sc.pp.neighbors(adata, n_neighbors=10, use_rep='X_pca')
# Perform clustering so that cell-type can be assigned to each cluster
sc.tl.leiden(adata)
# The cluster labels are stored in `adata.obs['louvain']`
results = []
for cl in adata.obs['leiden'].unique():
cells_in_cluster = adata.obs_names[adata.obs['leiden'] == cl]
es_max_cl = es_max.loc[cells_in_cluster].sum().sort_values(ascending=False)
results.append(pd.DataFrame({
'cluster': cl,
'type': es_max_cl.index[:1],
'scores': es_max_cl.values[:1],
'ncells': len(cells_in_cluster)
}))

results = pd.concat(results)
results.loc[results['scores'] < results['ncells'] / 4, 'type'] = 'Unknown'
results.set_index('cluster', inplace=True)
# Assign the cell type labels to the cells in the AnnData object
adata.obs['cell_type'] = results.loc[adata.obs['leiden'], 'type'].values
return adata




















Loading

0 comments on commit 7ee63fe

Please sign in to comment.