-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmap_accession_strings.py
36 lines (26 loc) · 1.13 KB
/
map_accession_strings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import json
from Bio import Entrez
from django.conf import settings
from django.core.management import BaseCommand
class Command(BaseCommand):
help = 'For every protein ID we want to search against,\
query the NCBI database for its corresponding accession string.'
def handle(self, *args, **options):
protein_ids = settings.PROTEIN_ACCESSION_STRINGS_TO_IDS.values()
# TODO: replace with API key stored as environment variable.
Entrez.email = '[email protected]'
searchHandle = Entrez.esearch(db='nucleotide', term='OR '.join([
protein_id + '[accession]' for protein_id in protein_ids
]))
ids = Entrez.read(searchHandle)['IdList']
summary_response = Entrez.esummary(
db='nucleotide', id=','.join(ids), retmode='json'
)
summary_dict = json.loads(
summary_response.read().decode('UTF-8').strip()
)
for result_key in summary_dict['result']:
if result_key == 'uids':
continue
hit = summary_dict['result'][result_key]
print(hit['assemblyacc'], '=>', hit['caption'])