Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

#14199: catch subprocess.CalledProcessError in get_gpus() #14212

Merged
merged 2 commits into from
Mar 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions benchmark/python/control_flow/rnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,7 @@ def _array(shape, ctx):


def _get_gpus():
try:
re = subprocess.check_output(["nvidia-smi", "-L"], universal_newlines=True)
except OSError:
return []
return range(len([i for i in re.split('\n') if 'GPU' in i]))

return range(mx.util.get_gpu_count())

def run_benchmark(cell_type, ctx, seq_len, batch_size, hidden_dim):
obj = {"foreach": ForeachRNN, "while_loop": WhileRNN}[args.benchmark]
Expand Down
8 changes: 3 additions & 5 deletions example/image-classification/common/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
import os
import errno

import mxnet as mx

def download_file(url, local_fname=None, force_write=False):
# requests is not default installed
import requests
Expand Down Expand Up @@ -49,8 +51,4 @@ def get_gpus():
"""
return a list of GPUs
"""
try:
re = subprocess.check_output(["nvidia-smi", "-L"], universal_newlines=True)
except OSError:
return []
return range(len([i for i in re.split('\n') if 'GPU' in i]))
return range(mx.util.get_gpu_count())
11 changes: 2 additions & 9 deletions python/mxnet/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import struct
import traceback
import numbers
import subprocess
import sys
import os
import errno
Expand Down Expand Up @@ -213,6 +212,7 @@ def _get_powerlaw_dataset_csr(num_rows, num_cols, density=0.1, dtype=None):
else:
return mx.nd.array(output_arr).tostype("csr")


def assign_each(the_input, function):
"""Return ndarray composed of passing each array value through some function"""
if function is None:
Expand Down Expand Up @@ -1391,14 +1391,7 @@ def list_gpus():
If there are n GPUs, then return a list [0,1,...,n-1]. Otherwise returns
[].
"""
re = ''
nvidia_smi = ['nvidia-smi', '/usr/bin/nvidia-smi', '/usr/local/nvidia/bin/nvidia-smi']
for cmd in nvidia_smi:
try:
re = subprocess.check_output([cmd, "-L"], universal_newlines=True)
except (subprocess.CalledProcessError, OSError):
pass
return range(len([i for i in re.split('\n') if 'GPU' in i]))
return range(mx.util.get_gpu_count())

def download(url, fname=None, dirname=None, overwrite=False, retries=5):
"""Download an given URL
Expand Down
16 changes: 16 additions & 0 deletions python/mxnet/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,12 @@
# under the License.
"""general utility functions"""

import ctypes
import os
import sys

from .base import _LIB, check_call


def makedirs(d):
"""Create directories recursively if they don't exist. os.makedirs(exist_ok=True) is not
Expand All @@ -28,3 +31,16 @@ def makedirs(d):
mkpath(d)
else:
os.makedirs(d, exist_ok=True) # pylint: disable=unexpected-keyword-arg


def get_gpu_count():
size = ctypes.c_int()
check_call(_LIB.MXGetGPUCount(ctypes.byref(size)))
return size.value


def get_gpu_memory(gpu_dev_id):
free_mem = ctypes.c_uint64(0)
total_mem = ctypes.c_uint64(0)
check_call(_LIB.MXGetGPUMemoryInformation64(gpu_dev_id, ctypes.byref(free_mem), ctypes.byref(total_mem)))
return free_mem.value, total_mem.value
10 changes: 4 additions & 6 deletions tools/bandwidth/test_measure.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,11 @@
from measure import run
import subprocess
import logging

import mxnet as mx

def get_gpus():
try:
re = subprocess.check_output(["nvidia-smi", "-L"], universal_newlines=True)
except OSError:
return ''
gpus = [i for i in re.split('\n') if 'GPU' in i]
return ','.join([str(i) for i in range(len(gpus))])
return ','.join([str(i) for i in range(mx.util.get_gpu_count())])

def test_measure(**kwargs):
logging.info(kwargs)
Expand Down