kubernetes · dlorenc · Jul 9, 2018 · Jun 21, 2018 · Jun 26, 2018 · Jun 27, 2018
diff --git a/cmd/minikube/cmd/config/config.go b/cmd/minikube/cmd/config/config.go
@@ -212,6 +212,18 @@ var settings = []Setting{
 		validations: []setFn{IsValidAddon},
 		callbacks:   []setFn{EnableOrDisableAddon},
 	},
+	{
+		name:        "nvidia-driver-installer",
+		set:         SetBool,
+		validations: []setFn{IsValidAddon},
+		callbacks:   []setFn{EnableOrDisableAddon},
+	},
+	{
+		name:        "nvidia-gpu-device-plugin",
+		set:         SetBool,
+		validations: []setFn{IsValidAddon},
+		callbacks:   []setFn{EnableOrDisableAddon},
+	},
 	{
 		name: "hyperv-virtual-switch",
 		set:  SetString,

diff --git a/cmd/minikube/cmd/start.go b/cmd/minikube/cmd/start.go
@@ -73,6 +73,7 @@ const (
 	uuid                  = "uuid"
 	vpnkitSock            = "hyperkit-vpnkit-sock"
 	vsockPorts            = "hyperkit-vsock-ports"
+	gpu                   = "gpu"
 )
 
 var (
@@ -136,6 +137,10 @@ func runStart(cmd *cobra.Command, args []string) {
 		validateK8sVersion(k8sVersion)
 	}
 
+	if viper.GetBool(gpu) && viper.GetString(vmDriver) != "kvm2" {
+		glog.Exitf("--gpu is only supported with --vm-driver=kvm2")
+	}
+
 	config := cfg.MachineConfig{
 		MinikubeISO:         viper.GetString(isoURL),
 		Memory:              viper.GetInt(memory),
@@ -157,6 +162,7 @@ func runStart(cmd *cobra.Command, args []string) {
 		Downloader:          pkgutil.DefaultDownloader{},
 		DisableDriverMounts: viper.GetBool(disableDriverMounts),
 		UUID:                viper.GetString(uuid),
+		GPU:                 viper.GetBool(gpu),
 	}
 
 	fmt.Printf("Starting local Kubernetes %s cluster...\n", viper.GetString(kubernetesVersion))
@@ -419,6 +425,7 @@ func init() {
 	startCmd.Flags().String(uuid, "", "Provide VM UUID to restore MAC address (only supported with Hyperkit driver).")
 	startCmd.Flags().String(vpnkitSock, "", "Location of the VPNKit socket used for networking. If empty, disables Hyperkit VPNKitSock, if 'auto' uses Docker for Mac VPNKit connection, otherwise uses the specified VSock.")
 	startCmd.Flags().StringSlice(vsockPorts, []string{}, "List of guest VSock ports that should be exposed as sockets on the host (Only supported on with hyperkit now).")
+	startCmd.Flags().Bool(gpu, false, "Enable experimental NVIDIA GPU support in minikube (works only with kvm2 driver on Linux)")
 	viper.BindPFlags(startCmd.Flags())
 	RootCmd.AddCommand(startCmd)
 }

diff --git a/deploy/addons/gpu/nvidia-driver-installer.yaml b/deploy/addons/gpu/nvidia-driver-installer.yaml
@@ -0,0 +1,76 @@
+# Copyright 2018 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# The Dockerfile and other source for this daemonset are in
+# https://github.com/GoogleCloudPlatform/container-engine-accelerators/tree/master/nvidia-driver-installer/minikube
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: nvidia-driver-installer
+  namespace: kube-system
+  labels:
+    k8s-app: nvidia-driver-installer
+    kubernetes.io/minikube-addons: nvidia-driver-installer
+    addonmanager.kubernetes.io/mode: Reconcile
+spec:
+  selector:
+    matchLabels:
+      k8s-app: nvidia-driver-installer
+  updateStrategy:
+    type: RollingUpdate
+  template:
+    metadata:
+      labels:
+        k8s-app: nvidia-driver-installer
+    spec:
+      tolerations:
+      - key: "nvidia.com/gpu"
+        effect: "NoSchedule"
+        operator: "Exists"
+      volumes:
+      - name: dev
+        hostPath:
+          path: /dev
+      - name: nvidia-install-dir-host
+        hostPath:
+          path: /home/kubernetes/bin/nvidia
+      - name: root-mount
+        hostPath:
+          path: /
+      initContainers:
+      - image: k8s.gcr.io/minikube-nvidia-driver-installer@sha256:85cbeadb8bee62a96079823e81915955af0959063ff522ec01522e4edda28f33
+        name: nvidia-driver-installer
+        resources:
+          requests:
+            cpu: 0.15
+        securityContext:
+          privileged: true
+        env:
+          - name: NVIDIA_INSTALL_DIR_HOST
+            value: /home/kubernetes/bin/nvidia
+          - name: NVIDIA_INSTALL_DIR_CONTAINER
+            value: /usr/local/nvidia
+          - name: ROOT_MOUNT_DIR
+            value: /root
+        volumeMounts:
+        - name: nvidia-install-dir-host
+          mountPath: /usr/local/nvidia
+        - name: dev
+          mountPath: /dev
+        - name: root-mount
+          mountPath: /root
+      containers:
+      - image: "gcr.io/google-containers/pause:2.0"
+        name: pause
diff --git a/deploy/addons/gpu/nvidia-gpu-device-plugin.yaml b/deploy/addons/gpu/nvidia-gpu-device-plugin.yaml
@@ -0,0 +1,67 @@
+# Copyright 2018 The Kubernetes Authors All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+apiVersion: apps/v1
+kind: DaemonSet
+metadata:
+  name: nvidia-gpu-device-plugin
+  namespace: kube-system
+  labels:
+    k8s-app: nvidia-gpu-device-plugin
+    kubernetes.io/minikube-addons: nvidia-gpu-device-plugin
+    addonmanager.kubernetes.io/mode: Reconcile
+spec:
+  selector:
+    matchLabels:
+      k8s-app: nvidia-gpu-device-plugin
+  template:
+    metadata:
+      labels:
+        k8s-app: nvidia-gpu-device-plugin
+      annotations:
+        scheduler.alpha.kubernetes.io/critical-pod: ''
+    spec:
+      priorityClassName: system-node-critical
+      tolerations:
+      - operator: "Exists"
+        effect: "NoExecute"
+      - operator: "Exists"
+        effect: "NoSchedule"
+      volumes:
+      - name: device-plugin
+        hostPath:
+          path: /var/lib/kubelet/device-plugins
+      - name: dev
+        hostPath:
+          path: /dev
+      containers:
+      - image: "k8s.gcr.io/nvidia-gpu-device-plugin@sha256:0842734032018be107fa2490c98156992911e3e1f2a21e059ff0105b07dd8e9e"
+        command: ["/usr/bin/nvidia-gpu-device-plugin", "-logtostderr"]
+        name: nvidia-gpu-device-plugin
+        resources:
+          requests:
+            cpu: 50m
+            memory: 10Mi
+          limits:
+            cpu: 50m
+            memory: 10Mi
+        securityContext:
+          privileged: true
+        volumeMounts:
+        - name: device-plugin
+          mountPath: /device-plugin
+        - name: dev
+          mountPath: /dev
+  updateStrategy:
+    type: RollingUpdate
diff --git a/docs/README.md b/docs/README.md
@@ -12,6 +12,8 @@
 
 * **Caching Images** ([cache.md](cache.md)): Caching non-minikube images in minikube
 
+* **GPUs** ([gpu.md](gpu.md)): Using NVIDIA GPUs on minikube
+
 ### Installation and debugging
 
 * **Driver installation** ([drivers.md](drivers.md)): In depth instructions for installing the various hypervisor drivers

diff --git a/docs/addons.md b/docs/addons.md
@@ -15,6 +15,8 @@ $ minikube addons list
 - ingress: disabled
 - default-storageclass: enabled
 - storage-provisioner: enabled
+- nvidia-driver-installer: disabled
+- nvidia-gpu-device-plugin: disabled
 
 # minikube must be running for these commands to take effect
 $ minikube addons enable heapster
@@ -36,6 +38,8 @@ The currently supported addons include:
 * [CoreDNS](https://github.com/coredns/deployment/tree/master/kubernetes)
 * [Ingress](https://github.com/kubernetes/ingress-nginx)
 * [Freshpod](https://github.com/GoogleCloudPlatform/freshpod)
+* [nvidia-driver-installer](https://github.com/GoogleCloudPlatform/container-engine-accelerators/tree/master/nvidia-driver-installer/minikube)
+* [nvidia-gpu-device-plugin](https://github.com/GoogleCloudPlatform/container-engine-accelerators/tree/master/cmd/nvidia_gpu)
 
 If you would like to have minikube properly start/restart custom addons, place the addon(s) you wish to be launched with minikube in the `.minikube/addons` directory. Addons in this folder will be moved to the minikube VM and launched each time minikube is started/restarted.
 

diff --git a/docs/gpu.md b/docs/gpu.md
@@ -0,0 +1,116 @@
+# (Experimental) NVIDIA GPU support in minikube
+
+minikube has experimental support for using NVIDIA GPUs on Linux.
+
+## Using NVIDIA GPUs on minikube on Linux with `--vm-driver=kvm2`
+
+When using NVIDIA GPUs with the kvm2 vm-driver. We passthrough spare GPUs on the
+host to the minikube VM. Doing so has a few prerequisites:
+
+- You must install the [kvm2 driver](drivers.md#kvm2-driver). If you already had
+  this installed make sure that you fetch the latest
+  `docker-machine-driver-kvm2` binary that has GPU support.
+
+- Your CPU must support IOMMU. Different vendors have different names for this
+  technology. Intel calls it Intel VT-d. AMD calls it AMD-Vi. Your motherboard
+  must also support IOMMU.
+
+- You must enable IOMMU in the kernel: add `intel_iommu=on` or `amd_iommu=on`
+  (depending to your CPU vendor) to the kernel command line. Also add `iommu=pt`
+  to the kernel command line.
+
+- You must have spare GPUs that are not used on the host and can be passthrough
+  to the VM. These GPUs must not be controlled by the nvidia/nouveau driver. You
+  can ensure this by either not loading the nvidia/nouveau driver on the host at
+  all or assigning the spare GPU devices to stub kernel modules like `vfio-pci`
+  or `pci-stub` at boot time. You can do that by adding the
+  [vendorId:deviceId](https://pci-ids.ucw.cz/read/PC/10de) of your spare GPU to
+  the kernel command line. For ex. for Quadro M4000 add `pci-stub.ids=10de:13f1`
+  to the kernel command line. Note that you will have to do this for all GPUs
+  you want to passthrough to the VM and all other devices that are in the IOMMU
+  group of these GPUs.
+
+- Once you reboot the system after doing the above, you should be ready to you
+  GPUs with kvm2. Run the following command to start minikube:
+  ```
+  minikube start --vm-driver kvm2 --gpu
+  ```
+  This command will check if all the above conditions are satisfied and
+  passthrough spare GPUs found on the host to the VM.
+
+  If this succeeded, run the following commands:
+  ```
+  minikube addons enable nvidia-gpu-device-plugin
+  minikube addons enable nvidia-driver-installer
+  ```
+  This will install the NVIDIA driver (that works for GeForce/Quadro cards)
+  on the VM.
+
+- If everything succeeded, you should be able to see `nvidia.com/gpu` in the
+  capacity:
+  ```
+  kubectl get nodes -ojson | jq .items[].status.capacity
+  ```
+
+### Where can I learn more about GPU passthrough?
+See the excellent documentation at
+https://wiki.archlinux.org/index.php/PCI_passthrough_via_OVMF
+
+### Why are so many manual steps required to use GPUs with kvm2 on minikube?
+These steps require elevated privileges which minikube doesn't run with and they
+are disruptive to the host, so we decided to not do them automatically.
+
+
+## Using NVIDIA GPU on minikube on Linux with `--vm-driver=none`
+
+NOTE: This approach used to expose GPUs here is different than the approach used
+to expose GPUs with `--vm-driver=kvm2`. Please don't mix these instructions.
+
+- Install minikube.
+
+- Install the nvidia driver, nvidia-docker and configure docker with nvidia as
+  the default runtime. See instructions at
+  https://github.com/NVIDIA/nvidia-docker
+
+- Start minikube:
+  ```
+  minikube start --vm-driver=none --apiserver-ips 127.0.0.1 --apiserver-name localhost
+  ```
+
+- Install NVIDIA's device plugin:
+  ```
+  kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.10/nvidia-device-plugin.yml
+  ```
+
+
+## Why does minikube not support NVIDIA GPUs on macOS?
+VM drivers supported by minikube for macOS doesn't support GPU passthrough:
+- [mist64/xhyve#108](https://github.com/mist64/xhyve/issues/108)
+- [moby/hyperkit#159](https://github.com/moby/hyperkit/issues/159)
+- [VirtualBox docs](http://www.virtualbox.org/manual/ch09.html#pcipassthrough)
+
+Also:
+- For quite a while, all Mac hardware (both laptops and desktops) have come with
+  Intel or AMD GPUs (and not with NVIDIA GPUs). Recently, Apple added [support
+  for eGPUs](https://support.apple.com/en-us/HT208544), but even then all the
+  supported GPUs listed are AMD’s.
+
+- nvidia-docker [doesn't support
+  macOS](https://github.com/NVIDIA/nvidia-docker/issues/101) either.
+
+
+## Why does minikube not support NVIDIA GPUs on Windows?
+minikube suppports Windows host through Hyper-V or VirtualBox.
+
+- VirtualBox doesn't support PCI passthrough for [Windows
+  host](http://www.virtualbox.org/manual/ch09.html#pcipassthrough).
+
+- Hyper-V supports DDA (discrete device assignment) but [only for Windows Server
+  2016](https://docs.microsoft.com/en-us/windows-server/virtualization/hyper-v/plan/plan-for-deploying-devices-using-discrete-device-assignment)
+
+Since the only possibility of supporting GPUs on minikube on Windows is on a
+server OS where users don't usually run minikube, we haven't invested time in
+trying to support NVIDIA GPUs on minikube on Windows.
+
+Also, nvidia-docker [doesn't support
+Windows](https://github.com/NVIDIA/nvidia-docker/issues/197) either.
diff --git a/pkg/drivers/kvm/domain.go b/pkg/drivers/kvm/domain.go
@@ -74,6 +74,9 @@ const domainTmpl = `
     <rng model='virtio'>
       <backend model='random'>/dev/random</backend>
     </rng>
+    {{if .GPU}}
+    {{.DevicesXML}}
+    {{end}}
   </devices>
 </domain>
 `