Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add OpenShift deployment support #94

Merged
merged 1 commit into from
Apr 13, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion controllers/nicclusterpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ func (r *NicClusterPolicyReconciler) Reconcile(ctx context.Context, req ctrl.Req

// Create a new State service catalog
sc := state.NewInfoCatalog()
if instance.Spec.OFEDDriver != nil || instance.Spec.NVPeerDriver != nil {
if instance.Spec.OFEDDriver != nil || instance.Spec.NVPeerDriver != nil ||
instance.Spec.RdmaSharedDevicePlugin != nil {
// Create node infoProvider and add to the service catalog
reqLogger.V(consts.LogLevelInfo).Info("Creating Node info provider")
nodeList := &corev1.NodeList{}
Expand Down
13 changes: 13 additions & 0 deletions deploy/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,13 @@ rules:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- rolebindings
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down Expand Up @@ -270,3 +277,9 @@ rules:
- update
- patch
- delete
- apiGroups:
- security.openshift.io
resources:
- securitycontextconstraints
verbs:
- '*'
13 changes: 13 additions & 0 deletions deployment/network-operator/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,13 @@ rules:
- get
- list
- watch
- apiGroups:
- rbac.authorization.k8s.io
resources:
- roles
- rolebindings
verbs:
- '*'
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down Expand Up @@ -272,3 +279,9 @@ rules:
- update
- patch
- delete
- apiGroups:
- security.openshift.io
resources:
- securitycontextconstraints
verbs:
- '*'
4 changes: 2 additions & 2 deletions example/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,12 @@ tools to test RDMA and GPU-Direct RDMA traffic.
##### RDMA
__Pod1:__ Run `ib_write_bw` as server
```bash
# ib_write_bw -d <RDMA device e.g mlx5_0> -a -F --report_gbits -R -q 2
# ib_write_bw -d <RDMA device e.g mlx5_0> -a -F --report_gbits -R
adrianchiris marked this conversation as resolved.
Show resolved Hide resolved
```

__Pod2:__ Run `ib_write_bw` as client
```bash
# ib_write_bw -d <RDMA device e.g mlx5_0> -a -F --report_gbits -R -q 2 <Pod1 IP address>
# ib_write_bw -d <RDMA device e.g mlx5_0> -a -F --report_gbits -R <Pod1 IP address>
```

##### GPU-Direct RDMA
Expand Down
45 changes: 45 additions & 0 deletions example/crs/mellanox.com_v1alpha1_nicclusterpolicy_cr-ocp.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# Copyright 2021 NVIDIA
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: mellanox.com/v1alpha1
kind: NicClusterPolicy
metadata:
name: nic-cluster-policy
spec:
ofedDriver:
image: mofed
repository: mellanox
version: 5.3-1.0.0.1
devicePlugin:
image: k8s-rdma-shared-dev-plugin
repository: mellanox
version: v1.1.0
# The config below directly propagates to k8s-rdma-shared-device-plugin configuration.
# Replace 'devices' with your (RDMA capable) netdevice name.
config: |
{
"configList": [
{
"resourceName": "hca_shared_devices_a",
"rdmaHcaMax": 1000,
"selectors": {
"ifNames": ["ens2f0"]
}
}
]
}
nvPeerDriver:
image: nv-peer-mem-driver
repository: mellanox
version: 1.0-9
gpuDriverSourcePath: /run/nvidia/driver
14 changes: 7 additions & 7 deletions example/rdma-gpu-test-pod1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@ metadata:
annotations:
k8s.v1.cni.cncf.io/networks: rdma-net-ipam
# If a network with static IPAM is used replace network annotation with the below.
#k8s.v1.cni.cncf.io/networks: '[
# { "name": "rmda-net",
# "ips": ["192.168.111.101/24"],
# "gateway": ["192.168.111.1"]
# }
#]'
# k8s.v1.cni.cncf.io/networks: '[
# { "name": "rdma-net",
# "ips": ["192.168.111.101/24"],
# "gateway": ["192.168.111.1"]
# }
# ]'
spec:
nodeSelector:
# Note: Replace hostname or remove selector altogether
kubernetes.io/hostname: ubuntu
kubernetes.io/hostname: worker01
sjug marked this conversation as resolved.
Show resolved Hide resolved
restartPolicy: OnFailure
containers:
- image: mellanox/cuda-perftest
Expand Down
6 changes: 3 additions & 3 deletions example/rdma-gpu-test-pod2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ metadata:
k8s.v1.cni.cncf.io/networks: rdma-net-ipam
# If a network with static IPAM is used replace network annotation with the below.
#k8s.v1.cni.cncf.io/networks: '[
# { "name": "rmda-net",
# "ips": ["192.168.111.101/24"],
# { "name": "rdma-net",
# "ips": ["192.168.111.102/24"],
# "gateway": ["192.168.111.1"]
# }
#]'
spec:
nodeSelector:
# Note: Replace hostname or remove selector altogether
kubernetes.io/hostname: ubuntu00
kubernetes.io/hostname: worker02
restartPolicy: OnFailure
containers:
- image: mellanox/cuda-perftest
Expand Down
4 changes: 2 additions & 2 deletions example/rdma-test-pod1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ metadata:
k8s.v1.cni.cncf.io/networks: rdma-net-ipam
# If a network with static IPAM is used replace network annotation with the below.
#k8s.v1.cni.cncf.io/networks: '[
# { "name": "rmda-net",
# { "name": "rdma-net",
# "ips": ["192.168.111.101/24"],
# "gateway": ["192.168.111.1"]
# }
#]'
spec:
nodeSelector:
# Note: Replace hostname or remove selector altogether
kubernetes.io/hostname: ubuntu
kubernetes.io/hostname: worker01
restartPolicy: OnFailure
containers:
- image: mellanox/rping-test
Expand Down
6 changes: 3 additions & 3 deletions example/rdma-test-pod2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@ metadata:
k8s.v1.cni.cncf.io/networks: rdma-net-ipam
# If a network with static IPAM is used replace network annotation with the below.
#k8s.v1.cni.cncf.io/networks: '[
# { "name": "rmda-net",
# "ips": ["192.168.111.101/24"],
# { "name": "rdma-net",
# "ips": ["192.168.111.102/24"],
# "gateway": ["192.168.111.1"]
# }
#]'
spec:
nodeSelector:
# Note: Replace hostname or remove selector altogether
kubernetes.io/hostname: ubuntu00
kubernetes.io/hostname: worker02
restartPolicy: OnFailure
containers:
- image: mellanox/rping-test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{if eq .RuntimeSpec.OSName "rhcos"}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: nv-peer-mem-driver
namespace: {{ .RuntimeSpec.Namespace }}
{{end}}
16 changes: 16 additions & 0 deletions manifests/stage-nv-peer-mem-driver/0020_role.openshift.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{if eq .RuntimeSpec.OSName "rhcos"}}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: nv-peer-mem-driver
namespace: {{ .RuntimeSpec.Namespace }}
rules:
- apiGroups:
- security.openshift.io
resources:
- securitycontextconstraints
verbs:
- use
resourceNames:
- privileged
{{end}}
18 changes: 18 additions & 0 deletions manifests/stage-nv-peer-mem-driver/0030_rolebinding.openshift.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{{if eq .RuntimeSpec.OSName "rhcos"}}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: nv-peer-mem-driver
namespace: {{ .RuntimeSpec.Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: nv-peer-mem-driver
namespace: {{ .RuntimeSpec.Namespace }}
subjects:
- kind: ServiceAccount
name: nv-peer-mem-driver
namespace: {{ .RuntimeSpec.Namespace }}
userNames:
- system:serviceaccount:{{ .RuntimeSpec.Namespace }}:nv-peer-mem-driver
{{end}}
49 changes: 49 additions & 0 deletions manifests/stage-nv-peer-mem-driver/0040_scc.openshift.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{{if eq .RuntimeSpec.OSName "rhcos"}}
# Please edit the object below. Lines beginning with a '#' will be ignored,
# and an empty file will abort the edit. If an error occurs while saving this file will be
# reopened with the relevant failures.
#
allowHostDirVolumePlugin: true
allowHostIPC: false
allowHostNetwork: false
allowHostPID: true
allowHostPorts: false
allowPrivilegeEscalation: true
allowPrivilegedContainer: true
allowedCapabilities:
- '*'
allowedUnsafeSysctls:
- '*'
apiVersion: security.openshift.io/v1
defaultAddCapabilities: null
fsGroup:
type: RunAsAny
groups:
- system:cluster-admins
- system:nodes
- system:masters
kind: SecurityContextConstraints
metadata:
annotations:
kubernetes.io/description: 'privileged allows access to all privileged and host
features and the ability to run as any user, any group, any fsGroup, and with
any SELinux context. WARNING: this is the most relaxed SCC and should be used
only for cluster administration. Grant with caution.'

name: nv-peer-mem-driver
priority: null
readOnlyRootFilesystem: false
requiredDropCapabilities: null
runAsUser:
type: RunAsAny
seLinuxContext:
type: RunAsAny
seccompProfiles:
- '*'
supplementalGroups:
type: RunAsAny
users:
- system:serviceaccount:{{ .RuntimeSpec.Namespace }}:nv-peer-mem-driver
volumes:
- '*'
{{end}}
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ spec:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
{{if eq .RuntimeSpec.OSName "rhcos"}}
serviceAccountName: nv-peer-mem-driver
sjug marked this conversation as resolved.
Show resolved Hide resolved
{{end}}
hostNetwork: true
initContainers:
- name: gpu-driver-validation
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{{if eq .RuntimeSpec.OSName "rhcos"}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: ofed-driver
namespace: {{ .RuntimeSpec.Namespace }}
{{end}}
16 changes: 16 additions & 0 deletions manifests/stage-ofed-driver/0020_role.openshift.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{{if eq .RuntimeSpec.OSName "rhcos"}}
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: ofed-driver
namespace: {{ .RuntimeSpec.Namespace }}
rules:
- apiGroups:
- security.openshift.io
resources:
- securitycontextconstraints
verbs:
- use
resourceNames:
- privileged
{{end}}
18 changes: 18 additions & 0 deletions manifests/stage-ofed-driver/0030_rolebinding.openshift.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{{if eq .RuntimeSpec.OSName "rhcos"}}
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: ofed-driver
namespace: {{ .RuntimeSpec.Namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: ofed-driver
namespace: {{ .RuntimeSpec.Namespace }}
subjects:
- kind: ServiceAccount
name: ofed-driver
namespace: {{ .RuntimeSpec.Namespace }}
userNames:
- system:serviceaccount:{{ .RuntimeSpec.Namespace }}:ofed-driver
{{end}}
49 changes: 49 additions & 0 deletions manifests/stage-ofed-driver/0040_scc.openshift.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{{if eq .RuntimeSpec.OSName "rhcos"}}
# Please edit the object below. Lines beginning with a '#' will be ignored,
# and an empty file will abort the edit. If an error occurs while saving this file will be
# reopened with the relevant failures.
#
allowHostDirVolumePlugin: true
allowHostIPC: false
allowHostNetwork: false
allowHostPID: true
allowHostPorts: false
allowPrivilegeEscalation: true
allowPrivilegedContainer: true
allowedCapabilities:
- '*'
allowedUnsafeSysctls:
- '*'
apiVersion: security.openshift.io/v1
defaultAddCapabilities: null
fsGroup:
type: RunAsAny
groups:
- system:cluster-admins
- system:nodes
- system:masters
kind: SecurityContextConstraints
metadata:
annotations:
kubernetes.io/description: 'privileged allows access to all privileged and host
features and the ability to run as any user, any group, any fsGroup, and with
any SELinux context. WARNING: this is the most relaxed SCC and should be used
only for cluster administration. Grant with caution.'

name: ofed-driver
priority: null
readOnlyRootFilesystem: false
requiredDropCapabilities: null
runAsUser:
type: RunAsAny
seLinuxContext:
type: RunAsAny
seccompProfiles:
- '*'
supplementalGroups:
type: RunAsAny
users:
- system:serviceaccount:{{ .RuntimeSpec.Namespace }}:ofed-driver
volumes:
- '*'
{{end}}
Loading