550.40.65

NVIDIA · Jun 29, 2024 · 91726f2 · 91726f2
1 parent 3750358
commit 91726f2
Show file tree

Hide file tree

Showing 194 changed files with 44,349 additions and 38,468 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Release 550 Entries
 
+### [550.40.65] 2024-06-28
+
 ### [550.40.63] 2024-05-31
 
 ### [550.40.61] 2024-04-23

diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source
 
 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 550.40.63.
+version 550.40.65.
 
 
 ## How to Build
@@ -17,7 +17,7 @@ as root:
 
 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-550.40.63 driver release.  This can be achieved by installing
+550.40.65 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,
 
@@ -188,7 +188,7 @@ encountered specific to them.
 For details on feature support and limitations, see the NVIDIA GPU driver
 end user README here:
 
-https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.63/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.65/README/kernel_open.html
 
 For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
 Package for more details.
@@ -757,6 +757,8 @@ Subsystem Device ID.
 | NVIDIA H100 80GB HBM3                           | 2330 10DE 16C0 |
 | NVIDIA H100 80GB HBM3                           | 2330 10DE 16C1 |
 | NVIDIA H100 PCIe                                | 2331 10DE 1626 |
+| NVIDIA H200                                     | 2335 10DE 18BE |
+| NVIDIA H200                                     | 2335 10DE 18BF |
 | NVIDIA H100                                     | 2339 10DE 17FC |
 | NVIDIA H800 NVL                                 | 233A 10DE 183A |
 | NVIDIA GH200 120GB                              | 2342 10DE 16EB |
@@ -873,6 +875,7 @@ Subsystem Device ID.
 | NVIDIA L40S                                     | 26B9 10DE 1851 |
 | NVIDIA L40S                                     | 26B9 10DE 18CF |
 | NVIDIA L20                                      | 26BA 10DE 1957 |
+| NVIDIA L20                                      | 26BA 10DE 1990 |
 | NVIDIA GeForce RTX 4080 SUPER                   | 2702           |
 | NVIDIA GeForce RTX 4080                         | 2704           |
 | NVIDIA GeForce RTX 4070 Ti SUPER                | 2705           |

diff --git a/kernel-open/Kbuild b/kernel-open/Kbuild
@@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.63\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.65\"
 
 ifneq ($(SYSSRCHOST1X),)
  EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

diff --git a/kernel-open/common/inc/nv-hypervisor.h b/kernel-open/common/inc/nv-hypervisor.h
@@ -37,13 +37,11 @@ typedef enum _HYPERVISOR_TYPE
     OS_HYPERVISOR_UNKNOWN
 } HYPERVISOR_TYPE;
 
-#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE         0
-#define CMD_VGPU_VFIO_INJECT_INTERRUPT        1
-#define CMD_VGPU_VFIO_REGISTER_MDEV           2
-#define CMD_VGPU_VFIO_PRESENT                 3
-#define CMD_VFIO_PCI_CORE_PRESENT             4
+#define CMD_VFIO_WAKE_REMOVE_GPU              1
+#define CMD_VGPU_VFIO_PRESENT                 2
+#define CMD_VFIO_PCI_CORE_PRESENT             3
 
-#define MAX_VF_COUNT_PER_GPU 64
+#define MAX_VF_COUNT_PER_GPU                  64
 
 typedef enum _VGPU_TYPE_INFO
 {
@@ -54,17 +52,11 @@ typedef enum _VGPU_TYPE_INFO
 
 typedef struct
 {
-    void  *vgpuVfioRef;
-    void  *waitQueue;
     void  *nv;
-    NvU32 *vgpuTypeIds;
-    NvU8 **vgpuNames;
-    NvU32  numVgpuTypes;
-    NvU32  domain;
-    NvU8   bus;
-    NvU8   slot;
-    NvU8   function;
-    NvBool is_virtfn;
+    NvU32 domain;
+    NvU32 bus;
+    NvU32 device;
+    NvU32 return_status;
 } vgpu_vfio_info;
 
 typedef struct

diff --git a/kernel-open/common/inc/nv-linux.h b/kernel-open/common/inc/nv-linux.h
@@ -1614,6 +1614,10 @@ typedef struct nv_linux_state_s {
     nv_kthread_q_t open_q;
     NvBool is_accepting_opens;
     struct semaphore open_q_lock;
+#if defined(NV_VGPU_KVM_BUILD)
+    wait_queue_head_t wait;
+    NvS32 return_status;
+#endif
 } nv_linux_state_t;
 
 extern nv_linux_state_t *nv_linux_devices;

diff --git a/kernel-open/common/inc/nv.h b/kernel-open/common/inc/nv.h
@@ -1041,13 +1041,12 @@ NV_STATUS  NV_API_CALL  nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, c
 NV_STATUS  NV_API_CALL  nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
 NV_STATUS  NV_API_CALL  nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
 NV_STATUS  NV_API_CALL  nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
-NV_STATUS  NV_API_CALL  nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *, NvBool *);
+NV_STATUS  NV_API_CALL  nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
+                                             NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
 NV_STATUS  NV_API_CALL  nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
-NV_STATUS  NV_API_CALL  nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
-NV_STATUS  NV_API_CALL  nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *);
 NV_STATUS  NV_API_CALL  nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
-NV_STATUS  NV_API_CALL  nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *);
 NV_STATUS  NV_API_CALL  nv_gpu_bind_event(nvidia_stack_t *);
+NV_STATUS  NV_API_CALL  nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);
 
 NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
 nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);

diff --git a/kernel-open/common/inc/nv_uvm_interface.h b/kernel-open/common/inc/nv_uvm_interface.h
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: MIT
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -1505,41 +1505,49 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
 void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);
 
 /*******************************************************************************
-    nvUvmInterfaceCslUpdateContext
+    nvUvmInterfaceCslRotateKey
 
-    Updates a context after a key rotation event and can only be called once per
-    key rotation event. Following a key rotation event, and before
-    nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
-    previous key can be decrypted with nvUvmInterfaceCslDecrypt.
+    Disables channels and rotates keys.
 
-    Locking: This function acquires an API lock.
-    Memory : This function does not dynamically allocate memory.
+    This function disables channels and rotates associated keys. The channels
+    associated with the given CSL contexts must be idled before this function is
+    called. To trigger key rotation all allocated channels for a given key must
+    be present in the list. If the function returns successfully then the CSL
+    contexts have been updated with the new key.
 
-    Arguments:
-        uvmCslContext[IN] - The CSL context associated with a channel.
+    Locking: This function attempts to acquire the GPU lock. In case of failure
+             to acquire the return code is NV_ERR_STATE_IN_USE. The caller must
+             guarantee that no CSL function, including this one, is invoked
+             concurrently with the CSL contexts in contextList.
+    Memory : This function dynamically allocates memory.
 
+    Arguments:
+        contextList[IN/OUT]  - An array of pointers to CSL contexts.
+        contextListCount[IN] - Number of CSL contexts in contextList. Its value
+                               must be greater than 0.
     Error codes:
-        NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
+        NV_ERR_INVALID_ARGUMENT - contextList is NULL or contextListCount is 0.
+        NV_ERR_STATE_IN_USE     - Unable to acquire lock / resource. Caller
+                                  can retry at a later time.
+        NV_ERR_GENERIC          - A failure other than _STATE_IN_USE occurred
+                                  when attempting to acquire a lock.
 */
-NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
+NV_STATUS nvUvmInterfaceCslRotateKey(UvmCslContext *contextList[],
+                                     NvU32 contextListCount);
 
 /*******************************************************************************
     nvUvmInterfaceCslRotateIv
 
     Rotates the IV for a given channel and operation.
 
     This function will rotate the IV on both the CPU and the GPU.
-    Outstanding messages that have been encrypted by the GPU should first be
-    decrypted before calling this function with operation equal to
-    UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
-    encrypted by the CPU should first be decrypted before calling this function
-    with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
-    the channel must be idle before calling this function. This function can be
-    called regardless of the value of the IV's message counter.
-
-    Locking: This function attempts to acquire the GPU lock.
-             In case of failure to acquire the return code
-             is NV_ERR_STATE_IN_USE.
+    For a given operation the channel must be idle before calling this function.
+    This function can be called regardless of the value of the IV's message counter.
+
+    Locking: This function attempts to acquire the GPU lock. In case of failure to
+             acquire the return code is NV_ERR_STATE_IN_USE. The caller must guarantee
+             that no CSL function, including this one, is invoked concurrently with
+             the same CSL context.
     Memory : This function does not dynamically allocate memory.
 
 Arguments:
@@ -1573,8 +1581,8 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
     However, it is optional. If it is NULL, the next IV in line will be used.
 
     Locking: This function does not acquire an API or GPU lock.
-             If called concurrently in different threads with the same UvmCslContext
-             the caller must guarantee exclusion.
+             The caller must guarantee that no CSL function, including this one,
+             is invoked concurrently with the same CSL context.
     Memory : This function does not dynamically allocate memory.
 
 Arguments:
@@ -1610,9 +1618,14 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
     maximized when the input and output buffers are 16-byte aligned. This is
     natural alignment for AES block.
 
+    During a key rotation event the previous key is stored in the CSL context.
+    This allows data encrypted by the GPU to be decrypted with the previous key.
+    The keyRotationId parameter identifies which key is used. The first key rotation
+    ID has a value of 0 that increments by one for each key rotation event.
+
     Locking: This function does not acquire an API or GPU lock.
-             If called concurrently in different threads with the same UvmCslContext
-             the caller must guarantee exclusion.
+             The caller must guarantee that no CSL function, including this one,
+             is invoked concurrently with the same CSL context.
     Memory : This function does not dynamically allocate memory.
 
     Arguments:
@@ -1622,6 +1635,8 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
         decryptIv[IN]         - IV used to decrypt the ciphertext. Its value can either be given by
                                 nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
                                 internal counter is used.
+        keyRotationId[IN]     - Specifies the key that is used for decryption.
+                                A value of NV_U32_MAX specifies the current key.
         inputBuffer[IN]       - Address of ciphertext input buffer.
         outputBuffer[OUT]     - Address of plaintext output buffer.
         addAuthData[IN]       - Address of the plaintext additional authenticated data used to
@@ -1642,6 +1657,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
                                    NvU32 bufferSize,
                                    NvU8 const *inputBuffer,
                                    UvmCslIv const *decryptIv,
+                                   NvU32 keyRotationId,
                                    NvU8 *outputBuffer,
                                    NvU8 const *addAuthData,
                                    NvU32 addAuthDataSize,
@@ -1656,8 +1672,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
     undefined behavior.
 
     Locking: This function does not acquire an API or GPU lock.
-             If called concurrently in different threads with the same UvmCslContext
-             the caller must guarantee exclusion.
+             The caller must guarantee that no CSL function, including this one,
+             is invoked concurrently with the same CSL context.
     Memory : This function does not dynamically allocate memory.
 
     Arguments:
@@ -1685,8 +1701,8 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
 
     Locking: This function does not acquire an API or GPU lock.
     Memory : This function does not dynamically allocate memory.
-             If called concurrently in different threads with the same UvmCslContext
-             the caller must guarantee exclusion.
+             The caller must guarantee that no CSL function, including this one,
+             is invoked concurrently with the same CSL context.
 
     Arguments:
         uvmCslContext[IN/OUT] - The CSL context.
@@ -1711,8 +1727,8 @@ NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
     the returned IV can be used in nvUvmInterfaceCslDecrypt.
 
     Locking: This function does not acquire an API or GPU lock.
-             If called concurrently in different threads with the same UvmCslContext
-             the caller must guarantee exclusion.
+             The caller must guarantee that no CSL function, including this one,
+             is invoked concurrently with the same CSL context.
     Memory : This function does not dynamically allocate memory.
 
 Arguments:
@@ -1734,28 +1750,41 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
                                        UvmCslIv *iv);
 
 /*******************************************************************************
-    nvUvmInterfaceCslLogExternalEncryption
+    nvUvmInterfaceCslLogEncryption
+
+    Checks and logs information about encryptions associated with the given
+    CSL context.
 
-    Checks and logs information about non-CSL encryptions, such as those that
-    originate from the GPU.
+    For contexts associated with channels, this function does not modify elements of
+    the UvmCslContext, and must be called for every CPU/GPU encryption.
 
-    This function does not modify elements of the UvmCslContext.
+    For the context associated with fault buffers, bufferSize can encompass multiple
+    encryption invocations, and the UvmCslContext will be updated following a key
+    rotation event.
+
+    In either case the IV remains unmodified after this function is called.
 
     Locking: This function does not acquire an API or GPU lock.
     Memory : This function does not dynamically allocate memory.
-             If called concurrently in different threads with the same UvmCslContext
-             the caller must guarantee exclusion.
+             The caller must guarantee that no CSL function, including this one,
+             is invoked concurrently with the same CSL context.
 
     Arguments:
         uvmCslContext[IN/OUT] - The CSL context.
-        bufferSize[OUT]       - The size of the buffer encrypted by the
+        operation[IN]         - If the CSL context is associated with a fault
+                                buffer, this argument is ignored. If it is
+                                associated with a channel, it must be either
+                                - UVM_CSL_OPERATION_ENCRYPT
+                                - UVM_CSL_OPERATION_DECRYPT
+        bufferSize[IN]        - The size of the buffer(s) encrypted by the
                                 external entity in units of bytes.
 
     Error codes:
-      NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
+      NV_ERR_INSUFFICIENT_RESOURCES - The encryption would cause a counter
                                       to overflow.
 */
-NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
-                                                 NvU32 bufferSize);
+NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
+                                         UvmCslOperation operation,
+                                         NvU32 bufferSize);
 
 #endif // _NV_UVM_INTERFACE_H_
diff --git a/kernel-open/common/inc/nv_uvm_types.h b/kernel-open/common/inc/nv_uvm_types.h
@@ -267,6 +267,7 @@ typedef struct UvmGpuChannelInfo_tag
 
     // The errorNotifier is filled out when the channel hits an RC error.
     NvNotification    *errorNotifier;
+    NvNotification    *keyRotationNotifier;
 
     NvU32              hwRunlistId;
     NvU32              hwChannelId;
@@ -292,13 +293,13 @@ typedef struct UvmGpuChannelInfo_tag
 
     // GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
     // so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
-    NvU64             gpFifoGpuVa;
-    NvU64             gpPutGpuVa;
-    NvU64             gpGetGpuVa;
+    NvU64              gpFifoGpuVa;
+    NvU64              gpPutGpuVa;
+    NvU64              gpGetGpuVa;
     // GPU VA of work submission offset is needed in Confidential Computing
     // so CE channels can ring doorbell of other channels as required for
     // WLC/LCIC work submission
-    NvU64             workSubmissionOffsetGpuVa;
+    NvU64              workSubmissionOffsetGpuVa;
 } UvmGpuChannelInfo;
 
 typedef enum
@@ -604,6 +605,8 @@ typedef struct UvmGpuConfComputeCaps_tag
 {
     // Out: GPU's confidential compute mode
     UvmGpuConfComputeMode mode;
+    // Is key rotation enabled for UVM keys
+    NvBool bKeyRotationEnabled;
 } UvmGpuConfComputeCaps;
 
 #define UVM_GPU_NAME_LENGTH 0x40
@@ -1086,4 +1089,21 @@ typedef enum UvmCslOperation
     UVM_CSL_OPERATION_DECRYPT
 } UvmCslOperation;
 
+typedef enum UVM_KEY_ROTATION_STATUS {
+    // Key rotation complete/not in progress
+    UVM_KEY_ROTATION_STATUS_IDLE = 0,
+    // RM is waiting for clients to report their channels are idle for key rotation
+    UVM_KEY_ROTATION_STATUS_PENDING = 1,
+    // Key rotation is in progress
+    UVM_KEY_ROTATION_STATUS_IN_PROGRESS = 2,
+    // Key rotation timeout failure, RM will RC non-idle channels.
+    // UVM should never see this status value.
+    UVM_KEY_ROTATION_STATUS_FAILED_TIMEOUT = 3,
+    // Key rotation failed because upper threshold was crossed, RM will RC non-idle channels
+    UVM_KEY_ROTATION_STATUS_FAILED_THRESHOLD = 4,
+    // Internal RM failure while rotating keys for a certain channel, RM will RC the channel.
+    UVM_KEY_ROTATION_STATUS_FAILED_ROTATION = 5,
+    UVM_KEY_ROTATION_STATUS_MAX_COUNT = 6,
+} UVM_KEY_ROTATION_STATUS;
+
 #endif // _NV_UVM_TYPES_H_