Skip to content

Commit

Permalink
550.40.65
Browse files Browse the repository at this point in the history
  • Loading branch information
russellcnv committed Jun 29, 2024
1 parent 3750358 commit 91726f2
Show file tree
Hide file tree
Showing 194 changed files with 44,349 additions and 38,468 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## Release 550 Entries

### [550.40.65] 2024-06-28

### [550.40.63] 2024-05-31

### [550.40.61] 2024-04-23
Expand Down
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source

This is the source release of the NVIDIA Linux open GPU kernel modules,
version 550.40.63.
version 550.40.65.


## How to Build
Expand All @@ -17,7 +17,7 @@ as root:

Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
550.40.63 driver release. This can be achieved by installing
550.40.65 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,

Expand Down Expand Up @@ -188,7 +188,7 @@ encountered specific to them.
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:

https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.63/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.40.65/README/kernel_open.html

For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
Package for more details.
Expand Down Expand Up @@ -757,6 +757,8 @@ Subsystem Device ID.
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
| NVIDIA H100 PCIe | 2331 10DE 1626 |
| NVIDIA H200 | 2335 10DE 18BE |
| NVIDIA H200 | 2335 10DE 18BF |
| NVIDIA H100 | 2339 10DE 17FC |
| NVIDIA H800 NVL | 233A 10DE 183A |
| NVIDIA GH200 120GB | 2342 10DE 16EB |
Expand Down Expand Up @@ -873,6 +875,7 @@ Subsystem Device ID.
| NVIDIA L40S | 26B9 10DE 1851 |
| NVIDIA L40S | 26B9 10DE 18CF |
| NVIDIA L20 | 26BA 10DE 1957 |
| NVIDIA L20 | 26BA 10DE 1990 |
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
| NVIDIA GeForce RTX 4080 | 2704 |
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
Expand Down
2 changes: 1 addition & 1 deletion kernel-open/Kbuild
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.63\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.40.65\"

ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
Expand Down
24 changes: 8 additions & 16 deletions kernel-open/common/inc/nv-hypervisor.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,11 @@ typedef enum _HYPERVISOR_TYPE
OS_HYPERVISOR_UNKNOWN
} HYPERVISOR_TYPE;

#define CMD_VGPU_VFIO_WAKE_WAIT_QUEUE 0
#define CMD_VGPU_VFIO_INJECT_INTERRUPT 1
#define CMD_VGPU_VFIO_REGISTER_MDEV 2
#define CMD_VGPU_VFIO_PRESENT 3
#define CMD_VFIO_PCI_CORE_PRESENT 4
#define CMD_VFIO_WAKE_REMOVE_GPU 1
#define CMD_VGPU_VFIO_PRESENT 2
#define CMD_VFIO_PCI_CORE_PRESENT 3

#define MAX_VF_COUNT_PER_GPU 64
#define MAX_VF_COUNT_PER_GPU 64

typedef enum _VGPU_TYPE_INFO
{
Expand All @@ -54,17 +52,11 @@ typedef enum _VGPU_TYPE_INFO

typedef struct
{
void *vgpuVfioRef;
void *waitQueue;
void *nv;
NvU32 *vgpuTypeIds;
NvU8 **vgpuNames;
NvU32 numVgpuTypes;
NvU32 domain;
NvU8 bus;
NvU8 slot;
NvU8 function;
NvBool is_virtfn;
NvU32 domain;
NvU32 bus;
NvU32 device;
NvU32 return_status;
} vgpu_vfio_info;

typedef struct
Expand Down
4 changes: 4 additions & 0 deletions kernel-open/common/inc/nv-linux.h
Original file line number Diff line number Diff line change
Expand Up @@ -1614,6 +1614,10 @@ typedef struct nv_linux_state_s {
nv_kthread_q_t open_q;
NvBool is_accepting_opens;
struct semaphore open_q_lock;
#if defined(NV_VGPU_KVM_BUILD)
wait_queue_head_t wait;
NvS32 return_status;
#endif
} nv_linux_state_t;

extern nv_linux_state_t *nv_linux_devices;
Expand Down
7 changes: 3 additions & 4 deletions kernel-open/common/inc/nv.h
Original file line number Diff line number Diff line change
Expand Up @@ -1041,13 +1041,12 @@ NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, c
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *, NvBool *);
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *,
NvU64 *, NvU64 *, NvU32 *, NvBool *, NvU8 *);
NV_STATUS NV_API_CALL nv_vgpu_get_hbm_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
NV_STATUS NV_API_CALL nv_vgpu_get_sparse_mmap(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 **, NvU64 **, NvU32 *);
NV_STATUS NV_API_CALL nv_vgpu_process_vf_info(nvidia_stack_t *, nv_state_t *, NvU8, NvU32, NvU8, NvU8, NvU8, NvBool, void *);
NV_STATUS NV_API_CALL nv_vgpu_update_request(nvidia_stack_t *, const NvU8 *, NvU32, NvU64 *, NvU64 *, const char *);
NV_STATUS NV_API_CALL nv_gpu_bind_event(nvidia_stack_t *);
NV_STATUS NV_API_CALL nv_gpu_unbind_event(nvidia_stack_t *, NvU32, NvBool *);

NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_access_params_t*);
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
Expand Down
115 changes: 72 additions & 43 deletions kernel-open/common/inc/nv_uvm_interface.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2013-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2013-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
Expand Down Expand Up @@ -1505,41 +1505,49 @@ NV_STATUS nvUvmInterfaceCslInitContext(UvmCslContext *uvmCslContext,
void nvUvmInterfaceDeinitCslContext(UvmCslContext *uvmCslContext);

/*******************************************************************************
nvUvmInterfaceCslUpdateContext
nvUvmInterfaceCslRotateKey
Updates a context after a key rotation event and can only be called once per
key rotation event. Following a key rotation event, and before
nvUvmInterfaceCslUpdateContext is called, data encrypted by the GPU with the
previous key can be decrypted with nvUvmInterfaceCslDecrypt.
Disables channels and rotates keys.
Locking: This function acquires an API lock.
Memory : This function does not dynamically allocate memory.
This function disables channels and rotates associated keys. The channels
associated with the given CSL contexts must be idled before this function is
called. To trigger key rotation all allocated channels for a given key must
be present in the list. If the function returns successfully then the CSL
contexts have been updated with the new key.
Arguments:
uvmCslContext[IN] - The CSL context associated with a channel.
Locking: This function attempts to acquire the GPU lock. In case of failure
to acquire the return code is NV_ERR_STATE_IN_USE. The caller must
guarantee that no CSL function, including this one, is invoked
concurrently with the CSL contexts in contextList.
Memory : This function dynamically allocates memory.
Arguments:
contextList[IN/OUT] - An array of pointers to CSL contexts.
contextListCount[IN] - Number of CSL contexts in contextList. Its value
must be greater than 0.
Error codes:
NV_ERR_INVALID_ARGUMENT - The CSL context is not associated with a channel.
NV_ERR_INVALID_ARGUMENT - contextList is NULL or contextListCount is 0.
NV_ERR_STATE_IN_USE - Unable to acquire lock / resource. Caller
can retry at a later time.
NV_ERR_GENERIC - A failure other than _STATE_IN_USE occurred
when attempting to acquire a lock.
*/
NV_STATUS nvUvmInterfaceCslUpdateContext(UvmCslContext *uvmCslContext);
NV_STATUS nvUvmInterfaceCslRotateKey(UvmCslContext *contextList[],
NvU32 contextListCount);

/*******************************************************************************
nvUvmInterfaceCslRotateIv
Rotates the IV for a given channel and operation.
This function will rotate the IV on both the CPU and the GPU.
Outstanding messages that have been encrypted by the GPU should first be
decrypted before calling this function with operation equal to
UVM_CSL_OPERATION_DECRYPT. Similarly, outstanding messages that have been
encrypted by the CPU should first be decrypted before calling this function
with operation equal to UVM_CSL_OPERATION_ENCRYPT. For a given operation
the channel must be idle before calling this function. This function can be
called regardless of the value of the IV's message counter.
Locking: This function attempts to acquire the GPU lock.
In case of failure to acquire the return code
is NV_ERR_STATE_IN_USE.
For a given operation the channel must be idle before calling this function.
This function can be called regardless of the value of the IV's message counter.
Locking: This function attempts to acquire the GPU lock. In case of failure to
acquire the return code is NV_ERR_STATE_IN_USE. The caller must guarantee
that no CSL function, including this one, is invoked concurrently with
the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
Expand Down Expand Up @@ -1573,8 +1581,8 @@ NV_STATUS nvUvmInterfaceCslRotateIv(UvmCslContext *uvmCslContext,
However, it is optional. If it is NULL, the next IV in line will be used.
Locking: This function does not acquire an API or GPU lock.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
Expand Down Expand Up @@ -1610,9 +1618,14 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
maximized when the input and output buffers are 16-byte aligned. This is
natural alignment for AES block.
During a key rotation event the previous key is stored in the CSL context.
This allows data encrypted by the GPU to be decrypted with the previous key.
The keyRotationId parameter identifies which key is used. The first key rotation
ID has a value of 0 that increments by one for each key rotation event.
Locking: This function does not acquire an API or GPU lock.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
Expand All @@ -1622,6 +1635,8 @@ NV_STATUS nvUvmInterfaceCslEncrypt(UvmCslContext *uvmCslContext,
decryptIv[IN] - IV used to decrypt the ciphertext. Its value can either be given by
nvUvmInterfaceCslIncrementIv, or, if NULL, the CSL context's
internal counter is used.
keyRotationId[IN] - Specifies the key that is used for decryption.
A value of NV_U32_MAX specifies the current key.
inputBuffer[IN] - Address of ciphertext input buffer.
outputBuffer[OUT] - Address of plaintext output buffer.
addAuthData[IN] - Address of the plaintext additional authenticated data used to
Expand All @@ -1642,6 +1657,7 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
NvU32 bufferSize,
NvU8 const *inputBuffer,
UvmCslIv const *decryptIv,
NvU32 keyRotationId,
NvU8 *outputBuffer,
NvU8 const *addAuthData,
NvU32 addAuthDataSize,
Expand All @@ -1656,8 +1672,8 @@ NV_STATUS nvUvmInterfaceCslDecrypt(UvmCslContext *uvmCslContext,
undefined behavior.
Locking: This function does not acquire an API or GPU lock.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
Expand Down Expand Up @@ -1685,8 +1701,8 @@ NV_STATUS nvUvmInterfaceCslSign(UvmCslContext *uvmCslContext,
Locking: This function does not acquire an API or GPU lock.
Memory : This function does not dynamically allocate memory.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
Expand All @@ -1711,8 +1727,8 @@ NV_STATUS nvUvmInterfaceCslQueryMessagePool(UvmCslContext *uvmCslContext,
the returned IV can be used in nvUvmInterfaceCslDecrypt.
Locking: This function does not acquire an API or GPU lock.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Memory : This function does not dynamically allocate memory.
Arguments:
Expand All @@ -1734,28 +1750,41 @@ NV_STATUS nvUvmInterfaceCslIncrementIv(UvmCslContext *uvmCslContext,
UvmCslIv *iv);

/*******************************************************************************
nvUvmInterfaceCslLogExternalEncryption
nvUvmInterfaceCslLogEncryption
Checks and logs information about encryptions associated with the given
CSL context.
Checks and logs information about non-CSL encryptions, such as those that
originate from the GPU.
For contexts associated with channels, this function does not modify elements of
the UvmCslContext, and must be called for every CPU/GPU encryption.
This function does not modify elements of the UvmCslContext.
For the context associated with fault buffers, bufferSize can encompass multiple
encryption invocations, and the UvmCslContext will be updated following a key
rotation event.
In either case the IV remains unmodified after this function is called.
Locking: This function does not acquire an API or GPU lock.
Memory : This function does not dynamically allocate memory.
If called concurrently in different threads with the same UvmCslContext
the caller must guarantee exclusion.
The caller must guarantee that no CSL function, including this one,
is invoked concurrently with the same CSL context.
Arguments:
uvmCslContext[IN/OUT] - The CSL context.
bufferSize[OUT] - The size of the buffer encrypted by the
operation[IN] - If the CSL context is associated with a fault
buffer, this argument is ignored. If it is
associated with a channel, it must be either
- UVM_CSL_OPERATION_ENCRYPT
- UVM_CSL_OPERATION_DECRYPT
bufferSize[IN] - The size of the buffer(s) encrypted by the
external entity in units of bytes.
Error codes:
NV_ERR_INSUFFICIENT_RESOURCES - The device encryption would cause a counter
NV_ERR_INSUFFICIENT_RESOURCES - The encryption would cause a counter
to overflow.
*/
NV_STATUS nvUvmInterfaceCslLogExternalEncryption(UvmCslContext *uvmCslContext,
NvU32 bufferSize);
NV_STATUS nvUvmInterfaceCslLogEncryption(UvmCslContext *uvmCslContext,
UvmCslOperation operation,
NvU32 bufferSize);

#endif // _NV_UVM_INTERFACE_H_
28 changes: 24 additions & 4 deletions kernel-open/common/inc/nv_uvm_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ typedef struct UvmGpuChannelInfo_tag

// The errorNotifier is filled out when the channel hits an RC error.
NvNotification *errorNotifier;
NvNotification *keyRotationNotifier;

NvU32 hwRunlistId;
NvU32 hwChannelId;
Expand All @@ -292,13 +293,13 @@ typedef struct UvmGpuChannelInfo_tag

// GPU VAs of both GPFIFO and GPPUT are needed in Confidential Computing
// so a channel can be controlled via another channel (SEC2 or WLC/LCIC)
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
NvU64 gpFifoGpuVa;
NvU64 gpPutGpuVa;
NvU64 gpGetGpuVa;
// GPU VA of work submission offset is needed in Confidential Computing
// so CE channels can ring doorbell of other channels as required for
// WLC/LCIC work submission
NvU64 workSubmissionOffsetGpuVa;
NvU64 workSubmissionOffsetGpuVa;
} UvmGpuChannelInfo;

typedef enum
Expand Down Expand Up @@ -604,6 +605,8 @@ typedef struct UvmGpuConfComputeCaps_tag
{
// Out: GPU's confidential compute mode
UvmGpuConfComputeMode mode;
// Is key rotation enabled for UVM keys
NvBool bKeyRotationEnabled;
} UvmGpuConfComputeCaps;

#define UVM_GPU_NAME_LENGTH 0x40
Expand Down Expand Up @@ -1086,4 +1089,21 @@ typedef enum UvmCslOperation
UVM_CSL_OPERATION_DECRYPT
} UvmCslOperation;

typedef enum UVM_KEY_ROTATION_STATUS {
// Key rotation complete/not in progress
UVM_KEY_ROTATION_STATUS_IDLE = 0,
// RM is waiting for clients to report their channels are idle for key rotation
UVM_KEY_ROTATION_STATUS_PENDING = 1,
// Key rotation is in progress
UVM_KEY_ROTATION_STATUS_IN_PROGRESS = 2,
// Key rotation timeout failure, RM will RC non-idle channels.
// UVM should never see this status value.
UVM_KEY_ROTATION_STATUS_FAILED_TIMEOUT = 3,
// Key rotation failed because upper threshold was crossed, RM will RC non-idle channels
UVM_KEY_ROTATION_STATUS_FAILED_THRESHOLD = 4,
// Internal RM failure while rotating keys for a certain channel, RM will RC the channel.
UVM_KEY_ROTATION_STATUS_FAILED_ROTATION = 5,
UVM_KEY_ROTATION_STATUS_MAX_COUNT = 6,
} UVM_KEY_ROTATION_STATUS;

#endif // _NV_UVM_TYPES_H_
Loading

0 comments on commit 91726f2

Please sign in to comment.