diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 4d8c27eca96be..5dfe50355e191 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1200,6 +1200,27 @@ PAGE_SIZE multiple when read back. high limit is used and monitored properly, this limit's utility is limited to providing the final safety net. + memory.reclaim + A write-only nested-keyed file which exists for all cgroups. + + This is a simple interface to trigger memory reclaim in the + target cgroup. + + This file accepts a single key, the number of bytes to reclaim. + No nested keys are currently supported. + + Example:: + + echo "1G" > memory.reclaim + + The interface can be later extended with nested keys to + configure the reclaim behavior. For example, specify the + type of memory to reclaim from (anon, file, ..). + + Please note that the kernel can over or under reclaim from + the target cgroup. If less bytes are reclaimed than the + specified amount, -EAGAIN is returned. + memory.oom.group A read-write single value file which exists on non-root cgroups. The default value is "0". diff --git a/MSFT-Merge/log b/MSFT-Merge/log index 6182f9c7344ac..ae6ef1868589b 100644 --- a/MSFT-Merge/log +++ b/MSFT-Merge/log @@ -1,12 +1,13 @@ Name SHA1 ---- ---- -config/wsl a1edb1f45f33bee0960a1c82de4737227a8a5e72 +config/wsl efdbd7684bb9625e93871b347d3c7327fc9a63ac feature/arm64-hyperv-hypercall-interface/5.15 3e314b48254cb9c3eeac699356ac605193b4b6fa feature/arm64-hyperv-synthetic-clocks-timers/5.15 59db35e760b9bacc8596a3660a12420f1fa5318f -feature/dxgkrnl/5.15 411a9171ae715efe3817a05a1802f9367c04df33 +feature/dxgkrnl/5.15 26d29d8377a78f1b0014f6c9ba6df9ed5763f885 feature/hvlite_virtio_pmem/5.15 9194f84de8a58bc1a83125054286d649e35054be feature/page-reporting/5.15 ad427234defd6cdfdc0c21ca5b64ef589b82a421 feature/vpci/5.15 d19bb684cca000b65a3b001964a3cc820cbbf9b5 +feature/memory-reclaim/5.15 fed46d1f99d22a5a9efd06da0bf5baf6a04045d8 fix/vsock/5.15 c9e883dfbd7be1194e53133d888b435b277e2e5b product/wsl/readme/5.15 0e1ddcfdc9986e1bf420a3663011abd79752c642 product/wsl/security/5.15 ab2488a9f10a3b83b958103c9b3ed728eb57c564 diff --git a/Makefile b/Makefile index b2954abf44cf2..13c22981426db 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 5 PATCHLEVEL = 15 SUBLEVEL = 90 -EXTRAVERSION = .1 +EXTRAVERSION = .3 NAME = Trick or Treat # *DOCUMENTATION* diff --git a/arch/arm64/configs/config-wsl-arm64 b/arch/arm64/configs/config-wsl-arm64 index 47f23f6818484..bf6926751e417 100644 --- a/arch/arm64/configs/config-wsl-arm64 +++ b/arch/arm64/configs/config-wsl-arm64 @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/arm64 5.15.83.1 Kernel Configuration +# Linux/arm64 5.15.90.1 Kernel Configuration # CONFIG_CC_VERSION_TEXT="aarch64-msft-linux-gcc (GCC) 9.3.0" CONFIG_CC_IS_GCC=y @@ -1768,7 +1768,7 @@ CONFIG_BLK_DEV_BSG=y # CONFIG_CHR_DEV_SCH is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set -# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_SCAN_ASYNC=y # # SCSI Transports diff --git a/arch/x86/configs/config-wsl b/arch/x86/configs/config-wsl index affeefd363aae..4b02ff5c81a9d 100644 --- a/arch/x86/configs/config-wsl +++ b/arch/x86/configs/config-wsl @@ -1,6 +1,6 @@ # # Automatically generated file; DO NOT EDIT. -# Linux/x86 5.15.83.1 Kernel Configuration +# Linux/x86 5.15.90.1 Kernel Configuration # CONFIG_CC_VERSION_TEXT="x86_64-msft-linux-gcc (GCC) 9.3.0" CONFIG_CC_IS_GCC=y @@ -1777,7 +1777,7 @@ CONFIG_BLK_DEV_BSG=y # CONFIG_CHR_DEV_SCH is not set # CONFIG_SCSI_CONSTANTS is not set # CONFIG_SCSI_LOGGING is not set -# CONFIG_SCSI_SCAN_ASYNC is not set +CONFIG_SCSI_SCAN_ASYNC=y # # SCSI Transports diff --git a/drivers/hv/dxgkrnl/dxgadapter.c b/drivers/hv/dxgkrnl/dxgadapter.c index d9d45bd4a31eb..d4db4a129ea66 100644 --- a/drivers/hv/dxgkrnl/dxgadapter.c +++ b/drivers/hv/dxgkrnl/dxgadapter.c @@ -1025,8 +1025,7 @@ struct dxgprocess_adapter *dxgprocess_adapter_create(struct dxgprocess *process, } return adapter_info; cleanup: - if (adapter_info) - kfree(adapter_info); + kfree(adapter_info); return NULL; } @@ -1233,10 +1232,8 @@ struct dxgsyncobject *dxgsyncobject_create(struct dxgprocess *process, DXG_TRACE("Syncobj created: %p", syncobj); return syncobj; cleanup: - if (syncobj->host_event) - kfree(syncobj->host_event); - if (syncobj) - kfree(syncobj); + kfree(syncobj->host_event); + kfree(syncobj); return NULL; } @@ -1316,8 +1313,7 @@ void dxgsyncobject_release(struct kref *refcount) kref_put(&syncobj->shared_owner->ssyncobj_kref, dxgsharedsyncobj_release); } - if (syncobj->host_event) - kfree(syncobj->host_event); + kfree(syncobj->host_event); kfree(syncobj); } diff --git a/drivers/hv/dxgkrnl/dxgkrnl.h b/drivers/hv/dxgkrnl/dxgkrnl.h index 1b40d6e39085b..2a28c9ea3488e 100644 --- a/drivers/hv/dxgkrnl/dxgkrnl.h +++ b/drivers/hv/dxgkrnl/dxgkrnl.h @@ -47,10 +47,10 @@ struct dxghwqueue; * Driver private data. * A single /dev/dxg device is created per virtual machine. */ -struct dxgdriver{ +struct dxgdriver { struct dxgglobal *dxgglobal; - struct device *dxgdev; - struct pci_driver pci_drv; + struct device *dxgdev; + struct pci_driver pci_drv; struct hv_driver vmbus_drv; }; extern struct dxgdriver dxgdrv; @@ -386,6 +386,8 @@ struct dxgprocess { struct list_head plistentry; pid_t pid; pid_t tgid; + pid_t vpid; /* pdi from the current namespace */ + struct pid_namespace *nspid; /* namespace id */ /* how many time the process was opened */ struct kref process_kref; /* protects the object memory */ @@ -478,6 +480,7 @@ struct dxgadapter { struct winluid luid; /* VM bus channel luid */ u16 device_description[80]; u16 device_instance_id[WIN_MAX_PATH]; + bool compute_only; bool stopping_adapter; }; @@ -954,7 +957,8 @@ int dxgvmb_send_query_alloc_residency(struct dxgprocess *process, *args); int dxgvmb_send_escape(struct dxgprocess *process, struct dxgadapter *adapter, - struct d3dkmt_escape *args); + struct d3dkmt_escape *args, + bool user_mode); int dxgvmb_send_query_vidmem_info(struct dxgprocess *process, struct dxgadapter *adapter, struct d3dkmt_queryvideomemoryinfo *args, @@ -984,7 +988,7 @@ int dxgvmb_send_get_stdalloc_data(struct dxgdevice *device, void *prive_alloc_data, u32 *res_priv_data_size, void *priv_res_data); -int dxgvmb_send_query_statistics(struct dxgprocess *process, +int dxgvmb_send_query_statistics(struct d3dkmthandle host_process_handle, struct dxgadapter *adapter, struct d3dkmt_querystatistics *args); int dxgvmb_send_async_msg(struct dxgvmbuschannel *channel, @@ -992,6 +996,11 @@ int dxgvmb_send_async_msg(struct dxgvmbuschannel *channel, u32 cmd_size); int dxgvmb_send_share_object_with_host(struct dxgprocess *process, struct d3dkmt_shareobjectwithhost *args); +int dxgvmb_send_invalidate_cache(struct dxgprocess *process, + struct dxgadapter *adapter, + struct d3dkmt_invalidatecache *args); +int dxgvmb_send_is_feature_enabled(struct dxgadapter *adapter, + struct d3dkmt_isfeatureenabled *args); void signal_host_cpu_event(struct dxghostevent *eventhdr); int ntstatus2int(struct ntstatus status); diff --git a/drivers/hv/dxgkrnl/dxgmodule.c b/drivers/hv/dxgkrnl/dxgmodule.c index f419597f711a1..0fafb6167229b 100644 --- a/drivers/hv/dxgkrnl/dxgmodule.c +++ b/drivers/hv/dxgkrnl/dxgmodule.c @@ -20,6 +20,7 @@ #define PCI_VENDOR_ID_MICROSOFT 0x1414 #define PCI_DEVICE_ID_VIRTUAL_RENDER 0x008E +#define PCI_DEVICE_ID_COMPUTE_ACCELERATOR 0x008A #undef pr_fmt #define pr_fmt(fmt) "dxgk: " fmt @@ -270,6 +271,8 @@ int dxgglobal_create_adapter(struct pci_dev *dev, guid_t *guid, adapter->adapter_state = DXGADAPTER_STATE_WAITING_VMBUS; adapter->host_vgpu_luid = host_vgpu_luid; + if (dev->device == PCI_DEVICE_ID_COMPUTE_ACCELERATOR) + adapter->compute_only = true; kref_init(&adapter->adapter_kref); init_rwsem(&adapter->core_lock); mutex_init(&adapter->device_creation_lock); @@ -622,6 +625,12 @@ static struct pci_device_id dxg_pci_id_table[] = { .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID }, + { + .vendor = PCI_VENDOR_ID_MICROSOFT, + .device = PCI_DEVICE_ID_COMPUTE_ACCELERATOR, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID + }, { 0 } }; @@ -962,4 +971,4 @@ module_exit(dxg_drv_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Microsoft Dxgkrnl virtual compute device Driver"); -MODULE_VERSION("2.0.1"); +MODULE_VERSION("2.0.2"); diff --git a/drivers/hv/dxgkrnl/dxgprocess.c b/drivers/hv/dxgkrnl/dxgprocess.c index fd51fd9680498..9bfd53df1a54f 100644 --- a/drivers/hv/dxgkrnl/dxgprocess.c +++ b/drivers/hv/dxgkrnl/dxgprocess.c @@ -12,6 +12,8 @@ */ #include "dxgkrnl.h" +#include "linux/sched.h" +#include #undef dev_fmt #define dev_fmt(fmt) "dxgk: " fmt @@ -31,6 +33,8 @@ struct dxgprocess *dxgprocess_create(void) DXG_TRACE("new dxgprocess created"); process->pid = current->pid; process->tgid = current->tgid; + process->vpid = task_pid_vnr(current); + process->nspid = task_active_pid_ns(current); ret = dxgvmb_send_create_process(process); if (ret < 0) { DXG_TRACE("send_create_process failed"); diff --git a/drivers/hv/dxgkrnl/dxgvmbus.c b/drivers/hv/dxgkrnl/dxgvmbus.c index 8c99f141482e2..9320bede3a0a5 100644 --- a/drivers/hv/dxgkrnl/dxgvmbus.c +++ b/drivers/hv/dxgkrnl/dxgvmbus.c @@ -22,6 +22,8 @@ #include "dxgkrnl.h" #include "dxgvmbus.h" +#pragma GCC diagnostic ignored "-Warray-bounds" + #undef dev_fmt #define dev_fmt(fmt) "dxgk: " fmt @@ -113,7 +115,6 @@ static int init_message(struct dxgvmbusmsg *msg, struct dxgadapter *adapter, static int init_message_res(struct dxgvmbusmsgres *msg, struct dxgadapter *adapter, - struct dxgprocess *process, u32 size, u32 result_size) { @@ -134,19 +135,20 @@ static int init_message_res(struct dxgvmbusmsgres *msg, if (use_ext_header) { msg->msg = (char *)&msg->hdr[1]; msg->hdr->command_offset = sizeof(msg->hdr[0]); - msg->hdr->vgpu_luid = adapter->host_vgpu_luid; + if (adapter) + msg->hdr->vgpu_luid = adapter->host_vgpu_luid; } else { msg->msg = (char *)msg->hdr; } msg->res = (char *)msg->hdr + msg->size; - if (dxgglobal->async_msg_enabled) - msg->channel = &dxgglobal->channel; - else + if (adapter && !dxgglobal->async_msg_enabled) msg->channel = &adapter->channel; + else + msg->channel = &dxgglobal->channel; return 0; } -static void free_message(struct dxgvmbusmsg *msg, struct dxgprocess *process) +static void free_message(struct dxgvmbusmsg *msg) { if (msg->hdr && (char *)msg->hdr != msg->msg_on_stack) vfree(msg->hdr); @@ -244,7 +246,7 @@ int dxgvmbuschannel_init(struct dxgvmbuschannel *ch, struct hv_device *hdev) goto cleanup; } -#if LINUX_VERSION_CODE >= KERNEL_VERSION(5,15,0) +#if KERNEL_VERSION(5, 15, 0) <= LINUX_VERSION_CODE hdev->channel->max_pkt_size = DXG_MAX_VM_BUS_PACKET_SIZE; #endif ret = vmbus_open(hdev->channel, RING_BUFSIZE, RING_BUFSIZE, @@ -418,6 +420,7 @@ int dxgvmb_send_sync_msg(struct dxgvmbuschannel *channel, struct dxgvmbuspacket *packet = NULL; struct dxgkvmb_command_vm_to_host *cmd1; struct dxgkvmb_command_vgpu_to_host *cmd2; + int try_count = 0; if (cmd_size > DXG_MAX_VM_BUS_PACKET_SIZE || result_size > DXG_MAX_VM_BUS_PACKET_SIZE) { @@ -451,9 +454,19 @@ int dxgvmb_send_sync_msg(struct dxgvmbuschannel *channel, list_add_tail(&packet->packet_list_entry, &channel->packet_list_head); spin_unlock_irq(&channel->packet_list_mutex); - ret = vmbus_sendpacket(channel->channel, command, cmd_size, - packet->request_id, VM_PKT_DATA_INBAND, - VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + do { + ret = vmbus_sendpacket(channel->channel, command, cmd_size, + packet->request_id, VM_PKT_DATA_INBAND, + VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); + /* + * -EAGAIN is returned when the VM bus ring buffer if full. + * Wait 2ms to allow the host to process messages and try again. + */ + if (ret == -EAGAIN) { + usleep_range(1000, 2000); + try_count++; + } + } while (ret == -EAGAIN && try_count < 50); if (ret) { DXG_ERR("vmbus_sendpacket failed: %x", ret); spin_lock_irq(&channel->packet_list_mutex); @@ -646,7 +659,7 @@ int dxgvmb_send_set_iospace_region(u64 start, u64 len) dxgglobal_release_channel_lock(); cleanup: - free_message(&msg, NULL); + free_message(&msg); if (ret) DXG_TRACE("Error: %d", ret); return ret; @@ -699,7 +712,7 @@ int dxgvmb_send_create_process(struct dxgprocess *process) dxgglobal_release_channel_lock(); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -727,7 +740,7 @@ int dxgvmb_send_destroy_process(struct d3dkmthandle process) dxgglobal_release_channel_lock(); cleanup: - free_message(&msg, NULL); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -790,7 +803,7 @@ int dxgvmb_send_open_sync_object_nt(struct dxgprocess *process, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -839,7 +852,7 @@ int dxgvmb_send_open_sync_object(struct dxgprocess *process, *syncobj = result.sync_object; cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -881,7 +894,7 @@ int dxgvmb_send_create_nt_shared_object(struct dxgprocess *process, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -912,7 +925,7 @@ int dxgvmb_send_destroy_nt_shared_object(struct d3dkmthandle shared_handle) dxgglobal_release_channel_lock(); cleanup: - free_message(&msg, NULL); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -945,7 +958,7 @@ int dxgvmb_send_destroy_sync_object(struct dxgprocess *process, dxgglobal_release_channel_lock(); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -989,7 +1002,7 @@ int dxgvmb_send_share_object_with_host(struct dxgprocess *process, args->object_vail_nt_handle = result.vail_nt_handle; cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_ERR("err: %d", ret); return ret; @@ -1026,7 +1039,7 @@ int dxgvmb_send_open_adapter(struct dxgadapter *adapter) adapter->host_handle = result.host_adapter_handle; cleanup: - free_message(&msg, NULL); + free_message(&msg); if (ret) DXG_ERR("Failed to open adapter: %d", ret); return ret; @@ -1048,7 +1061,7 @@ int dxgvmb_send_close_adapter(struct dxgadapter *adapter) ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size, NULL, 0); - free_message(&msg, NULL); + free_message(&msg); if (ret) DXG_ERR("Failed to close adapter: %d", ret); return ret; @@ -1084,7 +1097,7 @@ int dxgvmb_send_get_internal_adapter_info(struct dxgadapter *adapter) sizeof(adapter->device_instance_id) / sizeof(u16)); dxgglobal->async_msg_enabled = result.async_msg_enabled != 0; } - free_message(&msg, NULL); + free_message(&msg); if (ret) DXG_ERR("Failed to get adapter info: %d", ret); return ret; @@ -1114,7 +1127,7 @@ struct d3dkmthandle dxgvmb_send_create_device(struct dxgadapter *adapter, &result, sizeof(result)); if (ret < 0) result.device.v = 0; - free_message(&msg, process); + free_message(&msg); cleanup: if (ret) DXG_TRACE("err: %d", ret); @@ -1140,7 +1153,7 @@ int dxgvmb_send_destroy_device(struct dxgadapter *adapter, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -1167,7 +1180,7 @@ int dxgvmb_send_flush_device(struct dxgdevice *device, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -1239,7 +1252,7 @@ dxgvmb_send_create_context(struct dxgadapter *adapter, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return context; @@ -1265,7 +1278,7 @@ int dxgvmb_send_destroy_context(struct dxgadapter *adapter, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -1312,7 +1325,7 @@ int dxgvmb_send_create_paging_queue(struct dxgprocess *process, pqueue->handle = args->paging_queue; cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -1339,7 +1352,7 @@ int dxgvmb_send_destroy_paging_queue(struct dxgprocess *process, ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size, NULL, 0); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -1558,7 +1571,7 @@ int create_existing_sysmem(struct dxgdevice *device, cleanup: if (kmem) vunmap(kmem); - free_message(&msg, device->process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -1791,7 +1804,7 @@ create_local_allocations(struct dxgprocess *process, dxgdevice_release_alloc_list_lock(device); } - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -1916,7 +1929,7 @@ int dxgvmb_send_create_allocation(struct dxgprocess *process, if (result) vfree(result); - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); @@ -1958,7 +1971,7 @@ int dxgvmb_send_destroy_allocation(struct dxgprocess *process, cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -1972,14 +1985,16 @@ int dxgvmb_send_query_clock_calibration(struct dxgprocess *process, *__user inargs) { struct dxgkvmb_command_queryclockcalibration *command; - struct dxgkvmb_command_queryclockcalibration_return result; + struct dxgkvmb_command_queryclockcalibration_return *result; int ret; - struct dxgvmbusmsg msg = {.hdr = NULL}; + struct dxgvmbusmsgres msg = {.hdr = NULL}; - ret = init_message(&msg, adapter, process, sizeof(*command)); + ret = init_message_res(&msg, adapter, sizeof(*command), + sizeof(*result)); if (ret) goto cleanup; command = (void *)msg.msg; + result = msg.res; command_vgpu_to_host_init2(&command->hdr, DXGK_VMBCOMMAND_QUERYCLOCKCALIBRATION, @@ -1987,20 +2002,20 @@ int dxgvmb_send_query_clock_calibration(struct dxgprocess *process, command->args = *args; ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size, - &result, sizeof(result)); + result, sizeof(*result)); if (ret < 0) goto cleanup; - ret = copy_to_user(&inargs->clock_data, &result.clock_data, - sizeof(result.clock_data)); + ret = copy_to_user(&inargs->clock_data, &result->clock_data, + sizeof(result->clock_data)); if (ret) { DXG_ERR("failed to copy clock data"); ret = -EFAULT; goto cleanup; } - ret = ntstatus2int(result.status); + ret = ntstatus2int(result->status); cleanup: - free_message(&msg, process); + free_message((struct dxgvmbusmsg *)&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2023,7 +2038,83 @@ int dxgvmb_send_flush_heap_transitions(struct dxgprocess *process, process->host_handle); ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); + if (ret) + DXG_TRACE("err: %d", ret); + return ret; +} + +int dxgvmb_send_invalidate_cache(struct dxgprocess *process, + struct dxgadapter *adapter, + struct d3dkmt_invalidatecache *args) +{ + struct dxgkvmb_command_invalidatecache *command; + int ret; + struct dxgvmbusmsg msg = {.hdr = NULL}; + + ret = init_message(&msg, adapter, process, sizeof(*command)); + if (ret) + goto cleanup; + command = (void *)msg.msg; + command_vgpu_to_host_init2(&command->hdr, + DXGK_VMBCOMMAND_INVALIDATECACHE, + process->host_handle); + command->device = args->device; + command->allocation = args->allocation; + command->offset = args->offset; + command->length = args->length; + ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); +cleanup: + free_message(&msg); + if (ret) + DXG_TRACE("err: %d", ret); + return ret; +} + +int dxgvmb_send_is_feature_enabled(struct dxgadapter *adapter, + struct d3dkmt_isfeatureenabled *args) +{ + int ret; + struct dxgkvmb_command_isfeatureenabled_return *result; + struct dxgvmbusmsgres msg = {.hdr = NULL}; + int res_size = sizeof(*result); + + if (adapter) { + struct dxgkvmb_command_isfeatureenabled *command; + + ret = init_message_res(&msg, adapter, sizeof(*command), + res_size); + if (ret) + goto cleanup; + command = (void *)msg.msg; + command->feature_id = args->feature_id; + result = msg.res; + command_vgpu_to_host_init1(&command->hdr, + DXGK_VMBCOMMAND_ISFEATUREENABLED); + } else { + struct dxgkvmb_command_isfeatureenabled_gbl *command; + + ret = init_message_res(&msg, adapter, sizeof(*command), + res_size); + if (ret) + goto cleanup; + command = (void *)msg.msg; + command->feature_id = args->feature_id; + result = msg.res; + command_vm_to_host_init1(&command->hdr, + DXGK_VMBCOMMAND_ISFEATUREENABLED_GLOBAL); + } + ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size, + result, res_size); + if (ret == 0) { + ret = ntstatus2int(result->status); + if (ret == 0) + args->result = result->result; + goto cleanup; + } + +cleanup: + free_message((struct dxgvmbusmsg *)&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2059,7 +2150,7 @@ int dxgvmb_send_query_alloc_residency(struct dxgprocess *process, } result_size += result_allocation_size; - ret = init_message_res(&msg, adapter, process, cmd_size, result_size); + ret = init_message_res(&msg, adapter, cmd_size, result_size); if (ret) goto cleanup; command = (void *)msg.msg; @@ -2096,7 +2187,7 @@ int dxgvmb_send_query_alloc_residency(struct dxgprocess *process, } cleanup: - free_message((struct dxgvmbusmsg *)&msg, process); + free_message((struct dxgvmbusmsg *)&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2104,7 +2195,8 @@ int dxgvmb_send_query_alloc_residency(struct dxgprocess *process, int dxgvmb_send_escape(struct dxgprocess *process, struct dxgadapter *adapter, - struct d3dkmt_escape *args) + struct d3dkmt_escape *args, + bool user_mode) { int ret; struct dxgkvmb_command_escape *command = NULL; @@ -2133,13 +2225,18 @@ int dxgvmb_send_escape(struct dxgprocess *process, command->priv_drv_data_size = args->priv_drv_data_size; command->context = args->context; if (args->priv_drv_data_size) { - ret = copy_from_user(command->priv_drv_data, - args->priv_drv_data, - args->priv_drv_data_size); - if (ret) { - DXG_ERR("failed to copy priv data"); - ret = -EFAULT; - goto cleanup; + if (user_mode) { + ret = copy_from_user(command->priv_drv_data, + args->priv_drv_data, + args->priv_drv_data_size); + if (ret) { + DXG_ERR("failed to copy priv data"); + ret = -EFAULT; + goto cleanup; + } + } else { + memcpy(command->priv_drv_data, args->priv_drv_data, + args->priv_drv_data_size); } } @@ -2150,17 +2247,23 @@ int dxgvmb_send_escape(struct dxgprocess *process, goto cleanup; if (args->priv_drv_data_size) { - ret = copy_to_user(args->priv_drv_data, - command->priv_drv_data, - args->priv_drv_data_size); - if (ret) { - DXG_ERR("failed to copy priv data"); - ret = -EINVAL; + if (user_mode) { + ret = copy_to_user(args->priv_drv_data, + command->priv_drv_data, + args->priv_drv_data_size); + if (ret) { + DXG_ERR("failed to copy priv data"); + ret = -EINVAL; + } + } else { + memcpy(args->priv_drv_data, + command->priv_drv_data, + args->priv_drv_data_size); } } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2224,7 +2327,7 @@ int dxgvmb_send_query_vidmem_info(struct dxgprocess *process, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2269,7 +2372,7 @@ int dxgvmb_send_get_device_state(struct dxgprocess *process, args->execution_state = result.args.execution_state; cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2293,8 +2396,7 @@ int dxgvmb_send_open_resource(struct dxgprocess *process, sizeof(*result); struct dxgvmbusmsgres msg = {.hdr = NULL}; - ret = init_message_res(&msg, adapter, process, sizeof(*command), - result_size); + ret = init_message_res(&msg, adapter, sizeof(*command), result_size); if (ret) goto cleanup; command = msg.msg; @@ -2323,7 +2425,7 @@ int dxgvmb_send_open_resource(struct dxgprocess *process, alloc_handles[i] = handles[i]; cleanup: - free_message((struct dxgvmbusmsg *)&msg, process); + free_message((struct dxgvmbusmsg *)&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2348,7 +2450,7 @@ int dxgvmb_send_get_stdalloc_data(struct dxgdevice *device, result_size += *alloc_priv_driver_size; if (priv_res_data) result_size += *res_priv_data_size; - ret = init_message_res(&msg, device->adapter, device->process, + ret = init_message_res(&msg, device->adapter, sizeof(*command), result_size); if (ret) goto cleanup; @@ -2408,7 +2510,7 @@ int dxgvmb_send_get_stdalloc_data(struct dxgdevice *device, cleanup: - free_message((struct dxgvmbusmsg *)&msg, device->process); + free_message((struct dxgvmbusmsg *)&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2460,7 +2562,7 @@ int dxgvmb_send_make_resident(struct dxgprocess *process, cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2506,7 +2608,7 @@ int dxgvmb_send_evict(struct dxgprocess *process, cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2561,7 +2663,7 @@ int dxgvmb_send_submit_command(struct dxgprocess *process, cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2598,7 +2700,7 @@ int dxgvmb_send_map_gpu_va(struct dxgprocess *process, cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2628,7 +2730,7 @@ int dxgvmb_send_reserve_gpu_va(struct dxgprocess *process, args->virtual_address = result.virtual_address; cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2655,7 +2757,7 @@ int dxgvmb_send_free_gpu_va(struct dxgprocess *process, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2711,7 +2813,7 @@ int dxgvmb_send_update_gpu_va(struct dxgprocess *process, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2797,7 +2899,7 @@ dxgvmb_send_create_sync_object(struct dxgprocess *process, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2891,7 +2993,7 @@ int dxgvmb_send_signal_sync_object(struct dxgprocess *process, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -2951,7 +3053,7 @@ int dxgvmb_send_wait_sync_object_cpu(struct dxgprocess *process, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3004,7 +3106,7 @@ int dxgvmb_send_wait_sync_object_gpu(struct dxgprocess *process, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3084,7 +3186,7 @@ int dxgvmb_send_lock2(struct dxgprocess *process, hmgrtable_unlock(&process->handle_table, DXGLOCK_EXCL); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3111,7 +3213,7 @@ int dxgvmb_send_unlock2(struct dxgprocess *process, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3156,7 +3258,7 @@ int dxgvmb_send_update_alloc_property(struct dxgprocess *process, } } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3181,7 +3283,7 @@ int dxgvmb_send_mark_device_as_error(struct dxgprocess *process, command->args = *args; ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3251,7 +3353,7 @@ int dxgvmb_send_set_allocation_priority(struct dxgprocess *process, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3293,7 +3395,7 @@ int dxgvmb_send_get_allocation_priority(struct dxgprocess *process, } result_size = sizeof(*result) + priority_size; - ret = init_message_res(&msg, adapter, process, cmd_size, result_size); + ret = init_message_res(&msg, adapter, cmd_size, result_size); if (ret) goto cleanup; command = (void *)msg.msg; @@ -3333,7 +3435,7 @@ int dxgvmb_send_get_allocation_priority(struct dxgprocess *process, } cleanup: - free_message((struct dxgvmbusmsg *)&msg, process); + free_message((struct dxgvmbusmsg *)&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3362,7 +3464,7 @@ int dxgvmb_send_set_context_sch_priority(struct dxgprocess *process, command->in_process = in_process; ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3396,7 +3498,7 @@ int dxgvmb_send_get_context_sch_priority(struct dxgprocess *process, *priority = result.priority; } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3442,7 +3544,7 @@ int dxgvmb_send_offer_allocations(struct dxgprocess *process, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3467,7 +3569,7 @@ int dxgvmb_send_reclaim_allocations(struct dxgprocess *process, result_size += (args->allocation_count - 1) * sizeof(enum d3dddi_reclaim_result); - ret = init_message_res(&msg, adapter, process, cmd_size, result_size); + ret = init_message_res(&msg, adapter, cmd_size, result_size); if (ret) goto cleanup; command = (void *)msg.msg; @@ -3518,7 +3620,7 @@ int dxgvmb_send_reclaim_allocations(struct dxgprocess *process, } cleanup: - free_message((struct dxgvmbusmsg *)&msg, process); + free_message((struct dxgvmbusmsg *)&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3548,7 +3650,7 @@ int dxgvmb_send_change_vidmem_reservation(struct dxgprocess *process, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3687,7 +3789,7 @@ int dxgvmb_send_create_hwqueue(struct dxgprocess *process, dxgvmb_send_destroy_hwqueue(process, adapter, command->hwqueue); } - free_message(&msg, process); + free_message(&msg); return ret; } @@ -3712,7 +3814,7 @@ int dxgvmb_send_destroy_hwqueue(struct dxgprocess *process, ret = dxgvmb_send_sync_msg_ntstatus(msg.channel, msg.hdr, msg.size); cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3782,6 +3884,7 @@ int dxgvmb_send_query_adapter_info(struct dxgprocess *process, adapter_type->indirect_display_device = 0; adapter_type->acg_supported = 0; adapter_type->support_set_timings_from_vidpn = 0; + adapter_type->compute_only = !!adapter->compute_only; break; } default: @@ -3795,7 +3898,7 @@ int dxgvmb_send_query_adapter_info(struct dxgprocess *process, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; @@ -3853,13 +3956,13 @@ int dxgvmb_send_submit_command_hwqueue(struct dxgprocess *process, } cleanup: - free_message(&msg, process); + free_message(&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; } -int dxgvmb_send_query_statistics(struct dxgprocess *process, +int dxgvmb_send_query_statistics(struct d3dkmthandle host_process_handle, struct dxgadapter *adapter, struct d3dkmt_querystatistics *args) { @@ -3868,7 +3971,7 @@ int dxgvmb_send_query_statistics(struct dxgprocess *process, int ret; struct dxgvmbusmsgres msg = {.hdr = NULL}; - ret = init_message_res(&msg, adapter, process, sizeof(*command), + ret = init_message_res(&msg, adapter, sizeof(*command), sizeof(*result)); if (ret) goto cleanup; @@ -3877,7 +3980,7 @@ int dxgvmb_send_query_statistics(struct dxgprocess *process, command_vgpu_to_host_init2(&command->hdr, DXGK_VMBCOMMAND_QUERYSTATISTICS, - process->host_handle); + host_process_handle); command->args = *args; ret = dxgvmb_send_sync_msg(msg.channel, msg.hdr, msg.size, @@ -3889,7 +3992,7 @@ int dxgvmb_send_query_statistics(struct dxgprocess *process, ret = ntstatus2int(result->status); cleanup: - free_message((struct dxgvmbusmsg *)&msg, process); + free_message((struct dxgvmbusmsg *)&msg); if (ret) DXG_TRACE("err: %d", ret); return ret; diff --git a/drivers/hv/dxgkrnl/dxgvmbus.h b/drivers/hv/dxgkrnl/dxgvmbus.h index b4a98f7c25222..a7e625b2f896a 100644 --- a/drivers/hv/dxgkrnl/dxgvmbus.h +++ b/drivers/hv/dxgkrnl/dxgvmbus.h @@ -48,6 +48,7 @@ enum dxgkvmb_commandtype_global { DXGK_VMBCOMMAND_SETIOSPACEREGION = 1010, DXGK_VMBCOMMAND_COMPLETETRANSACTION = 1011, DXGK_VMBCOMMAND_SHAREOBJECTWITHHOST = 1021, + DXGK_VMBCOMMAND_ISFEATUREENABLED_GLOBAL = 1022, DXGK_VMBCOMMAND_INVALID_VM_TO_HOST }; @@ -125,6 +126,8 @@ enum dxgkvmb_commandtype { DXGK_VMBCOMMAND_QUERYRESOURCEINFO = 64, DXGK_VMBCOMMAND_LOGEVENT = 65, DXGK_VMBCOMMAND_SETEXISTINGSYSMEMPAGES = 66, + DXGK_VMBCOMMAND_INVALIDATECACHE = 67, + DXGK_VMBCOMMAND_ISFEATUREENABLED = 68, DXGK_VMBCOMMAND_INVALID }; @@ -428,6 +431,16 @@ struct dxgkvmb_command_flushheaptransitions { struct dxgkvmb_command_vgpu_to_host hdr; }; +/* Returns ntstatus */ +struct dxgkvmb_command_invalidatecache { + struct dxgkvmb_command_vgpu_to_host hdr; + struct d3dkmthandle device; + struct d3dkmthandle allocation; + u64 offset; + u64 length; + u64 reserved; +}; + struct dxgkvmb_command_freegpuvirtualaddress { struct dxgkvmb_command_vgpu_to_host hdr; struct d3dkmt_freegpuvirtualaddress args; @@ -860,6 +873,35 @@ struct dxgkvmb_command_shareobjectwithhost_return { u64 vail_nt_handle; }; +struct dxgk_feature_desc { + u16 min_supported_version; + u16 max_supported_version; + struct { + u16 supported : 1; + u16 virtualization_mode : 3; + u16 global : 1; + u16 driver_feature : 1; + u16 internal : 1; + u16 reserved : 9; + }; +}; + +struct dxgkvmb_command_isfeatureenabled { + struct dxgkvmb_command_vgpu_to_host hdr; + enum dxgk_feature_id feature_id; +}; + +struct dxgkvmb_command_isfeatureenabled_gbl { + struct dxgkvmb_command_vm_to_host hdr; + enum dxgk_feature_id feature_id; +}; + +struct dxgkvmb_command_isfeatureenabled_return { + struct ntstatus status; + struct dxgk_feature_desc descriptor; + struct dxgk_isfeatureenabled_result result; +}; + int dxgvmb_send_sync_msg(struct dxgvmbuschannel *channel, void *command, u32 command_size, void *result, diff --git a/drivers/hv/dxgkrnl/ioctl.c b/drivers/hv/dxgkrnl/ioctl.c index 98350583943e0..d91af2e176e9c 100644 --- a/drivers/hv/dxgkrnl/ioctl.c +++ b/drivers/hv/dxgkrnl/ioctl.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "dxgkrnl.h" #include "dxgvmbus.h" @@ -147,6 +148,23 @@ static int dxgkio_open_adapter_from_luid(struct dxgprocess *process, return ret; } +static struct d3dkmthandle find_dxgprocess_handle(u64 pid) +{ + struct dxgglobal *dxgglobal = dxggbl(); + struct dxgprocess *entry; + struct d3dkmthandle host_handle = {}; + + mutex_lock(&dxgglobal->plistmutex); + list_for_each_entry(entry, &dxgglobal->plisthead, plistentry) { + if (entry->vpid == pid) { + host_handle.v = entry->host_handle.v; + break; + } + } + mutex_unlock(&dxgglobal->plistmutex); + return host_handle; +} + static int dxgkio_query_statistics(struct dxgprocess *process, void __user *inargs) { @@ -156,6 +174,8 @@ static int dxgkio_query_statistics(struct dxgprocess *process, struct dxgadapter *adapter = NULL; struct winluid tmp; struct dxgglobal *dxgglobal = dxggbl(); + struct d3dkmthandle host_process_handle = process->host_handle; + u64 pid; args = vzalloc(sizeof(struct d3dkmt_querystatistics)); if (args == NULL) { @@ -170,6 +190,18 @@ static int dxgkio_query_statistics(struct dxgprocess *process, goto cleanup; } + /* Find the host process handle when needed */ + pid = args->process; + if (pid) { + host_process_handle = find_dxgprocess_handle(pid); + if (host_process_handle.v == 0) { + DXG_ERR("Invalid process ID is specified: %lld", pid); + ret = -EINVAL; + goto cleanup; + } + args->process = 0; + } + dxgglobal_acquire_adapter_list_lock(DXGLOCK_SHARED); list_for_each_entry(entry, &dxgglobal->adapter_list_head, adapter_list_entry) { @@ -186,7 +218,8 @@ static int dxgkio_query_statistics(struct dxgprocess *process, if (adapter) { tmp = args->adapter_luid; args->adapter_luid = adapter->host_adapter_luid; - ret = dxgvmb_send_query_statistics(process, adapter, args); + ret = dxgvmb_send_query_statistics(host_process_handle, adapter, + args); if (ret >= 0) { args->adapter_luid = tmp; ret = copy_to_user(inargs, args, sizeof(*args)); @@ -254,6 +287,8 @@ dxgkp_enum_adapters(struct dxgprocess *process, list_for_each_entry(entry, &dxgglobal->adapter_list_head, adapter_list_entry) { + if (entry->compute_only && !filter.include_compute_only) + continue; if (dxgadapter_acquire_lock_shared(entry) == 0) { struct d3dkmt_adapterinfo *inf = &info[adapter_count]; @@ -278,7 +313,10 @@ dxgkp_enum_adapters(struct dxgprocess *process, dxgglobal_release_adapter_list_lock(DXGLOCK_SHARED); if (adapter_count > adapter_count_max) { - ret = STATUS_BUFFER_TOO_SMALL; + struct ntstatus status; + + status.v = STATUS_BUFFER_TOO_SMALL; + ret = ntstatus2int(status); DXG_TRACE("Too many adapters"); ret = copy_to_user(adapter_count_out, &dxgglobal->num_adapters, sizeof(u32)); @@ -474,6 +512,8 @@ dxgkio_enum_adapters(struct dxgprocess *process, void *__user inargs) list_for_each_entry(entry, &dxgglobal->adapter_list_head, adapter_list_entry) { + if (entry->compute_only) + continue; if (dxgadapter_acquire_lock_shared(entry) == 0) { struct d3dkmt_adapterinfo *inf = &info[adapter_count]; @@ -3122,8 +3162,7 @@ dxgkio_signal_sync_object(struct dxgprocess *process, void *__user inargs) } if (event) eventfd_ctx_put(event); - if (host_event) - kfree(host_event); + kfree(host_event); } if (adapter) dxgadapter_release_lock_shared(adapter); @@ -3358,8 +3397,7 @@ dxgkio_signal_sync_object_gpu2(struct dxgprocess *process, void *__user inargs) } if (event) eventfd_ctx_put(event); - if (host_event) - kfree(host_event); + kfree(host_event); } if (adapter) dxgadapter_release_lock_shared(adapter); @@ -3537,8 +3575,7 @@ dxgkio_wait_sync_object_cpu(struct dxgprocess *process, void *__user inargs) } if (event) eventfd_ctx_put(event); - if (async_host_event) - kfree(async_host_event); + kfree(async_host_event); } DXG_TRACE_IOCTL_END(ret); @@ -4217,10 +4254,8 @@ dxgkio_change_vidmem_reservation(struct dxgprocess *process, void *__user inargs } ret = dxgadapter_acquire_lock_shared(adapter); - if (ret < 0) { - adapter = NULL; + if (ret < 0) goto cleanup; - } adapter_locked = true; args.adapter.v = 0; ret = dxgvmb_send_change_vidmem_reservation(process, adapter, @@ -4259,10 +4294,8 @@ dxgkio_query_clock_calibration(struct dxgprocess *process, void *__user inargs) } ret = dxgadapter_acquire_lock_shared(adapter); - if (ret < 0) { - adapter = NULL; + if (ret < 0) goto cleanup; - } adapter_locked = true; args.adapter = adapter->host_handle; @@ -4270,11 +4303,6 @@ dxgkio_query_clock_calibration(struct dxgprocess *process, void *__user inargs) &args, inargs); if (ret < 0) goto cleanup; - ret = copy_to_user(inargs, &args, sizeof(args)); - if (ret) { - DXG_ERR("failed to copy output args"); - ret = -EFAULT; - } cleanup: @@ -4282,6 +4310,8 @@ dxgkio_query_clock_calibration(struct dxgprocess *process, void *__user inargs) dxgadapter_release_lock_shared(adapter); if (adapter) kref_put(&adapter->adapter_kref, dxgadapter_release); + + DXG_TRACE_IOCTL_END(ret); return ret; } @@ -4307,10 +4337,8 @@ dxgkio_flush_heap_transitions(struct dxgprocess *process, void *__user inargs) } ret = dxgadapter_acquire_lock_shared(adapter); - if (ret < 0) { - adapter = NULL; + if (ret < 0) goto cleanup; - } adapter_locked = true; args.adapter = adapter->host_handle; @@ -4329,6 +4357,176 @@ dxgkio_flush_heap_transitions(struct dxgprocess *process, void *__user inargs) dxgadapter_release_lock_shared(adapter); if (adapter) kref_put(&adapter->adapter_kref, dxgadapter_release); + + DXG_TRACE_IOCTL_END(ret); + return ret; +} + +static int +dxgkio_invalidate_cache(struct dxgprocess *process, void *__user inargs) +{ + struct d3dkmt_invalidatecache args; + int ret; + struct dxgdevice *device = NULL; + + ret = copy_from_user(&args, inargs, sizeof(args)); + if (ret) { + DXG_ERR("failed to copy input args"); + ret = -EFAULT; + goto cleanup; + } + + device = dxgprocess_device_by_handle(process, args.device); + if (device == NULL) { + ret = -EINVAL; + goto cleanup; + } + + ret = dxgdevice_acquire_lock_shared(device); + if (ret < 0) { + kref_put(&device->device_kref, dxgdevice_release); + device = NULL; + goto cleanup; + } + + ret = dxgvmb_send_invalidate_cache(process, device->adapter, + &args); + +cleanup: + + if (device) { + dxgdevice_release_lock_shared(device); + kref_put(&device->device_kref, dxgdevice_release); + } + + DXG_TRACE_IOCTL_END(ret); + return ret; +} + +static int +build_test_command_buffer(struct dxgprocess *process, + struct dxgadapter *adapter, + struct d3dkmt_escape *args) +{ + int ret; + struct d3dddi_buildtestcommandbuffer cmd; + struct d3dkmt_escape newargs = *args; + u32 buf_size; + struct d3dddi_buildtestcommandbuffer *buf = NULL; + struct d3dddi_buildtestcommandbuffer *__user ucmd; + + ucmd = args->priv_drv_data; + if (args->priv_drv_data_size < + sizeof(struct d3dddi_buildtestcommandbuffer)) { + DXG_ERR("Invalid private data size"); + return -EINVAL; + } + ret = copy_from_user(&cmd, ucmd, sizeof(cmd)); + if (ret) { + DXG_ERR("Failed to copy private data"); + return -EFAULT; + } + + if (cmd.dma_buffer_size < sizeof(u32) || + cmd.dma_buffer_size > D3DDDI_MAXTESTBUFFERSIZE || + cmd.dma_buffer_priv_data_size > + D3DDDI_MAXTESTBUFFERPRIVATEDRIVERDATASIZE) { + DXG_ERR("Invalid DMA buffer or private data size"); + return -EINVAL; + } + /* Allocate a new buffer for the escape call */ + buf_size = sizeof(struct d3dddi_buildtestcommandbuffer) + + cmd.dma_buffer_size + + cmd.dma_buffer_priv_data_size; + buf = vzalloc(buf_size); + if (buf == NULL) { + ret = -ENOMEM; + goto cleanup; + } + *buf = cmd; + buf->dma_buffer = NULL; + buf->dma_buffer_priv_data = NULL; + + /* Replace private data in the escape arguments and call the host */ + newargs.priv_drv_data = buf; + newargs.priv_drv_data_size = buf_size; + ret = dxgvmb_send_escape(process, adapter, &newargs, false); + if (ret) { + DXG_ERR("Host failed escape"); + goto cleanup; + } + + ret = copy_to_user(&ucmd->dma_buffer_size, &buf->dma_buffer_size, + sizeof(u32)); + if (ret) { + DXG_ERR("Failed to dma size to user"); + ret = -EFAULT; + goto cleanup; + } + ret = copy_to_user(&ucmd->dma_buffer_priv_data_size, + &buf->dma_buffer_priv_data_size, + sizeof(u32)); + if (ret) { + DXG_ERR("Failed to dma private data size to user"); + ret = -EFAULT; + goto cleanup; + } + ret = copy_to_user(cmd.dma_buffer, (char *)buf + sizeof(*buf), + buf->dma_buffer_size); + if (ret) { + DXG_ERR("Failed to copy dma buffer to user"); + ret = -EFAULT; + goto cleanup; + } + if (buf->dma_buffer_priv_data_size) { + ret = copy_to_user(cmd.dma_buffer_priv_data, + (char *)buf + sizeof(*buf) + cmd.dma_buffer_size, + buf->dma_buffer_priv_data_size); + if (ret) { + DXG_ERR("Failed to copy private data to user"); + ret = -EFAULT; + goto cleanup; + } + } + +cleanup: + if (buf) + vfree(buf); + return ret; +} + +static int +driver_known_escape(struct dxgprocess *process, + struct dxgadapter *adapter, + struct d3dkmt_escape *args) +{ + enum d3dkmt_escapetype escape_type; + int ret = 0; + + if (args->priv_drv_data_size < sizeof(enum d3dddi_knownescapetype)) { + DXG_ERR("Invalid private data size"); + return -EINVAL; + } + ret = copy_from_user(&escape_type, args->priv_drv_data, + sizeof(escape_type)); + if (ret) { + DXG_ERR("Failed to read escape type"); + return -EFAULT; + } + switch (escape_type) { + case _D3DDDI_DRIVERESCAPETYPE_TRANSLATEALLOCATIONHANDLE: + case _D3DDDI_DRIVERESCAPETYPE_TRANSLATERESOURCEHANDLE: + /* + * The host and VM handles are the same + */ + break; + case _D3DDDI_DRIVERESCAPETYPE_BUILDTESTCOMMANDBUFFER: + ret = build_test_command_buffer(process, adapter, args); + break; + default: + ret = dxgvmb_send_escape(process, adapter, args, true); + break; + } return ret; } @@ -4353,14 +4551,17 @@ dxgkio_escape(struct dxgprocess *process, void *__user inargs) } ret = dxgadapter_acquire_lock_shared(adapter); - if (ret < 0) { - adapter = NULL; + if (ret < 0) goto cleanup; - } adapter_locked = true; args.adapter = adapter->host_handle; - ret = dxgvmb_send_escape(process, adapter, &args); + + if (args.type == _D3DKMT_ESCAPE_DRIVERPRIVATE && + args.flags.driver_known_escape) + ret = driver_known_escape(process, adapter, &args); + else + ret = dxgvmb_send_escape(process, adapter, &args, true); cleanup: @@ -4400,10 +4601,8 @@ dxgkio_query_vidmem_info(struct dxgprocess *process, void *__user inargs) } ret = dxgadapter_acquire_lock_shared(adapter); - if (ret < 0) { - adapter = NULL; + if (ret < 0) goto cleanup; - } adapter_locked = true; args.adapter = adapter->host_handle; @@ -5154,6 +5353,129 @@ dxgkio_share_object_with_host(struct dxgprocess *process, void *__user inargs) return ret; } +static int +dxgkio_enum_processes(struct dxgprocess *process, void *__user inargs) +{ + struct d3dkmt_enumprocesses args; + struct d3dkmt_enumprocesses *__user input = inargs; + struct dxgadapter *adapter = NULL; + struct dxgadapter *entry; + struct dxgglobal *dxgglobal = dxggbl(); + struct dxgprocess_adapter *pentry; + int nump = 0; /* Current number of processes*/ + struct ntstatus status; + int ret; + + ret = copy_from_user(&args, inargs, sizeof(args)); + if (ret) { + DXG_ERR("failed to copy input args"); + ret = -EFAULT; + goto cleanup; + } + + if (args.buffer_count == 0) { + DXG_ERR("Invalid buffer count"); + ret = -EINVAL; + goto cleanup; + } + + dxgglobal_acquire_adapter_list_lock(DXGLOCK_SHARED); + dxgglobal_acquire_process_adapter_lock(); + + list_for_each_entry(entry, &dxgglobal->adapter_list_head, + adapter_list_entry) { + if (*(u64 *) &entry->luid == *(u64 *) &args.adapter_luid) { + adapter = entry; + break; + } + } + + if (adapter == NULL) { + DXG_ERR("Failed to find dxgadapter"); + ret = -EINVAL; + goto cleanup_locks; + } + + list_for_each_entry(pentry, &adapter->adapter_process_list_head, + adapter_process_list_entry) { + if (pentry->process->nspid != task_active_pid_ns(current)) + continue; + if (nump == args.buffer_count) { + status.v = STATUS_BUFFER_TOO_SMALL; + ret = ntstatus2int(status); + goto cleanup_locks; + } + ret = copy_to_user(&args.buffer[nump], &pentry->process->vpid, + sizeof(u32)); + if (ret) { + DXG_ERR("failed to copy data to user"); + ret = -EFAULT; + goto cleanup_locks; + } + nump++; + } + +cleanup_locks: + + dxgglobal_release_process_adapter_lock(); + dxgglobal_release_adapter_list_lock(DXGLOCK_SHARED); + + if (ret == 0) { + ret = copy_to_user(&input->buffer_count, &nump, sizeof(u32)); + if (ret) + DXG_ERR("failed to copy buffer count to user"); + } + +cleanup: + + DXG_TRACE_IOCTL_END(ret); + return ret; +} + +static int +dxgkio_is_feature_enabled(struct dxgprocess *process, void *__user inargs) +{ + struct d3dkmt_isfeatureenabled args; + struct dxgadapter *adapter = NULL; + struct d3dkmt_isfeatureenabled *__user uargs = inargs; + int ret; + bool adapter_locked = false; + + ret = copy_from_user(&args, inargs, sizeof(args)); + if (ret) { + DXG_ERR("failed to copy input args"); + ret = -EFAULT; + goto cleanup; + } + + adapter = dxgprocess_adapter_by_handle(process, args.adapter); + if (adapter == NULL) { + ret = -EINVAL; + goto cleanup; + } + + ret = dxgadapter_acquire_lock_shared(adapter); + if (ret < 0) + goto cleanup; + adapter_locked = true; + + ret = dxgvmb_send_is_feature_enabled(adapter, &args); + if (ret) + goto cleanup; + + ret = copy_to_user(&uargs->result, &args.result, sizeof(args.result)); + +cleanup: + + if (adapter_locked) + dxgadapter_release_lock_shared(adapter); + if (adapter) + kref_put(&adapter->adapter_kref, dxgadapter_release); + + DXG_TRACE_IOCTL_END(ret); + return ret; +} + static struct ioctl_desc ioctls[] = { /* 0x00 */ {}, /* 0x01 */ {dxgkio_open_adapter_from_luid, LX_DXOPENADAPTERFROMLUID}, @@ -5194,7 +5516,7 @@ static struct ioctl_desc ioctls[] = { /* 0x22 */ {dxgkio_get_context_scheduling_priority, LX_DXGETCONTEXTSCHEDULINGPRIORITY}, /* 0x23 */ {}, -/* 0x24 */ {}, +/* 0x24 */ {dxgkio_invalidate_cache, LX_DXINVALIDATECACHE}, /* 0x25 */ {dxgkio_lock2, LX_DXLOCK2}, /* 0x26 */ {dxgkio_mark_device_as_error, LX_DXMARKDEVICEASERROR}, /* 0x27 */ {dxgkio_offer_allocations, LX_DXOFFERALLOCATIONS}, @@ -5239,8 +5561,10 @@ static struct ioctl_desc ioctls[] = { /* 0x44 */ {dxgkio_share_object_with_host, LX_DXSHAREOBJECTWITHHOST}, /* 0x45 */ {dxgkio_create_sync_file, LX_DXCREATESYNCFILE}, /* 0x46 */ {dxgkio_wait_sync_file, LX_DXWAITSYNCFILE}, -/* 0x46 */ {dxgkio_open_syncobj_from_syncfile, +/* 0x47 */ {dxgkio_open_syncobj_from_syncfile, LX_DXOPENSYNCOBJECTFROMSYNCFILE}, +/* 0x48 */ {dxgkio_enum_processes, LX_DXENUMPROCESSES}, +/* 0x49 */ {dxgkio_is_feature_enabled, LX_ISFEATUREENABLED}, }; /* @@ -5298,10 +5622,8 @@ void dxgk_validate_ioctls(void) { int i; - for (i=0; i < ARRAY_SIZE(ioctls); i++) - { - if (ioctls[i].ioctl && _IOC_NR(ioctls[i].ioctl) != i) - { + for (i = 0; i < ARRAY_SIZE(ioctls); i++) { + if (ioctls[i].ioctl && _IOC_NR(ioctls[i].ioctl) != i) { DXG_ERR("Invalid ioctl"); DXGKRNL_ASSERT(0); } diff --git a/include/uapi/misc/d3dkmthk.h b/include/uapi/misc/d3dkmthk.h index 1eaa3f0383226..db40e8ff40b0b 100644 --- a/include/uapi/misc/d3dkmthk.h +++ b/include/uapi/misc/d3dkmthk.h @@ -237,6 +237,37 @@ struct d3dddi_destroypagingqueue { struct d3dkmthandle paging_queue; }; +enum d3dddi_knownescapetype { + _D3DDDI_DRIVERESCAPETYPE_TRANSLATEALLOCATIONHANDLE = 0, + _D3DDDI_DRIVERESCAPETYPE_TRANSLATERESOURCEHANDLE = 1, + _D3DDDI_DRIVERESCAPETYPE_CPUEVENTUSAGE = 2, + _D3DDDI_DRIVERESCAPETYPE_BUILDTESTCOMMANDBUFFER = 3, +}; + +struct d3dddi_translate_allocation_handle { + enum d3dddi_knownescapetype escape_type; + struct d3dkmthandle allocation; +}; + +struct d3dddi_testcommand { + char buffer[72]; +}; + +#define D3DDDI_MAXTESTBUFFERSIZE 4096 +#define D3DDDI_MAXTESTBUFFERPRIVATEDRIVERDATASIZE 1024 + +struct d3dddi_buildtestcommandbuffer { + enum d3dddi_knownescapetype escape_type; + struct d3dkmthandle device; + struct d3dkmthandle context; + __u32 flags; + struct d3dddi_testcommand command; + void *dma_buffer; + void *dma_buffer_priv_data; + __u32 dma_buffer_size; + __u32 dma_buffer_priv_data_size; +}; + enum d3dkmt_escapetype { _D3DKMT_ESCAPE_DRIVERPRIVATE = 0, _D3DKMT_ESCAPE_VIDMM = 1, @@ -1580,6 +1611,50 @@ struct d3dkmt_opensyncobjectfromsyncfile { __u64 fence_value_gpu_va; /* out */ }; +struct d3dkmt_enumprocesses { + struct winluid adapter_luid; +#ifdef __KERNEL__ + __u32 *buffer; +#else + __u64 buffer; +#endif + __u64 buffer_count; +}; + +enum dxgk_feature_id { + _DXGK_FEATURE_HWSCH = 0, + _DXGK_FEATURE_PAGE_BASED_MEMORY_MANAGER = 32, + _DXGK_FEATURE_KERNEL_MODE_TESTING = 33, + _DXGK_FEATURE_MAX +}; + +struct dxgk_isfeatureenabled_result { + __u16 version; + union { + struct { + __u16 enabled : 1; + __u16 known_feature : 1; + __u16 supported_by_driver : 1; + __u16 supported_on_config : 1; + __u16 reserved : 12; + }; + __u16 value; + }; +}; + +struct d3dkmt_isfeatureenabled { + struct d3dkmthandle adapter; + enum dxgk_feature_id feature_id; + struct dxgk_isfeatureenabled_result result; +}; + +struct d3dkmt_invalidatecache { + struct d3dkmthandle device; + struct d3dkmthandle allocation; + __u64 offset; + __u64 length; +}; + /* * Dxgkrnl Graphics Port Driver ioctl definitions * @@ -1647,6 +1722,8 @@ struct d3dkmt_opensyncobjectfromsyncfile { _IOWR(0x47, 0x21, struct d3dkmt_getcontextinprocessschedulingpriority) #define LX_DXGETCONTEXTSCHEDULINGPRIORITY \ _IOWR(0x47, 0x22, struct d3dkmt_getcontextschedulingpriority) +#define LX_DXINVALIDATECACHE \ + _IOWR(0x47, 0x24, struct d3dkmt_invalidatecache) #define LX_DXLOCK2 \ _IOWR(0x47, 0x25, struct d3dkmt_lock2) #define LX_DXMARKDEVICEASERROR \ @@ -1709,5 +1786,9 @@ struct d3dkmt_opensyncobjectfromsyncfile { _IOWR(0x47, 0x46, struct d3dkmt_waitsyncfile) #define LX_DXOPENSYNCOBJECTFROMSYNCFILE \ _IOWR(0x47, 0x47, struct d3dkmt_opensyncobjectfromsyncfile) +#define LX_DXENUMPROCESSES \ + _IOWR(0x47, 0x48, struct d3dkmt_enumprocesses) +#define LX_ISFEATUREENABLED \ + _IOWR(0x47, 0x49, struct d3dkmt_isfeatureenabled) #endif /* _D3DKMTHK_H */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3d3364cd4ff19..3e92da4d977a3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6478,6 +6478,46 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; } +static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_retries = MAX_RECLAIM_RETRIES; + unsigned long nr_to_reclaim, nr_reclaimed = 0; + int err; + + buf = strstrip(buf); + err = page_counter_memparse(buf, "", &nr_to_reclaim); + if (err) + return err; + + while (nr_reclaimed < nr_to_reclaim) { + unsigned long reclaimed; + + if (signal_pending(current)) + return -EINTR; + + /* + * This is the final attempt, drain percpu lru caches in the + * hope of introducing more evictable pages for + * try_to_free_mem_cgroup_pages(). + */ + if (!nr_retries) + lru_add_drain_all(); + + reclaimed = try_to_free_mem_cgroup_pages(memcg, + nr_to_reclaim - nr_reclaimed, + GFP_KERNEL, true); + + if (!reclaimed && !nr_retries--) + return -EAGAIN; + + nr_reclaimed += reclaimed; + } + + return nbytes; +} + static struct cftype memory_files[] = { { .name = "current", @@ -6536,6 +6576,11 @@ static struct cftype memory_files[] = { .seq_show = memory_oom_group_show, .write = memory_oom_group_write, }, + { + .name = "reclaim", + .flags = CFTYPE_NS_DELEGATABLE, + .write = memory_reclaim, + }, { } /* terminate */ }; diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c index 0cf7e90c0052e..ffcddc066eaac 100644 --- a/tools/testing/selftests/cgroup/cgroup_util.c +++ b/tools/testing/selftests/cgroup/cgroup_util.c @@ -19,6 +19,7 @@ #include "cgroup_util.h" #include "../clone3/clone3_selftests.h" +/* Returns read len on success, or -errno on failure. */ static ssize_t read_text(const char *path, char *buf, size_t max_len) { ssize_t len; @@ -26,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len) fd = open(path, O_RDONLY); if (fd < 0) - return fd; + return -errno; len = read(fd, buf, max_len - 1); - if (len < 0) - goto out; - buf[len] = 0; -out: + if (len >= 0) + buf[len] = 0; + close(fd); - return len; + return len < 0 ? -errno : len; } +/* Returns written len on success, or -errno on failure. */ static ssize_t write_text(const char *path, char *buf, ssize_t len) { int fd; fd = open(path, O_WRONLY | O_APPEND); if (fd < 0) - return fd; + return -errno; len = write(fd, buf, len); - if (len < 0) { - close(fd); - return len; - } - close(fd); - - return len; + return len < 0 ? -errno : len; } char *cg_name(const char *root, const char *name) @@ -87,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control) return ret; } +/* Returns 0 on success, or -errno on failure. */ int cg_read(const char *cgroup, const char *control, char *buf, size_t len) { char path[PATH_MAX]; + ssize_t ret; snprintf(path, sizeof(path), "%s/%s", cgroup, control); - if (read_text(path, buf, len) >= 0) - return 0; - - return -1; + ret = read_text(path, buf, len); + return ret >= 0 ? 0 : ret; } int cg_read_strcmp(const char *cgroup, const char *control, @@ -177,17 +172,15 @@ long cg_read_lc(const char *cgroup, const char *control) return cnt; } +/* Returns 0 on success, or -errno on failure. */ int cg_write(const char *cgroup, const char *control, char *buf) { char path[PATH_MAX]; - ssize_t len = strlen(buf); + ssize_t len = strlen(buf), ret; snprintf(path, sizeof(path), "%s/%s", cgroup, control); - - if (write_text(path, buf, len) == len) - return 0; - - return -1; + ret = write_text(path, buf, len); + return ret == len ? 0 : ret; } int cg_find_unified_root(char *root, size_t len) @@ -538,6 +531,7 @@ int set_oom_adj_score(int pid, int score) ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size) { char path[PATH_MAX]; + ssize_t ret; if (!pid) snprintf(path, sizeof(path), "/proc/%s/%s", @@ -545,7 +539,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t else snprintf(path, sizeof(path), "/proc/%d/%s", pid, item); - return read_text(path, buf, size); + ret = read_text(path, buf, size); + return ret < 0 ? -1 : ret; } int proc_read_strstr(int pid, bool thread, const char *item, const char *needle) diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c index c19a97dd02d49..94e16e383bcf8 100644 --- a/tools/testing/selftests/cgroup/test_memcontrol.c +++ b/tools/testing/selftests/cgroup/test_memcontrol.c @@ -210,13 +210,17 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg) static int alloc_anon_noexit(const char *cgroup, void *arg) { int ppid = getppid(); + size_t size = (unsigned long)arg; + char *buf, *ptr; - if (alloc_anon(cgroup, arg)) - return -1; + buf = malloc(size); + for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE) + *ptr = 0; while (getppid() == ppid) sleep(1); + free(buf); return 0; } @@ -679,6 +683,111 @@ static int test_memcg_max(const char *root) return ret; } +/* + * This test checks that memory.reclaim reclaims the given + * amount of memory (from both anon and file, if possible). + */ +static int test_memcg_reclaim(const char *root) +{ + int ret = KSFT_FAIL, fd, retries; + char *memcg; + long current, expected_usage, to_reclaim; + char buf[64]; + + memcg = cg_name(root, "memcg_test"); + if (!memcg) + goto cleanup; + + if (cg_create(memcg)) + goto cleanup; + + current = cg_read_long(memcg, "memory.current"); + if (current != 0) + goto cleanup; + + fd = get_temp_fd(); + if (fd < 0) + goto cleanup; + + cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd); + + /* + * If swap is enabled, try to reclaim from both anon and file, else try + * to reclaim from file only. + */ + if (is_swap_enabled()) { + cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50)); + expected_usage = MB(100); + } else + expected_usage = MB(50); + + /* + * Wait until current usage reaches the expected usage (or we run out of + * retries). + */ + retries = 5; + while (!values_close(cg_read_long(memcg, "memory.current"), + expected_usage, 10)) { + if (retries--) { + sleep(1); + continue; + } else { + fprintf(stderr, + "failed to allocate %ld for memcg reclaim test\n", + expected_usage); + goto cleanup; + } + } + + /* + * Reclaim until current reaches 30M, this makes sure we hit both anon + * and file if swap is enabled. + */ + retries = 5; + while (true) { + int err; + + current = cg_read_long(memcg, "memory.current"); + to_reclaim = current - MB(30); + + /* + * We only keep looping if we get EAGAIN, which means we could + * not reclaim the full amount. + */ + if (to_reclaim <= 0) + goto cleanup; + + + snprintf(buf, sizeof(buf), "%ld", to_reclaim); + err = cg_write(memcg, "memory.reclaim", buf); + if (!err) { + /* + * If writing succeeds, then the written amount should have been + * fully reclaimed (and maybe more). + */ + current = cg_read_long(memcg, "memory.current"); + if (!values_close(current, MB(30), 3) && current > MB(30)) + goto cleanup; + break; + } + + /* The kernel could not reclaim the full amount, try again. */ + if (err == -EAGAIN && retries--) + continue; + + /* We got an unexpected error or ran out of retries. */ + goto cleanup; + } + + ret = KSFT_PASS; +cleanup: + cg_destroy(memcg); + free(memcg); + close(fd); + + return ret; +} + static int alloc_anon_50M_check_swap(const char *cgroup, void *arg) { long mem_max = (long)arg; @@ -1181,6 +1290,7 @@ struct memcg_test { T(test_memcg_low), T(test_memcg_high), T(test_memcg_max), + T(test_memcg_reclaim), T(test_memcg_oom_events), T(test_memcg_swap_max), T(test_memcg_sock),