diff --git a/cover.txt b/cover.txt
new file mode 100644
index 0000000000000..149b0cf659b79
--- /dev/null
+++ b/cover.txt
@@ -0,0 +1,82 @@
+mm: BPF OOM
+
+This patchset adds an ability to customize the out of memory
+handling using bpf.
+
+It focuses on two parts:
+1) OOM handling policy,
+2) PSI-based OOM invocation.
+
+The idea to use bpf for customizing the OOM handling is not new, but
+unlike the previous proposal [1], which augmented the existing task
+ranking policy, this one tries to be as generic as possible and
+leverage the full power of the modern bpf.
+
+It provides a generic interface which is called before the existing OOM
+killer code and allows implementing any policy, e.g. picking a victim
+task or memory cgroup or potentially even releasing memory in other
+ways, e.g. deleting tmpfs files (the last one might require some
+additional but relatively simple changes).
+
+The past attempt to implement memory-cgroup aware policy [2] showed
+that there are multiple opinions on what the best policy is.  As it's
+highly workload-dependent and specific to a concrete way of organizing
+workloads, the structure of the cgroup tree etc, a customizable
+bpf-based implementation is preferable over an in-kernel implementation
+with a dozen of sysctls.
+
+The second part is related to the fundamental question on when to
+declare the OOM event. It's a trade-off between the risk of
+unnecessary OOM kills and associated work losses and the risk of
+infinite trashing and effective soft lockups.  In the last few years
+several PSI-based userspace solutions were developed (e.g. OOMd [3] or
+systemd-OOMd [4]). The common idea was to use userspace daemons to
+implement custom OOM logic as well as rely on PSI monitoring to avoid
+stalls. In this scenario the userspace daemon was supposed to handle
+the majority of OOMs, while the in-kernel OOM killer worked as the
+last resort measure to guarantee that the system would never deadlock
+on the memory. But this approach creates additional infrastructure
+churn: userspace OOM daemon is a separate entity which needs to be
+deployed, updated, monitored. A completely different pipeline needs to
+be built to monitor both types of OOM events and collect associated
+logs. A userspace daemon is more restricted in terms on what data is
+available to it. Implementing a daemon which can work reliably under a
+heavy memory pressure in the system is also tricky.
+
+This patchset includes the code, tests and many ideas from the patchset
+of JP Kobryn, which implemented bpf kfuncs to provide a faster method
+to access memcg data [5].
+
+[1]: https://lwn.net/ml/linux-kernel/20230810081319.65668-1-zhouchuyi@bytedance.com/
+[2]: https://lore.kernel.org/lkml/20171130152824.1591-1-guro@fb.com/
+[3]: https://github.com/facebookincubator/oomd
+[4]: https://www.freedesktop.org/software/systemd/man/latest/systemd-oomd.service.html
+[5]: https://lkml.org/lkml/2025/10/15/1554
+
+----
+v2:
+  1) A single bpf_oom can be attached system-wide and a single bpf_oom per memcg.
+     (by Alexei Starovoitov)
+  2) Initial support for attaching struct ops to cgroups (Martin KaFai Lau,
+     Andrii Nakryiko and others)
+  3) bpf memcontrol kfuncs enhancements and tests (co-developed by JP Kobryn)
+  4) Many mall-ish fixes and cleanups (suggested by Andrew Morton, Suren Baghdasaryan,
+     Andrii Nakryiko and Kumar Kartikeya Dwivedi)
+  5) bpf_out_of_memory() is taking u64 flags instead of bool wait_on_oom_lock
+     (suggested by Kumar Kartikeya Dwivedi)
+  6) bpf_get_mem_cgroup() got KF_RCU flag (suggested by Kumar Kartikeya Dwivedi)
+  7) cgroup online and offline callbacks for bpf_psi, cgroup offline for bpf_oom
+
+v1:
+  1) Both OOM and PSI parts are now implemented using bpf struct ops,
+     providing a path the future extensions (suggested by Kumar Kartikeya Dwivedi,
+     Song Liu and Matt Bobrowski)
+  2) It's possible to create PSI triggers from BPF, no need for an additional
+     userspace agent. (suggested by Suren Baghdasaryan)
+     Also there is now a callback for the cgroup release event.
+  3) Added an ability to block on oom_lock instead of bailing out (suggested by Michal Hocko)
+  4) Added bpf_task_is_oom_victim (suggested by Michal Hocko)
+  5) PSI callbacks are scheduled using a separate workqueue (suggested by Suren Baghdasaryan)
+
+RFC:
+  https://lwn.net/ml/all/20250428033617.3797686-1-roman.gushchin@linux.dev/
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e53cda0aabb68..4abef08b3ed90 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1837,6 +1837,13 @@ struct bpf_raw_tp_link {
 	u64 cookie;
 };
 
+struct bpf_struct_ops_link {
+	struct bpf_link link;
+	struct bpf_map __rcu *map;
+	wait_queue_head_t wait_hup;
+	u64 cgroup_id;
+};
+
 struct bpf_link_primer {
 	struct bpf_link *link;
 	struct file *file;
diff --git a/include/linux/bpf_oom.h b/include/linux/bpf_oom.h
new file mode 100644
index 0000000000000..d93dba501a006
--- /dev/null
+++ b/include/linux/bpf_oom.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef __BPF_OOM_H
+#define __BPF_OOM_H
+
+struct oom_control;
+
+#define BPF_OOM_NAME_MAX_LEN 64
+
+struct bpf_oom_ctx {
+	/*
+	 * If bpf_oom_ops is attached to a cgroup, id of this cgroup.
+	 * 0 otherwise.
+	 */
+	u64 cgroup_id;
+};
+
+struct bpf_oom_ops {
+	/**
+	 * @handle_out_of_memory: Out of memory bpf handler, called before
+	 * the in-kernel OOM killer.
+	 * @oc: OOM control structure
+	 * @ctx: Execution context
+	 *
+	 * Should return 1 if some memory was freed up, otherwise
+	 * the in-kernel OOM killer is invoked.
+	 */
+	int (*handle_out_of_memory)(struct oom_control *oc, struct bpf_oom_ctx *ctx);
+
+	/**
+	 * @handle_cgroup_offline: Cgroup offline callback
+	 * @cgroup_id: Id of deleted cgroup
+	 *
+	 * Called if the cgroup with the attached bpf_oom_ops is deleted.
+	 */
+	void (*handle_cgroup_offline)(u64 cgroup_id, struct bpf_oom_ctx *ctx);
+
+	/**
+	 * @name: BPF OOM policy name
+	 */
+	char name[BPF_OOM_NAME_MAX_LEN];
+};
+
+#ifdef CONFIG_BPF_SYSCALL
+/**
+ * @bpf_handle_oom: handle out of memory condition using bpf
+ * @oc: OOM control structure
+ *
+ * Returns true if some memory was freed.
+ */
+bool bpf_handle_oom(struct oom_control *oc);
+
+
+/**
+ * @bpf_oom_memcg_offline: handle memcg offlining
+ * @memcg: Memory cgroup is offlined
+ *
+ * When a memory cgroup is about to be deleted and there is an
+ * attached BPF OOM structure, it has to be detached.
+ */
+void bpf_oom_memcg_offline(struct mem_cgroup *memcg);
+
+#else /* CONFIG_BPF_SYSCALL */
+static inline bool bpf_handle_oom(struct oom_control *oc)
+{
+	return false;
+}
+
+static inline void bpf_oom_memcg_offline(struct mem_cgroup *memcg) {}
+
+#endif /* CONFIG_BPF_SYSCALL */
+
+#endif /* __BPF_OOM_H */
diff --git a/include/linux/bpf_psi.h b/include/linux/bpf_psi.h
new file mode 100644
index 0000000000000..df00778e474ee
--- /dev/null
+++ b/include/linux/bpf_psi.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef __BPF_PSI_H
+#define __BPF_PSI_H
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/srcu.h>
+#include <linux/psi_types.h>
+
+struct cgroup;
+struct bpf_psi;
+struct psi_trigger;
+struct psi_trigger_params;
+
+#define BPF_PSI_FULL 0x80000000
+
+struct bpf_psi_ops {
+	/**
+	 * @init: Initialization callback, suited for creating psi triggers.
+	 * @bpf_psi: bpf_psi pointer, can be passed to bpf_psi_create_trigger().
+	 *
+	 * A non-0 return value means the initialization has been failed.
+	 */
+	int (*init)(struct bpf_psi *bpf_psi);
+
+	/**
+	 * @handle_psi_event: PSI event callback
+	 * @t: psi_trigger pointer
+	 */
+	void (*handle_psi_event)(struct psi_trigger *t);
+
+	/**
+	 * @handle_cgroup_online: Cgroup online callback
+	 * @cgroup_id: Id of the new cgroup
+	 *
+	 * Called every time a new cgroup is created. Can be used
+	 * to create new psi triggers.
+	 */
+	void (*handle_cgroup_online)(u64 cgroup_id);
+
+	/**
+	 * @handle_cgroup_offline: Cgroup offline callback
+	 * @cgroup_id: Id of offlined cgroup
+	 *
+	 * Called every time a cgroup with an attached bpf psi trigger is
+	 * offlined.
+	 */
+	void (*handle_cgroup_offline)(u64 cgroup_id);
+
+	/* private */
+	struct bpf_psi *bpf_psi;
+};
+
+struct bpf_psi {
+	spinlock_t lock;
+	struct list_head triggers;
+	struct bpf_psi_ops *ops;
+	struct srcu_struct srcu;
+	struct list_head node; /* Protected by bpf_psi_lock */
+};
+
+#ifdef CONFIG_BPF_SYSCALL
+void bpf_psi_add_trigger(struct psi_trigger *t,
+			 const struct psi_trigger_params *params);
+void bpf_psi_remove_trigger(struct psi_trigger *t);
+void bpf_psi_handle_event(struct psi_trigger *t);
+
+#else /* CONFIG_BPF_SYSCALL */
+static inline void bpf_psi_add_trigger(struct psi_trigger *t,
+			const struct psi_trigger_params *params) {}
+static inline void bpf_psi_remove_trigger(struct psi_trigger *t) {}
+static inline void bpf_psi_handle_event(struct psi_trigger *t) {}
+
+#endif /* CONFIG_BPF_SYSCALL */
+
+#if (defined(CONFIG_CGROUPS) && defined(CONFIG_PSI) && defined(CONFIG_BPF_SYSCALL))
+void bpf_psi_cgroup_online(struct cgroup *cgroup);
+void bpf_psi_cgroup_offline(struct cgroup *cgroup);
+
+#else /* CONFIG_CGROUPS && CONFIG_PSI && CONFIG_BPF_SYSCALL */
+static inline void bpf_psi_cgroup_online(struct cgroup *cgroup) {}
+static inline void bpf_psi_cgroup_offline(struct cgroup *cgroup) {}
+
+#endif /* CONFIG_CGROUPS && CONFIG_PSI && CONFIG_BPF_SYSCALL */
+
+#endif /* __BPF_PSI_H */
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 6ed477338b166..1a99da44999ed 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -707,6 +707,10 @@ static inline bool task_under_cgroup_hierarchy(struct task_struct *task,
 
 static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
 {}
+static inline struct cgroup *cgroup_get_from_id(u64 id)
+{
+	return NULL;
+}
 #endif /* !CONFIG_CGROUPS */
 
 #ifdef CONFIG_CGROUPS
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 873e510d6f8d9..b9e08dddd7ada 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -29,6 +29,7 @@ struct obj_cgroup;
 struct page;
 struct mm_struct;
 struct kmem_cache;
+struct bpf_oom_ops;
 
 /* Cgroup-specific page state, on top of universal node page state */
 enum memcg_stat_item {
@@ -226,6 +227,10 @@ struct mem_cgroup {
 	 */
 	bool oom_group;
 
+#ifdef CONFIG_BPF_SYSCALL
+	struct bpf_oom_ops *bpf_oom;
+#endif
+
 	int swappiness;
 
 	/* memory.events and memory.events.local */
@@ -832,9 +837,9 @@ static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg)
 {
 	return memcg ? cgroup_ino(memcg->css.cgroup) : 0;
 }
+#endif
 
 struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino);
-#endif
 
 static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
 {
@@ -948,7 +953,10 @@ static inline void mod_memcg_page_state(struct page *page,
 	rcu_read_unlock();
 }
 
+unsigned long memcg_events(struct mem_cgroup *memcg, int event);
+unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap);
 unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
+unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
 unsigned long lruvec_page_state(struct lruvec *lruvec, enum node_stat_item idx);
 unsigned long lruvec_page_state_local(struct lruvec *lruvec,
 				      enum node_stat_item idx);
@@ -1331,12 +1339,12 @@ static inline unsigned long mem_cgroup_ino(struct mem_cgroup *memcg)
 {
 	return 0;
 }
+#endif
 
 static inline struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino)
 {
 	return NULL;
 }
-#endif
 
 static inline struct mem_cgroup *mem_cgroup_from_seq(struct seq_file *m)
 {
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 7b02bc1d0a7ea..704fc0e786c62 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -19,6 +19,12 @@ enum oom_constraint {
 	CONSTRAINT_CPUSET,
 	CONSTRAINT_MEMORY_POLICY,
 	CONSTRAINT_MEMCG,
+	CONSTRAINT_BPF,
+};
+
+enum bpf_oom_flags {
+	BPF_OOM_FLAGS_WAIT_ON_OOM_LOCK = 1 << 0,
+	BPF_OOM_FLAGS_LAST = 1 << 1,
 };
 
 /*
@@ -51,6 +57,17 @@ struct oom_control {
 
 	/* Used to print the constraint info. */
 	enum oom_constraint constraint;
+
+#ifdef CONFIG_BPF_SYSCALL
+	/* Used by the bpf oom implementation to mark the forward progress */
+	bool bpf_memory_freed;
+
+	/* Policy name */
+	const char *bpf_policy_name;
+
+	/* BPF-specific constraint name */
+	const char *bpf_constraint;
+#endif
 };
 
 extern struct mutex oom_lock;
diff --git a/include/linux/psi.h b/include/linux/psi.h
index e0745873e3f26..8ffe84cd8571a 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -23,14 +23,23 @@ void psi_memstall_enter(unsigned long *flags);
 void psi_memstall_leave(unsigned long *flags);
 
 int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
-struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
-				       enum psi_res res, struct file *file,
-				       struct kernfs_open_file *of);
+int psi_trigger_parse(struct psi_trigger_params *params, const char *buf);
+struct psi_trigger *psi_trigger_create(struct psi_group *group,
+				const struct psi_trigger_params *param);
 void psi_trigger_destroy(struct psi_trigger *t);
 
 __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
 			poll_table *wait);
 
+static inline bool psi_file_privileged(struct file *file)
+{
+	/*
+	 * Checking the privilege here on file->f_cred implies that a privileged user
+	 * could open the file and delegate the write to an unprivileged one.
+	 */
+	return cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
+}
+
 #ifdef CONFIG_CGROUPS
 static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
 {
@@ -41,6 +50,12 @@ int psi_cgroup_alloc(struct cgroup *cgrp);
 void psi_cgroup_free(struct cgroup *cgrp);
 void cgroup_move_task(struct task_struct *p, struct css_set *to);
 void psi_cgroup_restart(struct psi_group *group);
+
+#else
+static inline struct psi_group *cgroup_psi(struct cgroup *cgrp)
+{
+	return &psi_system;
+}
 #endif
 
 #else /* CONFIG_PSI */
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index dd10c22299ab8..e551df9d6336c 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -119,7 +119,46 @@ struct psi_window {
 	u64 prev_growth;
 };
 
+enum psi_trigger_type {
+	PSI_SYSTEM,
+	PSI_CGROUP,
+	PSI_BPF,
+};
+
+struct psi_trigger_params {
+	/* Trigger type */
+	enum psi_trigger_type type;
+
+	/* Resource to be monitored */
+	enum psi_res res;
+
+	/* True if all threads should be stalled to trigger */
+	bool full;
+
+	/* Threshold in us */
+	u32 threshold_us;
+
+	/* Window in us */
+	u32 window_us;
+
+	/* Privileged triggers are treated differently */
+	bool privileged;
+
+	union {
+		/* Link to kernfs open file, only for PSI_CGROUP */
+		struct kernfs_open_file *of;
+
+#ifdef CONFIG_BPF_SYSCALL
+		/* Link to bpf_psi structure, only for BPF_PSI */
+		struct bpf_psi *bpf_psi;
+#endif
+	};
+};
+
 struct psi_trigger {
+	/* Trigger type */
+	enum psi_trigger_type type;
+
 	/* PSI state being monitored by the trigger */
 	enum psi_states state;
 
@@ -135,7 +174,7 @@ struct psi_trigger {
 	/* Wait queue for polling */
 	wait_queue_head_t event_wait;
 
-	/* Kernfs file for cgroup triggers */
+	/* Kernfs file for PSI_CGROUP triggers */
 	struct kernfs_open_file *of;
 
 	/* Pending event flag */
@@ -155,6 +194,31 @@ struct psi_trigger {
 
 	/* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */
 	enum psi_aggregators aggregator;
+
+#ifdef CONFIG_BPF_SYSCALL
+	/* Fields specific to PSI_BPF triggers */
+
+	/* Bpf psi structure for events handling */
+	struct bpf_psi *bpf_psi;
+
+	/* List node inside bpf_psi->triggers list */
+	struct list_head bpf_psi_node;
+
+	/* List node inside group->bpf_triggers list */
+	struct list_head bpf_group_node;
+
+	/* Work structure, used to execute event handlers */
+	struct work_struct bpf_work;
+
+	/*
+	 * Whether the trigger is being pinned in memory.
+	 * Protected by group->bpf_triggers_lock.
+	 */
+	bool pinned;
+
+	/* Cgroup Id */
+	u64 cgroup_id;
+#endif
 };
 
 struct psi_group {
@@ -203,6 +267,12 @@ struct psi_group {
 	u64 rtpoll_total[NR_PSI_STATES - 1];
 	u64 rtpoll_next_update;
 	u64 rtpoll_until;
+
+#ifdef CONFIG_BPF_SYSCALL
+	/* List of triggers owned by bpf and corresponding lock */
+	spinlock_t bpf_triggers_lock;
+	struct list_head bpf_triggers;
+#endif
 };
 
 #else /* CONFIG_PSI */
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index a41e6730edcf3..58664779a2b6f 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -13,6 +13,7 @@
 #include <linux/btf_ids.h>
 #include <linux/rcupdate_wait.h>
 #include <linux/poll.h>
+#include <linux/cgroup.h>
 
 struct bpf_struct_ops_value {
 	struct bpf_struct_ops_common_value common;
@@ -55,12 +56,6 @@ struct bpf_struct_ops_map {
 	struct bpf_struct_ops_value kvalue;
 };
 
-struct bpf_struct_ops_link {
-	struct bpf_link link;
-	struct bpf_map __rcu *map;
-	wait_queue_head_t wait_hup;
-};
-
 static DEFINE_MUTEX(update_mutex);
 
 #define VALUE_PREFIX "bpf_struct_ops_"
@@ -1365,6 +1360,18 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
 	}
 	bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL,
 		      attr->link_create.attach_type);
+#ifdef CONFIG_CGROUPS
+	if (attr->link_create.cgroup.relative_fd) {
+		struct cgroup *cgrp;
+
+		cgrp = cgroup_get_from_fd(attr->link_create.cgroup.relative_fd);
+		if (IS_ERR(cgrp))
+			return PTR_ERR(cgrp);
+
+		link->cgroup_id = cgroup_id(cgrp);
+		cgroup_put(cgrp);
+	}
+#endif /* CONFIG_CGROUPS */
 
 	err = bpf_link_prime(&link->link, &link_primer);
 	if (err)
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 248f517d66d04..4df4c49ba1793 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -15,6 +15,7 @@
 #include <linux/bpf.h>
 #include <linux/bpf-cgroup.h>
 #include <linux/bpf_lsm.h>
+#include <linux/bpf_psi.h>
 #include <linux/bpf_verifier.h>
 #include <net/sock.h>
 #include <net/bpf_sk_storage.h>
@@ -557,9 +558,11 @@ static int cgroup_bpf_lifetime_notify(struct notifier_block *nb,
 
 	switch (action) {
 	case CGROUP_LIFETIME_ONLINE:
+		bpf_psi_cgroup_online(cgrp);
 		ret = cgroup_bpf_inherit(cgrp);
 		break;
 	case CGROUP_LIFETIME_OFFLINE:
+		bpf_psi_cgroup_offline(cgrp);
 		cgroup_bpf_offline(cgrp);
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6d175849e57ac..7ef954760078d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7101,6 +7101,10 @@ BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct) {
 	struct file *vm_file;
 };
 
+BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct oom_control) {
+	struct mem_cgroup *memcg;
+};
+
 static bool type_is_rcu(struct bpf_verifier_env *env,
 			struct bpf_reg_state *reg,
 			const char *field_name, u32 btf_id)
@@ -7143,6 +7147,7 @@ static bool type_is_trusted_or_null(struct bpf_verifier_env *env,
 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct socket));
 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct dentry));
 	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct vm_area_struct));
+	BTF_TYPE_EMIT(BTF_TYPE_SAFE_TRUSTED_OR_NULL(struct oom_control));
 
 	return btf_nested_type_is_trusted(&env->log, reg, field_name, btf_id,
 					  "__safe_trusted_or_null");
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 6ae5f48cf64e3..836b28676abcb 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -4000,6 +4000,12 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
 	struct psi_trigger *new;
 	struct cgroup *cgrp;
 	struct psi_group *psi;
+	struct psi_trigger_params params;
+	int err;
+
+	err = psi_trigger_parse(&params, buf);
+	if (err)
+		return err;
 
 	cgrp = cgroup_kn_lock_live(of->kn, false);
 	if (!cgrp)
@@ -4015,7 +4021,13 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
 	}
 
 	psi = cgroup_psi(cgrp);
-	new = psi_trigger_create(psi, buf, res, of->file, of);
+
+	params.type = PSI_CGROUP;
+	params.res = res;
+	params.privileged = psi_file_privileged(of->file);
+	params.of = of;
+
+	new = psi_trigger_create(psi, &params);
 	if (IS_ERR(new)) {
 		cgroup_put(cgrp);
 		return PTR_ERR(new);
diff --git a/kernel/sched/bpf_psi.c b/kernel/sched/bpf_psi.c
new file mode 100644
index 0000000000000..952c7bd3ff3d2
--- /dev/null
+++ b/kernel/sched/bpf_psi.c
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BPF PSI event handlers
+ *
+ * Author: Roman Gushchin <roman.gushchin@linux.dev>
+ */
+
+#include <linux/bpf_psi.h>
+#include <linux/cgroup-defs.h>
+
+struct bpf_struct_ops bpf_psi_bpf_ops;
+static struct workqueue_struct *bpf_psi_wq;
+
+static DEFINE_MUTEX(bpf_psi_lock);
+static LIST_HEAD(bpf_psi_notify_list);
+static DEFINE_STATIC_KEY_FALSE(bpf_psi_notify_key);
+
+static struct bpf_psi *bpf_psi_create(struct bpf_psi_ops *ops)
+{
+	struct bpf_psi *bpf_psi;
+
+	bpf_psi = kzalloc(sizeof(*bpf_psi), GFP_KERNEL);
+	if (!bpf_psi)
+		return NULL;
+
+	if (init_srcu_struct(&bpf_psi->srcu)) {
+		kfree(bpf_psi);
+		return NULL;
+	}
+
+	spin_lock_init(&bpf_psi->lock);
+	bpf_psi->ops = ops;
+	INIT_LIST_HEAD(&bpf_psi->triggers);
+	ops->bpf_psi = bpf_psi;
+
+	if (ops->handle_cgroup_online) {
+		mutex_lock(&bpf_psi_lock);
+		list_add(&bpf_psi->node, &bpf_psi_notify_list);
+		mutex_unlock(&bpf_psi_lock);
+		static_branch_inc(&bpf_psi_notify_key);
+	} else {
+		INIT_LIST_HEAD(&bpf_psi->node);
+	}
+
+	return bpf_psi;
+}
+
+static void bpf_psi_handle_event_fn(struct work_struct *work)
+{
+	struct psi_trigger *t;
+	struct bpf_psi *bpf_psi;
+	int idx;
+
+	t = container_of(work, struct psi_trigger, bpf_work);
+	bpf_psi = READ_ONCE(t->bpf_psi);
+
+	if (likely(bpf_psi)) {
+		idx = srcu_read_lock(&bpf_psi->srcu);
+		bpf_psi->ops->handle_psi_event(t);
+		srcu_read_unlock(&bpf_psi->srcu, idx);
+	}
+}
+
+void bpf_psi_add_trigger(struct psi_trigger *t,
+			 const struct psi_trigger_params *params)
+{
+	t->bpf_psi = params->bpf_psi;
+	t->pinned = false;
+	INIT_WORK(&t->bpf_work, bpf_psi_handle_event_fn);
+
+	spin_lock(&t->bpf_psi->lock);
+	list_add(&t->bpf_psi_node, &t->bpf_psi->triggers);
+	spin_unlock(&t->bpf_psi->lock);
+
+	spin_lock(&t->group->bpf_triggers_lock);
+	list_add(&t->bpf_group_node, &t->group->bpf_triggers);
+	spin_unlock(&t->group->bpf_triggers_lock);
+}
+
+void bpf_psi_remove_trigger(struct psi_trigger *t)
+{
+	spin_lock(&t->group->bpf_triggers_lock);
+	list_del(&t->bpf_group_node);
+	spin_unlock(&t->group->bpf_triggers_lock);
+
+	spin_lock(&t->bpf_psi->lock);
+	list_del(&t->bpf_psi_node);
+	spin_unlock(&t->bpf_psi->lock);
+}
+
+#ifdef CONFIG_CGROUPS
+void bpf_psi_cgroup_online(struct cgroup *cgroup)
+{
+	struct bpf_psi *bpf_psi;
+	int idx;
+
+	if (!static_branch_likely(&bpf_psi_notify_key))
+		return;
+
+	mutex_lock(&bpf_psi_lock);
+	list_for_each_entry(bpf_psi, &bpf_psi_notify_list, node) {
+		idx = srcu_read_lock(&bpf_psi->srcu);
+		if (bpf_psi->ops->handle_cgroup_online)
+			bpf_psi->ops->handle_cgroup_online(cgroup_id(cgroup));
+		srcu_read_unlock(&bpf_psi->srcu, idx);
+	}
+	mutex_unlock(&bpf_psi_lock);
+}
+
+void bpf_psi_cgroup_offline(struct cgroup *cgroup)
+{
+	struct psi_group *group = cgroup->psi;
+	u64 cgrp_id = cgroup_id(cgroup);
+	struct psi_trigger *t, *p;
+	struct bpf_psi *bpf_psi;
+	LIST_HEAD(to_destroy);
+	int idx;
+
+	spin_lock(&group->bpf_triggers_lock);
+	list_for_each_entry_safe(t, p, &group->bpf_triggers, bpf_group_node) {
+		if (!t->pinned) {
+			t->pinned = true;
+			list_move(&t->bpf_group_node, &to_destroy);
+		}
+	}
+	spin_unlock(&group->bpf_triggers_lock);
+
+	list_for_each_entry_safe(t, p, &to_destroy, bpf_group_node) {
+		bpf_psi = READ_ONCE(t->bpf_psi);
+
+		idx = srcu_read_lock(&bpf_psi->srcu);
+		if (bpf_psi->ops->handle_cgroup_offline)
+			bpf_psi->ops->handle_cgroup_offline(cgrp_id);
+		srcu_read_unlock(&bpf_psi->srcu, idx);
+
+		spin_lock(&bpf_psi->lock);
+		list_del(&t->bpf_psi_node);
+		spin_unlock(&bpf_psi->lock);
+
+		WRITE_ONCE(t->bpf_psi, NULL);
+		flush_workqueue(bpf_psi_wq);
+		synchronize_srcu(&bpf_psi->srcu);
+		psi_trigger_destroy(t);
+	}
+}
+#endif
+
+void bpf_psi_handle_event(struct psi_trigger *t)
+{
+	queue_work(bpf_psi_wq, &t->bpf_work);
+}
+
+/* BPF struct ops */
+
+static int __bpf_psi_init(struct bpf_psi *bpf_psi) { return 0; }
+static void __bpf_psi_handle_psi_event(struct psi_trigger *t) {}
+static void __bpf_psi_handle_cgroup_online(u64 cgroup_id) {}
+static void __bpf_psi_handle_cgroup_offline(u64 cgroup_id) {}
+
+static struct bpf_psi_ops __bpf_psi_ops = {
+	.init = __bpf_psi_init,
+	.handle_psi_event = __bpf_psi_handle_psi_event,
+	.handle_cgroup_online = __bpf_psi_handle_cgroup_online,
+	.handle_cgroup_offline = __bpf_psi_handle_cgroup_offline,
+};
+
+static const struct bpf_func_proto *
+bpf_psi_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	return tracing_prog_func_proto(func_id, prog);
+}
+
+static bool bpf_psi_ops_is_valid_access(int off, int size,
+					enum bpf_access_type type,
+					const struct bpf_prog *prog,
+					struct bpf_insn_access_aux *info)
+{
+	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static const struct bpf_verifier_ops bpf_psi_verifier_ops = {
+	.get_func_proto = bpf_psi_func_proto,
+	.is_valid_access = bpf_psi_ops_is_valid_access,
+};
+
+__bpf_kfunc_start_defs();
+
+/**
+ * bpf_psi_create_trigger - Create a PSI trigger
+ * @bpf_psi: bpf_psi struct to attach the trigger to
+ * @cgroup_id: cgroup Id to attach the trigger; 0 for system-wide scope
+ * @resource: resource to monitor (PSI_MEM, PSI_IO, etc) and the full bit.
+ * @threshold_us: threshold in us
+ * @window_us: window in us
+ *
+ * Creates a PSI trigger and attached is to bpf_psi. The trigger will be
+ * active unless bpf struct ops is unloaded or the corresponding cgroup
+ * is deleted.
+ *
+ * Resource's most significant bit encodes whether "some" or "full"
+ * PSI state should be tracked.
+ *
+ * Returns 0 on success and the error code on failure.
+ */
+__bpf_kfunc int bpf_psi_create_trigger(struct bpf_psi *bpf_psi,
+				       u64 cgroup_id, u32 resource,
+				       u32 threshold_us, u32 window_us)
+{
+	enum psi_res res = resource & ~BPF_PSI_FULL;
+	bool full = resource & BPF_PSI_FULL;
+	struct psi_trigger_params params;
+	struct cgroup *cgroup __maybe_unused = NULL;
+	struct psi_group *group;
+	struct psi_trigger *t;
+	int ret = 0;
+
+	if (res >= NR_PSI_RESOURCES)
+		return -EINVAL;
+
+	if (IS_ENABLED(CONFIG_CGROUPS) && cgroup_id) {
+		cgroup = cgroup_get_from_id(cgroup_id);
+		if (IS_ERR_OR_NULL(cgroup))
+			return PTR_ERR(cgroup);
+
+		group = cgroup_psi(cgroup);
+	} else {
+		group = &psi_system;
+	}
+
+	params.type = PSI_BPF;
+	params.bpf_psi = bpf_psi;
+	params.privileged = capable(CAP_SYS_RESOURCE);
+	params.res = res;
+	params.full = full;
+	params.threshold_us = threshold_us;
+	params.window_us = window_us;
+
+	t = psi_trigger_create(group, &params);
+	if (IS_ERR(t))
+		ret = PTR_ERR(t);
+	else
+		t->cgroup_id = cgroup_id;
+
+#ifdef CONFIG_CGROUPS
+	if (cgroup)
+		cgroup_put(cgroup);
+#endif
+
+	return ret;
+}
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_psi_kfuncs)
+BTF_ID_FLAGS(func, bpf_psi_create_trigger, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_psi_kfuncs)
+
+static int bpf_psi_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+	if (btf_id_set8_contains(&bpf_psi_kfuncs, kfunc_id) &&
+	    prog->aux->st_ops != &bpf_psi_bpf_ops)
+		return -EACCES;
+
+	return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_psi_kfunc_set = {
+	.owner          = THIS_MODULE,
+	.set            = &bpf_psi_kfuncs,
+	.filter         = bpf_psi_kfunc_filter,
+};
+
+static int bpf_psi_ops_reg(void *kdata, struct bpf_link *link)
+{
+	struct bpf_psi_ops *ops = kdata;
+	struct bpf_psi *bpf_psi;
+
+	bpf_psi = bpf_psi_create(ops);
+	if (!bpf_psi)
+		return -ENOMEM;
+
+	return ops->init(bpf_psi);
+}
+
+static void bpf_psi_ops_unreg(void *kdata, struct bpf_link *link)
+{
+	struct bpf_psi_ops *ops = kdata;
+	struct bpf_psi *bpf_psi = ops->bpf_psi;
+	struct psi_trigger *t, *p;
+	LIST_HEAD(to_destroy);
+
+	spin_lock(&bpf_psi->lock);
+	list_for_each_entry_safe(t, p, &bpf_psi->triggers, bpf_psi_node) {
+		spin_lock(&t->group->bpf_triggers_lock);
+		if (!t->pinned) {
+			t->pinned = true;
+			list_move(&t->bpf_group_node, &to_destroy);
+			list_del(&t->bpf_psi_node);
+
+			WRITE_ONCE(t->bpf_psi, NULL);
+		}
+		spin_unlock(&t->group->bpf_triggers_lock);
+	}
+	spin_unlock(&bpf_psi->lock);
+
+	flush_workqueue(bpf_psi_wq);
+	synchronize_srcu(&bpf_psi->srcu);
+
+	list_for_each_entry_safe(t, p, &to_destroy, bpf_group_node)
+		psi_trigger_destroy(t);
+
+	if (!list_empty(&bpf_psi->node)) {
+		mutex_lock(&bpf_psi_lock);
+		list_del(&bpf_psi->node);
+		mutex_unlock(&bpf_psi_lock);
+		static_branch_dec(&bpf_psi_notify_key);
+	}
+
+	cleanup_srcu_struct(&bpf_psi->srcu);
+	kfree(bpf_psi);
+}
+
+static int bpf_psi_ops_check_member(const struct btf_type *t,
+				    const struct btf_member *member,
+				    const struct bpf_prog *prog)
+{
+	u32 moff = __btf_member_bit_offset(t, member) / 8;
+
+	switch (moff) {
+	case offsetof(struct bpf_psi_ops, init):
+		fallthrough;
+	case offsetof(struct bpf_psi_ops, handle_psi_event):
+		if (!prog)
+			return -EINVAL;
+		break;
+	}
+
+	return 0;
+}
+
+static int bpf_psi_ops_init_member(const struct btf_type *t,
+				   const struct btf_member *member,
+				   void *kdata, const void *udata)
+{
+	return 0;
+}
+
+static int bpf_psi_ops_init(struct btf *btf)
+{
+	return 0;
+}
+
+struct bpf_struct_ops bpf_psi_bpf_ops = {
+	.verifier_ops = &bpf_psi_verifier_ops,
+	.reg = bpf_psi_ops_reg,
+	.unreg = bpf_psi_ops_unreg,
+	.check_member = bpf_psi_ops_check_member,
+	.init_member = bpf_psi_ops_init_member,
+	.init = bpf_psi_ops_init,
+	.name = "bpf_psi_ops",
+	.owner = THIS_MODULE,
+	.cfi_stubs = &__bpf_psi_ops
+};
+
+static int __init bpf_psi_struct_ops_init(void)
+{
+	int wq_flags = WQ_MEM_RECLAIM | WQ_UNBOUND | WQ_HIGHPRI;
+	int err;
+
+	bpf_psi_wq = alloc_workqueue("bpf_psi_wq", wq_flags, 0);
+	if (!bpf_psi_wq)
+		return -ENOMEM;
+
+	err = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
+					&bpf_psi_kfunc_set);
+	if (err) {
+		pr_warn("error while registering bpf psi kfuncs: %d", err);
+		goto err;
+	}
+
+	err = register_bpf_struct_ops(&bpf_psi_bpf_ops, bpf_psi_ops);
+	if (err) {
+		pr_warn("error while registering bpf psi struct ops: %d", err);
+		goto err;
+	}
+
+	return 0;
+
+err:
+	destroy_workqueue(bpf_psi_wq);
+	return err;
+}
+late_initcall(bpf_psi_struct_ops_init);
diff --git a/kernel/sched/build_utility.c b/kernel/sched/build_utility.c
index e2cf3b08d4e95..1f90781781a12 100644
--- a/kernel/sched/build_utility.c
+++ b/kernel/sched/build_utility.c
@@ -19,6 +19,7 @@
 #include <linux/sched/rseq_api.h>
 #include <linux/sched/task_stack.h>
 
+#include <linux/bpf_psi.h>
 #include <linux/cpufreq.h>
 #include <linux/cpumask_api.h>
 #include <linux/cpuset.h>
@@ -91,6 +92,9 @@
 
 #ifdef CONFIG_PSI
 # include "psi.c"
+# ifdef CONFIG_BPF_SYSCALL
+#  include "bpf_psi.c"
+# endif
 #endif
 
 #ifdef CONFIG_MEMBARRIER
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 59fdb7ebbf22a..26de772750e82 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -223,6 +223,10 @@ static void group_init(struct psi_group *group)
 	init_waitqueue_head(&group->rtpoll_wait);
 	timer_setup(&group->rtpoll_timer, poll_timer_fn, 0);
 	rcu_assign_pointer(group->rtpoll_task, NULL);
+#ifdef CONFIG_BPF_SYSCALL
+	spin_lock_init(&group->bpf_triggers_lock);
+	INIT_LIST_HEAD(&group->bpf_triggers);
+#endif
 }
 
 void __init psi_init(void)
@@ -511,10 +515,17 @@ static void update_triggers(struct psi_group *group, u64 now,
 
 		/* Generate an event */
 		if (cmpxchg(&t->event, 0, 1) == 0) {
-			if (t->of)
-				kernfs_notify(t->of->kn);
-			else
+			switch (t->type) {
+			case PSI_SYSTEM:
 				wake_up_interruptible(&t->event_wait);
+				break;
+			case PSI_CGROUP:
+				kernfs_notify(t->of->kn);
+				break;
+			case PSI_BPF:
+				bpf_psi_handle_event(t);
+				break;
+			}
 		}
 		t->last_event_time = now;
 		/* Reset threshold breach flag once event got generated */
@@ -1292,74 +1303,91 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
 	return 0;
 }
 
-struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
-				       enum psi_res res, struct file *file,
-				       struct kernfs_open_file *of)
+int psi_trigger_parse(struct psi_trigger_params *params, const char *buf)
 {
-	struct psi_trigger *t;
-	enum psi_states state;
-	u32 threshold_us;
-	bool privileged;
-	u32 window_us;
+	u32 threshold_us, window_us;
 
 	if (static_branch_likely(&psi_disabled))
-		return ERR_PTR(-EOPNOTSUPP);
-
-	/*
-	 * Checking the privilege here on file->f_cred implies that a privileged user
-	 * could open the file and delegate the write to an unprivileged one.
-	 */
-	privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
+		return -EOPNOTSUPP;
 
 	if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
-		state = PSI_IO_SOME + res * 2;
+		params->full = false;
 	else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
-		state = PSI_IO_FULL + res * 2;
+		params->full = true;
 	else
-		return ERR_PTR(-EINVAL);
+		return -EINVAL;
+
+	params->threshold_us = threshold_us;
+	params->window_us = window_us;
+	return 0;
+}
+
+struct psi_trigger *psi_trigger_create(struct psi_group *group,
+				       const struct psi_trigger_params *params)
+{
+	struct psi_trigger *t;
+	enum psi_states state;
+
+	if (static_branch_likely(&psi_disabled))
+		return ERR_PTR(-EOPNOTSUPP);
+
+	state = params->full ? PSI_IO_FULL : PSI_IO_SOME;
+	state += params->res * 2;
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-	if (res == PSI_IRQ && --state != PSI_IRQ_FULL)
+	if (params->res == PSI_IRQ && --state != PSI_IRQ_FULL)
 		return ERR_PTR(-EINVAL);
 #endif
 
 	if (state >= PSI_NONIDLE)
 		return ERR_PTR(-EINVAL);
 
-	if (window_us == 0 || window_us > WINDOW_MAX_US)
+	if (params->window_us == 0 || params->window_us > WINDOW_MAX_US)
 		return ERR_PTR(-EINVAL);
 
 	/*
 	 * Unprivileged users can only use 2s windows so that averages aggregation
 	 * work is used, and no RT threads need to be spawned.
 	 */
-	if (!privileged && window_us % 2000000)
+	if (!params->privileged && params->window_us % 2000000)
 		return ERR_PTR(-EINVAL);
 
 	/* Check threshold */
-	if (threshold_us == 0 || threshold_us > window_us)
+	if (params->threshold_us == 0 || params->threshold_us > params->window_us)
 		return ERR_PTR(-EINVAL);
 
 	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (!t)
 		return ERR_PTR(-ENOMEM);
 
+	t->type = params->type;
 	t->group = group;
 	t->state = state;
-	t->threshold = threshold_us * NSEC_PER_USEC;
-	t->win.size = window_us * NSEC_PER_USEC;
+	t->threshold = params->threshold_us * NSEC_PER_USEC;
+	t->win.size = params->window_us * NSEC_PER_USEC;
 	window_reset(&t->win, sched_clock(),
 			group->total[PSI_POLL][t->state], 0);
 
 	t->event = 0;
 	t->last_event_time = 0;
-	t->of = of;
-	if (!of)
+
+	switch (params->type) {
+	case PSI_SYSTEM:
 		init_waitqueue_head(&t->event_wait);
+		t->of = NULL;
+		break;
+	case PSI_CGROUP:
+		t->of = params->of;
+		break;
+	case PSI_BPF:
+		bpf_psi_add_trigger(t, params);
+		break;
+	}
+
 	t->pending_event = false;
-	t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
+	t->aggregator = params->privileged ? PSI_POLL : PSI_AVGS;
 
-	if (privileged) {
+	if (params->privileged) {
 		mutex_lock(&group->rtpoll_trigger_lock);
 
 		if (!rcu_access_pointer(group->rtpoll_task)) {
@@ -1367,8 +1395,10 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
 
 			task = kthread_create(psi_rtpoll_worker, group, "psimon");
 			if (IS_ERR(task)) {
-				kfree(t);
 				mutex_unlock(&group->rtpoll_trigger_lock);
+				if (t->type == PSI_BPF)
+					bpf_psi_remove_trigger(t);
+				kfree(t);
 				return ERR_CAST(task);
 			}
 			atomic_set(&group->rtpoll_wakeup, 0);
@@ -1412,10 +1442,16 @@ void psi_trigger_destroy(struct psi_trigger *t)
 	 * being accessed later. Can happen if cgroup is deleted from under a
 	 * polling process.
 	 */
-	if (t->of)
-		kernfs_notify(t->of->kn);
-	else
+	switch (t->type) {
+	case PSI_SYSTEM:
 		wake_up_interruptible(&t->event_wait);
+		break;
+	case PSI_CGROUP:
+		kernfs_notify(t->of->kn);
+		break;
+	case PSI_BPF:
+		break;
+	}
 
 	if (t->aggregator == PSI_AVGS) {
 		mutex_lock(&group->avgs_lock);
@@ -1492,10 +1528,16 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
 	if (!t)
 		return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
 
-	if (t->of)
-		kernfs_generic_poll(t->of, wait);
-	else
+	switch (t->type) {
+	case PSI_SYSTEM:
 		poll_wait(file, &t->event_wait, wait);
+		break;
+	case PSI_CGROUP:
+		kernfs_generic_poll(t->of, wait);
+		break;
+	case PSI_BPF:
+		break;
+	}
 
 	if (cmpxchg(&t->event, 1, 0) == 1)
 		ret |= EPOLLPRI;
@@ -1541,6 +1583,8 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 	size_t buf_size;
 	struct seq_file *seq;
 	struct psi_trigger *new;
+	struct psi_trigger_params params;
+	int err;
 
 	if (static_branch_likely(&psi_disabled))
 		return -EOPNOTSUPP;
@@ -1554,6 +1598,10 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 
 	buf[buf_size - 1] = '\0';
 
+	err = psi_trigger_parse(&params, buf);
+	if (err)
+		return err;
+
 	seq = file->private_data;
 
 	/* Take seq->lock to protect seq->private from concurrent writes */
@@ -1565,7 +1613,11 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 		return -EBUSY;
 	}
 
-	new = psi_trigger_create(&psi_system, buf, res, file, NULL);
+	params.type = PSI_SYSTEM;
+	params.res = res;
+	params.privileged = psi_file_privileged(file);
+
+	new = psi_trigger_create(&psi_system, &params);
 	if (IS_ERR(new)) {
 		mutex_unlock(&seq->lock);
 		return PTR_ERR(new);
diff --git a/mm/Makefile b/mm/Makefile
index 21abb33535501..2d8f9beb3c710 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -105,6 +105,10 @@ obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
 ifdef CONFIG_SWAP
 obj-$(CONFIG_MEMCG) += swap_cgroup.o
 endif
+ifdef CONFIG_BPF_SYSCALL
+obj-y += bpf_oom.o
+obj-$(CONFIG_MEMCG) += bpf_memcontrol.o
+endif
 obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
 obj-$(CONFIG_GUP_TEST) += gup_test.o
 obj-$(CONFIG_DMAPOOL_TEST) += dmapool_test.o
diff --git a/mm/bpf_memcontrol.c b/mm/bpf_memcontrol.c
new file mode 100644
index 0000000000000..458ad022b036f
--- /dev/null
+++ b/mm/bpf_memcontrol.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Memory Controller-related BPF kfuncs and auxiliary code
+ *
+ * Author: Roman Gushchin <roman.gushchin@linux.dev>
+ */
+
+#include <linux/memcontrol.h>
+#include <linux/bpf.h>
+
+__bpf_kfunc_start_defs();
+
+/**
+ * bpf_get_root_mem_cgroup - Returns a pointer to the root memory cgroup
+ *
+ * The function has KF_ACQUIRE semantics, even though the root memory
+ * cgroup is never destroyed after being created and doesn't require
+ * reference counting. And it's perfectly safe to pass it to
+ * bpf_put_mem_cgroup()
+ */
+__bpf_kfunc struct mem_cgroup *bpf_get_root_mem_cgroup(void)
+{
+	/* css_get() is not needed */
+	return root_mem_cgroup;
+}
+
+/**
+ * bpf_get_mem_cgroup - Get a reference to a memory cgroup
+ * @css: pointer to the css structure
+ *
+ * Returns a pointer to a mem_cgroup structure after bumping
+ * the corresponding css's reference counter.
+ *
+ * It's fine to pass a css which belongs to any cgroup controller,
+ * e.g. unified hierarchy's main css.
+ *
+ * Implements KF_ACQUIRE semantics.
+ */
+__bpf_kfunc struct mem_cgroup *
+bpf_get_mem_cgroup(struct cgroup_subsys_state *css)
+{
+	struct mem_cgroup *memcg = NULL;
+	bool rcu_unlock = false;
+
+	if (!root_mem_cgroup)
+		return NULL;
+
+	if (root_mem_cgroup->css.ss != css->ss) {
+		struct cgroup *cgroup = css->cgroup;
+		int ssid = root_mem_cgroup->css.ss->id;
+
+		rcu_read_lock();
+		rcu_unlock = true;
+		css = rcu_dereference_raw(cgroup->subsys[ssid]);
+	}
+
+	if (css && css_tryget(css))
+		memcg = container_of(css, struct mem_cgroup, css);
+
+	if (rcu_unlock)
+		rcu_read_unlock();
+
+	return memcg;
+}
+
+/**
+ * bpf_put_mem_cgroup - Put a reference to a memory cgroup
+ * @memcg: memory cgroup to release
+ *
+ * Releases a previously acquired memcg reference.
+ * Implements KF_RELEASE semantics.
+ */
+__bpf_kfunc void bpf_put_mem_cgroup(struct mem_cgroup *memcg)
+{
+	css_put(&memcg->css);
+}
+
+/**
+ * bpf_mem_cgroup_vm_events - Read memory cgroup's vm event counter
+ * @memcg: memory cgroup
+ * @event: event id
+ *
+ * Allows to read memory cgroup event counters.
+ */
+__bpf_kfunc unsigned long bpf_mem_cgroup_vm_events(struct mem_cgroup *memcg,
+						enum vm_event_item event)
+{
+	return memcg_events(memcg, event);
+}
+
+/**
+ * bpf_mem_cgroup_usage - Read memory cgroup's usage
+ * @memcg: memory cgroup
+ *
+ * Returns current memory cgroup size in bytes.
+ */
+__bpf_kfunc unsigned long bpf_mem_cgroup_usage(struct mem_cgroup *memcg)
+{
+	return page_counter_read(&memcg->memory);
+}
+
+/**
+ * bpf_mem_cgroup_events - Read memory cgroup's page state counter
+ * bpf_mem_cgroup_memory_events - Read memory cgroup's memory event value
+ * @memcg: memory cgroup
+ * @event: memory event id
+ *
+ * Returns current memory event count.
+ */
+__bpf_kfunc unsigned long bpf_mem_cgroup_memory_events(struct mem_cgroup *memcg,
+						enum memcg_memory_event event)
+{
+	if (event >= MEMCG_NR_MEMORY_EVENTS)
+		return (unsigned long)-1;
+
+	return atomic_long_read(&memcg->memory_events[event]);
+}
+
+/**
+ * bpf_mem_cgroup_page_state - Read memory cgroup's page state counter
+ * @memcg: memory cgroup
+ * @idx: counter idx
+ *
+ * Allows to read memory cgroup statistics. The output is in bytes.
+ */
+__bpf_kfunc unsigned long bpf_mem_cgroup_page_state(struct mem_cgroup *memcg, int idx)
+{
+	if (idx < 0 || idx >= MEMCG_NR_STAT)
+		return (unsigned long)-1;
+
+	return memcg_page_state_output(memcg, idx);
+}
+
+/**
+ * bpf_mem_cgroup_flush_stats - Flush memory cgroup's statistics
+ * @memcg: memory cgroup
+ *
+ * Propagate memory cgroup's statistics up the cgroup tree.
+ */
+__bpf_kfunc void bpf_mem_cgroup_flush_stats(struct mem_cgroup *memcg)
+{
+	mem_cgroup_flush_stats(memcg);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_memcontrol_kfuncs)
+BTF_ID_FLAGS(func, bpf_get_root_mem_cgroup, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_get_mem_cgroup, KF_ACQUIRE | KF_RET_NULL | KF_RCU)
+BTF_ID_FLAGS(func, bpf_put_mem_cgroup, KF_RELEASE)
+
+BTF_ID_FLAGS(func, bpf_mem_cgroup_vm_events, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_mem_cgroup_memory_events, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_mem_cgroup_usage, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_mem_cgroup_page_state, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_mem_cgroup_flush_stats, KF_TRUSTED_ARGS | KF_SLEEPABLE)
+
+BTF_KFUNCS_END(bpf_memcontrol_kfuncs)
+
+static const struct btf_kfunc_id_set bpf_memcontrol_kfunc_set = {
+	.owner          = THIS_MODULE,
+	.set            = &bpf_memcontrol_kfuncs,
+};
+
+static int __init bpf_memcontrol_init(void)
+{
+	int err;
+
+	err = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC,
+					&bpf_memcontrol_kfunc_set);
+	if (err)
+		pr_warn("error while registering bpf memcontrol kfuncs: %d", err);
+
+	return err;
+}
+late_initcall(bpf_memcontrol_init);
diff --git a/mm/bpf_oom.c b/mm/bpf_oom.c
new file mode 100644
index 0000000000000..a7e021c9db44b
--- /dev/null
+++ b/mm/bpf_oom.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * BPF-driven OOM killer customization
+ *
+ * Author: Roman Gushchin <roman.gushchin@linux.dev>
+ */
+
+#include <linux/bpf.h>
+#include <linux/oom.h>
+#include <linux/bpf_oom.h>
+#include <linux/srcu.h>
+#include <linux/cgroup.h>
+#include <linux/memcontrol.h>
+
+DEFINE_STATIC_SRCU(bpf_oom_srcu);
+static struct bpf_oom_ops *system_bpf_oom;
+
+static int bpf_ops_handle_oom(struct bpf_oom_ops *bpf_oom_ops,
+			      struct mem_cgroup *memcg,
+			      struct oom_control *oc)
+{
+	struct bpf_oom_ctx exec_ctx;
+	int ret;
+
+	if (memcg)
+		exec_ctx.cgroup_id = cgroup_id(memcg->css.cgroup);
+	else
+		exec_ctx.cgroup_id = 0;
+
+	oc->bpf_policy_name = &bpf_oom_ops->name[0];
+	oc->bpf_memory_freed = false;
+	ret = bpf_oom_ops->handle_out_of_memory(oc, &exec_ctx);
+	oc->bpf_policy_name = NULL;
+
+	return ret;
+}
+
+bool bpf_handle_oom(struct oom_control *oc)
+{
+	struct bpf_oom_ops *bpf_oom_ops = NULL;
+	struct mem_cgroup *memcg;
+	int idx, ret = 0;
+
+	/* All bpf_oom_ops structures are protected using bpf_oom_srcu */
+	idx = srcu_read_lock(&bpf_oom_srcu);
+
+	/* Find the nearest bpf_oom_ops traversing the cgroup tree upwards */
+	for (memcg = oc->memcg; memcg; memcg = parent_mem_cgroup(memcg)) {
+		bpf_oom_ops = READ_ONCE(memcg->bpf_oom);
+		if (!bpf_oom_ops)
+			continue;
+
+		/* Call BPF OOM handler */
+		ret = bpf_ops_handle_oom(bpf_oom_ops, memcg, oc);
+		if (ret && oc->bpf_memory_freed)
+			goto exit;
+	}
+	/*
+	 * System-wide OOM or per-memcg BPF OOM handler wasn't successful?
+	 * Try system_bpf_oom.
+	 */
+	bpf_oom_ops = READ_ONCE(system_bpf_oom);
+	if (!bpf_oom_ops)
+		goto exit;
+
+	/* Call BPF OOM handler */
+	ret = bpf_ops_handle_oom(bpf_oom_ops, NULL, oc);
+exit:
+	srcu_read_unlock(&bpf_oom_srcu, idx);
+	return ret && oc->bpf_memory_freed;
+}
+
+static int __handle_out_of_memory(struct oom_control *oc,
+				  struct bpf_oom_ctx *exec_ctx)
+{
+	return 0;
+}
+
+static void __handle_cgroup_offline(u64 cgroup_id, struct bpf_oom_ctx *exec_ctx)
+{
+}
+
+static struct bpf_oom_ops __bpf_oom_ops = {
+	.handle_out_of_memory = __handle_out_of_memory,
+	.handle_cgroup_offline = __handle_cgroup_offline,
+};
+
+static const struct bpf_func_proto *
+bpf_oom_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	return tracing_prog_func_proto(func_id, prog);
+}
+
+static bool bpf_oom_ops_is_valid_access(int off, int size,
+					enum bpf_access_type type,
+					const struct bpf_prog *prog,
+					struct bpf_insn_access_aux *info)
+{
+	return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static const struct bpf_verifier_ops bpf_oom_verifier_ops = {
+	.get_func_proto = bpf_oom_func_proto,
+	.is_valid_access = bpf_oom_ops_is_valid_access,
+};
+
+static int bpf_oom_ops_reg(void *kdata, struct bpf_link *link)
+{
+	struct bpf_struct_ops_link *ops_link = container_of(link, struct bpf_struct_ops_link, link);
+	struct bpf_oom_ops **bpf_oom_ops_ptr = NULL;
+	struct bpf_oom_ops *bpf_oom_ops = kdata;
+	struct mem_cgroup *memcg = NULL;
+	int err = 0;
+
+	if (ops_link->cgroup_id) {
+		/* Attach to a memory cgroup? */
+		memcg = mem_cgroup_get_from_ino(ops_link->cgroup_id);
+		if (IS_ERR_OR_NULL(memcg))
+			return PTR_ERR(memcg);
+		bpf_oom_ops_ptr = &memcg->bpf_oom;
+	} else {
+		/* System-wide OOM handler */
+		bpf_oom_ops_ptr = &system_bpf_oom;
+	}
+
+	/* Another struct ops attached? */
+	if (READ_ONCE(*bpf_oom_ops_ptr)) {
+		err = -EBUSY;
+		goto exit;
+	}
+
+	/* Expose bpf_oom_ops structure */
+	WRITE_ONCE(*bpf_oom_ops_ptr, bpf_oom_ops);
+exit:
+	mem_cgroup_put(memcg);
+	return err;
+}
+
+static void bpf_oom_ops_unreg(void *kdata, struct bpf_link *link)
+{
+	struct bpf_struct_ops_link *ops_link = container_of(link, struct bpf_struct_ops_link, link);
+	struct bpf_oom_ops **bpf_oom_ops_ptr = NULL;
+	struct bpf_oom_ops *bpf_oom_ops = kdata;
+	struct mem_cgroup *memcg = NULL;
+
+	if (ops_link->cgroup_id) {
+		/* Detach from a memory cgroup? */
+		memcg = mem_cgroup_get_from_ino(ops_link->cgroup_id);
+		if (IS_ERR_OR_NULL(memcg))
+			goto exit;
+		bpf_oom_ops_ptr = &memcg->bpf_oom;
+	} else {
+		/* System-wide OOM handler */
+		bpf_oom_ops_ptr = &system_bpf_oom;
+	}
+
+	/* Hide bpf_oom_ops from new callers */
+	if (!WARN_ON(READ_ONCE(*bpf_oom_ops_ptr) != bpf_oom_ops))
+		WRITE_ONCE(*bpf_oom_ops_ptr, NULL);
+
+	mem_cgroup_put(memcg);
+
+exit:
+	/* Release bpf_oom_ops after a srcu grace period */
+	synchronize_srcu(&bpf_oom_srcu);
+}
+
+void bpf_oom_memcg_offline(struct mem_cgroup *memcg)
+{
+	struct bpf_oom_ops *bpf_oom_ops;
+	struct bpf_oom_ctx exec_ctx;
+	u64 cgrp_id;
+	int idx;
+
+	/* All bpf_oom_ops structures are protected using bpf_oom_srcu */
+	idx = srcu_read_lock(&bpf_oom_srcu);
+
+	bpf_oom_ops = READ_ONCE(memcg->bpf_oom);
+	WRITE_ONCE(memcg->bpf_oom, NULL);
+
+	if (bpf_oom_ops && bpf_oom_ops->handle_cgroup_offline) {
+		cgrp_id = cgroup_id(memcg->css.cgroup);
+		exec_ctx.cgroup_id = cgrp_id;
+		bpf_oom_ops->handle_cgroup_offline(cgrp_id, &exec_ctx);
+	}
+
+	srcu_read_unlock(&bpf_oom_srcu, idx);
+}
+
+static int bpf_oom_ops_check_member(const struct btf_type *t,
+				    const struct btf_member *member,
+				    const struct bpf_prog *prog)
+{
+	u32 moff = __btf_member_bit_offset(t, member) / 8;
+
+	switch (moff) {
+	case offsetof(struct bpf_oom_ops, handle_out_of_memory):
+		if (!prog)
+			return -EINVAL;
+		break;
+	}
+
+	return 0;
+}
+
+static int bpf_oom_ops_init_member(const struct btf_type *t,
+				   const struct btf_member *member,
+				   void *kdata, const void *udata)
+{
+	const struct bpf_oom_ops *uops = udata;
+	struct bpf_oom_ops *ops = kdata;
+	u32 moff = __btf_member_bit_offset(t, member) / 8;
+
+	switch (moff) {
+	case offsetof(struct bpf_oom_ops, name):
+		if (uops->name[0])
+			strscpy_pad(ops->name, uops->name, sizeof(ops->name));
+		else
+			strscpy_pad(ops->name, "bpf_defined_policy");
+		return 1;
+	}
+	return 0;
+}
+
+static int bpf_oom_ops_init(struct btf *btf)
+{
+	return 0;
+}
+
+static struct bpf_struct_ops bpf_oom_bpf_ops = {
+	.verifier_ops = &bpf_oom_verifier_ops,
+	.reg = bpf_oom_ops_reg,
+	.unreg = bpf_oom_ops_unreg,
+	.check_member = bpf_oom_ops_check_member,
+	.init_member = bpf_oom_ops_init_member,
+	.init = bpf_oom_ops_init,
+	.name = "bpf_oom_ops",
+	.owner = THIS_MODULE,
+	.cfi_stubs = &__bpf_oom_ops
+};
+
+static int __init bpf_oom_struct_ops_init(void)
+{
+	return register_bpf_struct_ops(&bpf_oom_bpf_ops, bpf_oom_ops);
+}
+late_initcall(bpf_oom_struct_ops_init);
diff --git a/mm/memcontrol-v1.h b/mm/memcontrol-v1.h
index 6358464bb4160..a304ad418cdfe 100644
--- a/mm/memcontrol-v1.h
+++ b/mm/memcontrol-v1.h
@@ -27,7 +27,6 @@ unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap);
 void drain_all_stock(struct mem_cgroup *root_memcg);
 
 unsigned long memcg_events(struct mem_cgroup *memcg, int event);
-unsigned long memcg_page_state_output(struct mem_cgroup *memcg, int item);
 int memory_stat_show(struct seq_file *m, void *v);
 
 void mem_cgroup_id_get_many(struct mem_cgroup *memcg, unsigned int n);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4deda33625f41..d44c1f293e168 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -63,6 +63,7 @@
 #include <linux/seq_buf.h>
 #include <linux/sched/isolation.h>
 #include <linux/kmemleak.h>
+#include <linux/bpf_oom.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -3618,7 +3619,6 @@ struct mem_cgroup *mem_cgroup_from_id(unsigned short id)
 	return xa_load(&mem_cgroup_ids, id);
 }
 
-#ifdef CONFIG_SHRINKER_DEBUG
 struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino)
 {
 	struct cgroup *cgrp;
@@ -3639,7 +3639,6 @@ struct mem_cgroup *mem_cgroup_get_from_ino(unsigned long ino)
 
 	return memcg;
 }
-#endif
 
 static void free_mem_cgroup_per_node_info(struct mem_cgroup_per_node *pn)
 {
@@ -3887,6 +3886,7 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 
 	zswap_memcg_offline_cleanup(memcg);
 
+	bpf_oom_memcg_offline(memcg);
 	memcg_offline_kmem(memcg);
 	reparent_shrinker_deferred(memcg);
 	wb_memcg_offline(memcg);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c145b0feecc1f..65a3b4c1fc725 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -45,6 +45,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/cred.h>
 #include <linux/nmi.h>
+#include <linux/bpf_oom.h>
 
 #include <asm/tlb.h>
 #include "internal.h"
@@ -239,12 +240,35 @@ long oom_badness(struct task_struct *p, unsigned long totalpages)
 	return points;
 }
 
-static const char * const oom_constraint_text[] = {
-	[CONSTRAINT_NONE] = "CONSTRAINT_NONE",
-	[CONSTRAINT_CPUSET] = "CONSTRAINT_CPUSET",
-	[CONSTRAINT_MEMORY_POLICY] = "CONSTRAINT_MEMORY_POLICY",
-	[CONSTRAINT_MEMCG] = "CONSTRAINT_MEMCG",
-};
+static const char *oom_policy_name(struct oom_control *oc)
+{
+#ifdef CONFIG_BPF_SYSCALL
+	if (oc->bpf_policy_name)
+		return oc->bpf_policy_name;
+#endif
+	return "default";
+}
+
+static const char *oom_constraint_text(struct oom_control *oc)
+{
+	switch (oc->constraint) {
+	case CONSTRAINT_NONE:
+		return "CONSTRAINT_NONE";
+	case CONSTRAINT_CPUSET:
+		return "CONSTRAINT_CPUSET";
+	case CONSTRAINT_MEMORY_POLICY:
+		return "CONSTRAINT_MEMORY_POLICY";
+	case CONSTRAINT_MEMCG:
+		return "CONSTRAINT_MEMCG";
+#ifdef CONFIG_BPF_SYSCALL
+	case CONSTRAINT_BPF:
+		return oc->bpf_constraint ? : "CONSTRAINT_BPF";
+#endif
+	default:
+		WARN_ON_ONCE(1);
+		return "";
+	}
+}
 
 /*
  * Determine the type of allocation constraint.
@@ -257,6 +281,9 @@ static enum oom_constraint constrained_alloc(struct oom_control *oc)
 	bool cpuset_limited = false;
 	int nid;
 
+	if (oc->constraint == CONSTRAINT_BPF)
+		return CONSTRAINT_BPF;
+
 	if (is_memcg_oom(oc)) {
 		oc->totalpages = mem_cgroup_get_max(oc->memcg) ?: 1;
 		return CONSTRAINT_MEMCG;
@@ -448,7 +475,7 @@ static void dump_oom_victim(struct oom_control *oc, struct task_struct *victim)
 {
 	/* one line summary of the oom killer context. */
 	pr_info("oom-kill:constraint=%s,nodemask=%*pbl",
-			oom_constraint_text[oc->constraint],
+			oom_constraint_text(oc),
 			nodemask_pr_args(oc->nodemask));
 	cpuset_print_current_mems_allowed();
 	mem_cgroup_print_oom_context(oc->memcg, victim);
@@ -458,9 +485,10 @@ static void dump_oom_victim(struct oom_control *oc, struct task_struct *victim)
 
 static void dump_header(struct oom_control *oc)
 {
-	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\n",
+	pr_warn("%s invoked oom-killer: gfp_mask=%#x(%pGg), order=%d, oom_score_adj=%hd\noom_policy=%s\n",
 		current->comm, oc->gfp_mask, &oc->gfp_mask, oc->order,
-			current->signal->oom_score_adj);
+		current->signal->oom_score_adj,
+		oom_policy_name(oc));
 	if (!IS_ENABLED(CONFIG_COMPACTION) && oc->order)
 		pr_warn("COMPACTION is disabled!!!\n");
 
@@ -1167,6 +1195,13 @@ bool out_of_memory(struct oom_control *oc)
 		return true;
 	}
 
+	/*
+	 * Let bpf handle the OOM first. If it was able to free up some memory,
+	 * bail out. Otherwise fall back to the kernel OOM killer.
+	 */
+	if (bpf_handle_oom(oc))
+		return true;
+
 	select_bad_process(oc);
 	/* Found nothing?!?! */
 	if (!oc->chosen) {
@@ -1270,3 +1305,153 @@ SYSCALL_DEFINE2(process_mrelease, int, pidfd, unsigned int, flags)
 	return -ENOSYS;
 #endif /* CONFIG_MMU */
 }
+
+#ifdef CONFIG_BPF_SYSCALL
+
+__bpf_kfunc_start_defs();
+/**
+ * bpf_oom_kill_process - Kill a process as OOM killer
+ * @oc: pointer to oom_control structure, describes OOM context
+ * @task: task to be killed
+ * @message__str: message to print in dmesg
+ *
+ * Kill a process in a way similar to the kernel OOM killer.
+ * This means dump the necessary information to dmesg, adjust memcg
+ * statistics, leverage the oom reaper, respect memory.oom.group etc.
+ *
+ * bpf_oom_kill_process() marks the forward progress by setting
+ * oc->bpf_memory_freed. If the progress was made, the bpf program
+ * is free to decide if the kernel oom killer should be invoked.
+ * Otherwise it's enforced, so that a bad bpf program can't
+ * deadlock the machine on memory.
+ */
+__bpf_kfunc int bpf_oom_kill_process(struct oom_control *oc,
+				     struct task_struct *task,
+				     const char *message__str)
+{
+	if (oom_unkillable_task(task))
+		return -EPERM;
+
+	/* paired with put_task_struct() in oom_kill_process() */
+	task = tryget_task_struct(task);
+	if (!task)
+		return -EINVAL;
+
+	oc->chosen = task;
+
+	oom_kill_process(oc, message__str);
+
+	oc->chosen = NULL;
+	oc->bpf_memory_freed = true;
+
+	return 0;
+}
+
+/**
+ * bpf_out_of_memory - declare Out Of Memory state and invoke OOM killer
+ * @memcg__nullable: memcg or NULL for system-wide OOMs
+ * @order: order of page which wasn't allocated
+ * @flags: flags
+ * @constraint_text__nullable: custom constraint description for the OOM report
+ *
+ * Declares the Out Of Memory state and invokes the OOM killer.
+ *
+ * OOM handlers are synchronized using the oom_lock mutex. If wait_on_oom_lock
+ * is true, the function will wait on it. Otherwise it bails out with -EBUSY
+ * if oom_lock is contended.
+ *
+ * Generally it's advised to pass wait_on_oom_lock=false for global OOMs
+ * and wait_on_oom_lock=true for memcg-scoped OOMs.
+ *
+ * Returns 1 if the forward progress was achieved and some memory was freed.
+ * Returns a negative value if an error occurred.
+ */
+__bpf_kfunc int bpf_out_of_memory(struct mem_cgroup *memcg__nullable,
+				  int order, u64 flags,
+				  const char *constraint_text__nullable)
+{
+	struct oom_control oc = {
+		.memcg = memcg__nullable,
+		.order = order,
+		.constraint = CONSTRAINT_BPF,
+		.bpf_constraint = constraint_text__nullable,
+	};
+	int ret;
+
+	if (flags & ~(BPF_OOM_FLAGS_LAST - 1))
+		return -EINVAL;
+
+	if (oc.order < 0 || oc.order > MAX_PAGE_ORDER)
+		return -EINVAL;
+
+	if (flags & BPF_OOM_FLAGS_WAIT_ON_OOM_LOCK) {
+		ret = mutex_lock_killable(&oom_lock);
+		if (ret)
+			return ret;
+	} else if (!mutex_trylock(&oom_lock))
+		return -EBUSY;
+
+	ret = out_of_memory(&oc);
+
+	mutex_unlock(&oom_lock);
+	return ret;
+}
+
+/**
+ * bpf_task_is_oom_victim - Check if the task has been marked as an OOM victim
+ * @task: task to check
+ *
+ * Returns true if the task has been previously selected by the OOM killer
+ * to be killed. It's expected that the task will be destroyed soon and some
+ * memory will be freed, so maybe no additional actions required.
+ */
+__bpf_kfunc bool bpf_task_is_oom_victim(struct task_struct *task)
+{
+	return tsk_is_oom_victim(task);
+}
+
+__bpf_kfunc_end_defs();
+
+BTF_KFUNCS_START(bpf_oom_kfuncs)
+BTF_ID_FLAGS(func, bpf_oom_kill_process, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_out_of_memory, KF_SLEEPABLE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_is_oom_victim, KF_TRUSTED_ARGS)
+BTF_KFUNCS_END(bpf_oom_kfuncs)
+
+BTF_SET_START(bpf_oom_declare_oom_kfuncs)
+BTF_ID(func, bpf_out_of_memory)
+BTF_SET_END(bpf_oom_declare_oom_kfuncs)
+
+extern struct bpf_struct_ops bpf_psi_bpf_ops;
+
+static int bpf_oom_kfunc_filter(const struct bpf_prog *prog, u32 kfunc_id)
+{
+	if (!btf_id_set_contains(&bpf_oom_declare_oom_kfuncs, kfunc_id))
+		return 0;
+
+	if (IS_ENABLED(CONFIG_PSI) && prog->aux->st_ops == &bpf_psi_bpf_ops)
+		return 0;
+
+	return -EACCES;
+}
+
+static const struct btf_kfunc_id_set bpf_oom_kfunc_set = {
+	.owner          = THIS_MODULE,
+	.set            = &bpf_oom_kfuncs,
+	.filter         = bpf_oom_kfunc_filter,
+};
+
+static int __init bpf_oom_init(void)
+{
+	int err;
+
+	err = register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS,
+					&bpf_oom_kfunc_set);
+	if (err)
+		pr_warn("error while registering bpf oom kfuncs: %d", err);
+
+	return err;
+}
+late_initcall(bpf_oom_init);
+
+#endif
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 339b197972374..4c8944f8d6ba5 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -883,6 +883,14 @@ int bpf_link_create(int prog_fd, int target_fd,
 		if (!OPTS_ZEROED(opts, cgroup))
 			return libbpf_err(-EINVAL);
 		break;
+	case BPF_STRUCT_OPS:
+		relative_fd = OPTS_GET(opts, cgroup.relative_fd, 0);
+		attr.link_create.cgroup.relative_fd = relative_fd;
+		attr.link_create.cgroup.expected_revision =
+			OPTS_GET(opts, cgroup.expected_revision, 0);
+		if (!OPTS_ZEROED(opts, cgroup))
+			return libbpf_err(-EINVAL);
+		break;
 	default:
 		if (!OPTS_ZEROED(opts, flags))
 			return libbpf_err(-EINVAL);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index b90574f39d1c7..be56a5dee5050 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -13196,12 +13196,19 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link)
 	return close(link->fd);
 }
 
-struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
+struct bpf_link *bpf_map__attach_struct_ops_opts(const struct bpf_map *map,
+						 const struct bpf_struct_ops_opts *opts)
 {
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, link_opts);
 	struct bpf_link_struct_ops *link;
 	__u32 zero = 0;
 	int err, fd;
 
+	if (!OPTS_VALID(opts, bpf_struct_ops_opts)) {
+		pr_warn("map '%s': invalid opts\n", map->name);
+		return libbpf_err_ptr(-EINVAL);
+	}
+
 	if (!bpf_map__is_struct_ops(map)) {
 		pr_warn("map '%s': can't attach non-struct_ops map\n", map->name);
 		return libbpf_err_ptr(-EINVAL);
@@ -13237,7 +13244,9 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
 		return &link->link;
 	}
 
-	fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, NULL);
+	link_opts.cgroup.relative_fd = OPTS_GET(opts, relative_fd, 0);
+
+	fd = bpf_link_create(map->fd, 0, BPF_STRUCT_OPS, &link_opts);
 	if (fd < 0) {
 		free(link);
 		return libbpf_err_ptr(fd);
@@ -13249,6 +13258,11 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
 	return &link->link;
 }
 
+struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)
+{
+	return bpf_map__attach_struct_ops_opts(map, NULL);
+}
+
 /*
  * Swap the back struct_ops of a link with a new struct_ops map.
  */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 5118d0a90e243..dc84898715cfc 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -922,6 +922,20 @@ bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd,
 struct bpf_map;
 
 LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map);
+
+struct bpf_struct_ops_opts {
+	/* size of this struct, for forward/backward compatibility */
+	size_t sz;
+	__u32 flags;
+	__u32 relative_fd;
+	__u64 expected_revision;
+	size_t :0;
+};
+#define bpf_struct_ops_opts__last_field expected_revision
+
+LIBBPF_API struct bpf_link *
+bpf_map__attach_struct_ops_opts(const struct bpf_map *map,
+				const struct bpf_struct_ops_opts *opts);
 LIBBPF_API int bpf_link__update_map(struct bpf_link *link, const struct bpf_map *map);
 
 struct bpf_iter_attach_opts {
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 8ed8749907d47..bc00089343ce4 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -451,4 +451,5 @@ LIBBPF_1.7.0 {
 	global:
 		bpf_map__set_exclusive_program;
 		bpf_map__exclusive_program;
+		bpf_map__attach_struct_ops_opts;
 } LIBBPF_1.6.0;
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 20cede4db3cee..8fb02fe4c4aaa 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -126,6 +126,45 @@ int enable_controllers(const char *relative_path, const char *controllers)
 	return __enable_controllers(cgroup_path, controllers);
 }
 
+static size_t __read_cgroup_file(const char *cgroup_path, const char *file,
+				 char *buf, size_t size)
+{
+	char file_path[PATH_MAX + 1];
+	size_t ret;
+	int fd;
+
+	snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
+	fd = open(file_path, O_RDONLY);
+	if (fd < 0) {
+		log_err("Opening %s", file_path);
+		return -1;
+	}
+
+	ret = read(fd, buf, size);
+	close(fd);
+	return ret;
+}
+
+/**
+ * read_cgroup_file() - Read to a cgroup file
+ * @relative_path: The cgroup path, relative to the workdir
+ * @file: The name of the file in cgroupfs to read to
+ * @buf: Buffer to read from the file
+ * @size: Size of the buffer
+ *
+ * Read to a file in the given cgroup's directory.
+ *
+ * If successful, the number of read bytes is returned.
+ */
+size_t read_cgroup_file(const char *relative_path, const char *file,
+			char *buf, size_t size)
+{
+	char cgroup_path[PATH_MAX - 24];
+
+	format_cgroup_path(cgroup_path, relative_path);
+	return __read_cgroup_file(cgroup_path, file, buf, size);
+}
+
 static int __write_cgroup_file(const char *cgroup_path, const char *file,
 			       const char *buf)
 {
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 3857304be8741..9f9bb6b5d9928 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -11,6 +11,8 @@
 
 /* cgroupv2 related */
 int enable_controllers(const char *relative_path, const char *controllers);
+size_t read_cgroup_file(const char *relative_path, const char *file,
+			char *buf, size_t size);
 int write_cgroup_file(const char *relative_path, const char *file,
 		      const char *buf);
 int write_cgroup_file_parent(const char *relative_path, const char *file,
diff --git a/tools/testing/selftests/bpf/cgroup_iter_memcg.h b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
new file mode 100644
index 0000000000000..3f59b127943ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_iter_memcg.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef __CGROUP_ITER_MEMCG_H
+#define __CGROUP_ITER_MEMCG_H
+
+struct memcg_query {
+	/* some node_stat_item's */
+	unsigned long nr_anon_mapped;
+	unsigned long nr_shmem;
+	unsigned long nr_file_pages;
+	unsigned long nr_file_mapped;
+	/* some memcg_stat_item */
+	unsigned long memcg_kmem;
+	/* some vm_event_item */
+	unsigned long pgfault;
+};
+
+#endif /* __CGROUP_ITER_MEMCG_H */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 70b28c1e653ea..178c840c844bc 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -110,6 +110,7 @@ CONFIG_IP6_NF_IPTABLES=y
 CONFIG_IP6_NF_FILTER=y
 CONFIG_NF_NAT=y
 CONFIG_PACKET=y
+CONFIG_PSI=y
 CONFIG_RC_CORE=y
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
new file mode 100644
index 0000000000000..215e4c98c76f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter_memcg.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "cgroup_helpers.h"
+#include "cgroup_iter_memcg.h"
+#include "cgroup_iter_memcg.skel.h"
+
+static int read_stats(struct bpf_link *link)
+{
+	int fd, ret = 0;
+	ssize_t bytes;
+
+	fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_OK_FD(fd, "bpf_iter_create"))
+		return 1;
+
+	/*
+	 * Invoke iter program by reading from its fd. We're not expecting any
+	 * data to be written by the bpf program so the result should be zero.
+	 * Results will be read directly through the custom data section
+	 * accessible through skel->data_query.memcg_query.
+	 */
+	bytes = read(fd, NULL, 0);
+	if (!ASSERT_EQ(bytes, 0, "read fd"))
+		ret = 1;
+
+	close(fd);
+	return ret;
+}
+
+static void test_anon(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+	void *map;
+	size_t len;
+
+	len = sysconf(_SC_PAGESIZE) * 1024;
+
+	/*
+	 * Increase memcg anon usage by mapping and writing
+	 * to a new anon region.
+	 */
+	map = mmap(NULL, len, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon"))
+		return;
+
+	memset(map, 1, len);
+
+	if (!ASSERT_OK(read_stats(link), "read stats"))
+		goto cleanup;
+
+	ASSERT_GT(memcg_query->nr_anon_mapped, 0, "final anon mapped val");
+
+cleanup:
+	munmap(map, len);
+}
+
+static void test_file(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+	void *map;
+	size_t len;
+	char *path;
+	int fd;
+
+	len = sysconf(_SC_PAGESIZE) * 1024;
+	path = "/tmp/test_cgroup_iter_memcg";
+
+	/*
+	 * Increase memcg file usage by creating and writing
+	 * to a mapped file.
+	 */
+	fd = open(path, O_CREAT | O_RDWR, 0644);
+	if (!ASSERT_OK_FD(fd, "open fd"))
+		return;
+	if (!ASSERT_OK(ftruncate(fd, len), "ftruncate"))
+		goto cleanup_fd;
+
+	map = mmap(NULL, len, PROT_WRITE, MAP_SHARED, fd, 0);
+	if (!ASSERT_NEQ(map, MAP_FAILED, "mmap file"))
+		goto cleanup_fd;
+
+	memset(map, 1, len);
+
+	if (!ASSERT_OK(read_stats(link), "read stats"))
+		goto cleanup_map;
+
+	ASSERT_GT(memcg_query->nr_file_pages, 0, "final file value");
+	ASSERT_GT(memcg_query->nr_file_mapped, 0, "final file mapped value");
+
+cleanup_map:
+	munmap(map, len);
+cleanup_fd:
+	close(fd);
+	unlink(path);
+}
+
+static void test_shmem(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+	size_t len;
+	int fd;
+
+	len = sysconf(_SC_PAGESIZE) * 1024;
+
+	/*
+	 * Increase memcg shmem usage by creating and writing
+	 * to a shmem object.
+	 */
+	fd = shm_open("/tmp_shmem", O_CREAT | O_RDWR, 0644);
+	if (!ASSERT_OK_FD(fd, "shm_open"))
+		return;
+
+	if (!ASSERT_OK(fallocate(fd, 0, 0, len), "fallocate"))
+		goto cleanup;
+
+	if (!ASSERT_OK(read_stats(link), "read stats"))
+		goto cleanup;
+
+	ASSERT_GT(memcg_query->nr_shmem, 0, "final shmem value");
+
+cleanup:
+	close(fd);
+	shm_unlink("/tmp_shmem");
+}
+
+#define NR_PIPES 64
+static void test_kmem(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+	int fds[NR_PIPES][2], i;
+
+	/*
+	 * Increase kmem value by creating pipes which will allocate some
+	 * kernel buffers.
+	 */
+	for (i = 0; i < NR_PIPES; i++) {
+		if (!ASSERT_OK(pipe(fds[i]), "pipe"))
+			goto cleanup;
+	}
+
+	if (!ASSERT_OK(read_stats(link), "read stats"))
+		goto cleanup;
+
+	ASSERT_GT(memcg_query->memcg_kmem, 0, "kmem value");
+
+cleanup:
+	for (i = 0; i < NR_PIPES; i++) {
+		close(fds[i][0]);
+		close(fds[i][1]);
+	}
+}
+
+static void test_pgfault(struct bpf_link *link, struct memcg_query *memcg_query)
+{
+	void *map;
+	size_t len;
+
+	len = sysconf(_SC_PAGESIZE) * 1024;
+
+	/* Create region to use for triggering a page fault. */
+	map = mmap(NULL, len, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	if (!ASSERT_NEQ(map, MAP_FAILED, "mmap anon"))
+		return;
+
+	/* Trigger page fault. */
+	memset(map, 1, len);
+
+	if (!ASSERT_OK(read_stats(link), "read stats"))
+		goto cleanup;
+
+	ASSERT_GT(memcg_query->pgfault, 0, "final pgfault val");
+
+cleanup:
+	munmap(map, len);
+}
+
+void test_cgroup_iter_memcg(void)
+{
+	char *cgroup_rel_path = "/cgroup_iter_memcg_test";
+	struct cgroup_iter_memcg *skel;
+	struct bpf_link *link;
+	int cgroup_fd;
+
+	cgroup_fd = cgroup_setup_and_join(cgroup_rel_path);
+	if (!ASSERT_OK_FD(cgroup_fd, "cgroup_setup_and_join"))
+		return;
+
+	skel = cgroup_iter_memcg__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "cgroup_iter_memcg__open_and_load"))
+		goto cleanup_cgroup_fd;
+
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo = {
+		.cgroup.cgroup_fd = cgroup_fd,
+		.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY,
+	};
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	link = bpf_program__attach_iter(skel->progs.cgroup_memcg_query, &opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+		goto cleanup_skel;
+
+	if (test__start_subtest("cgroup_iter_memcg__anon"))
+		test_anon(link, &skel->data_query->memcg_query);
+	if (test__start_subtest("cgroup_iter_memcg__shmem"))
+		test_shmem(link, &skel->data_query->memcg_query);
+	if (test__start_subtest("cgroup_iter_memcg__file"))
+		test_file(link, &skel->data_query->memcg_query);
+	if (test__start_subtest("cgroup_iter_memcg__kmem"))
+		test_kmem(link, &skel->data_query->memcg_query);
+	if (test__start_subtest("cgroup_iter_memcg__pgfault"))
+		test_pgfault(link, &skel->data_query->memcg_query);
+
+	bpf_link__destroy(link);
+cleanup_skel:
+	cgroup_iter_memcg__destroy(skel);
+cleanup_cgroup_fd:
+	close(cgroup_fd);
+	cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_oom.c b/tools/testing/selftests/bpf/prog_tests/test_oom.c
new file mode 100644
index 0000000000000..6126d961aba3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_oom.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "test_oom.skel.h"
+
+struct cgroup_desc {
+	const char *path;
+	int fd;
+	unsigned long long id;
+	int pid;
+	size_t target;
+	size_t max;
+	int oom_score_adj;
+	bool victim;
+};
+
+#define MB (1024 * 1024)
+#define OOM_SCORE_ADJ_MIN	(-1000)
+#define OOM_SCORE_ADJ_MAX	1000
+
+static struct cgroup_desc cgroups[] = {
+	{ .path = "/oom_test", .max = 80 * MB},
+	{ .path = "/oom_test/cg1", .target = 10 * MB,
+	  .oom_score_adj = OOM_SCORE_ADJ_MAX },
+	{ .path = "/oom_test/cg2", .target = 40 * MB,
+	  .oom_score_adj = OOM_SCORE_ADJ_MIN },
+	{ .path = "/oom_test/cg3" },
+	{ .path = "/oom_test/cg3/cg4", .target = 30 * MB,
+	  .victim = true },
+	{ .path = "/oom_test/cg3/cg5", .target = 20 * MB },
+};
+
+static int spawn_task(struct cgroup_desc *desc)
+{
+	char *ptr;
+	int pid;
+
+	pid = fork();
+	if (pid < 0)
+		return pid;
+
+	if (pid > 0) {
+		/* parent */
+		desc->pid = pid;
+		return 0;
+	}
+
+	/* child */
+	if (desc->oom_score_adj) {
+		char buf[64];
+		int fd = open("/proc/self/oom_score_adj", O_WRONLY);
+
+		if (fd < 0)
+			return -1;
+
+		snprintf(buf, sizeof(buf), "%d", desc->oom_score_adj);
+		write(fd, buf, sizeof(buf));
+		close(fd);
+	}
+
+	ptr = (char *)malloc(desc->target);
+	if (!ptr)
+		return -ENOMEM;
+
+	memset(ptr, 'a', desc->target);
+
+	while (1)
+		sleep(1000);
+
+	return 0;
+}
+
+static void setup_environment(void)
+{
+	int i, err;
+
+	err = setup_cgroup_environment();
+	if (!ASSERT_OK(err, "setup_cgroup_environment"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(cgroups); i++) {
+		cgroups[i].fd = create_and_get_cgroup(cgroups[i].path);
+		if (!ASSERT_GE(cgroups[i].fd, 0, "create_and_get_cgroup"))
+			goto cleanup;
+
+		cgroups[i].id = get_cgroup_id(cgroups[i].path);
+		if (!ASSERT_GT(cgroups[i].id, 0, "get_cgroup_id"))
+			goto cleanup;
+
+		/* Freeze the top-level cgroup */
+		if (i == 0) {
+			/* Freeze the top-level cgroup */
+			err = write_cgroup_file(cgroups[i].path, "cgroup.freeze", "1");
+			if (!ASSERT_OK(err, "freeze cgroup"))
+				goto cleanup;
+		}
+
+		/* Recursively enable the memory controller */
+		if (!cgroups[i].target) {
+
+			err = write_cgroup_file(cgroups[i].path, "cgroup.subtree_control",
+						"+memory");
+			if (!ASSERT_OK(err, "enable memory controller"))
+				goto cleanup;
+		}
+
+		/* Set memory.max */
+		if (cgroups[i].max) {
+			char buf[256];
+
+			snprintf(buf, sizeof(buf), "%lu", cgroups[i].max);
+			err = write_cgroup_file(cgroups[i].path, "memory.max", buf);
+			if (!ASSERT_OK(err, "set memory.max"))
+				goto cleanup;
+
+			snprintf(buf, sizeof(buf), "0");
+			write_cgroup_file(cgroups[i].path, "memory.swap.max", buf);
+
+		}
+
+		/* Spawn tasks creating memory pressure */
+		if (cgroups[i].target) {
+			char buf[256];
+
+			err = spawn_task(&cgroups[i]);
+			if (!ASSERT_OK(err, "spawn task"))
+				goto cleanup;
+
+			snprintf(buf, sizeof(buf), "%d", cgroups[i].pid);
+			err = write_cgroup_file(cgroups[i].path, "cgroup.procs", buf);
+			if (!ASSERT_OK(err, "put child into a cgroup"))
+				goto cleanup;
+		}
+	}
+
+	return;
+
+cleanup:
+	cleanup_cgroup_environment();
+}
+
+static int run_and_wait_for_oom(void)
+{
+	int ret = -1;
+	bool first = true;
+	char buf[4096] = {};
+	size_t size;
+
+	/* Unfreeze the top-level cgroup */
+	ret = write_cgroup_file(cgroups[0].path, "cgroup.freeze", "0");
+	if (!ASSERT_OK(ret, "freeze cgroup"))
+		return -1;
+
+	for (;;) {
+		int i, status;
+		pid_t pid = wait(&status);
+
+		if (pid == -1) {
+			if (errno == EINTR)
+				continue;
+			/* ECHILD */
+			break;
+		}
+
+		if (!first)
+			continue;
+
+		first = false;
+
+		/* Check which process was terminated first */
+		for (i = 0; i < ARRAY_SIZE(cgroups); i++) {
+			if (!ASSERT_OK(cgroups[i].victim !=
+				       (pid == cgroups[i].pid),
+				       "correct process was killed")) {
+				ret = -1;
+				break;
+			}
+
+			if (!cgroups[i].victim)
+				continue;
+
+			/* Check the memcg oom counter */
+			size = read_cgroup_file(cgroups[i].path,
+						"memory.events",
+						buf, sizeof(buf));
+			if (!ASSERT_OK(size <= 0, "read memory.events")) {
+				ret = -1;
+				break;
+			}
+
+			if (!ASSERT_OK(strstr(buf, "oom_kill 1") == NULL,
+				       "oom_kill count check")) {
+				ret = -1;
+				break;
+			}
+		}
+
+		/* Kill all remaining tasks */
+		for (i = 0; i < ARRAY_SIZE(cgroups); i++)
+			if (cgroups[i].pid && cgroups[i].pid != pid)
+				kill(cgroups[i].pid, SIGKILL);
+	}
+
+	return ret;
+}
+
+void test_oom(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_struct_ops_opts, opts);
+	struct test_oom *skel;
+	struct bpf_link *link1, *link2;
+	int err = 0;
+
+	setup_environment();
+
+	skel = test_oom__open_and_load();
+	if (!skel) {
+		err = -errno;
+		CHECK_FAIL(err);
+		goto cleanup;
+	}
+
+	opts.relative_fd = cgroups[0].fd;
+	link1 = bpf_map__attach_struct_ops_opts(skel->maps.test_bpf_oom, &opts);
+	if (!link1) {
+		err = -errno;
+		CHECK_FAIL(err);
+		goto cleanup;
+	}
+
+	opts.relative_fd = 0; /* attach system-wide */
+	link2 = bpf_map__attach_struct_ops_opts(skel->maps.test_bpf_oom, &opts);
+	if (!link2) {
+		err = -errno;
+		CHECK_FAIL(err);
+		goto cleanup;
+	}
+
+	/* Unfreeze all child tasks and create the memory pressure */
+	err = run_and_wait_for_oom();
+	CHECK_FAIL(err);
+
+cleanup:
+	cleanup_cgroup_environment();
+	test_oom__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_psi.c b/tools/testing/selftests/bpf/prog_tests/test_psi.c
new file mode 100644
index 0000000000000..b294cea0a6fe2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_psi.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "test_psi.skel.h"
+
+enum psi_res {
+	PSI_IO,
+	PSI_MEM,
+	PSI_CPU,
+	PSI_IRQ,
+	NR_PSI_RESOURCES,
+};
+
+struct cgroup_desc {
+	const char *path;
+	unsigned long long id;
+	int pid;
+	int fd;
+	size_t target;
+	size_t high;
+	bool victim;
+};
+
+#define MB (1024 * 1024)
+
+static struct cgroup_desc cgroups[] = {
+	{ .path = "/psi_test" },
+	{ .path = "/psi_test/cg1" },
+	{ .path = "/psi_test/cg2", .target = 500 * MB,
+	  .high = 40 * MB, .victim = true },
+};
+
+static int spawn_task(struct cgroup_desc *desc)
+{
+	char *ptr;
+	int pid;
+
+	pid = fork();
+	if (pid < 0)
+		return pid;
+
+	if (pid > 0) {
+		/* parent */
+		desc->pid = pid;
+		return 0;
+	}
+
+	/* child */
+	ptr = (char *)malloc(desc->target);
+	if (!ptr)
+		return -ENOMEM;
+
+	memset(ptr, 'a', desc->target);
+
+	while (1)
+		sleep(1000);
+
+	return 0;
+}
+
+static void setup_environment(void)
+{
+	int i, err;
+
+	err = setup_cgroup_environment();
+	if (!ASSERT_OK(err, "setup_cgroup_environment"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(cgroups); i++) {
+		cgroups[i].fd = create_and_get_cgroup(cgroups[i].path);
+		if (!ASSERT_GE(cgroups[i].fd, 0, "create_and_get_cgroup"))
+			goto cleanup;
+
+		cgroups[i].id = get_cgroup_id(cgroups[i].path);
+		if (!ASSERT_GT(cgroups[i].id, 0, "get_cgroup_id"))
+			goto cleanup;
+
+		/* Freeze the top-level cgroup and enable the memory controller */
+		if (i == 0) {
+			err = write_cgroup_file(cgroups[i].path, "cgroup.freeze", "1");
+			if (!ASSERT_OK(err, "freeze cgroup"))
+				goto cleanup;
+
+			err = write_cgroup_file(cgroups[i].path, "cgroup.subtree_control",
+						"+memory");
+			if (!ASSERT_OK(err, "enable memory controller"))
+				goto cleanup;
+		}
+
+		/* Set memory.high */
+		if (cgroups[i].high) {
+			char buf[256];
+
+			snprintf(buf, sizeof(buf), "%lu", cgroups[i].high);
+			err = write_cgroup_file(cgroups[i].path, "memory.high", buf);
+			if (!ASSERT_OK(err, "set memory.high"))
+				goto cleanup;
+
+			snprintf(buf, sizeof(buf), "0");
+			write_cgroup_file(cgroups[i].path, "memory.swap.max", buf);
+		}
+
+		/* Spawn tasks creating memory pressure */
+		if (cgroups[i].target) {
+			char buf[256];
+
+			err = spawn_task(&cgroups[i]);
+			if (!ASSERT_OK(err, "spawn task"))
+				goto cleanup;
+
+			snprintf(buf, sizeof(buf), "%d", cgroups[i].pid);
+			err = write_cgroup_file(cgroups[i].path, "cgroup.procs", buf);
+			if (!ASSERT_OK(err, "put child into a cgroup"))
+				goto cleanup;
+		}
+	}
+
+	return;
+
+cleanup:
+	cleanup_cgroup_environment();
+}
+
+static int run_and_wait_for_oom(void)
+{
+	int ret = -1;
+	bool first = true;
+	char buf[4096] = {};
+	size_t size;
+
+	/* Unfreeze the top-level cgroup */
+	ret = write_cgroup_file(cgroups[0].path, "cgroup.freeze", "0");
+	if (!ASSERT_OK(ret, "unfreeze cgroup"))
+		return -1;
+
+	for (;;) {
+		int i, status;
+		pid_t pid = wait(&status);
+
+		if (pid == -1) {
+			if (errno == EINTR)
+				continue;
+			/* ECHILD */
+			break;
+		}
+
+		if (!first)
+			continue;
+		first = false;
+
+		/* Check which process was terminated first */
+		for (i = 0; i < ARRAY_SIZE(cgroups); i++) {
+			if (!ASSERT_OK(cgroups[i].victim !=
+				       (pid == cgroups[i].pid),
+				       "correct process was killed")) {
+				ret = -1;
+				break;
+			}
+
+			if (!cgroups[i].victim)
+				continue;
+
+			/* Check the memcg oom counter */
+			size = read_cgroup_file(cgroups[i].path, "memory.events",
+						buf, sizeof(buf));
+			if (!ASSERT_OK(size <= 0, "read memory.events")) {
+				ret = -1;
+				break;
+			}
+
+			if (!ASSERT_OK(strstr(buf, "oom_kill 1") == NULL,
+				       "oom_kill count check")) {
+				ret = -1;
+				break;
+			}
+		}
+
+		/* Kill all remaining tasks */
+		for (i = 0; i < ARRAY_SIZE(cgroups); i++)
+			if (cgroups[i].pid && cgroups[i].pid != pid)
+				kill(cgroups[i].pid, SIGKILL);
+	}
+
+	return ret;
+}
+
+void test_psi(void)
+{
+	struct test_psi *skel;
+	u64 deleted_cgroup_id;
+	int new_cgroup_fd;
+	u64 new_cgroup_id;
+	int err;
+
+	setup_environment();
+
+	skel = test_psi__open_and_load();
+	err = libbpf_get_error(skel);
+	if (CHECK_FAIL(err))
+		goto cleanup;
+
+	skel->bss->deleted_cgroup_id = cgroups[1].id;
+	skel->bss->high_pressure_cgroup_id = cgroups[2].id;
+
+	err = test_psi__attach(skel);
+	if (CHECK_FAIL(err))
+		goto cleanup;
+
+	/* Delete the first cgroup, it should trigger handle_cgroup_offline() */
+	remove_cgroup(cgroups[1].path);
+
+	new_cgroup_fd = create_and_get_cgroup("/psi_test_new");
+	if (!ASSERT_GE(new_cgroup_fd, 0, "create_and_get_cgroup"))
+		goto cleanup;
+
+	new_cgroup_id = get_cgroup_id("/psi_test_new");
+	if (!ASSERT_GT(new_cgroup_id, 0, "get_cgroup_id"))
+		goto cleanup;
+
+	/* Unfreeze all child tasks and create the memory pressure */
+	err = run_and_wait_for_oom();
+	CHECK_FAIL(err);
+
+	/* Check the result of the handle_cgroup_offline() handler */
+	deleted_cgroup_id = skel->bss->deleted_cgroup_id;
+	ASSERT_EQ(deleted_cgroup_id, cgroups[1].id, "deleted cgroup id");
+
+	/* Check the result of the handle_cgroup_online() handler */
+	ASSERT_EQ(skel->bss->new_cgroup_id, new_cgroup_id,
+		  "new cgroup id");
+
+cleanup:
+	cleanup_cgroup_environment();
+	test_psi__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
new file mode 100644
index 0000000000000..92db5fd11391d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_iter_memcg.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include "cgroup_iter_memcg.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* The latest values read are stored here. */
+struct memcg_query memcg_query SEC(".data.query");
+
+SEC("iter.s/cgroup")
+int cgroup_memcg_query(struct bpf_iter__cgroup *ctx)
+{
+	struct cgroup *cgrp = ctx->cgroup;
+	struct cgroup_subsys_state *css;
+	struct mem_cgroup *memcg;
+
+	if (!cgrp)
+		return 1;
+
+	css = container_of(cgrp, struct cgroup_subsys_state, cgroup);
+	if (!css)
+		return 1;
+
+	memcg = bpf_get_mem_cgroup(css);
+	if (!memcg)
+		return 1;
+
+	bpf_mem_cgroup_flush_stats(memcg);
+
+	memcg_query.nr_anon_mapped = bpf_mem_cgroup_page_state(memcg, NR_ANON_MAPPED);
+	memcg_query.nr_shmem = bpf_mem_cgroup_page_state(memcg, NR_SHMEM);
+	memcg_query.nr_file_pages = bpf_mem_cgroup_page_state(memcg, NR_FILE_PAGES);
+	memcg_query.nr_file_mapped = bpf_mem_cgroup_page_state(memcg, NR_FILE_MAPPED);
+	memcg_query.memcg_kmem = bpf_mem_cgroup_page_state(memcg, MEMCG_KMEM);
+	memcg_query.pgfault = bpf_mem_cgroup_vm_events(memcg, PGFAULT);
+
+	bpf_put_mem_cgroup(memcg);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_oom.c b/tools/testing/selftests/bpf/progs/test_oom.c
new file mode 100644
index 0000000000000..352b522ae584c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_oom.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define OOM_SCORE_ADJ_MIN	(-1000)
+
+static bool mem_cgroup_killable(struct mem_cgroup *memcg)
+{
+	struct task_struct *task;
+	bool ret = true;
+
+	bpf_for_each(css_task, task, &memcg->css, CSS_TASK_ITER_PROCS)
+		if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+			return false;
+
+	return ret;
+}
+
+/*
+ * Find the largest leaf cgroup (ignoring page cache) without unkillable tasks
+ * and kill all belonging tasks.
+ */
+SEC("struct_ops.s/handle_out_of_memory")
+int BPF_PROG(test_out_of_memory, struct oom_control *oc, struct bpf_oom_ctx *exec_ctx)
+{
+	struct task_struct *task;
+	struct mem_cgroup *root_memcg = oc->memcg;
+	struct mem_cgroup *memcg, *victim = NULL;
+	struct cgroup_subsys_state *css_pos;
+	unsigned long usage, max_usage = 0;
+	unsigned long pagecache = 0;
+	int ret = 0;
+
+	/* Pass to the system-level bpf_oom ops */
+	if (exec_ctx->cgroup_id)
+		return 0;
+
+	if (root_memcg)
+		root_memcg = bpf_get_mem_cgroup(&root_memcg->css);
+	else
+		root_memcg = bpf_get_root_mem_cgroup();
+
+	if (!root_memcg)
+		return 0;
+
+	bpf_rcu_read_lock();
+	bpf_for_each(css, css_pos, &root_memcg->css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+		if (css_pos->cgroup->nr_descendants + css_pos->cgroup->nr_dying_descendants)
+			continue;
+
+		memcg = bpf_get_mem_cgroup(css_pos);
+		if (!memcg)
+			continue;
+
+		usage = bpf_mem_cgroup_usage(memcg);
+		pagecache = bpf_mem_cgroup_page_state(memcg, NR_FILE_PAGES);
+
+		if (usage > pagecache)
+			usage -= pagecache;
+		else
+			usage = 0;
+
+		if ((usage > max_usage) && mem_cgroup_killable(memcg)) {
+			max_usage = usage;
+			if (victim)
+				bpf_put_mem_cgroup(victim);
+			victim = bpf_get_mem_cgroup(&memcg->css);
+		}
+
+		bpf_put_mem_cgroup(memcg);
+	}
+	bpf_rcu_read_unlock();
+
+	if (!victim)
+		goto exit;
+
+	bpf_for_each(css_task, task, &victim->css, CSS_TASK_ITER_PROCS) {
+		struct task_struct *t = bpf_task_acquire(task);
+
+		if (t) {
+			/*
+			 * If the task is already an OOM victim, it will
+			 * quit soon and release some memory.
+			 */
+			if (bpf_task_is_oom_victim(task)) {
+				bpf_task_release(t);
+				ret = 1;
+				break;
+			}
+
+			bpf_oom_kill_process(oc, task, "bpf oom test");
+			bpf_task_release(t);
+			ret = 1;
+		}
+	}
+
+	bpf_put_mem_cgroup(victim);
+exit:
+	bpf_put_mem_cgroup(root_memcg);
+
+	return ret;
+}
+
+SEC("struct_ops.s/handle_cgroup_offline")
+int BPF_PROG(test_cgroup_offline, u64 cgroup_id, struct bpf_oom_ctx *exec_ctx)
+{
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_oom_ops test_bpf_oom = {
+	.name = "bpf_test_policy",
+	.handle_out_of_memory = (void *)test_out_of_memory,
+	.handle_cgroup_offline = (void *)test_cgroup_offline,
+};
diff --git a/tools/testing/selftests/bpf/progs/test_psi.c b/tools/testing/selftests/bpf/progs/test_psi.c
new file mode 100644
index 0000000000000..4e5cdb5242d1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_psi.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define PSI_FULL 0x80000000
+
+/* cgroup which will experience the high memory pressure */
+u64 high_pressure_cgroup_id;
+
+/* cgroup which will be deleted */
+u64 deleted_cgroup_id;
+
+/* cgroup which will be created */
+u64 new_cgroup_id;
+
+/* cgroup which was deleted */
+u64 deleted_cgroup_id;
+
+char constraint_name[] = "CONSTRAINT_BPF_PSI_MEM";
+
+SEC("struct_ops.s/init")
+int BPF_PROG(psi_init, struct bpf_psi *bpf_psi)
+{
+	int ret;
+
+	ret = bpf_psi_create_trigger(bpf_psi, high_pressure_cgroup_id,
+				     PSI_MEM | PSI_FULL, 100000, 1000000);
+	if (ret)
+		return ret;
+
+	return bpf_psi_create_trigger(bpf_psi, deleted_cgroup_id,
+				      PSI_IO, 100000, 1000000);
+}
+
+SEC("struct_ops.s/handle_psi_event")
+void BPF_PROG(handle_psi_event, struct psi_trigger *t)
+{
+	u64 cgroup_id = t->cgroup_id;
+	struct mem_cgroup *memcg;
+	struct cgroup *cgroup;
+
+	cgroup = bpf_cgroup_from_id(cgroup_id);
+	if (!cgroup)
+		return;
+
+	memcg = bpf_get_mem_cgroup(&cgroup->self);
+	if (!memcg) {
+		bpf_cgroup_release(cgroup);
+		return;
+	}
+
+	bpf_out_of_memory(memcg, 0, BPF_OOM_FLAGS_WAIT_ON_OOM_LOCK,
+			  constraint_name);
+
+	bpf_put_mem_cgroup(memcg);
+	bpf_cgroup_release(cgroup);
+}
+
+SEC("struct_ops.s/handle_cgroup_online")
+void BPF_PROG(handle_cgroup_online, u64 cgroup_id)
+{
+	new_cgroup_id = cgroup_id;
+}
+
+SEC("struct_ops.s/handle_cgroup_offline")
+void BPF_PROG(handle_cgroup_offline, u64 cgroup_id)
+{
+	deleted_cgroup_id = cgroup_id;
+}
+
+SEC(".struct_ops.link")
+struct bpf_psi_ops test_bpf_psi = {
+	.init = (void *)psi_init,
+	.handle_psi_event = (void *)handle_psi_event,
+	.handle_cgroup_online = (void *)handle_cgroup_online,
+	.handle_cgroup_offline = (void *)handle_cgroup_offline,
+};