Skip to content

Commit b9f1706

Browse files
committed
scx_mitosis: move tick() to timer
If cpu 0 is not running any scx_mitosis tasks, we can miss configuration updates. This was due to a misunderstanding of how tick() works. The intended behavior is just to periodically run this logic on some core. A bpf_timer is the appropriate way to do this and so this commit makes that change. There's an additional small fix to the freeing logic ensuring that cpus are given back to the root cgroup. Signed-off-by: Dan Schatzberg <[email protected]>
1 parent 504facd commit b9f1706

File tree

2 files changed

+60
-41
lines changed

2 files changed

+60
-41
lines changed

scheds/rust/scx_mitosis/src/bpf/intf.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ enum consts {
2525
MAX_CPUS_U8 = MAX_CPUS / 8,
2626
MAX_CELLS = 16,
2727
USAGE_HALF_LIFE = 100000000, /* 100ms */
28+
TIMER_INTERVAL_NS = 100000000, /* 100 ms */
29+
CLOCK_BOOTTIME = 7,
2830

2931
PCPU_BASE = 0x80000000,
3032
MAX_CG_DEPTH = 256,

scheds/rust/scx_mitosis/src/bpf/mitosis.bpf.c

Lines changed: 58 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,17 @@ const volatile u64 root_cgid = 1;
4343
u32 configuration_seq;
4444
u32 applied_configuration_seq;
4545

46+
struct update_timer {
47+
struct bpf_timer timer;
48+
};
49+
50+
struct {
51+
__uint(type, BPF_MAP_TYPE_ARRAY);
52+
__uint(max_entries, 1);
53+
__type(key, u32);
54+
__type(value, struct update_timer);
55+
} update_timer SEC(".maps") __weak;
56+
4657
private(all_cpumask) struct bpf_cpumask __kptr *all_cpumask;
4758
private(root_cgrp) struct cgroup __kptr *root_cgrp;
4859

@@ -756,56 +767,51 @@ static inline int get_cgroup_cpumask(struct cgroup *cgrp,
756767
* cgroup hierarchy.
757768
*/
758769
u32 level_cells[MAX_CG_DEPTH];
759-
int running;
760770

761771
/*
762772
* On tick, we identify new cells and apply CPU assignment
763773
*/
764-
void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
774+
static int update_timer_cb(void *map, int *key, struct bpf_timer *timer)
765775
{
766-
/*
767-
* We serialize tick() on core 0 and ensure only one tick running at a time
768-
* to ensure this can only happen once.
769-
*/
770-
if (bpf_get_smp_processor_id())
771-
return;
776+
int ret;
777+
if ((ret = bpf_timer_start(timer, TIMER_INTERVAL_NS, 0))) {
778+
scx_bpf_error("Failed to arm update timer: %d", ret);
779+
return 0;
780+
}
772781

773782
u32 local_configuration_seq = READ_ONCE(configuration_seq);
774783
if (local_configuration_seq == READ_ONCE(applied_configuration_seq))
775-
return;
776-
777-
int zero = 0;
778-
if (!__atomic_compare_exchange_n(&running, &zero, 1, false,
779-
__ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST))
780-
return;
784+
return 0;
781785

782786
DECLARE_CPUMASK_ENTRY(entry) = allocate_cpumask_entry();
783787
if (!entry)
784-
return;
788+
return 0;
785789

786790
/* Get the root cell (cell 0) and its cpumask */
791+
int zero = 0;
787792
struct cell_cpumask_wrapper *root_cell_cpumaskw;
788793
if (!(root_cell_cpumaskw =
789794
bpf_map_lookup_elem(&cell_cpumasks, &zero))) {
790795
scx_bpf_error("Failed to find root cell cpumask");
791-
return;
796+
return 0;
792797
}
793798

794799
struct bpf_cpumask *root_bpf_cpumask;
795800
root_bpf_cpumask =
796801
bpf_kptr_xchg(&root_cell_cpumaskw->tmp_cpumask, NULL);
797802
if (!root_bpf_cpumask) {
798803
scx_bpf_error("tmp_cpumask should never be null");
799-
return;
804+
return 0;
800805
}
801806
if (!root_cell_cpumaskw->cpumask) {
802807
scx_bpf_error("root cpumasks should never be null");
803808
goto out;
804809
}
805810

811+
bpf_rcu_read_lock();
806812
if (!all_cpumask) {
807813
scx_bpf_error("NULL all_cpumask");
808-
goto out;
814+
goto out_rcu_unlock;
809815
}
810816

811817
/*
@@ -819,25 +825,24 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
819825

820826
if (!root_cgrp) {
821827
scx_bpf_error("root_cgrp should not be null");
822-
goto out;
828+
goto out_rcu_unlock;
823829
}
824830

825831
struct cgrp_ctx *root_cgrp_ctx;
826832
if (!(root_cgrp_ctx = lookup_cgrp_ctx(root_cgrp)))
827-
goto out;
833+
goto out_rcu_unlock;
828834

829835
if (!root_cgrp) {
830836
scx_bpf_error("root_cgrp should not be null");
831-
goto out;
837+
goto out_rcu_unlock;
832838
}
833839

834840
if (!(root_cgrp_ref = bpf_cgroup_acquire(root_cgrp))) {
835841
scx_bpf_error("Failed to acquire reference to root_cgrp");
836-
goto out;
842+
goto out_rcu_unlock;
837843
}
838844
root_css = &root_cgrp_ref->self;
839845

840-
bpf_rcu_read_lock();
841846
/*
842847
* Iterate over all cgroups, check if any have a cpumask and populate them
843848
* as a separate cell.
@@ -867,7 +872,7 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
867872
scx_bpf_error(
868873
"Cgroup hierarchy is too deep: %d",
869874
level);
870-
goto out_rcu_unlock;
875+
goto out_root_cgrp;
871876
}
872877
/*
873878
* This is a janky way of getting the parent cell, ideally we'd
@@ -889,7 +894,7 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
889894
}
890895
continue;
891896
} else if (rc < 0)
892-
goto out_rcu_unlock;
897+
goto out_root_cgrp;
893898

894899
/*
895900
* cgroup has a cpumask, allocate a new cell if needed, and assign cpus
@@ -898,7 +903,7 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
898903
if (!cgrp_ctx->cell_owner) {
899904
cell_idx = allocate_cell();
900905
if (cell_idx < 0)
901-
goto out_rcu_unlock;
906+
goto out_root_cgrp;
902907
cgrp_ctx->cell_owner = true;
903908
}
904909

@@ -907,14 +912,14 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
907912
bpf_map_lookup_elem(&cell_cpumasks, &cell_idx))) {
908913
scx_bpf_error("Failed to find cell cpumask: %d",
909914
cell_idx);
910-
goto out_rcu_unlock;
915+
goto out_root_cgrp;
911916
}
912917

913918
struct bpf_cpumask *bpf_cpumask;
914919
bpf_cpumask = bpf_kptr_xchg(&cell_cpumaskw->tmp_cpumask, NULL);
915920
if (!bpf_cpumask) {
916921
scx_bpf_error("tmp_cpumask should never be null");
917-
goto out_rcu_unlock;
922+
goto out_root_cgrp;
918923
}
919924
bpf_cpumask_copy(bpf_cpumask,
920925
(const struct cpumask *)&entry->cpumask);
@@ -927,7 +932,7 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
927932
struct cpu_ctx *cpu_ctx;
928933
if (!(cpu_ctx = lookup_cpu_ctx(cpu_idx))) {
929934
bpf_cpumask_release(bpf_cpumask);
930-
goto out_rcu_unlock;
935+
goto out_root_cgrp;
931936
}
932937
cpu_ctx->cell = cell_idx;
933938
bpf_cpumask_clear_cpu(cpu_idx,
@@ -938,15 +943,15 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
938943
bpf_kptr_xchg(&cell_cpumaskw->cpumask, bpf_cpumask);
939944
if (!bpf_cpumask) {
940945
scx_bpf_error("cpumask should never be null");
941-
goto out_rcu_unlock;
946+
goto out_root_cgrp;
942947
}
943948

944949
bpf_cpumask =
945950
bpf_kptr_xchg(&cell_cpumaskw->tmp_cpumask, bpf_cpumask);
946951
if (bpf_cpumask) {
947952
scx_bpf_error("tmp_cpumask should be null");
948953
bpf_cpumask_release(bpf_cpumask);
949-
goto out_rcu_unlock;
954+
goto out_root_cgrp;
950955
}
951956

952957
barrier();
@@ -955,11 +960,10 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
955960
if (level <= 0 || level >= MAX_CG_DEPTH) {
956961
scx_bpf_error("Cgroup hierarchy is too deep: %d",
957962
level);
958-
goto out_rcu_unlock;
963+
goto out_root_cgrp;
959964
}
960965
level_cells[level] = cell_idx;
961966
}
962-
bpf_rcu_read_unlock();
963967

964968
/*
965969
* assign root cell cpus that are left over
@@ -968,21 +972,21 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
968972
bpf_for(cpu_idx, 0, nr_possible_cpus)
969973
{
970974
if (bpf_cpumask_test_cpu(
971-
cpu_idx, (const struct cpumask *)&entry->cpumask)) {
975+
cpu_idx, (const struct cpumask *)root_bpf_cpumask)) {
972976
struct cpu_ctx *cpu_ctx;
973977
if (!(cpu_ctx = lookup_cpu_ctx(cpu_idx)))
974978
goto out_root_cgrp;
975979
cpu_ctx->cell = 0;
976-
bpf_cpumask_clear_cpu(cpu_idx, root_bpf_cpumask);
977980
}
978981
}
979982

980983
root_bpf_cpumask =
981984
bpf_kptr_xchg(&root_cell_cpumaskw->cpumask, root_bpf_cpumask);
982985
if (!root_bpf_cpumask) {
983986
scx_bpf_error("root cpumask should never be null");
987+
bpf_rcu_read_unlock();
984988
bpf_cgroup_release(root_cgrp_ref);
985-
return;
989+
return 0;
986990
}
987991

988992
root_bpf_cpumask = bpf_kptr_xchg(&root_cell_cpumaskw->tmp_cpumask,
@@ -994,16 +998,17 @@ void BPF_STRUCT_OPS(mitosis_tick, struct task_struct *p_run)
994998

995999
barrier();
9961000
WRITE_ONCE(applied_configuration_seq, local_configuration_seq);
997-
WRITE_ONCE(running, 0);
9981001

999-
bpf_cgroup_release(root_cgrp_ref);
1000-
return;
1001-
out_rcu_unlock:
10021002
bpf_rcu_read_unlock();
1003+
bpf_cgroup_release(root_cgrp_ref);
1004+
return 0;
10031005
out_root_cgrp:
10041006
bpf_cgroup_release(root_cgrp_ref);
1007+
out_rcu_unlock:
1008+
bpf_rcu_read_unlock();
10051009
out:
10061010
bpf_cpumask_release(root_bpf_cpumask);
1011+
return 0;
10071012
}
10081013

10091014
void BPF_STRUCT_OPS(mitosis_running, struct task_struct *p)
@@ -1299,6 +1304,19 @@ s32 BPF_STRUCT_OPS_SLEEPABLE(mitosis_init)
12991304
u32 i;
13001305
s32 ret;
13011306

1307+
u32 key = 0;
1308+
struct bpf_timer *timer = bpf_map_lookup_elem(&update_timer, &key);
1309+
if (!timer) {
1310+
scx_bpf_error("Failed to lookup update timer");
1311+
return -ESRCH;
1312+
}
1313+
bpf_timer_init(timer, &update_timer, CLOCK_BOOTTIME);
1314+
bpf_timer_set_callback(timer, update_timer_cb);
1315+
if ((ret = bpf_timer_start(timer, TIMER_INTERVAL_NS, 0))) {
1316+
scx_bpf_error("Failed to arm update timer");
1317+
return ret;
1318+
}
1319+
13021320
struct cgroup *rootcg;
13031321
if (!(rootcg = bpf_cgroup_from_id(root_cgid)))
13041322
return -ENOENT;
@@ -1387,7 +1405,6 @@ struct sched_ext_ops mitosis = {
13871405
.select_cpu = (void *)mitosis_select_cpu,
13881406
.enqueue = (void *)mitosis_enqueue,
13891407
.dispatch = (void *)mitosis_dispatch,
1390-
.tick = (void *)mitosis_tick,
13911408
.running = (void *)mitosis_running,
13921409
.stopping = (void *)mitosis_stopping,
13931410
.set_cpumask = (void *)mitosis_set_cpumask,

0 commit comments

Comments
 (0)