Skip to content

Commit be76ea6

Browse files
committed
s390/idle: remove arch_cpu_idle_time() and corresponding code
arch_cpu_idle_time() returns the idle time of any given cpu if it is in idle, or zero if not. All if this is racy and partially incorrect. Time stamps taken with store clock extended and store clock fast from different cpus are compared, while the architecture states that this is nothing which can be relied on (see Principles of Operation; Chapter 4, "Setting and Inspecting the Clock"). A more fundamental problem is that the timestamp when a cpu is leaving idle is taken early in the assembler part of the interrupt handler, and this value is only transferred many cycles later to the cpu's per-cpu idle data structure. This per cpu data structure is read by arch_cpu_idle() to tell for which period of time a remote cpu is idle: if only an idle_enter value is present, the assumed idle time of the cpu is calculated by taking a local timestamp and returning the difference of the local timestamp and the idle_enter value. This is potentially incorrect, since the remote cpu may have already left idle, but the taken timestamp may not have been transferred to the per-cpu data structure. This in turn means that too much idle time may be reported for a cpu, and a subsequent calculation of system idle time may result in a smaller value. Instead of coming up with even more complex code trying to fix this, just remove this code, and only account idle time of a cpu, after idle state is left. Another minor bug is that it is assumed that timestamps are non-zero, which is not necessarily the case for timestamps taken with store clock fast. This however is just a very minor problem, since this can only happen when the epoch increases. Reviewed-by: Sven Schnelle <[email protected]> Signed-off-by: Heiko Carstens <[email protected]>
1 parent a02d584 commit be76ea6

File tree

3 files changed

+11
-74
lines changed

3 files changed

+11
-74
lines changed

arch/s390/include/asm/cputime.h

-4
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616
*/
1717
#define cputime_to_nsecs(cputime) tod_to_ns(cputime)
1818

19-
u64 arch_cpu_idle_time(int cpu);
20-
21-
#define arch_idle_time(cpu) arch_cpu_idle_time(cpu)
22-
2319
void account_idle_time_irq(void);
2420

2521
#endif /* _S390_CPUTIME_H */

arch/s390/include/asm/idle.h

-4
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,12 @@
1010

1111
#include <linux/types.h>
1212
#include <linux/device.h>
13-
#include <linux/seqlock.h>
1413

1514
struct s390_idle_data {
16-
seqcount_t seqcount;
1715
unsigned long idle_count;
1816
unsigned long idle_time;
1917
unsigned long clock_idle_enter;
20-
unsigned long clock_idle_exit;
2118
unsigned long timer_idle_enter;
22-
unsigned long timer_idle_exit;
2319
unsigned long mt_cycles_enter[8];
2420
};
2521

arch/s390/kernel/idle.c

+11-66
Original file line numberDiff line numberDiff line change
@@ -35,24 +35,18 @@ void account_idle_time_irq(void)
3535
this_cpu_add(mt_cycles[i], cycles_new[i] - idle->mt_cycles_enter[i]);
3636
}
3737

38-
idle->clock_idle_exit = S390_lowcore.int_clock;
39-
idle->timer_idle_exit = S390_lowcore.sys_enter_timer;
38+
idle_time = S390_lowcore.int_clock - idle->clock_idle_enter;
4039

4140
S390_lowcore.steal_timer += idle->clock_idle_enter - S390_lowcore.last_update_clock;
42-
S390_lowcore.last_update_clock = idle->clock_idle_exit;
41+
S390_lowcore.last_update_clock = S390_lowcore.int_clock;
4342

4443
S390_lowcore.system_timer += S390_lowcore.last_update_timer - idle->timer_idle_enter;
45-
S390_lowcore.last_update_timer = idle->timer_idle_exit;
44+
S390_lowcore.last_update_timer = S390_lowcore.sys_enter_timer;
4645

4746
/* Account time spent with enabled wait psw loaded as idle time. */
48-
raw_write_seqcount_begin(&idle->seqcount);
49-
idle_time = idle->clock_idle_exit - idle->clock_idle_enter;
50-
idle->clock_idle_enter = 0;
51-
idle->clock_idle_exit = 0;
52-
idle->idle_time += idle_time;
53-
idle->idle_count++;
47+
WRITE_ONCE(idle->idle_time, READ_ONCE(idle->idle_time) + idle_time);
48+
WRITE_ONCE(idle->idle_count, READ_ONCE(idle->idle_count) + 1);
5449
account_idle_time(cputime_to_nsecs(idle_time));
55-
raw_write_seqcount_end(&idle->seqcount);
5650
}
5751

5852
void noinstr arch_cpu_idle(void)
@@ -71,71 +65,22 @@ void noinstr arch_cpu_idle(void)
7165
}
7266

7367
static ssize_t show_idle_count(struct device *dev,
74-
struct device_attribute *attr, char *buf)
68+
struct device_attribute *attr, char *buf)
7569
{
7670
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
77-
unsigned long idle_count;
78-
unsigned int seq;
79-
80-
do {
81-
seq = read_seqcount_begin(&idle->seqcount);
82-
idle_count = READ_ONCE(idle->idle_count);
83-
if (READ_ONCE(idle->clock_idle_enter))
84-
idle_count++;
85-
} while (read_seqcount_retry(&idle->seqcount, seq));
86-
return sprintf(buf, "%lu\n", idle_count);
71+
72+
return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_count));
8773
}
8874
DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
8975

9076
static ssize_t show_idle_time(struct device *dev,
91-
struct device_attribute *attr, char *buf)
77+
struct device_attribute *attr, char *buf)
9278
{
93-
unsigned long now, idle_time, idle_enter, idle_exit, in_idle;
9479
struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
95-
unsigned int seq;
96-
97-
do {
98-
seq = read_seqcount_begin(&idle->seqcount);
99-
idle_time = READ_ONCE(idle->idle_time);
100-
idle_enter = READ_ONCE(idle->clock_idle_enter);
101-
idle_exit = READ_ONCE(idle->clock_idle_exit);
102-
} while (read_seqcount_retry(&idle->seqcount, seq));
103-
in_idle = 0;
104-
now = get_tod_clock();
105-
if (idle_enter) {
106-
if (idle_exit) {
107-
in_idle = idle_exit - idle_enter;
108-
} else if (now > idle_enter) {
109-
in_idle = now - idle_enter;
110-
}
111-
}
112-
idle_time += in_idle;
113-
return sprintf(buf, "%lu\n", idle_time >> 12);
114-
}
115-
DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
11680

117-
u64 arch_cpu_idle_time(int cpu)
118-
{
119-
struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
120-
unsigned long now, idle_enter, idle_exit, in_idle;
121-
unsigned int seq;
122-
123-
do {
124-
seq = read_seqcount_begin(&idle->seqcount);
125-
idle_enter = READ_ONCE(idle->clock_idle_enter);
126-
idle_exit = READ_ONCE(idle->clock_idle_exit);
127-
} while (read_seqcount_retry(&idle->seqcount, seq));
128-
in_idle = 0;
129-
now = get_tod_clock();
130-
if (idle_enter) {
131-
if (idle_exit) {
132-
in_idle = idle_exit - idle_enter;
133-
} else if (now > idle_enter) {
134-
in_idle = now - idle_enter;
135-
}
136-
}
137-
return cputime_to_nsecs(in_idle);
81+
return sysfs_emit(buf, "%lu\n", READ_ONCE(idle->idle_time) >> 12);
13882
}
83+
DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
13984

14085
void arch_cpu_idle_enter(void)
14186
{

0 commit comments

Comments
 (0)