forked from ceph/cbt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathradosbenchparser.py
executable file
·224 lines (184 loc) · 7.49 KB
/
radosbenchparser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/python
import argparse
import math
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('-i', '--interval', required=False, type=int, default=1000, help='interval of time in seconds.')
parser.add_argument('-d', '--divisor', required=False, type=int, default=1, help='divide the results by this value.')
parser.add_argument('-f', '--full', dest='full', action='store_true', default=False, help='print full output.')
parser.add_argument('-A', '--all', dest='allstats', action='store_true', default=False,
help='print all stats for each interval.')
parser.add_argument('-a', '--average', dest='average', action='store_true', default=False, help='print the average for each interval.')
parser.add_argument('-s', '--sum', dest='sum', action='store_true', default=False, help='print the sum for each interval.')
parser.add_argument('-t', '--type', dest='type', default='curmb',
choices=['curops', 'started', 'finished', 'avgmb', 'curmb', 'avglat'],
help='select the rados bench output data to work with (default: %(default)s)')
parser.add_argument("FILE", help="collectl log output files to parse", nargs="+")
args = parser.parse_args()
return args
class Interval():
def __init__(self, ctx, start, end, series):
self.ctx = ctx
self.start = start
self.end = end
self.series = series
def get_samples(self):
return [item for sublist in [ts.get_samples(self.start, self.end) for ts in self.series] for item in sublist]
def get_min(self):
return min([sample.value for sample in self.get_samples()])
def get_max(self):
return max([sample.value for sample in self.get_samples()])
def get_wa(self, samples, weight):
total = 0
for sample in samples:
total += sample.value * sample.get_weight(self.start, self.end)
return total / weight
def get_wa_list(self):
samples_list = [ts.get_samples(self.start, self.end) for ts in self.series]
return [self.get_wa(samples, 1) for samples in samples_list]
def get_wa_sum(self):
return sum(self.get_wa_list())
def get_wa_avg(self):
return self.get_wa_sum() / len(self.series)
def get_wp(self, p):
samples = self.get_samples()
samples.sort(key=lambda x: x.value)
weight = 0
last = None
cur = None
# first find the two samples that straddle the percentile based on weight
for sample in samples:
if weight >= len(self.series) * p:
break
weight += sample.get_weight(self.start, self.end)
last = cur
cur = sample
# next find weights based inversely on the distance to the percentile boundary
ld = len(self.series) - weight + cur.get_weight(self.start, self.end)
cd = weight - len(self.series) * p
lw = 1 - (ld / (ld + cd))
cw = 1 - (cd / (ld + cd))
return last.value * lw + cur.value * cw
@staticmethod
def get_ftime(series):
ftime = 0
for ts in series:
if ftime == 0 or ts.last.end < ftime:
ftime = ts.last.end
return ftime
@staticmethod
def get_intervals(series, itime):
intervals = []
ftime = Interval.get_ftime(series)
start = 0
end = itime
while (start < ftime):
end = ftime if ftime < end else end
intervals.append(Interval(ctx, start, end, series))
start += itime
end += itime
return intervals
class TimeSeries():
def __init__(self, ctx, fn):
self.ctx = ctx
self.last = None
self.samples = []
self.read_data(fn)
def read_data(self, fn):
f = open(fn, 'r')
p_time = 0
for line in f:
# First cleanup whitespace
line = ' '.join(line.split()).rstrip()
# Throw away lines that don't start with a digit or a space
if not line[0].isdigit():
continue
# Throw away the periodic summary lines
if 'min lat' in line:
continue
# Throw away the first second with no data
if line[0] == '0':
continue
names = ('time', 'curops', 'started', 'finished', 'avgmb', 'curmb', 'lastlat', 'avglat')
valuesdict = dict(zip(names, line.split()))
value = valuesdict[ctx.type]
time = valuesdict['time']
self.add_sample(p_time, int(time), float(value))
p_time = int(time)
def add_sample(self, start, end, value):
sample = Sample(ctx, start, end, value)
if not self.last or self.last.end < end:
self.last = sample
self.samples.append(sample)
def get_samples(self, start, end):
sample_list = []
for s in self.samples:
if s.get_weight(start, end) > 0:
sample_list.append(s)
return sample_list
class Sample():
def __init__(self, ctx, start, end, value):
self.ctx = ctx
self.start = start
self.end = end
self.value = value
def get_weight(self, start, end):
# short circuit if not within the bound
if (end < self.start or start > self.end):
return 0
sbound = self.start if start < self.start else start
ebound = self.end if end > self.end else end
return float(ebound-sbound) / (end-start)
class Printer():
def __init__(self, ctx, series):
self.ctx = ctx
self.series = series
self.ffmt = "%0.3f"
def format(self, data):
if isinstance(data, float) or isinstance(data, int):
data = data / self.ctx.divisor
return self.ffmt % data
return data
def print_full(self):
for i in Interval.get_intervals(self.series, ctx.interval):
value = ', '.join(self.format(j) for j in i.get_wa_list())
print "%s, %s" % (self.ffmt % i.end, value)
def print_sums(self):
for i in Interval.get_intervals(self.series, ctx.interval):
print "%s, %s" % (self.ffmt % i.end, self.format(i.get_wa_sum()))
def print_averages(self):
for i in Interval.get_intervals(self.series, ctx.interval):
print "%s, %s" % (self.ffmt % i.end, self.format(i.get_wa_avg()))
def print_all_stats(self):
print('end-time, samples, min, avg, median, 90%, 95%, 99%, max')
for i in Interval.get_intervals(self.series, ctx.interval):
print(', '.join([
self.ffmt % i.end,
"%d" % len(i.get_samples()),
self.format(i.get_min()),
self.format(i.get_wa_avg()),
self.format(i.get_wp(0.5)),
self.format(i.get_wp(0.9)),
self.format(i.get_wp(0.95)),
self.format(i.get_wp(0.99)),
self.format(i.get_max())
]))
def print_default(self):
interval = Interval.get_intervals(self.series, Interval.get_ftime(series))[0]
print self.format(interval.get_wa_sum())
if __name__ == '__main__':
ctx = parse_args()
series = []
for fn in ctx.FILE:
series.append(TimeSeries(ctx, fn))
p = Printer(ctx, series)
if ctx.sum:
p.print_sums()
elif ctx.average:
p.print_averages()
elif ctx.full:
p.print_full()
elif ctx.allstats:
p.print_all_stats()
else:
p.print_default()