-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathlima-memtester-genchart
executable file
·289 lines (256 loc) · 12.8 KB
/
lima-memtester-genchart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#!/usr/bin/env ruby
#
# Copyright © 2015 Siarhei Siamashka <[email protected]>
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice (including the next
# paragraph) shall be included in all copies or substantial portions of the
# Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.
###############################################################################
# Assuming Gaussian distribution of the DRAM clock speeds where the #
# lima-memtester test starts to detect reliability problems due to variances #
# between different board samples (but the same board model!), predict how #
# many boards of the same type are expected to pass/fail the test at each #
# DRAM clock speed. #
# #
# If a "good" DRAM clock frequency (passes the test) is 672 MHz and a "bad" #
# clock frequency is 696 MHz, then we are assuming that the cross over point #
# is somewhere in the middle. So we want to pass the "(672 + 696) / 2" value #
# into this script. In order to make this easier, there is the <freq_delta> #
# command line argument, which can be used to add a 12 MHz shift to the #
# "good" value or subtract 12 MHz from the "bad" value. It is a convenience #
# feature to avoid maual adjustment of the DRAM clock frequency lists #
# used as input for this script. #
###############################################################################
require 'tempfile'
if ARGV.size < 1
printf("Usage: #{$PROGRAM_NAME} <freq_delta> <png_file>\n")
printf("Where:\n")
printf(" freq_delta - adjustment for the frequency.\n")
printf(" png_file - resulting PNG file with the plot.\n")
printf("\n")
printf("This script reads the list of space separated DRAM clock\n")
printf("frequency values from STDIN, adds <freq_delta> to them and\n")
printf("generates a gnuplot chart. Several groups of DRAM clock\n")
printf("frequencies can be also comma separated and shown on the\n")
printf("same chart using different colors (blue, green, red, pink).\n")
printf("\n")
printf("Example:\n")
printf(" echo '672 696, 648 648 672' | lima-memtester-genchart 12 out.png\n")
printf("\n")
printf("In this example the cumulative distribution function for the data\n")
printf("set [684 708] will be shown as a blue line and the cumulative\n")
printf("distribution function for the set [660 660 684] will be shown\n")
printf("as a green line in the 'out.png' output file.\n")
exit(1)
end
freq_delta = ARGV[0].to_f
$binning_interval = (freq_delta.abs * 2).to_i
###############################################################################
# Parse html page from mediawiki #
###############################################################################
def html_parse(data)
out = []
data.scan(/(<table\s+class=\"wikitable\">)(.*?)(<\/table>)/m) do |_, tbl, _|
result1 = []
tbl.scan(/(<tr>)(.*?)(<\/tr>)/m) do |_, row, _|
tmp = []
row.scan(/(<th>)(.*?)(<\/th>)/m) { |_, x, _| tmp.push(x.strip) }
row.scan(/(<td>)(.*?)(<\/td>)/m) { |_, x, _| tmp.push(x.strip) }
result1.push(tmp)
end
if result1.size > 1 && ((result1[0][2] =~ /lima\-memtester passes/) &&
(result1[0][3] =~ /lima\-memtester fails/))
good_table = true
# Select only rows with exactly 5 columns
result2 = result1.select { |x| x.size == 5 }
# The first sanity check
good_table = false if result2.size != result1.size
# Remove the table header
result2.shift if good_table
# Extract the DRAM clock frequencies (the cross over point)
result3 = result2.map do |x|
lo = x[2].gsub(/<[\/]?b>/, "").to_i
hi = x[3].gsub(/<[\/]?b>/, "").to_i
$binning_interval = (hi - lo) if $binning_interval == 0
if lo == 0 || hi == 0 || lo >= hi || (hi - lo) != $binning_interval
printf("Problems parsing row %s\n", x)
good_table = false
end
(lo + hi).to_f / 2
end
out.push(result3) if good_table
end
end
return out
end
###############################################################################
# Chebyshev's inequality for lower semivariance #
###############################################################################
# https://en.wikipedia.org/wiki/Chebyshev%27s_inequality#Semivariances
def chebyshev_lower_semivariance_p(samples, val)
sample_mean = samples.reduce(:+) / samples.size.to_f
lower_semivariance = samples.select {|x| x < sample_mean }.reduce(0) {|sum, x|
sum + (x - sample_mean) ** 2} / (samples.size - 1)
a = (sample_mean - val) / Math.sqrt(lower_semivariance)
if val < sample_mean
[1.0 / (a * a), 1.0].min
else
1.0
end
end
###############################################################################
# Multinomial test (by calling an external R programming language interpreter #
# with XNomial library). If there are two sets of DRAM clock frequencies #
# provided then use the first set for training and the second set for testing #
###############################################################################
def multinomial_test(multinomial_data)
if multinomial_data.size >= 1
cutoff_prob = 0.0001
train_idx = 0
test_idx = (multinomial_data.size >= 2 ? 1 : 0)
tmp = {}
multinomial_data[train_idx][:prob].each do |freq, prob|
tmp[freq] = [prob, 0] if prob >= cutoff_prob
end
multinomial_data[test_idx][:observed].each do |freq, observed|
prob = (multinomial_data[train_idx][:prob][freq] || 0.0)
next if observed == 0 && prob < cutoff_prob
tmp[freq] = [prob, observed]
end
tmp = tmp.sort
# https://cran.r-project.org/web/packages/XNomial/vignettes/XNomial.html
r_input = sprintf("library(XNomial)\n\n")
r_input += sprintf("prob <- c(%s)\n", tmp.map {|x| x[1][0]}.join(", "))
r_input += sprintf("observed <- c(%s)\n", tmp.map {|x| x[1][1]}.join(", "))
r_input += sprintf("out <- xmulti(observed, prob, detail=3)\n")
r_output = `echo '#{r_input}' | R --vanilla 2>/dev/null`
if r_output
x_output = ""
r_output.each_line do |l|
x_output += " " + l.strip + "\n" if l =~ /^P value/
end
return x_output if x_output != ""
end
end
end
###############################################################################
# Cumulative distribution function #
###############################################################################
# https://en.wikipedia.org/wiki/Normal_distribution#Cumulative_distribution_function
def cumulative_distribution_function(x, sample_mean, sample_stddev)
return 0.5 * (1 + Math.erf((x - sample_mean) / (sample_stddev * Math.sqrt(2))))
end
###############################################################################
# Analyze data, calculate probabilities #
###############################################################################
if ARGV[0] =~ /^http/
require 'open-uri'
html_data = open(ARGV[0], &:read)
data_sets = html_parse(html_data)
else
data_sets = STDIN.read.split(",").map {|s| s.split.map {|x| x.to_f + freq_delta }}
end
printf("Frequency shift : %d MHz\n", freq_delta.to_i)
printf("Binning interval : %d MHz\n", $binning_interval)
multinomial_data = []
data_sets.each do |samples|
printf("Processing data : %s\n", samples.map {|x| x.to_i })
abort "As least two DRAM clock speed samples are needed" if samples.size < 2
sample_mean = samples.reduce(:+) / samples.size.to_f
sample_stddev = Math.sqrt(samples.reduce(0) {|sum, x| sum + (x - sample_mean) ** 2} /
(samples.size - 1))
printf("Sample mean : %f\n", sample_mean)
printf("Sample stddev : %f\n", sample_stddev)
# Analyze the frequencies range plus/minus six standard deviations from the mean
mid_freq = sample_mean.to_i / $binning_interval * $binning_interval
min_freq = mid_freq - (sample_stddev * 6).to_i / $binning_interval * $binning_interval
max_freq = mid_freq + (sample_stddev * 6).to_i / $binning_interval * $binning_interval
tmp = {}
tmp[:mean] = sample_mean
tmp[:stddev] = sample_stddev
tmp[:observed] = {}
tmp[:prob] = {}
(min_freq..max_freq).step($binning_interval) do |freq|
cdf2 = cumulative_distribution_function(freq + $binning_interval,
sample_mean, sample_stddev)
cdf1 = cumulative_distribution_function(freq, sample_mean, sample_stddev)
tmp[:prob][freq] = cdf2 - cdf1
tmp[:observed][freq] = samples.reduce(0) do |sum, x|
sum += ((x >= freq && x < freq + $binning_interval) ? 1 : 0)
end
end
multinomial_data.push(tmp)
multinomial_results = multinomial_test([multinomial_data[-1]])
printf("\n")
printf("{| class=\"wikitable\"\n")
printf("! rowspan=2 | DRAM clock speed\n")
printf("! colspan=2 | Percentage of boards failing the lima-memtester test\n")
printf("! rowspan=2 | Theoretical pessimistic upper bound of the failure " +
"percentage using Chebyshev's inequality for lower semivariance <ref name=\"chebyshev\">\n")
printf("If nothing is known about the distribution of samples, then at least\n")
printf("[https://en.wikipedia.org/wiki/Chebyshev's_inequality Chebyshev's inequality]\n")
printf("can be used to get a rough idea about the probabilities of encountering reliability\n")
printf("problems at different DRAM clock speeds. But this method is <u>very conservative</u>\n")
printf("and substantially overestimates probabilities (being too generic has its price).\n")
printf("</ref>\n")
printf("! rowspan=2 | Histogram\n")
printf("|-\n")
printf("! Experimental results\n")
printf("! Theoretical prediction (assuming Gaussian distribution) <ref name=\"gaussian\">\n")
printf("We can assume that the [https://en.wikipedia.org/wiki/Normal_distribution Gaussian distribution]\n")
printf("is a good approximation for our experimental data, calculate theoretical probabilities and do an\n")
printf("[http://www.biostathandbook.com/exactgof.html exact test of goodness-of-fit]\n")
printf("to see if the experimental data does not contradict with the theory.\n")
printf("There is a nice [https://cran.r-project.org/web/packages/XNomial/vignettes/XNomial.html XNomial]\n")
printf("library for R, which can do the job")
if multinomial_results
printf(":<pre>\n#{multinomial_results}</pre>\n")
printf("If the [https://en.wikipedia.org/wiki/P-value p-values] listed above happen to be too low\n")
printf("(less than 0.05) and reject our [https://en.wikipedia.org/wiki/Null_hypothesis null hypothesis]\n")
printf("about having the Gaussian distribution, then Chebyshev's inequality estimates still can be used.\n")
else
printf(" (it was not installed on the system where this report had been generated).\n")
end
printf("</ref>\n")
freq = max_freq
while true
p = cumulative_distribution_function(freq, sample_mean, sample_stddev)
pc = chebyshev_lower_semivariance_p(samples, freq)
break if (p < 0.0001 && pc < 0.01) || freq <= min_freq
freq -= $binning_interval
end
prev_observed = 0
while freq <= max_freq
p = cumulative_distribution_function(freq, sample_mean, sample_stddev)
pc = chebyshev_lower_semivariance_p(samples, freq)
observed = samples.reduce(0) {|sum, x| x <= freq ? sum + 1 : sum }
histogram = "*" * (observed - prev_observed)
prev_observed = observed
color = ""
color = "style=\"color: orange;\"" if p >= 0.01 || pc >= 0.05
color = "style=\"color: red;\"" if observed > 0
printf("|- %s\n| %.0f MHz || %.2f %% (%d/%d) || %.2f %% || %.2f %% || #{histogram}\n",
color,
freq, observed * 100.0 / samples.size, observed, samples.size,
p * 100, pc * 100)
break if p >= 0.99
freq += $binning_interval
end
printf("|}\n")
printf("<references/>\n")
end