Skip to content

Commit 1ee5520

Browse files
committed
[cocopp] two budget crosses in runtime distributions
1 parent 2de4fac commit 1ee5520

File tree

2 files changed

+122
-40
lines changed

2 files changed

+122
-40
lines changed

code-postprocessing/cocopp/compall/pprldmany.py

+55-40
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,9 @@
5252
save_zoom = False # save zoom into left and right part of the figures
5353
perfprofsamplesize = genericsettings.simulated_runlength_bootstrap_sample_size # number of bootstrap samples drawn for each fct+target in the performance profile
5454
nbperdecade = 1
55-
median_max_evals_marker_format = ['x', 12.5, 1] # [symbol, size, edgewidth]
55+
max_evals_marker_format = ['x', 12.5, 1] # [symbol, size, edgewidth]
56+
max_evals_single_marker_format = ['+', 14, 1] # [symbol, size, edgewidth]
57+
max_evals_percentile = 90
5658
label_fontsize = 15 # was 17
5759
xticks_fontsize = 16
5860
yticks_fontsize = 14
@@ -195,8 +197,7 @@ def beautify():
195197
plt.ylim(-0.0, 1.0)
196198

197199

198-
199-
def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
200+
def plotdata(data, maxval=None, maxevals=None, CrE=0., maxevals2=None, **kwargs):
200201
"""Draw a normalized ECDF. What means normalized?
201202
202203
:param seq data: data set, a 1-D ndarray of runlengths
@@ -206,7 +207,9 @@ def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
206207
:param seq maxevals: if provided, will plot the median of this
207208
sequence as a single cross marker
208209
:param float CrE: Crafting effort the data will be multiplied by
209-
the exponential of this value.
210+
the exponential of this value
211+
:param maxevals2: a single value or values to be plotted as median(maxevals2)
212+
with the same marker as maxevals
210213
:param kwargs: optional arguments provided to plot function.
211214
212215
"""
@@ -250,33 +253,37 @@ def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
250253
logscale=False, clip_on=False, **kwargs)
251254
# res = plotUnifLogXMarkers(x2, y2, nbperdecade, logscale=False, **kwargs)
252255

253-
if maxevals: # Should cover the case where maxevals is None or empty
254-
x3 = np.median(maxevals) # change it only here
255-
if (x3 <= maxval and
256-
# np.any(x2 <= x3) and # maxval < median(maxevals)
257-
not plt.getp(res[-1], 'label').startswith('best')
258-
): # TODO: HACK for not considering a "best" algorithm line
259-
# Setting y3
260-
if n == 0:
261-
y3 = 0
262-
else:
263-
try:
264-
y3 = y2[x2 <= x3][-1] # find right y-value for x3==median(maxevals)
265-
except IndexError: # median(maxevals) is smaller than any data, can only happen because of CrE?
266-
y3 = y2[0]
267-
h = plt.plot((x3,), (y3,),
268-
marker=median_max_evals_marker_format[0],
269-
markersize=median_max_evals_marker_format[1] * size_correction_from_n_foreground**0.85,
270-
markeredgewidth=median_max_evals_marker_format[2],
271-
# marker='x', markersize=24, markeredgewidth=3,
272-
markeredgecolor=plt.getp(res[0], 'color'),
273-
ls=plt.getp(res[0], 'linestyle'),
274-
color=plt.getp(res[0], 'color'),
275-
# zorder=1.6 # zorder=0;1;1.5 is behind the grid lines, 2 covers other lines, 1.6 is between
276-
)
277-
h.extend(res)
278-
res = h # so the last element in res still has the label.
279-
# Only take sequences for x and y!
256+
for maxeval_, format in ((maxevals, max_evals_marker_format),
257+
(maxevals2, max_evals_single_marker_format)):
258+
if not maxeval_: # cover the case where maxevals is None or empty
259+
continue
260+
x3 = np.median(maxeval_) # change it only here
261+
if (x3 <= maxval and
262+
# np.any(x2 <= x3) and # maxval < median(maxevals)
263+
not plt.getp(res[-1], 'label').startswith('best')
264+
): # TODO: HACK for not considering a "best" algorithm line
265+
# Setting y3
266+
if n == 0:
267+
y3 = 0
268+
else:
269+
try:
270+
y3 = y2[x2 <= x3][-1] # find right y-value for x3==median(maxevals)
271+
except IndexError: # median(maxevals) is smaller than any data, can only happen because of CrE?
272+
y3 = y2[0]
273+
h = plt.plot((x3,), (y3,),
274+
marker=format[0],
275+
markersize=format[1] * size_correction_from_n_foreground**0.85,
276+
markeredgewidth=format[2],
277+
# marker='x', markersize=24, markeredgewidth=3,
278+
markeredgecolor=plt.getp(res[0], 'color'),
279+
ls=plt.getp(res[0], 'linestyle'),
280+
color=plt.getp(res[0], 'color'),
281+
# zorder=1.6 # zorder=0;1;1.5 is behind the grid lines, 2 covers other lines, 1.6 is between
282+
)
283+
# h.extend(res)
284+
# res = h # so the last element in res still has the label.
285+
286+
# Only take sequences for x and y!
280287

281288
return res
282289

@@ -644,7 +651,8 @@ def main(dictAlg, order=None, outputdir='.', info='default',
644651
print('Crafting effort for', alg, 'is', CrE)
645652

646653
dictData = {} # list of (ert per function) per algorithm
647-
dictMaxEvals = collections.defaultdict(list) # list of (maxevals per function) per algorithm
654+
dictMaxEvals = collections.defaultdict(list) # sum(maxevals) / max(1, #success) per instance
655+
dictMaxEvals2 = collections.defaultdict(list) # max of successf and unsucc 90%tile runtime over all instances
648656

649657
# funcsolved = [set()] * len(targets) # number of functions solved per target
650658
xbest = []
@@ -690,7 +698,8 @@ def main(dictAlg, order=None, outputdir='.', info='default',
690698
samplesize = int(samplesize)
691699
for f, dictAlgperFunc in sorted(dictFunc.items()):
692700
# print(target_values((f, dim)))
693-
for j, t in enumerate(target_values((f, dim))):
701+
targets = target_values((f, dim))
702+
for j, t in enumerate(targets):
694703
# for j, t in enumerate(testbedsettings.current_testbed.ecdf_target_values(1e2, f)):
695704
# funcsolved[j].add(f)
696705

@@ -737,11 +746,17 @@ def main(dictAlg, order=None, outputdir='.', info='default',
737746
keyValue = 'f%d' % (f)
738747
dictData.setdefault(keyValue, []).extend(x)
739748
# dictMaxEvals.setdefault(keyValue, []).extend(runlengthunsucc)
740-
if len(runlengthunsucc):
741-
maxmed = np.median(runlengthunsucc)
742-
if len(runlengthsucc):
743-
maxmed = max((maxmed, np.median(runlengthsucc)))
744-
dictMaxEvals[keyValue].append(maxmed)
749+
if len(runlengthunsucc) and t == min(targets): # only once, not for each target as it was before June 2024
750+
def percentile(vals, which=max_evals_percentile):
751+
return toolsstats.prctile(vals, [which])[0]
752+
if 1 < 3: # budget_estimates gives the cross somewhat way too far to the right
753+
if 'entry' in locals():
754+
dictMaxEvals[keyValue].append(percentile(list(entry.budget_effective_estimates.values())))
755+
if 1 < 3:
756+
maxmed = percentile(runlengthunsucc)
757+
if len(runlengthsucc):
758+
maxmed = max((maxmed, percentile(runlengthsucc)))
759+
dictMaxEvals2[keyValue].append(maxmed)
745760

746761
displaybest = plotType == PlotType.ALG
747762
if displaybest:
@@ -806,7 +821,6 @@ def algname_to_label(algname, dirname=None):
806821
for i, alg in enumerate(plotting_style.algorithm_list):
807822
try:
808823
data = dictData[alg]
809-
maxevals = dictMaxEvals[alg]
810824
except KeyError:
811825
continue
812826

@@ -837,7 +851,8 @@ def algname_to_label(algname, dirname=None):
837851

838852
args.update(plotting_style.pprldmany_styles) # no idea what this does, maybe update for background algorithms?
839853

840-
lines.append(plotdata(np.array(data), x_limit, maxevals,
854+
lines.append(plotdata(np.array(data), x_limit,
855+
dictMaxEvals[alg], maxevals2=dictMaxEvals2[alg],
841856
CrE=CrEperAlg[alg], **args))
842857

843858
if 11 < 3:

code-postprocessing/cocopp/pproc.py

+67
Original file line numberDiff line numberDiff line change
@@ -584,6 +584,7 @@ class DataSet(object):
584584
algId
585585
algs
586586
bootstrap_sample_size
587+
budget_effective_estimates
587588
comment
588589
...
589590
dim
@@ -598,6 +599,7 @@ class DataSet(object):
598599
...
599600
info
600601
info_str
602+
instance_index_lists
601603
instance_multipliers
602604
instancenumbers
603605
isBiobjective
@@ -619,8 +621,10 @@ class DataSet(object):
619621
reference_values
620622
splitByTrials
621623
success_ratio
624+
successes_by_instance
622625
suite_name
623626
target
627+
trial_count_by_instance
624628
>>> all(ds.evals[:, 0] == ds.target) # first column of ds.evals is the "target" f-value
625629
True
626630
>>> # investigate row 0,10,20,... and of the result columns 0,5,6, index 0 is ftarget
@@ -2098,6 +2102,69 @@ def median_evals(self, target_values=None, append_instances=True):
20982102
m[~np.isfinite(m)] = np.nan
20992103
return m
21002104

2105+
def instance_index_lists(self, raw_values=True):
2106+
"""return `OrderedDict` of index lists for each instance.
2107+
2108+
The index starts with 0 conforming with ``instancenumbers``,
2109+
``maxevals``, `detEvals` and others. However in the ``evals``
2110+
array, column 0 contains f-values and the instance indices start
2111+
with 1.
2112+
"""
2113+
if raw_values is not True:
2114+
raise NotImplementedError('instance index lists is not implemented for expanded evals')
2115+
res = collections.OrderedDict()
2116+
for index, i in enumerate(self.instancenumbers):
2117+
if i not in res:
2118+
res[i] = []
2119+
res[i] += [index]
2120+
return res
2121+
2122+
@property
2123+
def _budget_estimates(self):
2124+
"""return `OrderedDict` of sum(maxevals) for each (raw data) instance.
2125+
2126+
This was implemented but never used.
2127+
"""
2128+
res = collections.OrderedDict()
2129+
for instance, indices in self.instance_index_lists(raw_values=True).items():
2130+
res[instance] = sum(self.maxevals[i] for i in indices)
2131+
# res[instance] = max((max(s), sum(u)))
2132+
# res[instance] = sum(u) + (np.median(s) if s else 0)
2133+
# res[instance] = sum(u) + (max(s) if s else 0)
2134+
return res
2135+
2136+
@property
2137+
def budget_effective_estimates(self):
2138+
"""return `OrderedDict` of ``sum(maxevals) / max(1, #successes)``
2139+
2140+
for each instance. This is similar to the budget of the
2141+
within-trial restarted algorithm and also equals to the
2142+
within-instance ERT for the most difficult target
2143+
``self.precision`` when #successes > 0.
2144+
"""
2145+
res = collections.OrderedDict()
2146+
successes = self.successes_by_instance()
2147+
for instance, indices in self.instance_index_lists(raw_values=True).items():
2148+
res[instance] = sum(self.maxevals[i] for i in indices
2149+
) / max((1, successes[instance]))
2150+
return res
2151+
2152+
def successes_by_instance(self, target=None, raw_values=True):
2153+
"""return `OrderedDict` with number of successes for each instance"""
2154+
res = collections.OrderedDict()
2155+
try:
2156+
target = self.precision
2157+
except AttributeError: # biobj case
2158+
target = 1e-8 # FIXME: is there a smarter way here?
2159+
evals = self.detEvals([target])[0]
2160+
for instance, indices in self.instance_index_lists(raw_values).items():
2161+
res[instance] = sum(np.isfinite(evals[i]) for i in indices)
2162+
return res
2163+
2164+
def trial_count_by_instance(self, target=None):
2165+
"""return `Counter` `dict` with number of trials for each instance"""
2166+
return collections.Counter(self.instancenumbers)
2167+
21012168
def _data_differ(self, ds):
21022169
"""return a list of targets for which `ds` differs from `self`
21032170

0 commit comments

Comments
 (0)