Skip to content

Commit

Permalink
[cocopp] two budget crosses in runtime distributions
Browse files Browse the repository at this point in the history
also addressing issue #2287 and #2258
  • Loading branch information
nikohansen committed Jul 2, 2024
1 parent 1217a52 commit b05f92b
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 41 deletions.
98 changes: 57 additions & 41 deletions code-postprocessing/cocopp/compall/pprldmany.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@
save_zoom = False # save zoom into left and right part of the figures
perfprofsamplesize = genericsettings.simulated_runlength_bootstrap_sample_size # number of bootstrap samples drawn for each fct+target in the performance profile
nbperdecade = 1
median_max_evals_marker_format = ['x', 12.5, 1] # [symbol, size, edgewidth]
max_evals_marker_format = ['x', 12.5, 1] # [symbol, size, edgewidth]
max_evals_single_marker_format = ['+', 14, 1] # [symbol, size, edgewidth]
max_evals_percentile = 90
budget_cross_always = True # was False before June 2024
label_fontsize = 15 # was 17
xticks_fontsize = 16
yticks_fontsize = 14
Expand Down Expand Up @@ -195,8 +198,7 @@ def beautify():
plt.ylim(-0.0, 1.0)



def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
def plotdata(data, maxval=None, maxevals=None, CrE=0., maxevals2=None, **kwargs):
"""Draw a normalized ECDF. What means normalized?
:param seq data: data set, a 1-D ndarray of runlengths
Expand All @@ -206,7 +208,9 @@ def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
:param seq maxevals: if provided, will plot the median of this
sequence as a single cross marker
:param float CrE: Crafting effort the data will be multiplied by
the exponential of this value.
the exponential of this value
:param maxevals2: a single value or values to be plotted as median(maxevals2)
with the same marker as maxevals
:param kwargs: optional arguments provided to plot function.
"""
Expand Down Expand Up @@ -250,33 +254,37 @@ def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
logscale=False, clip_on=False, **kwargs)
# res = plotUnifLogXMarkers(x2, y2, nbperdecade, logscale=False, **kwargs)

if maxevals: # Should cover the case where maxevals is None or empty
x3 = np.median(maxevals) # change it only here
if (x3 <= maxval and
# np.any(x2 <= x3) and # maxval < median(maxevals)
not plt.getp(res[-1], 'label').startswith('best')
): # TODO: HACK for not considering a "best" algorithm line
# Setting y3
if n == 0:
y3 = 0
else:
try:
y3 = y2[x2 <= x3][-1] # find right y-value for x3==median(maxevals)
except IndexError: # median(maxevals) is smaller than any data, can only happen because of CrE?
y3 = y2[0]
h = plt.plot((x3,), (y3,),
marker=median_max_evals_marker_format[0],
markersize=median_max_evals_marker_format[1] * size_correction_from_n_foreground**0.85,
markeredgewidth=median_max_evals_marker_format[2],
# marker='x', markersize=24, markeredgewidth=3,
markeredgecolor=plt.getp(res[0], 'color'),
ls=plt.getp(res[0], 'linestyle'),
color=plt.getp(res[0], 'color'),
# zorder=1.6 # zorder=0;1;1.5 is behind the grid lines, 2 covers other lines, 1.6 is between
)
h.extend(res)
res = h # so the last element in res still has the label.
# Only take sequences for x and y!
for maxeval_, format in ((maxevals, max_evals_marker_format),
(maxevals2, max_evals_single_marker_format)):
if not maxeval_: # cover the case where maxevals is None or empty
continue
x3 = np.median(maxeval_) # change it only here
if ((budget_cross_always or x3 <= maxval) and
# np.any(x2 <= x3) and # maxval < median(maxevals)
not plt.getp(res[-1], 'label').startswith('best')
): # TODO: HACK for not considering a "best" algorithm line
# Setting y3
if n == 0:
y3 = 0
else:
try:
y3 = y2[x2 <= x3][-1] # find right y-value for x3==median(maxevals)
except IndexError: # median(maxevals) is smaller than any data, can only happen because of CrE?
y3 = y2[0]
h = plt.plot((x3,), (y3,),
marker=format[0],
markersize=format[1] * size_correction_from_n_foreground**0.85,
markeredgewidth=format[2],
# marker='x', markersize=24, markeredgewidth=3,
markeredgecolor=plt.getp(res[0], 'color'),
ls=plt.getp(res[0], 'linestyle'),
color=plt.getp(res[0], 'color'),
# zorder=1.6 # zorder=0;1;1.5 is behind the grid lines, 2 covers other lines, 1.6 is between
)
# h.extend(res)
# res = h # so the last element in res still has the label.

# Only take sequences for x and y!

return res

Expand Down Expand Up @@ -644,7 +652,8 @@ def main(dictAlg, order=None, outputdir='.', info='default',
print('Crafting effort for', alg, 'is', CrE)

dictData = {} # list of (ert per function) per algorithm
dictMaxEvals = collections.defaultdict(list) # list of (maxevals per function) per algorithm
dictMaxEvals = collections.defaultdict(list) # sum(maxevals) / max(1, #success) per instance
dictMaxEvals2 = collections.defaultdict(list) # max of successf and unsucc 90%tile runtime over all instances

# funcsolved = [set()] * len(targets) # number of functions solved per target
xbest = []
Expand Down Expand Up @@ -690,13 +699,14 @@ def main(dictAlg, order=None, outputdir='.', info='default',
samplesize = int(samplesize)
for f, dictAlgperFunc in sorted(dictFunc.items()):
# print(target_values((f, dim)))
for j, t in enumerate(target_values((f, dim))):
targets = target_values((f, dim))
for j, t in enumerate(targets):
# for j, t in enumerate(testbedsettings.current_testbed.ecdf_target_values(1e2, f)):
# funcsolved[j].add(f)

for alg in algorithms_with_data:
x = [np.inf] * samplesize
runlengthunsucc = []
runlengthunsucc = [] # this should be a DataSet method
try:
entry = dictAlgperFunc[alg][0] # one element per fun and per dim.
evals = entry.detEvals([t])[0]
Expand Down Expand Up @@ -737,11 +747,17 @@ def main(dictAlg, order=None, outputdir='.', info='default',
keyValue = 'f%d' % (f)
dictData.setdefault(keyValue, []).extend(x)
# dictMaxEvals.setdefault(keyValue, []).extend(runlengthunsucc)
if len(runlengthunsucc):
maxmed = np.median(runlengthunsucc)
if len(runlengthsucc):
maxmed = max((maxmed, np.median(runlengthsucc)))
dictMaxEvals[keyValue].append(maxmed)
if len(runlengthunsucc) and t == min(targets): # only once, not for each target as it was before June 2024
def percentile(vals, which=max_evals_percentile):
return toolsstats.prctile(vals, [which])[0]
if 1 < 3:
if 'entry' in locals(): # entry was assigned under a try
dictMaxEvals[keyValue].append(percentile(entry.budget_effective_estimates.values()))
if 1 < 3:
maxmed = percentile(runlengthunsucc)
if len(runlengthsucc):
maxmed = max((maxmed, percentile(runlengthsucc)))
dictMaxEvals2[keyValue].append(maxmed)

displaybest = plotType == PlotType.ALG
if displaybest:
Expand Down Expand Up @@ -806,7 +822,6 @@ def algname_to_label(algname, dirname=None):
for i, alg in enumerate(plotting_style.algorithm_list):
try:
data = dictData[alg]
maxevals = dictMaxEvals[alg]
except KeyError:
continue

Expand Down Expand Up @@ -837,7 +852,8 @@ def algname_to_label(algname, dirname=None):

args.update(plotting_style.pprldmany_styles) # no idea what this does, maybe update for background algorithms?

lines.append(plotdata(np.array(data), x_limit, maxevals,
lines.append(plotdata(np.array(data), x_limit,
dictMaxEvals[alg], maxevals2=dictMaxEvals2[alg],
CrE=CrEperAlg[alg], **args))

if 11 < 3:
Expand Down
71 changes: 71 additions & 0 deletions code-postprocessing/cocopp/pproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,7 @@ class DataSet(object):
algId
algs
bootstrap_sample_size
budget_effective_estimates
comment
...
dim
Expand All @@ -598,6 +599,7 @@ class DataSet(object):
...
info
info_str
instance_index_lists
instance_multipliers
instancenumbers
isBiobjective
Expand All @@ -619,8 +621,10 @@ class DataSet(object):
reference_values
splitByTrials
success_ratio
successes_by_instance
suite_name
target
trial_count_by_instance
>>> all(ds.evals[:, 0] == ds.target) # first column of ds.evals is the "target" f-value
True
>>> # investigate row 0,10,20,... and of the result columns 0,5,6, index 0 is ftarget
Expand Down Expand Up @@ -2149,6 +2153,73 @@ def median_evals(self, target_values=None, append_instances=True):
m[~np.isfinite(m)] = np.nan
return m

def instance_index_lists(self, raw_values=True):
"""return `OrderedDict` of index lists for each instance.
`raw_values` means no instance balancing, otherwise the indices
refer to `instancenumbers_balanced` whos first indices are the same
as in `instancenumbers`.
The index starts with 0 conforming with `instancenumbers`,
`maxevals`, `detEvals` and others. However in the `evals`
array, column 0 contains f-values and the instance indices start
with 1.
"""
res = collections.OrderedDict()
for index, i in enumerate(self.instancenumbers if raw_values
else self.instancenumbers_balanced):
if i not in res:
res[i] = []
res[i] += [index]
return res

@property
def _budget_estimates(self):
"""return `OrderedDict` of sum(maxevals) for each (raw data) instance.
This was implemented but never used.
"""
res = collections.OrderedDict()
for instance, indices in self.instance_index_lists(raw_values=True).items():
res[instance] = sum(self.maxevals[i] for i in indices)
# res[instance] = max((max(s), sum(u)))
# res[instance] = sum(u) + (np.median(s) if s else 0)
# res[instance] = sum(u) + (max(s) if s else 0)
return res

@property
def budget_effective_estimates(self):
"""return `OrderedDict` of ``sum(maxevals) / max(1, #successes)``
for each instance. This is similar to the budget of the
within-trial restarted algorithm and also equals to the
within-instance ERT for the most difficult target
``self.precision`` when #successes > 0.
"""
res = collections.OrderedDict()
successes = self.successes_by_instance()
for instance, indices in self.instance_index_lists(raw_values=True).items():
res[instance] = sum(self.maxevals[i] for i in indices
) / max((1, successes[instance]))
return res

def successes_by_instance(self, target=None, raw_values=True):
"""return `OrderedDict` with number of successes for each instance"""
res = collections.OrderedDict()
try:
target = self.precision
except AttributeError: # biobj case
target = 1e-8 # FIXME: is there a smarter way here?
evals = self.detEvals([target])[0]
for instance, indices in self.instance_index_lists(raw_values).items():
res[instance] = sum(np.isfinite(evals[i]) for i in indices)
return res

@property
def trial_count_by_instance(self):
"""return `Counter` `dict` with number of trials (actually) done for each instance"""
return collections.Counter(self.instancenumbers)

def _data_differ(self, ds):
"""return a list of targets for which `ds` differs from `self`
Expand Down

0 comments on commit b05f92b

Please sign in to comment.