Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Development #2329

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
3882ef6
[cocopp] budget crosses are now at max(median(rt_s), median(rt_u))
nikohansen Jun 26, 2024
af9e249
[cocopp] support numpy 2.0
nikohansen Jun 26, 2024
353eef4
[cocopp] adapt pyproject.toml
nikohansen Jun 26, 2024
103aa78
[cocopp] minor grid in scaling plots
nikohansen Jun 27, 2024
167b471
[cocopp] add --budget-based option
nikohansen Jun 27, 2024
0acc93d
[cocopp] minor polishing of code, improve some docstring, minor fix
nikohansen Jun 28, 2024
134500c
[cocopp] polish DataSet.plot
nikohansen Jun 30, 2024
692a037
[cocopp] implement DataSet.instancenumbers_balanced property
nikohansen Jun 29, 2024
a0d9c72
[cocopp] implement detEvals_by_instance for getting runtimes by instance
nikohansen Jun 29, 2024
1217a52
[cocopp] revise DataSet.evals_with_simulated_restarts and add instanc…
nikohansen Jun 30, 2024
b05f92b
[cocopp] two budget crosses in runtime distributions
nikohansen Jun 27, 2024
9fffa5a
[cocopp] improve error message addressing #2312
nikohansen Jul 3, 2024
60b0415
[cocopp] remove transient consistency check
nikohansen Jul 3, 2024
87fd426
[cocopp] remove pprocold.py (which was an old version of readalign.py)
nikohansen Jul 10, 2024
067bed1
[cocoex] polish docstrings and error message
nikohansen Jul 11, 2024
04737f0
add publish-cocopp-howto.md
nikohansen Jul 12, 2024
72d5ac2
[cocopp] fix test of pproc.DataSet due to instancenumbers_balanced pr…
nikohansen Jul 17, 2024
12c2e37
[cocoex] fix array(copy=True) for numpy 2.0
nikohansen Jul 22, 2024
2f90d59
Update conda update command
olafmersmann Jul 25, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ Say you already have a conda environment named `foo` that you want to reuse beca
Then, once you've activated the environment, you can run

```sh
conda update -f env.yaml
conda update --file env.yaml
```

and it will install all the required development dependencies into your existing environment.
Expand Down
8 changes: 4 additions & 4 deletions code-experiments/build/python/src/cocoex/interface.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ cdef class Problem:
# or should we return `[]` for zero constraints?
# `[]` is more likely to produce quietly unexpected result?
cdef np.ndarray[double, ndim=1, mode="c"] _x
x = np.array(x, copy=False, dtype=np.double, order='C')
x = np.asarray(x, dtype=np.double, order='C')
if np.size(x) != self.number_of_variables:
raise ValueError(
"Dimension, `np.size(x)==%d`, of input `x` does " % np.size(x) +
Expand All @@ -596,7 +596,7 @@ cdef class Problem:
for the assessment of the algorithm.
"""
cdef np.ndarray[double, ndim=1, mode="c"] _x
x = np.array(arx, copy=False, dtype=np.double, order='C')
x = np.asarray(arx, dtype=np.double, order='C')
if np.size(x) != self.number_of_variables:
raise ValueError(
"Dimension, `np.size(x)==%d`, of input `x` does " % np.size(x) +
Expand All @@ -618,7 +618,7 @@ cdef class Problem:
"""
cdef size_t _evaluation = evaluation # "conversion" to size_t
cdef np.ndarray[double, ndim=1, mode="c"] _y
y = np.array(y, copy=False, dtype=np.double, order='C')
y = np.asarray(y, dtype=np.double, order='C')
if np.size(y) != self.number_of_objectives:
raise ValueError(
"Dimension, `np.size(y)==%d`, of input `y` does " % np.size(y) +
Expand Down Expand Up @@ -815,7 +815,7 @@ cdef class Problem:
"""return objective function value of input `x`"""
cdef np.ndarray[double, ndim=1, mode="c"] _x
assert self.initialized
x = np.array(x, copy=False, dtype=np.double, order='C')
x = np.asarray(x, dtype=np.double, order='C')
if np.size(x) != self.number_of_variables:
raise ValueError(
"Dimension, `np.size(x)==%d`, of input `x` does " % np.size(x) +
Expand Down
38 changes: 20 additions & 18 deletions code-experiments/build/python/src/cocoex/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,7 @@ class ExperimentRepeater:
implements safe access to this dictionary.

When problem instances are repeated in a single suite, they may be
_partially_ skipped _after_ the first full sweep. That is, the
*partially* skipped *after* the first full sweep. That is, the
configuration ``1-5,1-5,1-5`` can also lead to four trials of each
instance 1-5, because all instances have been repeated the same number
of times.
Expand Down Expand Up @@ -840,7 +840,7 @@ def done(self, problem=None, message=True):
Details
-------
``done()`` without argument gives only consistent results before or
after a _full_ first sweep. In particular, _during_ the first sweep
after a *full* first sweep. In particular, *during* the first sweep
it cannot account for problems that have not yet been run once.

Calling ``done()`` increments the sweep counter iff it returns
Expand Down Expand Up @@ -902,15 +902,14 @@ def initial_solution_proposal(self, problem, nonzero_odds=14):
return problem.initial_solution_proposal(trials if trials % (nonzero_odds + 1) else 0)

class BatchScheduler:
"""Facilitate running a benchmarking experiment on a `cocoex.Suite` in
several independent batches.
"""Facilitate to run a benchmarking experiment in independent batches.

The batch scheduler crucially assumes that in each batch the same
problems are given _in the same order_ when calling `is_in_batch`.
problems are given *in the same order* when calling `is_in_batch`.

Pseudo code example::

batch_to_execute = 0 # set current batch to execute in [0, 3]
batch_to_execute = 0 # set current batch to execute

suite = cocoex.Suite('bbob', '', '')
batcher = cocoex.BatchScheduler(4, batch_to_execute)
Expand All @@ -919,18 +918,21 @@ class BatchScheduler:
continue
# ... run optimizer on problem ...

needs to be run four times overall (e.g., in parallel) with
``batch_to_execute`` = 0..3 to generate the full experimental data.
needs to be run, in accordance with the first argument to
`BatchScheduler`, four times overall (e.g., in parallel) with
``batch_to_execute in (0,1,2,3)`` to generate the full experimental
data.

Details: to get a more even time distribution over all batches, it
seems advisable that the number of functions is not divisible by the
number of batches. That is, 4 (or 6 or 8 or 12) batches is not likely
to be ideal on the `'bbob'` testbed of 24 functions.
"""
def __init__(self, number_of_batches, batch_to_execute):
"""distribute over `number_of_batches` batches and executed here
"""distribute over `number_of_batches` batches and execute

the batch with number `batch_to_execute`.
the batch with number `batch_to_execute` which must obey
``0 <= batch_to_execute < number_of_batches``.
"""
self.params = {n: v for (n, v) in locals().items() if n != 'self'}
self.first_problem = None
Expand All @@ -939,18 +941,18 @@ def __init__(self, number_of_batches, batch_to_execute):
if self.params['number_of_batches'] == 1 and self.params['batch_to_execute'] in (0, 1, None):
print("number_of_batches == 1, hence running the full suite")
self.params['batch_to_execute'] = 0
elif self.params['number_of_batches'] <= self.params['batch_to_execute']:
raise ValueError("number of batches == {} <= {} == batch to execute,"
" however > is required."
elif (self.params['batch_to_execute'] < 0 or
self.params['batch_to_execute'] >= self.params['number_of_batches']):
raise ValueError("batch_to_execute == {} must be >= 0 and < {} == number_of_batches."
"\n The first argument is the number of batches (nob),"
"\n the second argument needs to sweep from 0...nob-1."
.format(self.params['number_of_batches'],
self.params['batch_to_execute']))
"\n the second argument needs to 'sweep' from 0...nob-1."
.format(self.params['batch_to_execute'],
self.params['number_of_batches']))
def is_in_batch(self, problem):
"""return `True` iff the batch number for `problem` equals `batch_to_execute`

which was given as a constructor argument. Assumes that
``id_function`` and ``dimension`` are attributes of `problem`.
as given as constructor argument. Assumes that ``id_function`` and
``dimension`` are attributes of `problem`.

The batch number for `problem` is attributed using
``(problem.id_function, problem.dimension)`` by order of
Expand Down
6 changes: 5 additions & 1 deletion code-postprocessing/cocopp/archiving.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,11 @@ def get_extended(self, args, remote=True):
warnings.warn('COCODataArchive failed to locate "%s".\n'
'Will try again after updating from %s'
% (name, self.remote_data_path))
self.update()
try:
self.update()
except Exception as e:
warnings.warn("Updating archive definitions failed with \n\n {}: {}\n\n You may want to check your WWW connectivity."
.format(str(type(e)).split("'")[1].split("'")[0], e))
res.extend(more)
if len(args) != len(set(args)):
warnings.warn("Several data arguments point to the very same "
Expand Down
2 changes: 1 addition & 1 deletion code-postprocessing/cocopp/bestalg.py
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ def extractBestAlgorithms(args=algs2009, f_factor=2,

# add second best or all algorithms that have an ERT
# within a factor of f_factor of the best:
secondbest_ERT = np.infty
secondbest_ERT = np.inf
secondbest_str = ''
secondbest_included = False
for astring in j:
Expand Down
4 changes: 2 additions & 2 deletions code-postprocessing/cocopp/compall/ppfigs.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,13 @@ def beautify(legend=False, rightlegend=False):
set_trace()

# Grid options
axisHandle.yaxis.grid(True)
axisHandle.yaxis.grid(True, which='both')

ymin, ymax = plt.ylim()

# quadratic slanted "grid"
if 1 < 3:
for i in range(-2, 7, 1 if ymax < 1e5 else 2):
for i in range(-2, 7, 1 if ymax/(ymin+1e-6) < 1e6 else 2):
plt.plot((0.2, 20000), (10**i, 10**(i + 5)), 'k:',
linewidth=0.5) # grid should be on top
else: # to be removed
Expand Down
97 changes: 59 additions & 38 deletions code-postprocessing/cocopp/compall/pprldmany.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@

import os
import warnings
from pdb import set_trace
import collections
import numpy as np
import matplotlib.pyplot as plt
from .. import toolsstats, bestalg, genericsettings, testbedsettings
Expand All @@ -52,7 +52,10 @@
save_zoom = False # save zoom into left and right part of the figures
perfprofsamplesize = genericsettings.simulated_runlength_bootstrap_sample_size # number of bootstrap samples drawn for each fct+target in the performance profile
nbperdecade = 1
median_max_evals_marker_format = ['x', 12.5, 1] # [symbol, size, edgewidth]
max_evals_marker_format = ['x', 12.5, 1] # [symbol, size, edgewidth]
max_evals_single_marker_format = ['+', 14, 1] # [symbol, size, edgewidth]
max_evals_percentile = 90
budget_cross_always = True # was False before June 2024
label_fontsize = 15 # was 17
xticks_fontsize = 16
yticks_fontsize = 14
Expand Down Expand Up @@ -195,8 +198,7 @@ def beautify():
plt.ylim(-0.0, 1.0)



def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
def plotdata(data, maxval=None, maxevals=None, CrE=0., maxevals2=None, **kwargs):
"""Draw a normalized ECDF. What means normalized?

:param seq data: data set, a 1-D ndarray of runlengths
Expand All @@ -206,7 +208,9 @@ def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
:param seq maxevals: if provided, will plot the median of this
sequence as a single cross marker
:param float CrE: Crafting effort the data will be multiplied by
the exponential of this value.
the exponential of this value
:param maxevals2: a single value or values to be plotted as median(maxevals2)
with the same marker as maxevals
:param kwargs: optional arguments provided to plot function.

"""
Expand Down Expand Up @@ -250,33 +254,37 @@ def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
logscale=False, clip_on=False, **kwargs)
# res = plotUnifLogXMarkers(x2, y2, nbperdecade, logscale=False, **kwargs)

if maxevals: # Should cover the case where maxevals is None or empty
x3 = np.median(maxevals) # change it only here
if (x3 <= maxval and
# np.any(x2 <= x3) and # maxval < median(maxevals)
not plt.getp(res[-1], 'label').startswith('best')
): # TODO: HACK for not considering a "best" algorithm line
# Setting y3
if n == 0:
y3 = 0
else:
try:
y3 = y2[x2 <= x3][-1] # find right y-value for x3==median(maxevals)
except IndexError: # median(maxevals) is smaller than any data, can only happen because of CrE?
y3 = y2[0]
h = plt.plot((x3,), (y3,),
marker=median_max_evals_marker_format[0],
markersize=median_max_evals_marker_format[1] * size_correction_from_n_foreground**0.85,
markeredgewidth=median_max_evals_marker_format[2],
# marker='x', markersize=24, markeredgewidth=3,
markeredgecolor=plt.getp(res[0], 'color'),
ls=plt.getp(res[0], 'linestyle'),
color=plt.getp(res[0], 'color'),
# zorder=1.6 # zorder=0;1;1.5 is behind the grid lines, 2 covers other lines, 1.6 is between
)
h.extend(res)
res = h # so the last element in res still has the label.
# Only take sequences for x and y!
for maxeval_, format in ((maxevals, max_evals_marker_format),
(maxevals2, max_evals_single_marker_format)):
if not maxeval_: # cover the case where maxevals is None or empty
continue
x3 = np.median(maxeval_) # change it only here
if ((budget_cross_always or x3 <= maxval) and
# np.any(x2 <= x3) and # maxval < median(maxevals)
not plt.getp(res[-1], 'label').startswith('best')
): # TODO: HACK for not considering a "best" algorithm line
# Setting y3
if n == 0:
y3 = 0
else:
try:
y3 = y2[x2 <= x3][-1] # find right y-value for x3==median(maxevals)
except IndexError: # median(maxevals) is smaller than any data, can only happen because of CrE?
y3 = y2[0]
h = plt.plot((x3,), (y3,),
marker=format[0],
markersize=format[1] * size_correction_from_n_foreground**0.85,
markeredgewidth=format[2],
# marker='x', markersize=24, markeredgewidth=3,
markeredgecolor=plt.getp(res[0], 'color'),
ls=plt.getp(res[0], 'linestyle'),
color=plt.getp(res[0], 'color'),
# zorder=1.6 # zorder=0;1;1.5 is behind the grid lines, 2 covers other lines, 1.6 is between
)
# h.extend(res)
# res = h # so the last element in res still has the label.

# Only take sequences for x and y!

return res

Expand Down Expand Up @@ -644,7 +652,8 @@ def main(dictAlg, order=None, outputdir='.', info='default',
print('Crafting effort for', alg, 'is', CrE)

dictData = {} # list of (ert per function) per algorithm
dictMaxEvals = {} # list of (maxevals per function) per algorithm
dictMaxEvals = collections.defaultdict(list) # sum(maxevals) / max(1, #success) per instance
dictMaxEvals2 = collections.defaultdict(list) # max of successf and unsucc 90%tile runtime over all instances

# funcsolved = [set()] * len(targets) # number of functions solved per target
xbest = []
Expand Down Expand Up @@ -690,13 +699,14 @@ def main(dictAlg, order=None, outputdir='.', info='default',
samplesize = int(samplesize)
for f, dictAlgperFunc in sorted(dictFunc.items()):
# print(target_values((f, dim)))
for j, t in enumerate(target_values((f, dim))):
targets = target_values((f, dim))
for j, t in enumerate(targets):
# for j, t in enumerate(testbedsettings.current_testbed.ecdf_target_values(1e2, f)):
# funcsolved[j].add(f)

for alg in algorithms_with_data:
x = [np.inf] * samplesize
runlengthunsucc = []
runlengthunsucc = [] # this should be a DataSet method
try:
entry = dictAlgperFunc[alg][0] # one element per fun and per dim.
evals = entry.detEvals([t])[0]
Expand Down Expand Up @@ -736,7 +746,18 @@ def main(dictAlg, order=None, outputdir='.', info='default',
elif plotType == PlotType.FUNC:
keyValue = 'f%d' % (f)
dictData.setdefault(keyValue, []).extend(x)
dictMaxEvals.setdefault(keyValue, []).extend(runlengthunsucc)
# dictMaxEvals.setdefault(keyValue, []).extend(runlengthunsucc)
if len(runlengthunsucc) and t == min(targets): # only once, not for each target as it was before June 2024
def percentile(vals, which=max_evals_percentile):
return toolsstats.prctile(vals, [which])[0]
if 1 < 3:
if 'entry' in locals(): # entry was assigned under a try
dictMaxEvals[keyValue].append(percentile(entry.budget_effective_estimates.values()))
if 1 < 3:
maxmed = percentile(runlengthunsucc)
if len(runlengthsucc):
maxmed = max((maxmed, percentile(runlengthsucc)))
dictMaxEvals2[keyValue].append(maxmed)

displaybest = plotType == PlotType.ALG
if displaybest:
Expand Down Expand Up @@ -801,7 +822,6 @@ def algname_to_label(algname, dirname=None):
for i, alg in enumerate(plotting_style.algorithm_list):
try:
data = dictData[alg]
maxevals = dictMaxEvals[alg]
except KeyError:
continue

Expand Down Expand Up @@ -832,7 +852,8 @@ def algname_to_label(algname, dirname=None):

args.update(plotting_style.pprldmany_styles) # no idea what this does, maybe update for background algorithms?

lines.append(plotdata(np.array(data), x_limit, maxevals,
lines.append(plotdata(np.array(data), x_limit,
dictMaxEvals[alg], maxevals2=dictMaxEvals2[alg],
CrE=CrEperAlg[alg], **args))

if 11 < 3:
Expand Down
6 changes: 3 additions & 3 deletions code-postprocessing/cocopp/pplogloss.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ def plot(xdata, ydata):
if len(y) == 0:
continue

res.extend(plt.plot([xdata[i]]*len(y), 10**np.array(y),
res.extend(plt.plot([xdata[i]]*len(y), 10**np.asarray(y),
marker='+', color=flierscolor,
ls='', markersize=20, markeredgewidth=3))

Expand Down Expand Up @@ -440,7 +440,7 @@ def plot(xdata, ydata):
verticalalignment='bottom'))
y = y[np.isfinite(y)]

dictboxwhisker = boxplot(list(10**np.array(i) for i in ydata),
dictboxwhisker = boxplot(list(10**np.asarray(i) for i in ydata),
sym='', notch=0, widths=None,
positions=xdata)
#'medians', 'fliers', 'whiskers', 'boxes', 'caps'
Expand Down Expand Up @@ -775,7 +775,7 @@ def generateFigure(dsList, CrE=0., isStoringXRange=True, outputdir='.',
#Aggregate over functions.
ydata.append(np.log10(list(data[f][i] for f in data)))

xdata = np.log10(np.array(EVALS)/d)
xdata = np.log10(np.asarray(EVALS)/d)
xticklabels = ['']
xticklabels.extend('%d' % i for i in xdata[1:])
plot(xdata, ydata)
Expand Down
Loading
Loading