numbbo · olafmersmann · Jun 26, 2024 · Jun 26, 2024 · Jun 26, 2024 · Jun 27, 2024
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
@@ -33,7 +33,7 @@ Say you already have a conda environment named `foo` that you want to reuse beca
 Then, once you've activated the environment, you can run
 
 ```sh
-conda update -f env.yaml
+conda update --file env.yaml
 ```
 
 and it will install all the required development dependencies into your existing environment.

diff --git a/code-experiments/build/python/src/cocoex/interface.pyx b/code-experiments/build/python/src/cocoex/interface.pyx
@@ -576,7 +576,7 @@ cdef class Problem:
             # or should we return `[]` for zero constraints?
             # `[]` is more likely to produce quietly unexpected result?
         cdef np.ndarray[double, ndim=1, mode="c"] _x
-        x = np.array(x, copy=False, dtype=np.double, order='C')
+        x = np.asarray(x, dtype=np.double, order='C')
         if np.size(x) != self.number_of_variables:
             raise ValueError(
                 "Dimension, `np.size(x)==%d`, of input `x` does " % np.size(x) +
@@ -596,7 +596,7 @@ cdef class Problem:
         for the assessment of the algorithm.
         """
         cdef np.ndarray[double, ndim=1, mode="c"] _x
-        x = np.array(arx, copy=False, dtype=np.double, order='C')
+        x = np.asarray(arx, dtype=np.double, order='C')
         if np.size(x) != self.number_of_variables:
             raise ValueError(
                 "Dimension, `np.size(x)==%d`, of input `x` does " % np.size(x) +
@@ -618,7 +618,7 @@ cdef class Problem:
         """
         cdef size_t _evaluation = evaluation # "conversion" to size_t
         cdef np.ndarray[double, ndim=1, mode="c"] _y
-        y = np.array(y, copy=False, dtype=np.double, order='C')
+        y = np.asarray(y, dtype=np.double, order='C')
         if np.size(y) != self.number_of_objectives:
             raise ValueError(
                 "Dimension, `np.size(y)==%d`, of input `y` does " % np.size(y) +
@@ -815,7 +815,7 @@ cdef class Problem:
         """return objective function value of input `x`"""
         cdef np.ndarray[double, ndim=1, mode="c"] _x
         assert self.initialized
-        x = np.array(x, copy=False, dtype=np.double, order='C')
+        x = np.asarray(x, dtype=np.double, order='C')
         if np.size(x) != self.number_of_variables:
             raise ValueError(
                 "Dimension, `np.size(x)==%d`, of input `x` does " % np.size(x) +

diff --git a/code-experiments/build/python/src/cocoex/utilities.py b/code-experiments/build/python/src/cocoex/utilities.py
@@ -645,7 +645,7 @@ class ExperimentRepeater:
     implements safe access to this dictionary.
 
     When problem instances are repeated in a single suite, they may be
-    _partially_ skipped _after_ the first full sweep. That is, the
+    *partially* skipped *after* the first full sweep. That is, the
     configuration ``1-5,1-5,1-5`` can also lead to four trials of each
     instance 1-5, because all instances have been repeated the same number
     of times.
@@ -840,7 +840,7 @@ def done(self, problem=None, message=True):
         Details
         -------
         ``done()`` without argument gives only consistent results before or
-        after a _full_ first sweep. In particular, _during_ the first sweep
+        after a *full* first sweep. In particular, *during* the first sweep
         it cannot account for problems that have not yet been run once.
 
         Calling ``done()`` increments the sweep counter iff it returns
@@ -902,15 +902,14 @@ def initial_solution_proposal(self, problem, nonzero_odds=14):
         return problem.initial_solution_proposal(trials if trials % (nonzero_odds + 1) else 0)
 
 class BatchScheduler:
-    """Facilitate running a benchmarking experiment on a `cocoex.Suite` in
-       several independent batches.
+    """Facilitate to run a benchmarking experiment in independent batches.
 
     The batch scheduler crucially assumes that in each batch the same
-    problems are given _in the same order_ when calling `is_in_batch`.
+    problems are given *in the same order* when calling `is_in_batch`.
 
     Pseudo code example::
 
-        batch_to_execute = 0  # set current batch to execute in [0, 3]
+        batch_to_execute = 0  # set current batch to execute
 
         suite = cocoex.Suite('bbob', '', '')
         batcher = cocoex.BatchScheduler(4, batch_to_execute)
@@ -919,18 +918,21 @@ class BatchScheduler:
                 continue
             # ... run optimizer on problem ...
 
-    needs to be run four times overall (e.g., in parallel) with
-    ``batch_to_execute`` = 0..3 to generate the full experimental data.
+    needs to be run, in accordance with the first argument to
+    `BatchScheduler`, four times overall (e.g., in parallel) with
+    ``batch_to_execute in (0,1,2,3)`` to generate the full experimental
+    data.
 
     Details: to get a more even time distribution over all batches, it
     seems advisable that the number of functions is not divisible by the
     number of batches. That is, 4 (or 6 or 8 or 12) batches is not likely
     to be ideal on the `'bbob'` testbed of 24 functions.
     """
     def __init__(self, number_of_batches, batch_to_execute):
-        """distribute over `number_of_batches` batches and executed here
+        """distribute over `number_of_batches` batches and execute
 
-        the batch with number `batch_to_execute`.
+        the batch with number `batch_to_execute` which must obey
+        ``0 <= batch_to_execute < number_of_batches``.
         """
         self.params = {n: v for (n, v) in locals().items() if n != 'self'}
         self.first_problem = None
@@ -939,18 +941,18 @@ def __init__(self, number_of_batches, batch_to_execute):
         if self.params['number_of_batches'] == 1 and self.params['batch_to_execute'] in (0, 1, None):
             print("number_of_batches == 1, hence running the full suite")
             self.params['batch_to_execute'] = 0
-        elif self.params['number_of_batches'] <= self.params['batch_to_execute']:
-            raise ValueError("number of batches == {} <= {} == batch to execute,"
-                            " however > is required."
+        elif (self.params['batch_to_execute'] < 0 or
+              self.params['batch_to_execute'] >= self.params['number_of_batches']):
+            raise ValueError("batch_to_execute == {} must be >= 0 and < {} == number_of_batches."
                             "\n  The first argument is the number of batches (nob),"
-                            "\n  the second argument needs to sweep from 0...nob-1."
-                            .format(self.params['number_of_batches'],
-                                    self.params['batch_to_execute']))
+                            "\n  the second argument needs to 'sweep' from 0...nob-1."
+                            .format(self.params['batch_to_execute'],
+                                    self.params['number_of_batches']))
     def is_in_batch(self, problem):
         """return `True` iff the batch number for `problem` equals `batch_to_execute`
 
-        which was given as a constructor argument. Assumes that
-        ``id_function`` and ``dimension`` are attributes of `problem`.
+        as given as constructor argument. Assumes that ``id_function`` and
+        ``dimension`` are attributes of `problem`.
 
         The batch number for `problem` is attributed using
         ``(problem.id_function, problem.dimension)`` by order of

diff --git a/code-postprocessing/cocopp/archiving.py b/code-postprocessing/cocopp/archiving.py
@@ -753,7 +753,11 @@ def get_extended(self, args, remote=True):
                 warnings.warn('COCODataArchive failed to locate "%s".\n'
                               'Will try again after updating from %s'
                               % (name, self.remote_data_path))
-                self.update()
+                try:
+                    self.update()
+                except Exception as e:
+                    warnings.warn("Updating archive definitions failed with \n\n    {}: {}\n\n You may want to check your WWW connectivity."
+                                  .format(str(type(e)).split("'")[1].split("'")[0], e))
             res.extend(more)
         if len(args) != len(set(args)):
             warnings.warn("Several data arguments point to the very same "

diff --git a/code-postprocessing/cocopp/bestalg.py b/code-postprocessing/cocopp/bestalg.py
@@ -753,7 +753,7 @@ def extractBestAlgorithms(args=algs2009, f_factor=2,
 
                     # add second best or all algorithms that have an ERT
                     # within a factor of f_factor of the best:
-                    secondbest_ERT = np.infty
+                    secondbest_ERT = np.inf
                     secondbest_str = ''
                     secondbest_included = False
                     for astring in j:

diff --git a/code-postprocessing/cocopp/compall/ppfigs.py b/code-postprocessing/cocopp/compall/ppfigs.py
@@ -353,13 +353,13 @@ def beautify(legend=False, rightlegend=False):
         set_trace()
 
     # Grid options
-    axisHandle.yaxis.grid(True)
+    axisHandle.yaxis.grid(True, which='both')
 
     ymin, ymax = plt.ylim()
 
     # quadratic slanted "grid"
     if 1 < 3:
-        for i in range(-2, 7, 1 if ymax < 1e5 else 2):
+        for i in range(-2, 7, 1 if ymax/(ymin+1e-6) < 1e6 else 2):
             plt.plot((0.2, 20000), (10**i, 10**(i + 5)), 'k:',
                      linewidth=0.5)  # grid should be on top
     else:  # to be removed

diff --git a/code-postprocessing/cocopp/compall/pprldmany.py b/code-postprocessing/cocopp/compall/pprldmany.py
@@ -32,7 +32,7 @@
 
 import os
 import warnings
-from pdb import set_trace
+import collections
 import numpy as np
 import matplotlib.pyplot as plt
 from .. import toolsstats, bestalg, genericsettings, testbedsettings
@@ -52,7 +52,10 @@
 save_zoom = False  # save zoom into left and right part of the figures
 perfprofsamplesize = genericsettings.simulated_runlength_bootstrap_sample_size  # number of bootstrap samples drawn for each fct+target in the performance profile
 nbperdecade = 1
-median_max_evals_marker_format = ['x', 12.5, 1]  # [symbol, size, edgewidth]
+max_evals_marker_format = ['x', 12.5, 1]  # [symbol, size, edgewidth]
+max_evals_single_marker_format = ['+', 14, 1]  # [symbol, size, edgewidth]
+max_evals_percentile = 90
+budget_cross_always = True  # was False before June 2024
 label_fontsize = 15  # was 17
 xticks_fontsize = 16
 yticks_fontsize = 14
@@ -195,8 +198,7 @@ def beautify():
     plt.ylim(-0.0, 1.0)
 
 
-
-def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
+def plotdata(data, maxval=None, maxevals=None, CrE=0., maxevals2=None, **kwargs):
     """Draw a normalized ECDF. What means normalized?
 
     :param seq data: data set, a 1-D ndarray of runlengths
@@ -206,7 +208,9 @@ def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
     :param seq maxevals: if provided, will plot the median of this
                          sequence as a single cross marker
     :param float CrE: Crafting effort the data will be multiplied by
-                      the exponential of this value.
+                      the exponential of this value
+    :param maxevals2: a single value or values to be plotted as median(maxevals2)
+                      with the same marker as maxevals
     :param kwargs: optional arguments provided to plot function.
 
     """
@@ -250,33 +254,37 @@ def plotdata(data, maxval=None, maxevals=None, CrE=0., **kwargs):
                                         logscale=False, clip_on=False, **kwargs)
         # res = plotUnifLogXMarkers(x2, y2, nbperdecade, logscale=False, **kwargs)
 
-    if maxevals:  # Should cover the case where maxevals is None or empty
-        x3 = np.median(maxevals)  # change it only here
-        if (x3 <= maxval and
-            # np.any(x2 <= x3) and   # maxval < median(maxevals)
-            not plt.getp(res[-1], 'label').startswith('best')
-            ): # TODO: HACK for not considering a "best" algorithm line
-            # Setting y3
-            if n == 0:
-                y3 = 0
-            else:
-                try:
-                    y3 = y2[x2 <= x3][-1]  # find right y-value for x3==median(maxevals)
-                except IndexError:  # median(maxevals) is smaller than any data, can only happen because of CrE?
-                    y3 = y2[0]
-            h = plt.plot((x3,), (y3,),
-                         marker=median_max_evals_marker_format[0],
-                         markersize=median_max_evals_marker_format[1] * size_correction_from_n_foreground**0.85,
-                         markeredgewidth=median_max_evals_marker_format[2],
-                         # marker='x', markersize=24, markeredgewidth=3, 
-                         markeredgecolor=plt.getp(res[0], 'color'),
-                         ls=plt.getp(res[0], 'linestyle'),
-                         color=plt.getp(res[0], 'color'),
-                         # zorder=1.6   # zorder=0;1;1.5 is behind the grid lines, 2 covers other lines, 1.6 is between
-                         )
-            h.extend(res)
-            res = h  # so the last element in res still has the label.
-            # Only take sequences for x and y!
+        for maxeval_, format in ((maxevals, max_evals_marker_format),
+                                 (maxevals2, max_evals_single_marker_format)):
+            if not maxeval_:  # cover the case where maxevals is None or empty
+                continue
+            x3 = np.median(maxeval_)  # change it only here
+            if ((budget_cross_always or x3 <= maxval) and
+                # np.any(x2 <= x3) and   # maxval < median(maxevals)
+                not plt.getp(res[-1], 'label').startswith('best')
+                ): # TODO: HACK for not considering a "best" algorithm line
+                # Setting y3
+                if n == 0:
+                    y3 = 0
+                else:
+                    try:
+                        y3 = y2[x2 <= x3][-1]  # find right y-value for x3==median(maxevals)
+                    except IndexError:  # median(maxevals) is smaller than any data, can only happen because of CrE?
+                        y3 = y2[0]
+                h = plt.plot((x3,), (y3,),
+                            marker=format[0],
+                            markersize=format[1] * size_correction_from_n_foreground**0.85,
+                            markeredgewidth=format[2],
+                            # marker='x', markersize=24, markeredgewidth=3, 
+                            markeredgecolor=plt.getp(res[0], 'color'),
+                            ls=plt.getp(res[0], 'linestyle'),
+                            color=plt.getp(res[0], 'color'),
+                            # zorder=1.6   # zorder=0;1;1.5 is behind the grid lines, 2 covers other lines, 1.6 is between
+                            )
+                # h.extend(res)
+                # res = h  # so the last element in res still has the label.
+
+                # Only take sequences for x and y!
 
     return res
 
@@ -644,7 +652,8 @@ def main(dictAlg, order=None, outputdir='.', info='default',
             print('Crafting effort for', alg, 'is', CrE)
 
     dictData = {}  # list of (ert per function) per algorithm
-    dictMaxEvals = {}  # list of (maxevals per function) per algorithm
+    dictMaxEvals = collections.defaultdict(list)  # sum(maxevals) / max(1, #success) per instance
+    dictMaxEvals2 = collections.defaultdict(list)  # max of successf and unsucc 90%tile runtime over all instances
 
     # funcsolved = [set()] * len(targets) # number of functions solved per target
     xbest = []
@@ -690,13 +699,14 @@ def main(dictAlg, order=None, outputdir='.', info='default',
             samplesize = int(samplesize)
         for f, dictAlgperFunc in sorted(dictFunc.items()):
             # print(target_values((f, dim)))
-            for j, t in enumerate(target_values((f, dim))):
+            targets = target_values((f, dim))
+            for j, t in enumerate(targets):
                 # for j, t in enumerate(testbedsettings.current_testbed.ecdf_target_values(1e2, f)):
                 # funcsolved[j].add(f)
 
                 for alg in algorithms_with_data:
                     x = [np.inf] * samplesize
-                    runlengthunsucc = []
+                    runlengthunsucc = []  # this should be a DataSet method
                     try:
                         entry = dictAlgperFunc[alg][0]  # one element per fun and per dim.
                         evals = entry.detEvals([t])[0]
@@ -736,7 +746,18 @@ def main(dictAlg, order=None, outputdir='.', info='default',
                     elif plotType == PlotType.FUNC:
                         keyValue = 'f%d' % (f)
                     dictData.setdefault(keyValue, []).extend(x)
-                    dictMaxEvals.setdefault(keyValue, []).extend(runlengthunsucc)
+                    # dictMaxEvals.setdefault(keyValue, []).extend(runlengthunsucc)
+                    if len(runlengthunsucc) and t == min(targets):  # only once, not for each target as it was before June 2024
+                        def percentile(vals, which=max_evals_percentile):
+                            return toolsstats.prctile(vals, [which])[0]
+                        if 1 < 3:
+                            if 'entry' in locals():  # entry was assigned under a try
+                                dictMaxEvals[keyValue].append(percentile(entry.budget_effective_estimates.values()))
+                        if 1 < 3:
+                            maxmed = percentile(runlengthunsucc)
+                            if len(runlengthsucc):
+                                maxmed = max((maxmed, percentile(runlengthsucc)))
+                            dictMaxEvals2[keyValue].append(maxmed)
 
             displaybest = plotType == PlotType.ALG
             if displaybest:
@@ -801,7 +822,6 @@ def algname_to_label(algname, dirname=None):
         for i, alg in enumerate(plotting_style.algorithm_list):
             try:
                 data = dictData[alg]
-                maxevals = dictMaxEvals[alg]
             except KeyError:
                 continue
 
@@ -832,7 +852,8 @@ def algname_to_label(algname, dirname=None):
 
             args.update(plotting_style.pprldmany_styles)  # no idea what this does, maybe update for background algorithms?
 
-            lines.append(plotdata(np.array(data), x_limit, maxevals,
+            lines.append(plotdata(np.array(data), x_limit,
+                                  dictMaxEvals[alg], maxevals2=dictMaxEvals2[alg],
                                   CrE=CrEperAlg[alg], **args))
 
     if 11 < 3:

diff --git a/code-postprocessing/cocopp/pplogloss.py b/code-postprocessing/cocopp/pplogloss.py
@@ -407,7 +407,7 @@ def plot(xdata, ydata):
                 if len(y) == 0:
                     continue
 
-            res.extend(plt.plot([xdata[i]]*len(y), 10**np.array(y),
+            res.extend(plt.plot([xdata[i]]*len(y), 10**np.asarray(y),
                                 marker='+', color=flierscolor,
                                 ls='', markersize=20, markeredgewidth=3))
 
@@ -440,7 +440,7 @@ def plot(xdata, ydata):
                                     verticalalignment='bottom'))
                 y = y[np.isfinite(y)]
 
-        dictboxwhisker = boxplot(list(10**np.array(i) for i in ydata),
+        dictboxwhisker = boxplot(list(10**np.asarray(i) for i in ydata),
                                  sym='', notch=0, widths=None,
                                  positions=xdata)
         #'medians', 'fliers', 'whiskers', 'boxes', 'caps'
@@ -775,7 +775,7 @@ def generateFigure(dsList, CrE=0., isStoringXRange=True, outputdir='.',
             #Aggregate over functions.
             ydata.append(np.log10(list(data[f][i] for f in data)))
 
-        xdata = np.log10(np.array(EVALS)/d)
+        xdata = np.log10(np.asarray(EVALS)/d)
         xticklabels = ['']
         xticklabels.extend('%d' % i for i in xdata[1:])
         plot(xdata, ydata)