From a070da4610b3e4aab79d15151511eada2f5c874f Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Wed, 9 Nov 2022 18:52:21 +0100 Subject: [PATCH 1/9] Show progress bar while fitting to training data --- autosklearn/automl.py | 5 +++++ autosklearn/estimators.py | 3 +++ autosklearn/util/progress_bar.py | 38 ++++++++++++++++++++++++++++++++ 3 files changed, 46 insertions(+) create mode 100644 autosklearn/util/progress_bar.py diff --git a/autosklearn/automl.py b/autosklearn/automl.py index e242fbbc08..4a7bd4f0bb 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -120,6 +120,7 @@ warnings_to, ) from autosklearn.util.parallel import preload_modules +from autosklearn.util.progress_bar import ProgressBar from autosklearn.util.smac_wrap import SMACCallback, SmacRunCallback from autosklearn.util.stopwatch import StopWatch @@ -239,6 +240,7 @@ def __init__( get_trials_callback: SMACCallback | None = None, dataset_compression: bool | Mapping[str, Any] = True, allow_string_features: bool = True, + disable_progress_bar: bool = False, ): super().__init__() @@ -295,6 +297,7 @@ def __init__( self.logging_config = logging_config self.precision = precision self.allow_string_features = allow_string_features + self.disable_progress_bar = disable_progress_bar self._initial_configurations_via_metalearning = ( initial_configurations_via_metalearning ) @@ -597,6 +600,7 @@ def fit( ------- self """ + progress_bar = ProgressBar(total=self._time_for_task, disable=self.disable_progress_bar) if (X_test is not None) ^ (y_test is not None): raise ValueError("Must provide both X_test and y_test together") @@ -961,6 +965,7 @@ def fit( self._logger.exception(e) raise e finally: + progress_bar.stop() self._fit_cleanup() self.fitted = True diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 1a094d2582..130651a334 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -76,6 +76,7 @@ def __init__( get_trials_callback: SMACCallback | None = None, dataset_compression: Union[bool, Mapping[str, Any]] = True, allow_string_features: bool = True, + disable_progress_bar: bool = False, ): """ Parameters @@ -475,6 +476,7 @@ def __init__( self.get_trials_callback = get_trials_callback self.dataset_compression = dataset_compression self.allow_string_features = allow_string_features + self.disable_progress_bar = disable_progress_bar self.automl_ = None # type: Optional[AutoML] @@ -525,6 +527,7 @@ def build_automl(self): get_trials_callback=self.get_trials_callback, dataset_compression=self.dataset_compression, allow_string_features=self.allow_string_features, + disable_progress_bar=self.disable_progress_bar ) return automl diff --git a/autosklearn/util/progress_bar.py b/autosklearn/util/progress_bar.py new file mode 100644 index 0000000000..d40d3bdfc5 --- /dev/null +++ b/autosklearn/util/progress_bar.py @@ -0,0 +1,38 @@ +import time + +from threading import Thread +from tqdm import trange + + +class ProgressBar(Thread): + """A Thread that displays a tqdm progress bar in the console.""" + + def __init__(self, total: float, update_interval: float = 1.0, disable: bool = False): + """ + Parameters + ---------- + total: the total amount that the progress bar should reach + update_interval: reduce this to update the progress bar more frequently + disable: flag that turns on or off the progress bar. If false, then no thread is started or created. + """ + self.disable = disable + if not disable: + super().__init__(name="_progressbar_") + self.total = total + self.update_interval = update_interval + self.terminated: bool = False + self.start() + + def run(self): + if not self.disable: + for _ in trange(self.total, colour="green"): + if not self.terminated: + time.sleep(self.update_interval) + else: + pass # max out the bar + + def stop(self): + if not self.disable: + self.terminated = True + super().join() + From 3431de211844c8fff080f5a202364e561db69e48 Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Thu, 10 Nov 2022 18:11:44 +0100 Subject: [PATCH 2/9] Minor fixes for progress bar --- autosklearn/automl.py | 6 +++- autosklearn/estimators.py | 2 +- autosklearn/experimental/askl2.py | 2 ++ autosklearn/util/progress_bar.py | 52 +++++++++++++++++++++---------- requirements.txt | 17 +++++++--- 5 files changed, 56 insertions(+), 23 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 4a7bd4f0bb..a2e1ee913f 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -600,7 +600,6 @@ def fit( ------- self """ - progress_bar = ProgressBar(total=self._time_for_task, disable=self.disable_progress_bar) if (X_test is not None) ^ (y_test is not None): raise ValueError("Must provide both X_test and y_test together") @@ -648,6 +647,11 @@ def fit( self._backend.save_start_time(self._seed) self._stopwatch = StopWatch() + progress_bar = ProgressBar( + total=self._time_for_task, + disable=self.disable_progress_bar, + final_message="Running cleanup...", + ) # Make sure that input is valid # Performs Ordinal one hot encoding to the target diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 130651a334..6039476b7a 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -527,7 +527,7 @@ def build_automl(self): get_trials_callback=self.get_trials_callback, dataset_compression=self.dataset_compression, allow_string_features=self.allow_string_features, - disable_progress_bar=self.disable_progress_bar + disable_progress_bar=self.disable_progress_bar, ) return automl diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py index 317f0be5b1..9649d5a371 100644 --- a/autosklearn/experimental/askl2.py +++ b/autosklearn/experimental/askl2.py @@ -166,6 +166,7 @@ def __init__( load_models: bool = True, dataset_compression: Union[bool, Mapping[str, Any]] = True, allow_string_features: bool = True, + disable_progress_bar: bool = False, ): """ @@ -337,6 +338,7 @@ def __init__( scoring_functions=scoring_functions, load_models=load_models, allow_string_features=allow_string_features, + disable_progress_bar=disable_progress_bar, ) def train_selectors(self, selected_metric=None): diff --git a/autosklearn/util/progress_bar.py b/autosklearn/util/progress_bar.py index d40d3bdfc5..b9566ea46c 100644 --- a/autosklearn/util/progress_bar.py +++ b/autosklearn/util/progress_bar.py @@ -1,38 +1,58 @@ import time - from threading import Thread + from tqdm import trange class ProgressBar(Thread): - """A Thread that displays a tqdm progress bar in the console.""" + """ + A Thread that displays a tqdm progress bar in the console. - def __init__(self, total: float, update_interval: float = 1.0, disable: bool = False): - """ - Parameters - ---------- - total: the total amount that the progress bar should reach - update_interval: reduce this to update the progress bar more frequently - disable: flag that turns on or off the progress bar. If false, then no thread is started or created. - """ + Parameters + ---------- + total : float + The total amount that should be reached by the progress bar once it finishes + update_interval : float + Specifies how frequently the progress bar is updated (in seconds) + disable : bool + Turns on or off the progress bar. If True, this thread won't be started or initialized + final_message : str + Optional message, which is printed out on a new line once the bar is maxed out. + """ + + def __init__( + self, + total: float, + update_interval: float = 1.0, + disable: bool = False, + final_message: str = None, + ): self.disable = disable if not disable: super().__init__(name="_progressbar_") self.total = total self.update_interval = update_interval + self.final_message = final_message self.terminated: bool = False self.start() - def run(self): + def run(self) -> None: + """ + Overrides the run method of Thread. It displays a tqdm progress bar in the console. + + """ if not self.disable: - for _ in trange(self.total, colour="green"): + for _ in trange( + self.total, colour="green", desc="Fitting to the training data" + ): if not self.terminated: time.sleep(self.update_interval) - else: - pass # max out the bar + print(self.final_message) - def stop(self): + def stop(self) -> None: + """ + Terminates the thread. + """ if not self.disable: self.terminated = True super().join() - diff --git a/requirements.txt b/requirements.txt index 76af7f4a06..a96928e9a9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,21 +1,28 @@ -setuptools +setuptools~=63.4.1 typing_extensions -distro +distro~=1.8.0 numpy>=1.9.0 scipy>=1.7.0 -joblib +joblib~=1.2.0 scikit-learn>=0.24.0,<0.25.0 dask>=2021.12 distributed>=2012.12 -pyyaml +pyyaml~=6.0 pandas>=1.0 liac-arff -threadpoolctl +threadpoolctl~=3.1.0 ConfigSpace>=0.4.21,<0.5 pynisher>=0.6.3,<0.7 pyrfr>=0.8.1,<0.9 smac>=1.2,<1.3 + +pytest~=7.1.2 +filelock~=3.6.0 +psutil~=5.9.2 +openml~=0.12.2 +matplotlib~=3.6.1 +tqdm~=4.64.1 \ No newline at end of file From 292a6048c82c71f60183ee75989bac3212da14fe Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Thu, 10 Nov 2022 18:18:27 +0100 Subject: [PATCH 3/9] Revert accidental changes to requirements.txt --- requirements.txt | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/requirements.txt b/requirements.txt index a96928e9a9..d47fb91474 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,28 +1,22 @@ -setuptools~=63.4.1 +setuptools typing_extensions -distro~=1.8.0 +distro numpy>=1.9.0 scipy>=1.7.0 -joblib~=1.2.0 +joblib scikit-learn>=0.24.0,<0.25.0 dask>=2021.12 distributed>=2012.12 -pyyaml~=6.0 +pyyaml pandas>=1.0 liac-arff -threadpoolctl~=3.1.0 +threadpoolctl +tqdm ConfigSpace>=0.4.21,<0.5 pynisher>=0.6.3,<0.7 pyrfr>=0.8.1,<0.9 -smac>=1.2,<1.3 - -pytest~=7.1.2 -filelock~=3.6.0 -psutil~=5.9.2 -openml~=0.12.2 -matplotlib~=3.6.1 -tqdm~=4.64.1 \ No newline at end of file +smac>=1.2,<1.3 \ No newline at end of file From 12d37186384db0095a25aeff564151c5204f4c18 Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Thu, 10 Nov 2022 22:52:13 +0100 Subject: [PATCH 4/9] Document changes --- autosklearn/automl.py | 8 +++----- autosklearn/estimators.py | 4 ++++ autosklearn/experimental/askl2.py | 4 ++++ autosklearn/util/progress_bar.py | 29 ++++++++++++++++++----------- 4 files changed, 29 insertions(+), 16 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index a2e1ee913f..3c46e69c84 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -629,6 +629,9 @@ def fit( # By default try to use the TCP logging port or get a new port self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT + progress_bar = ProgressBar( + total=self._time_for_task, disable=self.disable_progress_bar + ) # Once we start the logging server, it starts in a new process # If an error occurs then we want to make sure that we exit cleanly # and shut it down, else it might hang @@ -647,11 +650,6 @@ def fit( self._backend.save_start_time(self._seed) self._stopwatch = StopWatch() - progress_bar = ProgressBar( - total=self._time_for_task, - disable=self.disable_progress_bar, - final_message="Running cleanup...", - ) # Make sure that input is valid # Performs Ordinal one hot encoding to the target diff --git a/autosklearn/estimators.py b/autosklearn/estimators.py index 6039476b7a..577265239e 100644 --- a/autosklearn/estimators.py +++ b/autosklearn/estimators.py @@ -382,6 +382,10 @@ def __init__( Whether autosklearn should process string features. By default the textpreprocessing is enabled. + disable_progress_bar: bool = False + Whether to disable the progress bar that is displayed in the console + while fitting to the training data. + Attributes ---------- cv_results_ : dict of numpy (masked) ndarrays diff --git a/autosklearn/experimental/askl2.py b/autosklearn/experimental/askl2.py index 9649d5a371..b712ba484e 100644 --- a/autosklearn/experimental/askl2.py +++ b/autosklearn/experimental/askl2.py @@ -285,6 +285,10 @@ def __init__( load_models : bool, optional (True) Whether to load the models after fitting Auto-sklearn. + disable_progress_bar: bool = False + Whether to disable the progress bar that is displayed in the console + while fitting to the training data. + Attributes ---------- diff --git a/autosklearn/util/progress_bar.py b/autosklearn/util/progress_bar.py index b9566ea46c..5f739cf813 100644 --- a/autosklearn/util/progress_bar.py +++ b/autosklearn/util/progress_bar.py @@ -1,53 +1,60 @@ +import datetime import time from threading import Thread -from tqdm import trange +from tqdm import trange # type: ignore class ProgressBar(Thread): """ A Thread that displays a tqdm progress bar in the console. + It is specialized to display information relevant to fitting to the training data + with auto-sklearn. + Parameters ---------- - total : float + total : int The total amount that should be reached by the progress bar once it finishes update_interval : float Specifies how frequently the progress bar is updated (in seconds) disable : bool - Turns on or off the progress bar. If True, this thread won't be started or initialized - final_message : str - Optional message, which is printed out on a new line once the bar is maxed out. + Turns on or off the progress bar. If True, this thread won't be started or + initialized. """ def __init__( self, - total: float, + total: int, update_interval: float = 1.0, disable: bool = False, - final_message: str = None, ): self.disable = disable if not disable: super().__init__(name="_progressbar_") self.total = total self.update_interval = update_interval - self.final_message = final_message self.terminated: bool = False + # start this thread self.start() def run(self) -> None: """ - Overrides the run method of Thread. It displays a tqdm progress bar in the console. + Overrides the run method of Thread. It displays a tqdm progress bar in the + console with useful descriptions about the task. """ if not self.disable: for _ in trange( - self.total, colour="green", desc="Fitting to the training data" + self.total, + colour="green", + desc="Fitting to the training data", + postfix=f"The total time budget for this task is" + f" {datetime.timedelta(seconds=self.total)}", ): if not self.terminated: time.sleep(self.update_interval) - print(self.final_message) + print("Finishing up the task...") def stop(self) -> None: """ From 62a7d7a99cb1e42f8a28634951353ca32c03729b Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Fri, 11 Nov 2022 23:46:59 +0100 Subject: [PATCH 5/9] Skip type checks for tqdm --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 40ea854030..a696c0fb46 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -155,7 +155,8 @@ module = [ "setuptools.*", "pkg_resources.*", "yaml.*", - "psutil.*" + "psutil.*", + "tqdm.*", ] ignore_missing_imports = true From 1781d49096c79fe0f92d9b3e79159337276d5f09 Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Fri, 11 Nov 2022 23:47:41 +0100 Subject: [PATCH 6/9] Make progress bar more flexible with kwargs --- autosklearn/automl.py | 5 ++++- autosklearn/util/progress_bar.py | 31 +++++++++++++++++-------------- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/autosklearn/automl.py b/autosklearn/automl.py index 3c46e69c84..93fde84330 100644 --- a/autosklearn/automl.py +++ b/autosklearn/automl.py @@ -630,7 +630,10 @@ def fit( self._logger_port = logging.handlers.DEFAULT_TCP_LOGGING_PORT progress_bar = ProgressBar( - total=self._time_for_task, disable=self.disable_progress_bar + total=self._time_for_task, + disable=self.disable_progress_bar, + desc="Fitting to the training data", + colour="green", ) # Once we start the logging server, it starts in a new process # If an error occurs then we want to make sure that we exit cleanly diff --git a/autosklearn/util/progress_bar.py b/autosklearn/util/progress_bar.py index 5f739cf813..7653efa60c 100644 --- a/autosklearn/util/progress_bar.py +++ b/autosklearn/util/progress_bar.py @@ -1,13 +1,14 @@ +from typing import Any + import datetime import time from threading import Thread -from tqdm import trange # type: ignore +from tqdm import trange class ProgressBar(Thread): - """ - A Thread that displays a tqdm progress bar in the console. + """A Thread that displays a tqdm progress bar in the console. It is specialized to display information relevant to fitting to the training data with auto-sklearn. @@ -21,6 +22,10 @@ class ProgressBar(Thread): disable : bool Turns on or off the progress bar. If True, this thread won't be started or initialized. + kwargs : Any + Keyword arguments that are passed on to tqdm, refer to: + https://tqdm.github.io/docs/tqdm/. Note that postfix can not be specified + as a kwarg since it is already passed to tqdm by this class. """ def __init__( @@ -28,6 +33,7 @@ def __init__( total: int, update_interval: float = 1.0, disable: bool = False, + **kwargs: Any, ): self.disable = disable if not disable: @@ -35,31 +41,28 @@ def __init__( self.total = total self.update_interval = update_interval self.terminated: bool = False + self.kwargs = kwargs # start this thread self.start() def run(self) -> None: - """ - Overrides the run method of Thread. It displays a tqdm progress bar in the - console with useful descriptions about the task. + """Display a tqdm progress bar in the console. + Additionally, it shows useful information related to the task. This method + overrides the run method of Thread. """ if not self.disable: for _ in trange( self.total, - colour="green", - desc="Fitting to the training data", - postfix=f"The total time budget for this task is" - f" {datetime.timedelta(seconds=self.total)}", + postfix=f"The total time budget for this task is " + f"{datetime.timedelta(seconds=self.total)}", + **self.kwargs, ): if not self.terminated: time.sleep(self.update_interval) - print("Finishing up the task...") def stop(self) -> None: - """ - Terminates the thread. - """ + """Terminates the thread.""" if not self.disable: self.terminated = True super().join() From e5b187174e4261a9566e2462612cb9d88f60e426 Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Fri, 11 Nov 2022 23:53:42 +0100 Subject: [PATCH 7/9] Fix link checker make command in CONTRIBUTE.md --- CONTRIBUTING.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 73ce781618..dfffc2fcf1 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -252,10 +252,11 @@ Lastly, if the feature really is a game changer or you're very proud of it, cons make doc ``` * If you're unfamiliar with sphinx, it's a documentation generator which can read comments and docstrings from within the code and generate html documentation. - * If you've added documentation, we also has a command `linkcheck` for making sure all the links correctly go to some destination. + * If you've added documentation, we also have a command `links` for making + sure all the links correctly go to some destination. This helps tests for dead links or accidental typos. ```bash - make linkcheck + make links ``` * We also use sphinx-gallery which can take python files (such as those in the `examples` folder) and run them, creating html which shows the code and the output it generates. ```bash @@ -396,7 +397,7 @@ Lastly, if the feature really is a game changer or you're very proud of it, cons # If you changed documentation: # This will generate all documentation and check links make doc - make linkcheck + make links make examples # mainly needed if you modified some examples # ... fix any issues From 3cf305b637579a70810d725ee2e4efe72fafce89 Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Sat, 12 Nov 2022 00:25:59 +0100 Subject: [PATCH 8/9] Update doc link to be sphinx compatible --- autosklearn/util/progress_bar.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/autosklearn/util/progress_bar.py b/autosklearn/util/progress_bar.py index 7653efa60c..7ccd3bc153 100644 --- a/autosklearn/util/progress_bar.py +++ b/autosklearn/util/progress_bar.py @@ -23,9 +23,9 @@ class ProgressBar(Thread): Turns on or off the progress bar. If True, this thread won't be started or initialized. kwargs : Any - Keyword arguments that are passed on to tqdm, refer to: - https://tqdm.github.io/docs/tqdm/. Note that postfix can not be specified - as a kwarg since it is already passed to tqdm by this class. + Keyword arguments that are passed into tqdm's constructor. Refer to: + `tqdm `_. Note that postfix can not be + specified in the kwargs since it is already passed into tqdm by this class. """ def __init__( From 143b7c458f36eea27d950639aaced24bb0159ee2 Mon Sep 17 00:00:00 2001 From: Aron Bahram Date: Sat, 12 Nov 2022 00:28:30 +0100 Subject: [PATCH 9/9] Switch to pytets-forked from pytest-xdist --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index aa6e42669e..6e37e0e711 100644 --- a/setup.py +++ b/setup.py @@ -32,7 +32,7 @@ "test": [ "pytest>=4.6", "pytest-cov", - "pytest-xdist", + "pytest-forked", "pytest-timeout", "pytest-cases>=3.6.11", "mypy",