From 6d7ebd9cb66a1211c25014b65c0455bdca79b9a7 Mon Sep 17 00:00:00 2001 From: Ethan Harris Date: Tue, 23 Nov 2021 12:41:57 +0000 Subject: [PATCH] GPU CI Improvements (#992) --- .azure-pipelines/gpu-example-tests.yml | 16 +++++ .../{gpu-tests.yml => gpu-special-tests.yml} | 24 +++---- .azure-pipelines/testing-template.yml | 69 +++++++++++++++++++ .gitignore | 1 + CHANGELOG.md | 4 ++ flash/core/finetuning.py | 2 - flash/core/model.py | 9 +++ flash/core/trainer.py | 3 + flash/image/classification/model.py | 2 +- flash/text/classification/data.py | 2 +- flash/text/classification/model.py | 2 +- flash/text/seq2seq/summarization/model.py | 2 +- flash/text/seq2seq/translation/model.py | 12 ++-- .../image_classification_imagenette_mini.py | 6 +- flash_examples/summarization.py | 2 +- flash_examples/text_classification.py | 2 +- flash_examples/translation.py | 2 +- tests/examples/test_integrations.py | 9 ++- tests/examples/test_scripts.py | 8 +-- tests/examples/utils.py | 2 +- 20 files changed, 138 insertions(+), 41 deletions(-) create mode 100644 .azure-pipelines/gpu-example-tests.yml rename .azure-pipelines/{gpu-tests.yml => gpu-special-tests.yml} (87%) create mode 100644 .azure-pipelines/testing-template.yml diff --git a/.azure-pipelines/gpu-example-tests.yml b/.azure-pipelines/gpu-example-tests.yml new file mode 100644 index 0000000000..58db4918d8 --- /dev/null +++ b/.azure-pipelines/gpu-example-tests.yml @@ -0,0 +1,16 @@ +trigger: + branches: + include: ["master"] +pr: + branches: + include: ["master"] + autoCancel: true + drafts: true + +jobs: +- template: testing-template.yml + parameters: + configs: + - "image" + - "text" + - "tabular" diff --git a/.azure-pipelines/gpu-tests.yml b/.azure-pipelines/gpu-special-tests.yml similarity index 87% rename from .azure-pipelines/gpu-tests.yml rename to .azure-pipelines/gpu-special-tests.yml index 3f8ba3652f..d18c237efe 100644 --- a/.azure-pipelines/gpu-tests.yml +++ b/.azure-pipelines/gpu-special-tests.yml @@ -2,19 +2,19 @@ # Create and test a Python package on multiple Python versions. # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more: # https://docs.microsoft.com/azure/devops/pipelines/languages/python -trigger: none - -pr: none - -schedules: -- cron: "0 0 * * *" - displayName: Daily midnight testing +trigger: + tags: + include: + - '*' branches: include: - - master + - "master" + - "refs/tags/*" +pr: + - "master" jobs: - - job: pytest + - job: special # how long to run the job before automatically cancelling timeoutInMinutes: 45 # how much time to give 'run always even if cancelled tasks' before stopping them @@ -50,15 +50,11 @@ jobs: - bash: | # python -m pip install "pip==20.1" - pip install '.[all]' + pip install '.[image]' learn2learn pip install '.[test]' --upgrade-strategy only-if-needed pip list displayName: 'Install dependencies' - - bash: | - python -m coverage run --source flash -m pytest flash tests/examples/test_scripts.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30 - displayName: 'Testing' - - bash: | bash tests/special_tests.sh displayName: 'Testing: special' diff --git a/.azure-pipelines/testing-template.yml b/.azure-pipelines/testing-template.yml new file mode 100644 index 0000000000..50e9f540cb --- /dev/null +++ b/.azure-pipelines/testing-template.yml @@ -0,0 +1,69 @@ +jobs: +- ${{ each config in parameters.configs }}: + - job: + displayName: ${{config}} + # how long to run the job before automatically cancelling + timeoutInMinutes: 45 + # how much time to give 'run always even if cancelled tasks' before stopping them + cancelTimeoutInMinutes: 2 + + pool: azure-gpus-spot + # this need to have installed docker in the base image... + container: + # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04 + image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.8" + # image: "pytorch/pytorch:1.8.1-cuda11.0-cudnn8-runtime" + options: "-it --rm --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g" + + workspace: + clean: all + steps: + + - bash: | + lspci | egrep 'VGA|3D' + whereis nvidia + nvidia-smi + python --version + pip --version + pip list + df -kh /dev/shm + displayName: 'Image info & NVIDIA' + + - bash: | + python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'" + displayName: 'Sanity check' + + - bash: | + # python -m pip install "pip==20.1" + pip install '.[${{config}}]' + pip install '.[test]' --upgrade-strategy only-if-needed + pip list + displayName: 'Install dependencies' + + - bash: | + python -m coverage run --source flash -m pytest flash tests/examples/test_scripts.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30 + displayName: 'Testing' + + - bash: | + python -m coverage report + python -m coverage xml + python -m coverage html + python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure + ls -l + displayName: 'Statistics' + + - task: PublishTestResults@2 + displayName: 'Publish test results' + inputs: + testResultsFiles: '$(Build.StagingDirectory)/test-results.xml' + testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)' + condition: succeededOrFailed() + + - task: PublishCodeCoverageResults@1 + displayName: 'Publish coverage report' + inputs: + codeCoverageTool: 'cobertura' + summaryFileLocation: 'coverage.xml' + reportDirectory: '$(Build.SourcesDirectory)/htmlcov' + testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)' + condition: succeededOrFailed() diff --git a/.gitignore b/.gitignore index ce993720d2..c4ebcfd822 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,4 @@ urban8k_images/ __MACOSX *-v2.0.json cifar-10* +mini-imagenet* diff --git a/CHANGELOG.md b/CHANGELOG.md index d26cdcd97c..45ebfa166e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed a bug where Flash could not be used with IceVision 0.11.0 ([#989](https://github.com/PyTorchLightning/lightning-flash/pull/989)) +- Fixed a bug where backbone weights were sometimes not frozen correctly ([#992](https://github.com/PyTorchLightning/lightning-flash/pull/992)) + +- Fixed a bug where translation metrics were not computed correctly ([#992](https://github.com/PyTorchLightning/lightning-flash/pull/992)) + ### Removed - Removed `OutputMapping` ([#939](https://github.com/PyTorchLightning/lightning-flash/pull/939)) diff --git a/flash/core/finetuning.py b/flash/core/finetuning.py index 8e08e095ba..12bfe7907d 100644 --- a/flash/core/finetuning.py +++ b/flash/core/finetuning.py @@ -158,8 +158,6 @@ def finetune_function( self._freeze_unfreeze_function(pl_module, epoch, optimizer, opt_idx, self.strategy_metadata) elif self.strategy == FinetuningStrategies.UNFREEZE_MILESTONES: self._unfreeze_milestones_function(pl_module, epoch, optimizer, opt_idx, self.strategy_metadata) - else: - pass # Used for properly verifying input and providing neat and helpful error messages for users. diff --git a/flash/core/model.py b/flash/core/model.py index 5ed9b99a63..073c9baa1c 100644 --- a/flash/core/model.py +++ b/flash/core/model.py @@ -535,6 +535,15 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A batch = torch.stack(batch) return self(batch) + def modules_to_freeze(self) -> Optional[Union[nn.Module]]: + """By default, we try to get the ``backbone`` attribute from the task and return it or ``None`` if not + present. + + Returns: + The backbone ``Module`` to freeze or ``None`` if this task does not have a ``backbone`` attribute. + """ + return getattr(self, "backbone", None) + def _get_optimizer_class_from_registry(self, optimizer_key: str) -> Optimizer: if optimizer_key.lower() not in self.available_optimizers(): raise KeyError( diff --git a/flash/core/trainer.py b/flash/core/trainer.py index 3217a98b72..34cce32db1 100644 --- a/flash/core/trainer.py +++ b/flash/core/trainer.py @@ -85,6 +85,9 @@ def __init__(self, *args, serve_sanity_check: bool = False, **kwargs): kwargs["fast_dev_run"] = False else: kwargs["fast_dev_run"] = True + kwargs["gpus"] = None + kwargs["accelerator"] = None + kwargs["precision"] = 32 super().__init__(*args, **kwargs) self.serve_sanity_check = serve_sanity_check diff --git a/flash/image/classification/model.py b/flash/image/classification/model.py index 90e0181523..8fd6ec742b 100644 --- a/flash/image/classification/model.py +++ b/flash/image/classification/model.py @@ -152,6 +152,6 @@ def available_pretrained_weights(cls, backbone: str): def _ci_benchmark_fn(self, history: List[Dict[str, Any]]): """This function is used only for debugging usage with CI.""" if self.hparams.multi_label: - assert history[-1]["val_f1"] > 0.40, history[-1]["val_f1"] + assert history[-1]["val_f1"] > 0.30, history[-1]["val_f1"] else: assert history[-1]["val_accuracy"] > 0.85, history[-1]["val_accuracy"] diff --git a/flash/text/classification/data.py b/flash/text/classification/data.py index b0e4ca5841..6309d951c9 100644 --- a/flash/text/classification/data.py +++ b/flash/text/classification/data.py @@ -259,7 +259,7 @@ def __init__( val_transform: Optional[Dict[str, Callable]] = None, test_transform: Optional[Dict[str, Callable]] = None, predict_transform: Optional[Dict[str, Callable]] = None, - backbone: str = "prajjwal1/bert-tiny", + backbone: str = "prajjwal1/bert-medium", max_length: int = 128, ): self.backbone = backbone diff --git a/flash/text/classification/model.py b/flash/text/classification/model.py index 1e0b8f3abb..4a468295c4 100644 --- a/flash/text/classification/model.py +++ b/flash/text/classification/model.py @@ -57,7 +57,7 @@ class TextClassifier(ClassificationTask): def __init__( self, num_classes: int, - backbone: str = "prajjwal1/bert-tiny", + backbone: str = "prajjwal1/bert-medium", loss_fn: LOSS_FN_TYPE = None, optimizer: OPTIMIZER_TYPE = "Adam", lr_scheduler: LR_SCHEDULER_TYPE = None, diff --git a/flash/text/seq2seq/summarization/model.py b/flash/text/seq2seq/summarization/model.py index 6067eb5ceb..3b0147465f 100644 --- a/flash/text/seq2seq/summarization/model.py +++ b/flash/text/seq2seq/summarization/model.py @@ -89,4 +89,4 @@ def compute_metrics(self, generated_tokens: torch.Tensor, batch: Dict, prefix: s @staticmethod def _ci_benchmark_fn(history: List[Dict[str, Any]]): """This function is used only for debugging usage with CI.""" - assert history[-1]["rouge1_recall"] > 0.2 + assert history[-1]["rouge1_recall"] > 0.18, history[-1]["rouge1_recall"] diff --git a/flash/text/seq2seq/translation/model.py b/flash/text/seq2seq/translation/model.py index 553adb6b7a..d93e03ee04 100644 --- a/flash/text/seq2seq/translation/model.py +++ b/flash/text/seq2seq/translation/model.py @@ -81,13 +81,17 @@ def task(self) -> str: return "translation" def compute_metrics(self, generated_tokens, batch, prefix): - tgt_lns = self.tokenize_labels(batch["labels"]) + reference_corpus = self.tokenize_labels(batch["labels"]) # wrap targets in list as score expects a list of potential references - tgt_lns = [[reference] for reference in tgt_lns] - result = self.bleu(self._output_transform.uncollate(generated_tokens), tgt_lns) + reference_corpus = [[reference] for reference in reference_corpus] + + translate_corpus = self._output_transform.uncollate(generated_tokens) + translate_corpus = [line for line in translate_corpus] + + result = self.bleu(reference_corpus, translate_corpus) self.log(f"{prefix}_bleu_score", result, on_step=False, on_epoch=True, prog_bar=True) @staticmethod def _ci_benchmark_fn(history: List[Dict[str, Any]]): """This function is used only for debugging usage with CI.""" - assert history[-1]["val_bleu_score"] > 0.6 + assert history[-1]["val_bleu_score"] > 0.6, history[-1]["val_bleu_score"] diff --git a/flash_examples/integrations/learn2learn/image_classification_imagenette_mini.py b/flash_examples/integrations/learn2learn/image_classification_imagenette_mini.py index 00890af201..1459acca63 100644 --- a/flash_examples/integrations/learn2learn/image_classification_imagenette_mini.py +++ b/flash_examples/integrations/learn2learn/image_classification_imagenette_mini.py @@ -33,7 +33,6 @@ # download MiniImagenet train_dataset = l2l.vision.datasets.MiniImagenet(root="data", mode="train", download=True) val_dataset = l2l.vision.datasets.MiniImagenet(root="data", mode="validation", download=True) -test_dataset = l2l.vision.datasets.MiniImagenet(root="data", mode="test", download=True) train_transform = { "to_tensor_transform": nn.Sequential( @@ -69,9 +68,6 @@ train_targets=torch.from_numpy(train_dataset.y.astype(int)), val_data=val_dataset.x, val_targets=torch.from_numpy(val_dataset.y.astype(int)), - test_data=test_dataset.x, - test_targets=torch.from_numpy(test_dataset.y.astype(int)), - num_workers=4, train_transform=train_transform, ) @@ -90,7 +86,7 @@ "test_queries": 15, }, optimizer=torch.optim.Adam, - optimizer_kwargs={"lr": 0.001}, + learning_rate=0.001, ) trainer = flash.Trainer( diff --git a/flash_examples/summarization.py b/flash_examples/summarization.py index c032258fbd..5433805be3 100644 --- a/flash_examples/summarization.py +++ b/flash_examples/summarization.py @@ -30,7 +30,7 @@ # 3. Create the trainer and finetune the model trainer = Trainer(max_epochs=3) -trainer.finetune(model, datamodule=datamodule) +trainer.finetune(model, datamodule=datamodule, strategy="freeze") # 4. Summarize some text! predictions = model.predict( diff --git a/flash_examples/text_classification.py b/flash_examples/text_classification.py index bdeedbeb94..9e8a0b6856 100644 --- a/flash_examples/text_classification.py +++ b/flash_examples/text_classification.py @@ -25,7 +25,7 @@ "sentiment", train_file="data/imdb/train.csv", val_file="data/imdb/valid.csv", - backbone="prajjwal1/bert-tiny", + backbone="prajjwal1/bert-medium", ) # 2. Build the task diff --git a/flash_examples/translation.py b/flash_examples/translation.py index fc82bb767a..30f7c3053a 100644 --- a/flash_examples/translation.py +++ b/flash_examples/translation.py @@ -33,7 +33,7 @@ # 3. Create the trainer and finetune the model trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count()) -trainer.finetune(model, datamodule=datamodule) +trainer.finetune(model, datamodule=datamodule, strategy="freeze") # 4. Translate something! predictions = model.predict( diff --git a/tests/examples/test_integrations.py b/tests/examples/test_integrations.py index 4923099df4..c7f66f8207 100644 --- a/tests/examples/test_integrations.py +++ b/tests/examples/test_integrations.py @@ -17,7 +17,7 @@ import pytest -from flash.core.utilities.imports import _BAAL_AVAILABLE, _FIFTYONE_AVAILABLE, _IMAGE_AVAILABLE +from flash.core.utilities.imports import _BAAL_AVAILABLE, _FIFTYONE_AVAILABLE, _IMAGE_AVAILABLE, _LEARN2LEARN_AVAILABLE from tests.examples.utils import run_test root = Path(__file__).parent.parent.parent @@ -39,6 +39,13 @@ "image_classification_active_learning.py", marks=pytest.mark.skipif(not (_IMAGE_AVAILABLE and _BAAL_AVAILABLE), reason="baal library isn't installed"), ), + pytest.param( + "learn2learn", + "image_classification_imagenette_mini.py", + marks=pytest.mark.skipif( + not (_IMAGE_AVAILABLE and _LEARN2LEARN_AVAILABLE), reason="learn2learn isn't installed" + ), + ), ], ) def test_integrations(tmpdir, folder, file): diff --git a/tests/examples/test_scripts.py b/tests/examples/test_scripts.py index 6593863fc9..033ee35b3d 100644 --- a/tests/examples/test_scripts.py +++ b/tests/examples/test_scripts.py @@ -17,7 +17,7 @@ import pytest -from flash.core.utilities.imports import _LEARN2LEARN_AVAILABLE, _SKLEARN_AVAILABLE +from flash.core.utilities.imports import _SKLEARN_AVAILABLE from tests.examples.utils import run_test from tests.helpers.utils import ( _AUDIO_TESTING, @@ -52,12 +52,6 @@ "image_classification_multi_label.py", marks=pytest.mark.skipif(not _IMAGE_TESTING, reason="image libraries aren't installed"), ), - pytest.param( - "image_classification_meta_learning.py.py", - marks=pytest.mark.skipif( - not (_IMAGE_TESTING and _LEARN2LEARN_AVAILABLE), reason="image/learn2learn libraries aren't installed" - ), - ), # pytest.param("finetuning", "object_detection.py"), # TODO: takes too long. pytest.param( "question_answering.py", diff --git a/tests/examples/utils.py b/tests/examples/utils.py index cf713fcbd1..6a8ef4dbb3 100644 --- a/tests/examples/utils.py +++ b/tests/examples/utils.py @@ -51,4 +51,4 @@ def run_test(filepath): code, stdout, stderr = call_script(filepath) print(f"{filepath} STDOUT: {stdout}") print(f"{filepath} STDERR: {stderr}") - assert not code + assert not code, code