Lightning-Universe · tchaton · Nov 23, 2021 · Nov 22, 2021 · Nov 22, 2021 · Nov 22, 2021
@@ -0,0 +1,16 @@
+trigger:
+  branches:
+    include: ["master"]
+pr:
+  branches:
+    include: ["master"]
+  autoCancel: true
+  drafts: true
+
+jobs:
+- template: testing-template.yml
+  parameters:
+    configs:
+    - "image"
+    - "text"
+    - "tabular"
@@ -2,19 +2,19 @@
 # Create and test a Python package on multiple Python versions.
 # Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
 # https://docs.microsoft.com/azure/devops/pipelines/languages/python
-trigger: none
-
-pr: none
-
-schedules:
-- cron: "0 0 * * *"
-  displayName: Daily midnight testing
+trigger:
+  tags:
+    include:
+      - '*'
   branches:
     include:
-    - master
+      - "master"
+      - "refs/tags/*"
+pr:
+  - "master"
 
 jobs:
-  - job: pytest
+  - job: special
     # how long to run the job before automatically cancelling
     timeoutInMinutes: 45
     # how much time to give 'run always even if cancelled tasks' before stopping them
@@ -50,15 +50,11 @@ jobs:
 
     - bash: |
         # python -m pip install "pip==20.1"
-        pip install '.[all]'
+        pip install '.[image]' learn2learn
         pip install '.[test]' --upgrade-strategy only-if-needed
         pip list
       displayName: 'Install dependencies'
 
-    - bash: |
-        python -m coverage run --source flash -m pytest flash tests/examples/test_scripts.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30
-      displayName: 'Testing'
-
     - bash: |
         bash tests/special_tests.sh
       displayName: 'Testing: special'

@@ -0,0 +1,69 @@
+jobs:
+- ${{ each config in parameters.configs }}:
+  - job:
+    displayName: ${{config}}
+    # how long to run the job before automatically cancelling
+    timeoutInMinutes: 45
+    # how much time to give 'run always even if cancelled tasks' before stopping them
+    cancelTimeoutInMinutes: 2
+
+    pool: azure-gpus-spot
+    # this need to have installed docker in the base image...
+    container:
+      # base ML image: mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.2-cudnn8-ubuntu18.04
+      image: "pytorchlightning/pytorch_lightning:base-cuda-py3.9-torch1.8"
+      # image: "pytorch/pytorch:1.8.1-cuda11.0-cudnn8-runtime"
+      options: "-it --rm --runtime=nvidia -e NVIDIA_VISIBLE_DEVICES=all --shm-size=32g"
+
+    workspace:
+      clean: all
+    steps:
+
+    - bash: |
+        lspci | egrep 'VGA|3D'
+        whereis nvidia
+        nvidia-smi
+        python --version
+        pip --version
+        pip list
+        df -kh /dev/shm
+      displayName: 'Image info & NVIDIA'
+
+    - bash: |
+        python -c "import torch ; mgpu = torch.cuda.device_count() ; assert mgpu >= 2, f'GPU: {mgpu}'"
+      displayName: 'Sanity check'
+
+    - bash: |
+        # python -m pip install "pip==20.1"
+        pip install '.[${{config}}]'
+        pip install '.[test]' --upgrade-strategy only-if-needed
+        pip list
+      displayName: 'Install dependencies'
+
+    - bash: |
+        python -m coverage run --source flash -m pytest flash tests/examples/test_scripts.py -v --junitxml=$(Build.StagingDirectory)/test-results.xml --durations=30
+      displayName: 'Testing'
+
+    - bash: |
+        python -m coverage report
+        python -m coverage xml
+        python -m coverage html
+        python -m codecov --token=$(CODECOV_TOKEN) --commit=$(Build.SourceVersion) --flags=gpu,pytest --name="GPU-coverage" --env=linux,azure
+        ls -l
+      displayName: 'Statistics'
+
+    - task: PublishTestResults@2
+      displayName: 'Publish test results'
+      inputs:
+        testResultsFiles: '$(Build.StagingDirectory)/test-results.xml'
+        testRunTitle: '$(Agent.OS) - $(Build.DefinitionName) - Python $(python.version)'
+      condition: succeededOrFailed()
+
+    - task: PublishCodeCoverageResults@1
+      displayName: 'Publish coverage report'
+      inputs:
+        codeCoverageTool: 'cobertura'
+        summaryFileLocation: 'coverage.xml'
+        reportDirectory: '$(Build.SourcesDirectory)/htmlcov'
+        testRunTitle: '$(Agent.OS) - $(Build.BuildNumber)[$(Agent.JobName)] - Python $(python.version)'
+      condition: succeededOrFailed()
@@ -44,6 +44,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
 
 - Fixed a bug where Flash could not be used with IceVision 0.11.0 ([#989](https://github.com/PyTorchLightning/lightning-flash/pull/989))
 
+- Fixed a bug where backbone weights were sometimes not frozen correctly ([#992](https://github.com/PyTorchLightning/lightning-flash/pull/992))
+
+- Fixed a bug where translation metrics were not computed correctly ([#992](https://github.com/PyTorchLightning/lightning-flash/pull/992))
+
 ### Removed
 
 - Removed `OutputMapping` ([#939](https://github.com/PyTorchLightning/lightning-flash/pull/939))

@@ -535,6 +535,15 @@ def predict_step(self, batch: Any, batch_idx: int, dataloader_idx: int = 0) -> A
             batch = torch.stack(batch)
         return self(batch)
 
+    def modules_to_freeze(self) -> Optional[Union[nn.Module]]:
+        """By default, we try to get the ``backbone`` attribute from the task and return it or ``None`` if not
+        present.
+
+        Returns:
+            The backbone ``Module`` to freeze or ``None`` if this task does not have a ``backbone`` attribute.
+        """
+        return getattr(self, "backbone", None)
+
     def _get_optimizer_class_from_registry(self, optimizer_key: str) -> Optimizer:
         if optimizer_key.lower() not in self.available_optimizers():
             raise KeyError(

@@ -152,6 +152,6 @@ def available_pretrained_weights(cls, backbone: str):
     def _ci_benchmark_fn(self, history: List[Dict[str, Any]]):
         """This function is used only for debugging usage with CI."""
         if self.hparams.multi_label:
-            assert history[-1]["val_f1"] > 0.40, history[-1]["val_f1"]
+            assert history[-1]["val_f1"] > 0.30, history[-1]["val_f1"]
         else:
             assert history[-1]["val_accuracy"] > 0.85, history[-1]["val_accuracy"]
@@ -259,7 +259,7 @@ def __init__(
         val_transform: Optional[Dict[str, Callable]] = None,
         test_transform: Optional[Dict[str, Callable]] = None,
         predict_transform: Optional[Dict[str, Callable]] = None,
-        backbone: str = "prajjwal1/bert-tiny",
+        backbone: str = "prajjwal1/bert-medium",
         max_length: int = 128,
     ):
         self.backbone = backbone

@@ -57,7 +57,7 @@ class TextClassifier(ClassificationTask):
     def __init__(
         self,
         num_classes: int,
-        backbone: str = "prajjwal1/bert-tiny",
+        backbone: str = "prajjwal1/bert-medium",
         loss_fn: LOSS_FN_TYPE = None,
         optimizer: OPTIMIZER_TYPE = "Adam",
         lr_scheduler: LR_SCHEDULER_TYPE = None,

@@ -89,4 +89,4 @@ def compute_metrics(self, generated_tokens: torch.Tensor, batch: Dict, prefix: s
     @staticmethod
     def _ci_benchmark_fn(history: List[Dict[str, Any]]):
         """This function is used only for debugging usage with CI."""
-        assert history[-1]["rouge1_recall"] > 0.2
+        assert history[-1]["rouge1_recall"] > 0.18, history[-1]["rouge1_recall"]
@@ -81,13 +81,17 @@ def task(self) -> str:
         return "translation"
 
     def compute_metrics(self, generated_tokens, batch, prefix):
-        tgt_lns = self.tokenize_labels(batch["labels"])
+        reference_corpus = self.tokenize_labels(batch["labels"])
         # wrap targets in list as score expects a list of potential references
-        tgt_lns = [[reference] for reference in tgt_lns]
-        result = self.bleu(self._output_transform.uncollate(generated_tokens), tgt_lns)
+        reference_corpus = [[reference.split()] for reference in reference_corpus]
+
+        translate_corpus = self._output_transform.uncollate(generated_tokens)
+        translate_corpus = [line.split() for line in translate_corpus]
+
+        result = self.bleu(reference_corpus, translate_corpus)
         self.log(f"{prefix}_bleu_score", result, on_step=False, on_epoch=True, prog_bar=True)
 
     @staticmethod
     def _ci_benchmark_fn(history: List[Dict[str, Any]]):
         """This function is used only for debugging usage with CI."""
-        assert history[-1]["val_bleu_score"] > 0.6
+        assert history[-1]["val_bleu_score"] > 0.6, history[-1]["val_bleu_score"]
@@ -90,7 +90,7 @@
         "test_queries": 15,
     },
     optimizer=torch.optim.Adam,
-    optimizer_kwargs={"lr": 0.001},
+    learning_rate=0.001,
 )
 
 trainer = flash.Trainer(

@@ -30,7 +30,7 @@
 
 # 3. Create the trainer and finetune the model
 trainer = Trainer(max_epochs=3)
-trainer.finetune(model, datamodule=datamodule)
+trainer.finetune(model, datamodule=datamodule, strategy="freeze")
 
 # 4. Summarize some text!
 predictions = model.predict(

@@ -25,7 +25,7 @@
     "sentiment",
     train_file="data/imdb/train.csv",
     val_file="data/imdb/valid.csv",
-    backbone="prajjwal1/bert-tiny",
+    backbone="prajjwal1/bert-medium",
 )
 
 # 2. Build the task

@@ -33,7 +33,7 @@
 
 # 3. Create the trainer and finetune the model
 trainer = flash.Trainer(max_epochs=3, gpus=torch.cuda.device_count())
-trainer.finetune(model, datamodule=datamodule)
+trainer.finetune(model, datamodule=datamodule, strategy="freeze")
 
 # 4. Translate something!
 predictions = model.predict(

@@ -17,7 +17,7 @@
 
 import pytest
 
-from flash.core.utilities.imports import _BAAL_AVAILABLE, _FIFTYONE_AVAILABLE, _IMAGE_AVAILABLE
+from flash.core.utilities.imports import _BAAL_AVAILABLE, _FIFTYONE_AVAILABLE, _IMAGE_AVAILABLE, _LEARN2LEARN_AVAILABLE
 from tests.examples.utils import run_test
 
 root = Path(__file__).parent.parent.parent
@@ -39,6 +39,13 @@
             "image_classification_active_learning.py",
             marks=pytest.mark.skipif(not (_IMAGE_AVAILABLE and _BAAL_AVAILABLE), reason="baal library isn't installed"),
         ),
+        pytest.param(
+            "learn2learn",
+            "image_classification_imagenette_mini.py",
+            marks=pytest.mark.skipif(
+                not (_IMAGE_AVAILABLE and _LEARN2LEARN_AVAILABLE), reason="learn2learn isn't installed"
+            ),
+        ),
     ],
 )
 def test_integrations(tmpdir, folder, file):

@@ -17,7 +17,7 @@
 
 import pytest
 
-from flash.core.utilities.imports import _LEARN2LEARN_AVAILABLE, _SKLEARN_AVAILABLE
+from flash.core.utilities.imports import _SKLEARN_AVAILABLE
 from tests.examples.utils import run_test
 from tests.helpers.utils import (
     _AUDIO_TESTING,
@@ -52,12 +52,6 @@
             "image_classification_multi_label.py",
             marks=pytest.mark.skipif(not _IMAGE_TESTING, reason="image libraries aren't installed"),
         ),
-        pytest.param(
-            "image_classification_meta_learning.py.py",
-            marks=pytest.mark.skipif(
-                not (_IMAGE_TESTING and _LEARN2LEARN_AVAILABLE), reason="image/learn2learn libraries aren't installed"
-            ),
-        ),
         # pytest.param("finetuning", "object_detection.py"),  # TODO: takes too long.
         pytest.param(
             "question_answering.py",