GH-563: ci sentiment (#564)

undertheseanlp · Aug 24, 2022 · aedc4ca · aedc4ca
1 parent e744188
commit aedc4ca
Show file tree

Hide file tree

Showing 6 changed files with 52 additions and 12 deletions.
diff --git a/.github/workflows/ci-examples.yml b/.github/workflows/ci-examples.yml
@@ -0,0 +1,29 @@
+on:
+  pull_request:
+    branches: [ master ]
+    types:
+        - labeled
+
+jobs:
+  ci-sentiment:
+    if: ${{ github.event.label.name == 'ci-sentiment' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ ubuntu-latest, mac-os-latest, windows-latest ]
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v1
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v1
+        with:
+          python-version: 3.7
+      - name: Install dependencies
+        working-directory: ./examples/sentiment
+        run: |
+          pip install -r requirements.txt
+      - name: Train Models
+        working-directory: ./examples/sentiment
+        run: |
+          python train_bert.py data.batch_size=2 data.samples=10 trainer.epoch=1
diff --git a/.github/workflows/release-pypi.yml b/.github/workflows/release-pypi.yml
@@ -4,6 +4,7 @@ on:
 
 jobs:
   build_and_package_pypi:
+    if: "contains(github.event.head_commit.message, 'Release')"
     name: Build and package Pypi
     runs-on: ubuntu-latest
     steps:

diff --git a/examples/README.md b/examples/README.md
@@ -0,0 +1,7 @@
+# Underthesea Examples
+
+### CI
+
+Please use following `Pull Request Labels` to trigger ci
+
+* `ci-sentiment`
diff --git a/examples/sentiment/README.md b/examples/sentiment/README.md
@@ -9,10 +9,10 @@ python train_gpt2.py data.batch_size=2 ++data.num_workers=16 logger.project=loca
 Run with 100 samples
 
 ```
-python train_gpt2.py data.batch_size=2 data.samples=100 logger.project=debug-sentiment-5 
+python train_gpt2.py data.batch_size=2 data.samples=100 logger.project=debug-sentiment-5
 ```
 
-Run in clouod
+Run in cloud
 ```
 python train_gpt2.py data.batch_size=16 ++data.num_workers=16 logger.project=cloud-sentiment-5
 ```
@@ -26,10 +26,10 @@ python train_bert.py data.batch_size=2 ++data.num_workers=16 logger.project=loca
 Run with 100 samples
 
 ```
-python train_bert.py data.batch_size=2 data.samples=100 logger.project=debug-sentiment-5 
+python train_bert.py data.batch_size=2 data.samples=100 logger.project=debug-sentiment-5
 ```
 
-Run in clouod
+Run in cloud
 ```
 python train_bert.py data.batch_size=16 ++data.num_workers=16 logger.project=cloud-sentiment-5
 ```
diff --git a/examples/sentiment/configs/config.yaml b/examples/sentiment/configs/config.yaml
@@ -1,6 +1,7 @@
 trainer:
   gpus: -1
   accelerator: 'ddp'
+  epoch: 5
 #  fast_dev_run: 5
   # precision: 16
 data:

diff --git a/examples/sentiment/train_bert.py b/examples/sentiment/train_bert.py
@@ -1,7 +1,9 @@
 import torch
+import hydra
 import torch.nn as nn
 import pytorch_lightning as pl
-from pytorch_lightning.loggers import WandbLogger
+# from pytorch_lightning.loggers import WandbLogger
+from omegaconf import DictConfig
 from torchmetrics import F1Score as F1
 
 from torch.utils.data import Dataset, DataLoader
@@ -69,7 +71,7 @@ def __init__(self, train_dataset, test_dataset, val_dataset, batch_size=24):
 
     def train_dataloader(self):
         output = DataLoader(
-            self.train_dataset, self.batch_size, shuffle=True, drop_last=True
+            self.train_dataset, self.batch_size, shuffle=True, drop_last=True,
         )
         return output
 
@@ -171,9 +173,9 @@ def configure_optimizers(self):
         return optimizer
 
 
-def main():
+@hydra.main(config_path="configs/", config_name="config.yaml")
+def main(config: DictConfig) -> None:
     corpus = UITABSAHotel(training="aspect")  # predicting aspect or polarity
-    epochs = 5
     batch_size = 24
     max_sequence_len = 100
     num_labels = corpus.num_labels
@@ -184,7 +186,7 @@ def main():
         data=corpus.train,
         max_sequence_len=max_sequence_len,
         num_labels=num_labels,
-        tokenizer=tokenizer
+        tokenizer=tokenizer,
     )
     val_dataset = UITABSADataset(
         data=corpus.dev,
@@ -213,12 +215,12 @@ def main():
         train_dataset, val_dataset, test_dataset, batch_size
     )
 
-    logger = WandbLogger(project="debug-phobert-sentiment")
+    # logger = WandbLogger(project="debug-phobert-sentiment")
     trainer = pl.Trainer(
-        max_epochs=epochs,
+        max_epochs=config.trainer.epoch,
         accelerator="cpu",
         enable_progress_bar=True,
-        logger=logger
+        # logger=logger
     )
     trainer.fit(model, data_module)
     trainer.test()