Migrate AML SDK from v1 to v2 (#2134)

* Migrate AML SDK from v1 to v2 Signed-off-by: Simon Zhao <[email protected]> * Correct MLClient constructor paramenter names Signed-off-by: Simon Zhao <[email protected]> * Remove unsupported operation begin_start() on AmlCompute Signed-off-by: Simon Zhao <[email protected]> * Add label for environments.get() Signed-off-by: Simon Zhao <[email protected]> * Remove environment get Signed-off-by: Simon Zhao <[email protected]> * Update Signed-off-by: Simon Zhao <[email protected]> * Correct experiment and environment names Signed-off-by: Simon Zhao <[email protected]> * Correct compute Signed-off-by: Simon Zhao <[email protected]> * Create Conda env inside Dockerfile Signed-off-by: Simon Zhao <[email protected]> * Catch ResourceNotFoundError Signed-off-by: Simon Zhao <[email protected]> * Correct experiment name Signed-off-by: Simon Zhao <[email protected]> * Update env creation and job running Signed-off-by: Simon Zhao <[email protected]> * Try waiting for completion by stream Signed-off-by: Simon Zhao <[email protected]> * Try to fix conda activate Signed-off-by: Simon Zhao <[email protected]> * Import sys Signed-off-by: Simon Zhao <[email protected]> * Change logging level Signed-off-by: Simon Zhao <[email protected]> * Exit directly once pytest fails Signed-off-by: Simon Zhao <[email protected]> * Set numpy<2.0.0 due to issue of cornac Signed-off-by: Simon Zhao <[email protected]> * Correct Dockerfile Signed-off-by: Simon Zhao <[email protected]> * Change heredoc Signed-off-by: Simon Zhao <[email protected]> * Set dockerfile version Signed-off-by: Simon Zhao <[email protected]> * Copy environment.yml to container Signed-off-by: Simon Zhao <[email protected]> * Update .github/actions/azureml-test/action.yml Co-authored-by: Miguel Fierro <[email protected]> * Update as suggested by Andreas * Print pytest logs Signed-off-by: Simon Zhao <[email protected]> * Group outputs Signed-off-by: Simon Zhao <[email protected]> * Install pip in conda env, and show pytest warnings Signed-off-by: Simon Zhao <[email protected]> * Add command name Signed-off-by: Simon Zhao <[email protected]> * Update Signed-off-by: Simon Zhao <[email protected]> * Show warnings in pytest Signed-off-by: Simon Zhao <[email protected]> * Show warnings in pytest Signed-off-by: Simon Zhao <[email protected]> --------- Signed-off-by: Simon Zhao <[email protected]> Co-authored-by: Miguel Fierro <[email protected]>
recommenders-team · Jul 31, 2024 · f6d3e6b · f6d3e6b
1 parent da4b2db
commit f6d3e6b
Show file tree

Hide file tree

Showing 11 changed files with 472 additions and 555 deletions.
diff --git a/.github/actions/azureml-test/action.yml b/.github/actions/azureml-test/action.yml
@@ -6,108 +6,85 @@
 name: azureml-tests
 description: "Submit experiment to AzureML cluster"
 inputs:
-  # azureml experiment name
   EXP_NAME:
     required: true
-    type: string
-  # type of test - unit or nightly
+    description: AzureML experiment Name
+  ENV_NAME:
+    required: true
+    description: AzureML environment Name
   TEST_KIND:
     required: true
-    type: string
-  # test environment - cpu, gpu or spark
-  TEST_ENV:
-    required: false
-    type: string
-  # azureml compute credentials
+    description: Type of test - unit or nightly
   AZUREML_TEST_CREDENTIALS:
     required: true
-    type: string
-  # azureml compute subid
+    description: Credentials for AzureML login
   AZUREML_TEST_SUBID:
     required: true
-    type: string
-  # python version
+    description: AzureML subscription ID
   PYTHON_VERSION:
     required: true
-    type: string
-  # test group name
+    description: Python version used for the tests
   TEST_GROUP:
     required: true
-    type: string
-  # cpu cluster name
-  CPU_CLUSTER_NAME:
-    required: false
-    type: string
-    default: "cpu-cluster"
-  # gpu cluster name
-  GPU_CLUSTER_NAME:
-    required: false
-    type: string
-    default: "gpu-cluster"
-  # AzureML resource group name
+    description: Test group defined in test_group.py
   RG:
     required: false
-    type: string
+    description: AzureML resource group name
     default: "recommenders_project_resources"
-  # AzureML workspace name
   WS:
     required: false
-    type: string
+    description: AzureML workspace name
     default: "azureml-test-workspace"
-  # test logs path
-  TEST_LOGS_PATH:
-    required: false
-    type: string
-    default: '"test_logs.log"'
-  # pytest exit code
-  PYTEST_EXIT_CODE:
+  LOG_DIR:
     required: false
-    type: string
-    default: "pytest_exit_code.log"
+    description: Directory storing the test logs
+    default: "test_logs"
 
 runs:
   using: "composite"
   steps:
     - name: Setup python
       uses: actions/setup-python@v5
       with:
-        python-version: "3.8"
-    - name: Install azureml-core and azure-cli on a GitHub hosted server
+        python-version: "3.10"
+    - name: Install AzureML Python SDK
       shell: bash
-      run: pip install --quiet "azureml-core>1,<2" "azure-cli>2,<3"
+      run: pip install --quiet "azure-ai-ml>1,<2" mlflow "azureml-mlflow>1,<2" 
     - name: Log in to Azure
       uses: azure/login@v2
       with:
-        creds: ${{inputs.AZUREML_TEST_CREDENTIALS}}
-    - name: Install wheel package
-      shell: bash
-      run: pip install --quiet wheel
+        creds: ${{ inputs.AZUREML_TEST_CREDENTIALS }}
     - name: Submit tests to AzureML
       shell: bash
-      run: >-
+      run: |
+        echo "::group::Running tests ..."
         python tests/ci/azureml_tests/submit_groupwise_azureml_pytest.py \
-          --subid ${{inputs.AZUREML_TEST_SUBID}} \
-          --reponame "recommenders" \
-          --branch ${{ github.ref }} \
-          --rg ${{inputs.RG}} \
-          --wsname ${{inputs.WS}} \
-          --expname ${{inputs.EXP_NAME}}_${{inputs.TEST_GROUP}} \
-          --testlogs ${{inputs.TEST_LOGS_PATH}} \
-          --testkind ${{inputs.TEST_KIND}} \
-          --conda_pkg_python ${{inputs.PYTHON_VERSION}} \
-          --testgroup ${{inputs.TEST_GROUP}} \
-          --disable-warnings \
-          --sha "${GITHUB_SHA}" \
-          --clustername $(if [[ ${{inputs.TEST_GROUP}} =~ "gpu" ]]; then echo "${{inputs.GPU_CLUSTER_NAME}}"; else echo "${{inputs.CPU_CLUSTER_NAME}}"; fi) \
-          $(if [[ ${{inputs.TEST_GROUP}} =~ "gpu" ]]; then echo "--add_gpu_dependencies"; fi) \
-          $(if [[ ${{inputs.TEST_GROUP}} =~ "spark" ]]; then echo "--add_spark_dependencies"; fi)
-    - name: Get exit status
+          --subid ${{ inputs.AZUREML_TEST_SUBID }} \
+          --rg ${{ inputs.RG }} \
+          --ws ${{ inputs.WS }} \
+          --cluster ${{ contains(inputs.TEST_GROUP, 'gpu') && 'gpu-cluster' || 'cpu-cluster' }} \
+          --expname ${{ inputs.EXP_NAME }} \
+          --envname ${{ inputs.ENV_NAME }} \
+          --testkind ${{ inputs.TEST_KIND}} \
+          --python-version ${{ inputs.PYTHON_VERSION }} \
+          --testgroup ${{ inputs.TEST_GROUP }} \
+          --sha ${GITHUB_SHA}
+        echo "::endgroup::"
+    - name: Post tests
+      if: ${{ ! cancelled() }}
       shell: bash
-      id: exit_status
-      run: echo "code=$(cat ${{inputs.PYTEST_EXIT_CODE}})" >> $GITHUB_OUTPUT
-    - name: Check Success/Failure
-      if: ${{ steps.exit_status.outputs.code != 0 }}
-      uses: actions/github-script@v7
+      run: |
+        echo "::group::Pytest logs"
+        python tests/ci/azureml_tests/post_pytest.py \
+          --subid ${{ inputs.AZUREML_TEST_SUBID }} \
+          --rg ${{ inputs.RG }} \
+          --ws ${{ inputs.WS }} \
+          --expname ${{ inputs.EXP_NAME }} \
+          --log-dir ${{ inputs.LOG_DIR }}
+        echo "::endgroup::"
+    - name: Save logs
+      if: ${{ ! cancelled() }}
+      uses: actions/upload-artifact@v4
       with:
-        script: |
-            core.setFailed('All tests did not pass!')
+        name: logs-${{ inputs.TEST_GROUP }}-python${{ inputs.PYTHON_VERSION }}
+        path: ${{ inputs.LOG_DIR }}
diff --git a/.github/actions/get-test-groups/action.yml b/.github/actions/get-test-groups/action.yml
@@ -6,18 +6,17 @@
 name: get-test-groups
 description: "Get test group names from tests_groups.py"
 inputs:
-  # type of test - unit or nightly
   TEST_KIND:
     required: true
-    type: string
-  # test environment - cpu, gpu or spark
+    description: Type of test - unit or nightly
   TEST_ENV:
     required: false
-    type: string
+    description: Test environment - cpu, gpu or spark
     default: 'cpu'
 outputs:
   test_groups:
-    value: ${{steps.get_test_groups.outputs.test_groups}}
+    description: A list of test groups
+    value: ${{ steps.get_test_groups.outputs.test_groups }}
 
 runs:
   using: "composite"

diff --git a/.github/workflows/azureml-cpu-nightly.yml b/.github/workflows/azureml-cpu-nightly.yml
@@ -34,7 +34,7 @@ on:
 
   # Enable manual trigger
   workflow_dispatch:
-    input:
+    inputs:
       tags:
         description: 'Tags to label this manual run (optional)'
         default: 'Manual trigger'
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
@@ -76,9 +76,9 @@ jobs:
         uses: ./.github/actions/azureml-test
         id: execute_tests
         with:
-          EXP_NAME: 'nightly_tests'
+          EXP_NAME: recommenders-nightly-${{ matrix.test-group }}-python${{ matrix.python-version }}-${{ github.ref }}
+          ENV_NAME: recommenders-${{ github.sha }}-python${{ matrix.python-version }}${{ contains(matrix.test-group, 'gpu') && '-gpu' || '' }}${{ contains(matrix.test-group, 'spark') && '-spark' || '' }}
           TEST_KIND: 'nightly'
-          TEST_ENV: 'cpu'
           AZUREML_TEST_CREDENTIALS: ${{ secrets.AZUREML_TEST_CREDENTIALS }}
           AZUREML_TEST_SUBID: ${{ secrets.AZUREML_TEST_SUBID }}
           PYTHON_VERSION: ${{ matrix.python-version }}

diff --git a/.github/workflows/azureml-gpu-nightly.yml b/.github/workflows/azureml-gpu-nightly.yml
@@ -34,7 +34,7 @@ on:
 
   # Enable manual trigger
   workflow_dispatch:
-    input:
+    inputs:
       tags:
         description: 'Tags to label this manual run (optional)'
         default: 'Manual trigger'
@@ -67,7 +67,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
@@ -76,9 +76,9 @@ jobs:
         uses: ./.github/actions/azureml-test
         id: execute_tests
         with:
-          EXP_NAME: 'nightly_tests'
+          EXP_NAME: recommenders-nightly-${{ matrix.test-group }}-python${{ matrix.python-version }}-${{ github.ref }}
+          ENV_NAME: recommenders-${{ github.sha }}-python${{ matrix.python-version }}${{ contains(matrix.test-group, 'gpu') && '-gpu' || '' }}${{ contains(matrix.test-group, 'spark') && '-spark' || '' }}
           TEST_KIND: 'nightly'
-          TEST_ENV: 'gpu'
           AZUREML_TEST_CREDENTIALS: ${{ secrets.AZUREML_TEST_CREDENTIALS }}
           AZUREML_TEST_SUBID: ${{ secrets.AZUREML_TEST_SUBID }}
           PYTHON_VERSION: ${{ matrix.python-version }}

diff --git a/.github/workflows/azureml-spark-nightly.yml b/.github/workflows/azureml-spark-nightly.yml
@@ -33,7 +33,7 @@ on:
 
   # Enable manual trigger
   workflow_dispatch:
-    input:
+    inputs:
       tags:
         description: 'Tags to label this manual run (optional)'
         default: 'Manual trigger'
@@ -66,7 +66,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
@@ -75,9 +75,9 @@ jobs:
         uses: ./.github/actions/azureml-test
         id: execute_tests
         with:
-          EXP_NAME: 'nightly_tests'
+          EXP_NAME: recommenders-nightly-${{ matrix.test-group }}-python${{ matrix.python-version }}-${{ github.ref }}
+          ENV_NAME: recommenders-${{ github.sha }}-python${{ matrix.python-version }}${{ contains(matrix.test-group, 'gpu') && '-gpu' || '' }}${{ contains(matrix.test-group, 'spark') && '-spark' || '' }}
           TEST_KIND: 'nightly'
-          TEST_ENV: 'spark'
           AZUREML_TEST_CREDENTIALS: ${{ secrets.AZUREML_TEST_CREDENTIALS }}
           AZUREML_TEST_SUBID: ${{ secrets.AZUREML_TEST_SUBID }}
           PYTHON_VERSION: ${{ matrix.python-version }}

diff --git a/.github/workflows/azureml-unit-tests.yml b/.github/workflows/azureml-unit-tests.yml
@@ -23,7 +23,7 @@ on:
 
   # Enable manual trigger
   workflow_dispatch:
-    input:
+    inputs:
       tags:
         description: 'Tags to label this manual run (optional)'
         default: 'Manual trigger'
@@ -56,7 +56,7 @@ jobs:
     strategy:
       max-parallel: 50 # Usage limits: https://docs.github.com/en/actions/learn-github-actions/usage-limits-billing-and-administration
       matrix:
-        python-version: ['"python=3.8"', '"python=3.9"', '"python=3.10"', '"python=3.11"']
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
         test-group:  ${{ fromJSON(needs.get-test-groups.outputs.test_groups) }}
     steps:
       - name: Check out repository code
@@ -65,7 +65,8 @@ jobs:
         uses: ./.github/actions/azureml-test
         id: execute_tests
         with:
-          EXP_NAME: 'unit_tests'
+          EXP_NAME: recommenders-unit-${{ matrix.test-group }}-python${{ matrix.python-version }}-${{ github.sha }}
+          ENV_NAME: recommenders-${{ github.sha }}-python${{ matrix.python-version }}${{ contains(matrix.test-group, 'gpu') && '-gpu' || '' }}${{ contains(matrix.test-group, 'spark') && '-spark' || '' }}
           TEST_KIND: 'unit'
           AZUREML_TEST_CREDENTIALS: ${{ secrets.AZUREML_TEST_CREDENTIALS }}
           AZUREML_TEST_SUBID: ${{ secrets.AZUREML_TEST_SUBID }}

diff --git a/setup.py b/setup.py
@@ -36,6 +36,7 @@
     "nltk>=3.8.1,<4",  # requires tqdm
     "notebook>=6.5.5,<8",  # requires ipykernel, jinja2, jupyter, nbconvert, nbformat, packaging, requests
     "numba>=0.57.0,<1",
+    "numpy<2.0.0",  # FIXME: Remove numpy<2.0.0 once cornac release a version newer than 2.2.1 that resolve ImportError: numpy.core.multiarray failed to import.
     "pandas>2.0.0,<3.0.0",  # requires numpy
     "pandera[strategies]>=0.6.5,<0.18;python_version<='3.8'",  # For generating fake datasets
     "pandera[strategies]>=0.15.0;python_version>='3.9'",