Try individual github workflows per file

Signed-off-by: Ankita Katiyar <[email protected]>
kedro-org · Oct 26, 2023 · d790caf · d790caf
1 parent 8924df3
commit d790caf
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 41 deletions.
diff --git a/.github/workflows/run-test.yml b/.github/workflows/run-test.yml
@@ -0,0 +1,58 @@
+name: Run test on Kedro Starters
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  spaceflights-pyspark:
+    strategy:
+      matrix:
+        os: [ ubuntu-latest, windows-latest ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+      - name: Set up Python ${{inputs.python-version}}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install test requirements
+        run: make install-test-requirements
+      - name: Setup Java
+        uses: actions/setup-java@v1
+        with:
+          java-version: '11'
+      - name: Setup spark
+        uses: vemonet/setup-spark@v1
+        with:
+          spark-version: '3.4.1'
+          hadoop-version: '3'
+      - uses: jannekem/run-python-script-action@v1
+          with:
+            script: |
+              import os
+              from pathlib import Path
+              project_name = "project-dummy"
+              config = {
+                      "project_name": project_name,
+                      "repo_name": project_name,
+                      "output_dir": os.getcwd(),
+                      "python_package": project_name.replace("-", "_"),
+                      "include_example": False,
+              }
+              with Path("config.yml").open("w") as config_file:
+                yaml.dump(config, config_file, default_flow_style=False)
+      - name: Create a new project with starter
+        run: kedro new --starter=kedro-starters/pyspark-iris --config=config.yml
+      - name: Install project reqa
+        run: |
+          cd project-dummy
+          pip install -r requirements.txt
+      - name: Kedro run
+        run: kedro run
diff --git a/features/run.feature b/features/run.feature
@@ -28,7 +28,6 @@ Feature: Run all starters
     Given I have prepared a config file
     And I have run a non-interactive kedro new with the starter pyspark-iris
     And I have installed the Kedro project's dependencies
-    And I have setup hadoop binary
     When I run the Kedro pipeline
     Then I should get a successful exit code
 
@@ -51,7 +50,6 @@ Feature: Run all starters
     Given I have prepared a config file
     And I have run a non-interactive kedro new with the starter spaceflights-pyspark
     And I have installed the Kedro project's dependencies
-    And I have setup hadoop binary
     When I run the Kedro pipeline
     Then I should get a successful exit code
 
@@ -60,6 +58,5 @@ Feature: Run all starters
     Given I have prepared a config file
     And I have run a non-interactive kedro new with the starter spaceflights-pyspark-viz
     And I have installed the Kedro project's dependencies
-    And I have setup hadoop binary
     When I run the Kedro pipeline
     Then I should get a successful exit code
diff --git a/features/steps/run_steps.py b/features/steps/run_steps.py
@@ -1,7 +1,6 @@
 import subprocess
 
 import yaml
-import os, requests, platform
 from behave import given, then, when
 
 OK_EXIT_CODE = 0
@@ -58,42 +57,6 @@ def install_project_dependencies(context):
     )
     assert res.returncode == OK_EXIT_CODE
 
-@given("I have setup hadoop binary")
-def setup_hadoop(context):
-    if platform.system() != 'Windows':
-        return
-    # Define the URLs of the files to download
-    winutils_url = "https://github.com/steveloughran/winutils/raw/master/hadoop-2.7.1/bin/winutils.exe"
-    hadoop_dll_url = "https://github.com/steveloughran/winutils/raw/master/hadoop-2.7.1/bin/hadoop.dll"
-
-    # Specify the local file paths
-    winutils_local_path = "winutils.exe"
-    hadoop_dll_local_path = "hadoop.dll"
-    hadoop_bin_dir = "C:\\hadoop\\bin"
-
-    # Download winutils.exe and hadoop.dll
-    response1 = requests.get(winutils_url)
-    with open(winutils_local_path, "wb") as file1:
-        file1.write(response1.content)
-
-    response2 = requests.get(hadoop_dll_url)
-    with open(hadoop_dll_local_path, "wb") as file2:
-        file2.write(response2.content)
-
-    # Move hadoop.dll to C:\Windows\System32
-    os.rename(hadoop_dll_local_path, os.path.join("C:\\Windows\\System32", os.path.basename(hadoop_dll_local_path)))
-
-    # Create C:\hadoop\bin directory
-    if not os.path.exists(hadoop_bin_dir):
-        os.makedirs(hadoop_bin_dir)
-
-    # Move winutils.exe to C:\hadoop\bin
-    os.rename(winutils_local_path, os.path.join(hadoop_bin_dir, os.path.basename(winutils_local_path)))
-
-    # Set the HADOOP_HOME environment variable
-    os.system(f"setx /M HADOOP_HOME {hadoop_bin_dir}")
-
-
 @when("I run the Kedro pipeline")
 def run_kedro_pipeline(context):
     """Behave step to run the newly created Kedro pipeline."""

diff --git a/test_requirements.txt b/test_requirements.txt
@@ -4,4 +4,3 @@ black~=22.0
 PyYAML>=4.2, <7.0
 ruff~=0.0.290
 git+https://github.com/kedro-org/kedro.git@develop#egg=kedro
-requests