From d790cafa4d3c98204183e2dfa7af598f0ad582e8 Mon Sep 17 00:00:00 2001
From: Ankita Katiyar <ankitakatiyar2401@gmail.com>
Date: Thu, 26 Oct 2023 13:48:29 +0100
Subject: [PATCH] Try individual github workflows per file

Signed-off-by: Ankita Katiyar <ankitakatiyar2401@gmail.com>
---
 .github/workflows/run-test.yml | 58 ++++++++++++++++++++++++++++++++++
 features/run.feature           |  3 --
 features/steps/run_steps.py    | 37 ----------------------
 test_requirements.txt          |  1 -
 4 files changed, 58 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/run-test.yml b/.github/workflows/run-test.yml
index e69de29b..23814177 100644
--- a/.github/workflows/run-test.yml
+++ b/.github/workflows/run-test.yml
@@ -0,0 +1,58 @@
+name: Run test on Kedro Starters
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  spaceflights-pyspark:
+    strategy:
+      matrix:
+        os: [ ubuntu-latest, windows-latest ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11" ]
+    runs-on: ${{ matrix.os }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+      - name: Set up Python ${{inputs.python-version}}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install test requirements
+        run: make install-test-requirements
+      - name: Setup Java
+        uses: actions/setup-java@v1
+        with:
+          java-version: '11'
+      - name: Setup spark
+        uses: vemonet/setup-spark@v1
+        with:
+          spark-version: '3.4.1'
+          hadoop-version: '3'
+      - uses: jannekem/run-python-script-action@v1
+          with:
+            script: |
+              import os
+              from pathlib import Path
+              project_name = "project-dummy"
+              config = {
+                      "project_name": project_name,
+                      "repo_name": project_name,
+                      "output_dir": os.getcwd(),
+                      "python_package": project_name.replace("-", "_"),
+                      "include_example": False,
+              }
+              with Path("config.yml").open("w") as config_file:
+                yaml.dump(config, config_file, default_flow_style=False)
+      - name: Create a new project with starter
+        run: kedro new --starter=kedro-starters/pyspark-iris --config=config.yml
+      - name: Install project reqa
+        run: |
+          cd project-dummy
+          pip install -r requirements.txt
+      - name: Kedro run
+        run: kedro run
diff --git a/features/run.feature b/features/run.feature
index 83fcb266..cfd44e88 100644
--- a/features/run.feature
+++ b/features/run.feature
@@ -28,7 +28,6 @@ Feature: Run all starters
     Given I have prepared a config file
     And I have run a non-interactive kedro new with the starter pyspark-iris
     And I have installed the Kedro project's dependencies
-    And I have setup hadoop binary
     When I run the Kedro pipeline
     Then I should get a successful exit code
 
@@ -51,7 +50,6 @@ Feature: Run all starters
     Given I have prepared a config file
     And I have run a non-interactive kedro new with the starter spaceflights-pyspark
     And I have installed the Kedro project's dependencies
-    And I have setup hadoop binary
     When I run the Kedro pipeline
     Then I should get a successful exit code
 
@@ -60,6 +58,5 @@ Feature: Run all starters
     Given I have prepared a config file
     And I have run a non-interactive kedro new with the starter spaceflights-pyspark-viz
     And I have installed the Kedro project's dependencies
-    And I have setup hadoop binary
     When I run the Kedro pipeline
     Then I should get a successful exit code
diff --git a/features/steps/run_steps.py b/features/steps/run_steps.py
index 90167aab..9e0453fc 100644
--- a/features/steps/run_steps.py
+++ b/features/steps/run_steps.py
@@ -1,7 +1,6 @@
 import subprocess
 
 import yaml
-import os, requests, platform
 from behave import given, then, when
 
 OK_EXIT_CODE = 0
@@ -58,42 +57,6 @@ def install_project_dependencies(context):
     )
     assert res.returncode == OK_EXIT_CODE
 
-@given("I have setup hadoop binary")
-def setup_hadoop(context):
-    if platform.system() != 'Windows':
-        return
-    # Define the URLs of the files to download
-    winutils_url = "https://github.com/steveloughran/winutils/raw/master/hadoop-2.7.1/bin/winutils.exe"
-    hadoop_dll_url = "https://github.com/steveloughran/winutils/raw/master/hadoop-2.7.1/bin/hadoop.dll"
-
-    # Specify the local file paths
-    winutils_local_path = "winutils.exe"
-    hadoop_dll_local_path = "hadoop.dll"
-    hadoop_bin_dir = "C:\\hadoop\\bin"
-
-    # Download winutils.exe and hadoop.dll
-    response1 = requests.get(winutils_url)
-    with open(winutils_local_path, "wb") as file1:
-        file1.write(response1.content)
-
-    response2 = requests.get(hadoop_dll_url)
-    with open(hadoop_dll_local_path, "wb") as file2:
-        file2.write(response2.content)
-
-    # Move hadoop.dll to C:\Windows\System32
-    os.rename(hadoop_dll_local_path, os.path.join("C:\\Windows\\System32", os.path.basename(hadoop_dll_local_path)))
-
-    # Create C:\hadoop\bin directory
-    if not os.path.exists(hadoop_bin_dir):
-        os.makedirs(hadoop_bin_dir)
-
-    # Move winutils.exe to C:\hadoop\bin
-    os.rename(winutils_local_path, os.path.join(hadoop_bin_dir, os.path.basename(winutils_local_path)))
-
-    # Set the HADOOP_HOME environment variable
-    os.system(f"setx /M HADOOP_HOME {hadoop_bin_dir}")
-
-
 @when("I run the Kedro pipeline")
 def run_kedro_pipeline(context):
     """Behave step to run the newly created Kedro pipeline."""
diff --git a/test_requirements.txt b/test_requirements.txt
index abc1507f..317b156c 100644
--- a/test_requirements.txt
+++ b/test_requirements.txt
@@ -4,4 +4,3 @@ black~=22.0
 PyYAML>=4.2, <7.0
 ruff~=0.0.290
 git+https://github.com/kedro-org/kedro.git@develop#egg=kedro
-requests
\ No newline at end of file