From d790cafa4d3c98204183e2dfa7af598f0ad582e8 Mon Sep 17 00:00:00 2001 From: Ankita Katiyar Date: Thu, 26 Oct 2023 13:48:29 +0100 Subject: [PATCH] Try individual github workflows per file Signed-off-by: Ankita Katiyar --- .github/workflows/run-test.yml | 58 ++++++++++++++++++++++++++++++++++ features/run.feature | 3 -- features/steps/run_steps.py | 37 ---------------------- test_requirements.txt | 1 - 4 files changed, 58 insertions(+), 41 deletions(-) diff --git a/.github/workflows/run-test.yml b/.github/workflows/run-test.yml index e69de29b..23814177 100644 --- a/.github/workflows/run-test.yml +++ b/.github/workflows/run-test.yml @@ -0,0 +1,58 @@ +name: Run test on Kedro Starters + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + spaceflights-pyspark: + strategy: + matrix: + os: [ ubuntu-latest, windows-latest ] + python-version: [ "3.8", "3.9", "3.10", "3.11" ] + runs-on: ${{ matrix.os }} + steps: + - name: Checkout code + uses: actions/checkout@v3 + - name: Set up Python ${{inputs.python-version}} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install test requirements + run: make install-test-requirements + - name: Setup Java + uses: actions/setup-java@v1 + with: + java-version: '11' + - name: Setup spark + uses: vemonet/setup-spark@v1 + with: + spark-version: '3.4.1' + hadoop-version: '3' + - uses: jannekem/run-python-script-action@v1 + with: + script: | + import os + from pathlib import Path + project_name = "project-dummy" + config = { + "project_name": project_name, + "repo_name": project_name, + "output_dir": os.getcwd(), + "python_package": project_name.replace("-", "_"), + "include_example": False, + } + with Path("config.yml").open("w") as config_file: + yaml.dump(config, config_file, default_flow_style=False) + - name: Create a new project with starter + run: kedro new --starter=kedro-starters/pyspark-iris --config=config.yml + - name: Install project reqa + run: | + cd project-dummy + pip install -r requirements.txt + - name: Kedro run + run: kedro run diff --git a/features/run.feature b/features/run.feature index 83fcb266..cfd44e88 100644 --- a/features/run.feature +++ b/features/run.feature @@ -28,7 +28,6 @@ Feature: Run all starters Given I have prepared a config file And I have run a non-interactive kedro new with the starter pyspark-iris And I have installed the Kedro project's dependencies - And I have setup hadoop binary When I run the Kedro pipeline Then I should get a successful exit code @@ -51,7 +50,6 @@ Feature: Run all starters Given I have prepared a config file And I have run a non-interactive kedro new with the starter spaceflights-pyspark And I have installed the Kedro project's dependencies - And I have setup hadoop binary When I run the Kedro pipeline Then I should get a successful exit code @@ -60,6 +58,5 @@ Feature: Run all starters Given I have prepared a config file And I have run a non-interactive kedro new with the starter spaceflights-pyspark-viz And I have installed the Kedro project's dependencies - And I have setup hadoop binary When I run the Kedro pipeline Then I should get a successful exit code diff --git a/features/steps/run_steps.py b/features/steps/run_steps.py index 90167aab..9e0453fc 100644 --- a/features/steps/run_steps.py +++ b/features/steps/run_steps.py @@ -1,7 +1,6 @@ import subprocess import yaml -import os, requests, platform from behave import given, then, when OK_EXIT_CODE = 0 @@ -58,42 +57,6 @@ def install_project_dependencies(context): ) assert res.returncode == OK_EXIT_CODE -@given("I have setup hadoop binary") -def setup_hadoop(context): - if platform.system() != 'Windows': - return - # Define the URLs of the files to download - winutils_url = "https://github.com/steveloughran/winutils/raw/master/hadoop-2.7.1/bin/winutils.exe" - hadoop_dll_url = "https://github.com/steveloughran/winutils/raw/master/hadoop-2.7.1/bin/hadoop.dll" - - # Specify the local file paths - winutils_local_path = "winutils.exe" - hadoop_dll_local_path = "hadoop.dll" - hadoop_bin_dir = "C:\\hadoop\\bin" - - # Download winutils.exe and hadoop.dll - response1 = requests.get(winutils_url) - with open(winutils_local_path, "wb") as file1: - file1.write(response1.content) - - response2 = requests.get(hadoop_dll_url) - with open(hadoop_dll_local_path, "wb") as file2: - file2.write(response2.content) - - # Move hadoop.dll to C:\Windows\System32 - os.rename(hadoop_dll_local_path, os.path.join("C:\\Windows\\System32", os.path.basename(hadoop_dll_local_path))) - - # Create C:\hadoop\bin directory - if not os.path.exists(hadoop_bin_dir): - os.makedirs(hadoop_bin_dir) - - # Move winutils.exe to C:\hadoop\bin - os.rename(winutils_local_path, os.path.join(hadoop_bin_dir, os.path.basename(winutils_local_path))) - - # Set the HADOOP_HOME environment variable - os.system(f"setx /M HADOOP_HOME {hadoop_bin_dir}") - - @when("I run the Kedro pipeline") def run_kedro_pipeline(context): """Behave step to run the newly created Kedro pipeline.""" diff --git a/test_requirements.txt b/test_requirements.txt index abc1507f..317b156c 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -4,4 +4,3 @@ black~=22.0 PyYAML>=4.2, <7.0 ruff~=0.0.290 git+https://github.com/kedro-org/kedro.git@develop#egg=kedro -requests \ No newline at end of file