diff --git a/.github/README.md b/.github/README.md new file mode 100644 index 0000000..31b46be --- /dev/null +++ b/.github/README.md @@ -0,0 +1,127 @@ +# AsyncFlow Examples CI System + +This directory contains the CI/CD configuration for automatically testing AsyncFlow examples across different Python versions and backend configurations. + +## Overview + +The CI system provides: + +- **Automated Testing**: Examples are tested on every push/PR with manual dispatch option +- **Multi-Backend Support**: Tests examples with different execution backends (concurrent, dask, radical_pilot) +- **Configuration-Driven**: Uses `examples-config.yml` to specify per-example settings +- **Comprehensive Validation**: Includes syntax checking, dependency management, and output validation + +## Configuration Format + +The `examples-config.yml` file defines how each example should be tested: + +```yaml +examples: + example-name: + script: "path/to/example.py" # Path to example script + backend: "concurrent" # Backend to use: noop, concurrent, dask, radical.pilot + timeout_sec: 120 # Timeout in seconds + dependencies: # Additional pip packages + - "numpy>=1.20" + - "matplotlib" + min_output_lines: 5 # Minimum expected output lines + skip_python: # Python versions to skip + - "3.8" +``` + +### Configuration Options + +- **`script`**: Path to the example Python file (defaults to `examples/{example-name}.py`) +- **`backend`**: Execution backend to use (default: `concurrent`) +- **`timeout_sec`**: Maximum execution time in seconds (default: 120) +- **`dependencies`**: List of additional pip packages to install +- **`min_output_lines`**: Minimum number of output lines expected (default: 1) +- **`skip_python`**: List of Python versions to skip for this example + +## Workflows + +### Examples CI (`examples.yml`) + +Triggered on: + +- Push to main/master/develop branches (when relevant files change) +- Pull requests (when relevant files change) +- Manual workflow dispatch + +**What it tests:** + +- Only examples affected by code changes +- Multiple Python versions (3.9, 3.11, 3.12) +- Fast feedback for development +- When manually dispatched, can test all examples with custom Python versions + +## GitHub Action (`run-example`) + +The reusable action handles: + +1. **Environment Setup**: Python installation and caching +2. **Dependency Management**: Core package + example-specific + backend dependencies +3. **Configuration Loading**: Parsing YAML config and applying example settings +4. **Execution**: Running examples with proper timeout and error handling +5. **Validation**: Checking output for errors and minimum content requirements +6. **Artifact Collection**: Saving outputs for debugging failures + +## Local Testing + +You can validate the configuration locally: + +```bash +# Install dependencies +pip install pyyaml + +# Run validation script +python .github/bin/validate_examples.py +``` + +This script checks: + +- Configuration file syntax and structure +- Example script existence and syntax +- Basic configuration validation + +## Adding New Examples + +1. **Create the example file** in the `examples/` directory +2. **Add configuration** to `examples-config.yml`: + ```yaml + examples: + my-new-example: + script: "examples/my-new-example.py" + backend: "concurrent" + timeout_sec: 60 + dependencies: + - "requests" + ``` +3. **Test locally** using the validation script +4. **Commit changes** - CI will automatically test the new example + +## Backend-Specific Configuration + +### Concurrent Backend + +- **Description**: Built-in Python `concurrent.futures` backend +- **Dependencies**: None (included with Python) +- **Use Case**: General examples, I/O-bound tasks + +### Dask Backend + +- **Description**: Distributed computing backend +- **Dependencies**: `dask[complete]` +- **Use Case**: CPU-intensive parallel tasks + +### RADICAL-Pilot Backend + +- **Description**: HPC-focused execution backend +- **Dependencies**: `radical.pilot` +- **Use Case**: HPC environments, large-scale computing + +### NoOp Backend + +- **Description**: No-operation backend for testing +- **Dependencies**: None +- **Use Case**: Testing workflow logic without execution diff --git a/.github/actionlint.yaml b/.github/actionlint.yaml new file mode 100644 index 0000000..f9cb635 --- /dev/null +++ b/.github/actionlint.yaml @@ -0,0 +1,21 @@ +# Configuration for actionlint +# https://github.com/rhysd/actionlint/blob/main/docs/config.md + +# Disable specific checks if needed +# self-hosted-runner: +# # Labels of self-hosted runner in array of strings +# labels: +# - linux.2xlarge +# - windows-latest-xl +# # Repository path of self-hosted runner configurations +# config-file: path/to/runner-config.json + +# Schema validation config +# config-variables: +# # Array of known custom configuration variable names +# vars: [] + +# Format configuration +# format: +# # Available formats: sarif, json +# output-format: "" diff --git a/.github/actions/run-example/action.yml b/.github/actions/run-example/action.yml new file mode 100644 index 0000000..9b71136 --- /dev/null +++ b/.github/actions/run-example/action.yml @@ -0,0 +1,202 @@ +name: "Run single AsyncFlow example" +description: "Runs a single AsyncFlow example with a specific Python version and backend" + +inputs: + example: + description: "Example key in .github/examples-config.yml" + required: true + python-version: + description: "Python version, e.g., 3.11" + required: true + +outputs: + success: + description: "Whether the example ran successfully" + value: ${{ steps.run.outputs.success }} + +runs: + using: "composite" + steps: + - name: Setup Python + uses: actions/setup-python@v5 + id: setup_py + with: + python-version: ${{ inputs.python-version }} + + - name: Show Python details + shell: bash + run: | + PY="${{ steps.setup_py.outputs.python-path }}" + echo "python path: $PY" + "$PY" -V + "$PY" -c "import sys, sysconfig; print('include:', sysconfig.get_paths()['include'])" + + - name: Cache pip + uses: actions/cache@v4 + with: + path: ~/.cache/pip + key: pip-asyncflow-${{ runner.os }}-${{ inputs.python-version }}-${{ hashFiles('**/pyproject.toml') }} + restore-keys: | + pip-asyncflow-${{ runner.os }}-${{ inputs.python-version }}- + + - name: Ensure yq present + shell: bash + run: | + if ! command -v yq >/dev/null 2>&1; then + sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 + sudo chmod +x /usr/local/bin/yq + fi + + - name: Load example configuration + shell: bash + id: cfg + run: | + set -euo pipefail + CFG=".github/examples-config.yml" + EXAMPLE="${{ inputs.example }}" + + if [ ! -f "$CFG" ]; then + echo "❌ Missing $CFG" + exit 2 + fi + + if ! yq -e ".examples.$EXAMPLE" "$CFG" >/dev/null; then + echo "❌ Example '$EXAMPLE' not found in $CFG" + yq '.examples | keys' "$CFG" || echo "No examples found" + exit 2 + fi + + script=$(yq -r ".examples.$EXAMPLE.script" "$CFG") + timeout_sec=$(yq -r ".examples.$EXAMPLE.timeout_sec" "$CFG") + backend=$(yq -r ".examples.$EXAMPLE.backend // \"concurrent\"" "$CFG") + min_output_lines=$(yq -r ".examples.$EXAMPLE.min_output_lines // 3" "$CFG") + + # Handle arrays properly - return empty array if null + skip_python_raw=$(yq -r ".examples.$EXAMPLE.skip_python" "$CFG") + if [ "$skip_python_raw" = "null" ]; then + skip_python="[]" + else + skip_python=$(yq -o json ".examples.$EXAMPLE.skip_python" "$CFG" | jq -c '.') + fi + + dependencies_raw=$(yq -r ".examples.$EXAMPLE.dependencies" "$CFG") + if [ "$dependencies_raw" = "null" ]; then + dependencies="[]" + else + dependencies=$(yq -o json ".examples.$EXAMPLE.dependencies" "$CFG" | jq -c '.') + fi + + echo "script=$script" >> "$GITHUB_OUTPUT" + echo "timeout_sec=$timeout_sec" >> "$GITHUB_OUTPUT" + echo "backend=$backend" >> "$GITHUB_OUTPUT" + echo "min_output_lines=$min_output_lines" >> "$GITHUB_OUTPUT" + echo "skip_python=$skip_python" >> "$GITHUB_OUTPUT" + echo "dependencies=$dependencies" >> "$GITHUB_OUTPUT" + + # Check if this Python version should be skipped + if echo "$skip_python" | jq -e "index(\"${{ inputs.python-version }}\")" > /dev/null; then + echo "skip=true" >> "$GITHUB_OUTPUT" + echo "⏭️ Skipping example on Python ${{ inputs.python-version }}" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + + - name: Skip if configured + if: steps.cfg.outputs.skip == 'true' + shell: bash + run: | + echo "Example ${{ inputs.example }} configured to skip Python ${{ inputs.python-version }}" + exit 0 + + - name: Install AsyncFlow and dependencies + if: steps.cfg.outputs.skip != 'true' + shell: bash + run: | + PY="${{ steps.setup_py.outputs.python-path }}" + "$PY" -m pip install --upgrade pip + "$PY" -m pip install -e . + + # Install example-specific dependencies + DEPS='${{ steps.cfg.outputs.dependencies }}' + if [ "$DEPS" != "[]" ]; then + echo "Installing dependencies: $DEPS" + echo "$DEPS" | jq -r '.[]' | while read dep; do + echo "Installing: $dep" + "$PY" -m pip install "$dep" + done + fi + + # Install backend-specific dependencies + BACKEND='${{ steps.cfg.outputs.backend }}' + case "$BACKEND" in + "dask") + echo "Installing Dask backend dependencies via AsyncFlow optional dependencies" + "$PY" -m pip install -e '.[dask]' || echo "Dask backend installation failed" + ;; + "radical_pilot") + echo "Installing RADICAL-Pilot backend dependencies via AsyncFlow optional dependencies" + "$PY" -m pip install -e '.[radicalpilot]' || echo "RADICAL-Pilot backend installation failed" + ;; + "concurrent"|"noop") + echo "Using built-in backend: $BACKEND" + ;; + esac + + - name: Run example script + if: steps.cfg.outputs.skip != 'true' + id: run + shell: bash + run: | + set -euo pipefail + PY="${{ steps.setup_py.outputs.python-path }}" + SCRIPT="${{ steps.cfg.outputs.script }}" + TIMEOUT="${{ steps.cfg.outputs.timeout_sec }}" + BACKEND="${{ steps.cfg.outputs.backend }}" + + # Set backend environment variable + export RADICAL_ASYNCFLOW_BACKEND="$BACKEND" + + echo "πŸš€ Running: $SCRIPT (timeout: ${TIMEOUT}s, backend: $BACKEND)" + + # Create output directory and run with output capture + mkdir -p "example-outputs" + OUTPUT_FILE="example-outputs/${{ inputs.example }}-py${{ inputs.python-version }}.log" + + if timeout "${TIMEOUT}s" "$PY" "$SCRIPT" > "$OUTPUT_FILE" 2>&1; then + echo "success=true" >> "$GITHUB_OUTPUT" + echo "βœ… Example completed successfully" + + # Show output summary + LINES=$(wc -l < "$OUTPUT_FILE") + echo "Output: $LINES lines" + + # Check minimum output requirement + MIN_LINES=${{ steps.cfg.outputs.min_output_lines }} + if [ "$LINES" -lt "$MIN_LINES" ]; then + echo "⚠️ Output below minimum ($LINES < $MIN_LINES lines) - possible crash" + echo "Full output:" + cat "$OUTPUT_FILE" + else + echo "First 10 lines of output:" + head -10 "$OUTPUT_FILE" + fi + else + echo "success=false" >> "$GITHUB_OUTPUT" + echo "❌ Example failed" + echo "Last 20 lines of output:" + tail -20 "$OUTPUT_FILE" + exit 1 + fi + + - name: Validate output patterns + if: steps.cfg.outputs.skip != 'true' && steps.run.outputs.success == 'true' + shell: bash + run: | + OUTPUT_FILE="example-outputs/${{ inputs.example }}-py${{ inputs.python-version }}.log" + + # Check for error patterns (but allow expected ones) + if grep -i "traceback\|exception\|error" "$OUTPUT_FILE" | grep -v "expected\|handled\|graceful"; then + echo "⚠️ Found potential errors in output (but example still succeeded)" + fi + + echo "βœ… Output validation completed" diff --git a/.github/bin/validate_examples.py b/.github/bin/validate_examples.py new file mode 100644 index 0000000..e626356 --- /dev/null +++ b/.github/bin/validate_examples.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +"""Test script to validate AsyncFlow examples configuration and basic functionality.""" + +import subprocess +import sys +from pathlib import Path + +import yaml + + +def main(): + """Test the examples configuration.""" + print("πŸš€ Testing AsyncFlow examples configuration...") + + # Load configuration + config_path = Path(".github/examples-config.yml") + if not config_path.exists(): + print("❌ Missing .github/examples-config.yml") + return False + + with open(config_path) as f: + config = yaml.safe_load(f) + + if "examples" not in config: + print("❌ Missing 'examples' key in configuration") + return False + + examples = config["examples"] + print(f"πŸ“ Found {len(examples)} configured examples:") + + success_count = 0 + total_count = len(examples) + + for example_key, example_config in examples.items(): + print(f"\nπŸ” Testing {example_key}...") + + # Validate configuration + script = example_config.get("script", f"examples/{example_key}.py") + backend = example_config.get("backend", "concurrent") + timeout = example_config.get("timeout_sec", 120) + + print(f" Script: {script}") + print(f" Backend: {backend}") + print(f" Timeout: {timeout}s") + + # Check if script exists + script_path = Path(script) + if not script_path.exists(): + print(f" ❌ Script not found: {script}") + continue + + # Basic syntax check + try: + result = subprocess.run( + [sys.executable, "-m", "py_compile", str(script_path)], + capture_output=True, + text=True, + ) + + if result.returncode == 0: + print(" βœ… Syntax check passed") + success_count += 1 + else: + print(f" ❌ Syntax error: {result.stderr.strip()}") + + except Exception as e: + print(f" ❌ Failed to check syntax: {e}") + + print(f"\nπŸ“Š Results: {success_count}/{total_count} examples passed syntax check") + + if success_count == total_count: + print("βœ… All examples configuration validated successfully!") + return True + else: + print("❌ Some examples have issues") + return False + + +if __name__ == "__main__": + success = main() + sys.exit(0 if success else 1) diff --git a/.github/examples-config.yml b/.github/examples-config.yml new file mode 100644 index 0000000..dab428f --- /dev/null +++ b/.github/examples-config.yml @@ -0,0 +1,52 @@ +# AsyncFlow Examples Configuration +# This file defines how each example should be tested in CI +# Format matches stride-research/observe for consistency + +examples: + 01-workflows: + script: examples/01-workflows.py + timeout_sec: 60 + backend: "concurrent" + dependencies: [] + min_output_lines: 5 # Minimum lines in output (crash detection) + + 02-blocks: + script: examples/02-blocks.py + timeout_sec: 60 + backend: "concurrent" + dependencies: [] + min_output_lines: 5 + + 03-nested_blocks: + script: examples/03-nested_blocks.py + timeout_sec: 120 + backend: "concurrent" + dependencies: [] + min_output_lines: 5 + + 04-dask_execution_backend: + script: examples/04-dask_execution_backend.py + timeout_sec: 180 + backend: "dask" + dependencies: + - "numpy" # Required by the example + min_output_lines: 10 + skip_python: [] + + 05-radical_execution_backend: + script: examples/05-radical_execution_backend.py + timeout_sec: 300 + backend: "radical_pilot" + dependencies: [] # Installed via AsyncFlow optional dependencies + min_output_lines: 10 + # Skip if RP doesn't support Python 3.12+ yet (3.13 has multiprocessing issues) + skip_python: ["3.12", "3.13"] + +# Tutorial notebooks (if they exist) +notebooks: + build_async_workflows: + script: examples/tutorials/build_async_workflows.ipynb + timeout_sec: 180 + backend: "concurrent" + dependencies: [] + notebook: true diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 40d402e..01a9879 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -24,7 +24,7 @@ jobs: with: python-version: 3.x - - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV - uses: actions/cache@v4 with: diff --git a/.github/workflows/examples.yml b/.github/workflows/examples.yml new file mode 100644 index 0000000..8082ddc --- /dev/null +++ b/.github/workflows/examples.yml @@ -0,0 +1,96 @@ +name: Examples + +on: + pull_request: + branches: [ main, master, develop ] + paths: + - 'src/**' + - 'examples/**' + - '.github/workflows/examples.yml' + - '.github/actions/run-example/**' + - '.github/examples-config.yml' + - 'pyproject.toml' + push: + branches: [ main, master, develop ] + paths: + - 'src/**' + - 'examples/**' + - '.github/workflows/examples.yml' + - '.github/actions/run-example/**' + - '.github/examples-config.yml' + - 'pyproject.toml' + workflow_dispatch: + inputs: + python-versions: + description: 'Python versions to test (JSON array)' + required: false + default: '["3.9", "3.11", "3.12"]' + type: string + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + plan: + runs-on: ubuntu-22.04 + outputs: + examples: ${{ steps.collect.outputs.examples }} + steps: + - uses: actions/checkout@v4 + + - name: Ensure yq present + run: | + if ! command -v yq >/dev/null 2>&1; then + sudo wget -qO /usr/local/bin/yq https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 + sudo chmod +x /usr/local/bin/yq + fi + + - name: Validate examples-config schema + run: | + set -euo pipefail + CFG=".github/examples-config.yml" + if [ ! -f "$CFG" ]; then + echo "❌ Missing $CFG. Please add it." + exit 2 + fi + yq -e '.examples | type=="!!map"' "$CFG" >/dev/null \ + || { echo "❌ $CFG: 'examples' must be a mapping"; exit 2; } + + - name: Collect example keys + id: collect + run: | + set -euo pipefail + CFG=".github/examples-config.yml" + # produce JSON array of keys: ["ex1","ex2",...] + keys_json=$(yq -o=json -I=0 '.examples | keys' "$CFG") + echo "examples=$keys_json" >> "$GITHUB_OUTPUT" + echo "Examples: $keys_json" + + run: + needs: plan + if: ${{ needs.plan.outputs.examples != '[]' && needs.plan.outputs.examples != '' }} + runs-on: ubuntu-22.04 + strategy: + fail-fast: false + matrix: + example: ${{ fromJson(needs.plan.outputs.examples) }} + python-version: ${{ fromJson(inputs.python-versions || '["3.9", "3.11", "3.12"]') }} + max-parallel: 6 + steps: + - uses: actions/checkout@v4 + + - name: Run example + uses: ./.github/actions/run-example + with: + example: ${{ matrix.example }} + python-version: ${{ matrix.python-version }} + + - name: Upload outputs on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: failed-example-${{ matrix.example }}-py${{ matrix.python-version }} + path: example-outputs/ + retention-days: 7 + if-no-files-found: warn diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index f9e6856..d4920f4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -3,7 +3,6 @@ name: tests on: push: branches: [main, test-me-*] - tags: pull_request: workflow_dispatch: @@ -39,12 +38,12 @@ jobs: python-version: ${{ matrix.python }} - name: Get pip cache dir id: pip-cache-dir - run: echo "PIP_CACHE_DIR=$(pip cache dir)" >> $GITHUB_ENV + run: echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT - name: Use pip cache id: pip-cache uses: actions/cache@v4 with: - path: ${{ env.PIP_CACHE_DIR }} + path: ${{ steps.pip-cache-dir.outputs.dir }} key: tests-unit-${{ matrix.os }}-pip-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} restore-keys: | tests-unit-${{ matrix.os }}-pip-${{ matrix.python }}- @@ -84,12 +83,12 @@ jobs: python-version: ${{ matrix.python }} - name: Get pip cache dir id: pip-cache-dir - run: echo "PIP_CACHE_DIR=$(pip cache dir)" >> $GITHUB_ENV + run: echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT - name: Use pip cache id: pip-cache uses: actions/cache@v4 with: - path: ${{ env.PIP_CACHE_DIR }} + path: ${{ steps.pip-cache-dir.outputs.dir }} key: tests-integration-${{ matrix.os }}-pip-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} restore-keys: | tests-integration-${{ matrix.os }}-pip-${{ matrix.python }}- diff --git a/.gitignore b/.gitignore index ac2f92f..ddd0d56 100644 --- a/.gitignore +++ b/.gitignore @@ -170,3 +170,21 @@ cython_debug/ # PyPI configuration file .pypirc asyncflow.session.* + +# Ignore vscode settings +.vscode/ + +# Ignore GH instructions +.github/instructions + +# Ignore codacy settings +.codacy/ + +# Ignore devcontainer settings +.devcontainer/ + +# Ignore direnv settings +.direnv/ + +# Ignore dotenv settings +.envrc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..efd6512 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,70 @@ +# .pre-commit-config.yaml + +default_language_version: + python: python3 + +repos: + - repo: https://github.com/PyCQA/docformatter + rev: v1.7.7 + hooks: + - id: docformatter + args: ["--in-place", "--wrap-summaries=88", "--wrap-descriptions=88"] + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.13.1 + hooks: + - id: ruff + args: ["--fix"] + exclude: '\.ipynb$' + - id: ruff-format + exclude: '\.ipynb$' + + # - repo: https://github.com/pre-commit/mirrors-mypy + # rev: v1.14.0 + # hooks: + # - id: mypy + # additional_dependencies: [types-requests] + # args: ["--config-file=pyproject.toml"] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + exclude: ^mkdocs\.yml$ + - id: check-toml + - id: debug-statements + - id: check-merge-conflict + - id: check-added-large-files + args: ["--maxkb=500"] + + - repo: https://github.com/rhysd/actionlint + rev: v1.7.7 + hooks: + - id: actionlint + args: ["-config-file", ".github/actionlint.yaml"] + + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.34.0 + hooks: + - id: check-github-workflows + files: ^\.github/workflows/.*\.ya?ml$ + + - repo: https://github.com/crate-ci/typos + rev: v1.36.2 + hooks: + - id: typos + args: ["--config", "_typos.toml"] + + - repo: https://github.com/Yelp/detect-secrets + rev: v1.5.0 + hooks: + - id: detect-secrets + args: + - --baseline + - .secrets.baseline + - --exclude-files + - (?i)(^\.?venv/|^\.direnv/|^\.mypy_cache/|^\.ruff_cache/|^\.pytest_cache/|^\.tox/|^\.cache/|^dist/|^build/|^site/|^htmlcov/|^node_modules/|^docs/_build/|^\.ipynb_checkpoints/|\.(png|jpe?g|gif|svg|pdf|zip|tar|gz|tgz|xz|bz2|zst|woff2?)$) + - --exclude-lines + - (?i)(^\s*POSTGRES_PASSWORD:\s*postgres$|postgresql://postgres:postgres@localhost(:\d+)?/|(user|username|password)\s*=\s*(test|example|dummy)) diff --git a/.secrets.baseline b/.secrets.baseline new file mode 100644 index 0000000..a46edf8 --- /dev/null +++ b/.secrets.baseline @@ -0,0 +1,153 @@ +{ + "version": "1.5.0", + "plugins_used": [ + { + "name": "ArtifactoryDetector" + }, + { + "name": "AWSKeyDetector" + }, + { + "name": "AzureStorageKeyDetector" + }, + { + "name": "Base64HighEntropyString", + "limit": 4.5 + }, + { + "name": "BasicAuthDetector" + }, + { + "name": "CloudantDetector" + }, + { + "name": "DiscordBotTokenDetector" + }, + { + "name": "GitHubTokenDetector" + }, + { + "name": "GitLabTokenDetector" + }, + { + "name": "HexHighEntropyString", + "limit": 3.0 + }, + { + "name": "IbmCloudIamDetector" + }, + { + "name": "IbmCosHmacDetector" + }, + { + "name": "IPPublicDetector" + }, + { + "name": "JwtTokenDetector" + }, + { + "name": "KeywordDetector", + "keyword_exclude": "" + }, + { + "name": "MailchimpDetector" + }, + { + "name": "NpmDetector" + }, + { + "name": "OpenAIDetector" + }, + { + "name": "PrivateKeyDetector" + }, + { + "name": "PypiTokenDetector" + }, + { + "name": "SendGridDetector" + }, + { + "name": "SlackDetector" + }, + { + "name": "SoftlayerDetector" + }, + { + "name": "SquareOAuthDetector" + }, + { + "name": "StripeDetector" + }, + { + "name": "TelegramBotTokenDetector" + }, + { + "name": "TwilioKeyDetector" + } + ], + "filters_used": [ + { + "path": "detect_secrets.filters.allowlist.is_line_allowlisted" + }, + { + "path": "detect_secrets.filters.common.is_baseline_file", + "filename": ".secrets.baseline" + }, + { + "path": "detect_secrets.filters.common.is_ignored_due_to_verification_policies", + "min_level": 2 + }, + { + "path": "detect_secrets.filters.heuristic.is_indirect_reference" + }, + { + "path": "detect_secrets.filters.heuristic.is_likely_id_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_lock_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_not_alphanumeric_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_potential_uuid" + }, + { + "path": "detect_secrets.filters.heuristic.is_prefixed_with_dollar_sign" + }, + { + "path": "detect_secrets.filters.heuristic.is_sequential_string" + }, + { + "path": "detect_secrets.filters.heuristic.is_swagger_file" + }, + { + "path": "detect_secrets.filters.heuristic.is_templated_secret" + }, + { + "path": "detect_secrets.filters.regex.should_exclude_file", + "pattern": [ + "(?i)(^\\.?venv/|^\\.direnv/|^\\.mypy_cache/|^\\.ruff_cache/|^\\.pytest_cache/|^\\.tox/|^\\.cache/|^dist/|^build/|^site/|^htmlcov/|^node_modules/|^docs/_build/|^\\.ipynb_checkpoints/|\\.(png|jpe?g|gif|svg|pdf|zip|tar|gz|tgz|xz|bz2|zst|woff2?)$)" + ] + }, + { + "path": "detect_secrets.filters.regex.should_exclude_line", + "pattern": [ + "(?i)(^\\s*POSTGRES_PASSWORD:\\s*postgres$|postgresql://postgres:postgres@localhost(:\\d+)?/|(user|username|password)\\s*=\\s*(test|example|dummy))" + ] + } + ], + "results": { + "examples/tutorials/build_async_workflows.ipynb": [ + { + "type": "Base64 High Entropy String", + "filename": "examples/tutorials/build_async_workflows.ipynb", + "hashed_secret": "627076ecb044ca925ea922ff9189c83fd6178cf6", + "is_verified": false, + "line_number": 6 + } + ] + }, + "generated_at": "2025-09-27T17:50:40Z" +} diff --git a/.wci.yml b/.wci.yml index d5b4bc4..b2e0d6e 100644 --- a/.wci.yml +++ b/.wci.yml @@ -2,11 +2,11 @@ name: RADICAL-AsyncFlow icon: https://radical-cybertools.github.io/assets/radical-logo-transparent.png headline: High Performance Asynchronous Workflow Scripting Library description: | - RADICAL-AsyncFlow (RAF) is an asynchronous scripting library for building - high-performance, scalable workflows that run on HPC systems, clusters, - and local machines. Designed for flexibility and speed, it allows users - to compose complex workflows from async and sync tasks with clear - dependencies, while ensuring efficient execution at any scale with + RADICAL-AsyncFlow (RAF) is an asynchronous scripting library for building + high-performance, scalable workflows that run on HPC systems, clusters, + and local machines. Designed for flexibility and speed, it allows users + to compose complex workflows from async and sync tasks with clear + dependencies, while ensuring efficient execution at any scale with different execution backends. language: Python @@ -20,4 +20,3 @@ documentation: general: https://radical-cybertools.github.io/radical.asyncflow/ installation: https://radical-cybertools.github.io/radical.asyncflow/install/ tutorial: https://radical-cybertools.github.io/radical.asyncflow/basic/index.html - diff --git a/README.md b/README.md index e2d6240..9cb3686 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@

-RADICAL AsyncFlow (RAF) is a fast asynchronous scripting library built on top of [asyncio](https://docs.python.org/3/library/asyncio.html) for building powerful async/sync workflows on HPC, clusters, and local machines. It supports pluggable execution backends with intuitive task dependencies and workflow composition. +RADICAL AsyncFlow (RAF) is a fast asynchronous scripting library built on top of [asyncio](https://docs.python.org/3/library/asyncio.html) for building powerful async/sync workflows on HPC, clusters, and local machines. It supports pluggable execution backends with intuitive task dependencies and workflow composition. - ⚑ Powerful asynchronous workflows β€” Compose complex async and sync workflows easily, with intuitive task dependencies and campaign orchestration. @@ -28,21 +28,38 @@ RADICAL AsyncFlow (RAF) is a fast asynchronous scripting library built on top of Currently, RAF supports the following execution backends: +* **Local Development** + * [Concurrent.Executor](https://docs.python.org/3/library/concurrent.futures.html#executor-objects) - Multi-threading/processing on single machine + * Noop - Dry-run backend for testing and validation -- [Radical.Pilot](https://radicalpilot.readthedocs.io/en/stable/#) -- [Dask.Parallel](https://docs.dask.org/en/stable/) -- [Concurrent.Executor](https://docs.python.org/3/library/concurrent.futures.html#executor-objects) -- Noop with `dry_run` -- Custom implementations +* **HPC and Distributed Computing** *(optional dependencies)* + * [RADICAL-Pilot](https://radicalpilot.readthedocs.io/en/stable/#) - Large-scale HPC and supercomputing + * [Dask.Distributed](https://docs.dask.org/en/stable/) - Distributed computing clusters + +* **Custom Implementations** + * Extensible backend system for custom execution environments ## βš™οΈ Installation -Radical Asyncflow package is available on [PyPI](https://pypi.org/project/radical-asyncflow/). + +### Core Installation +Radical AsyncFlow package is available on [PyPI](https://pypi.org/project/radical-asyncflow/). +```bash +pip install radical.asyncflow ``` -pip install radical-asyncflow + +### Optional HPC Backends +For HPC and distributed computing capabilities: +```bash +# All HPC backends +pip install 'radical.asyncflow[hpc]' + +# Specific backends +pip install 'radical.asyncflow[dask]' # Dask distributed computing +pip install 'radical.asyncflow[radicalpilot]' # RADICAL-Pilot for HPC ``` +### Development Installation For developers: - ```shell git clone https://github.com/radical-cybertools/radical.asyncflow cd radical.asyncflow @@ -55,17 +72,17 @@ pip install -e .[dev,lint,doc] ## Basic Usage + ```python import asyncio - -from radical.asyncflow import WorkflowEngine -from radical.asyncflow import ConcurrentExecutionBackend - -from concurrent.futures import ThreadPoolExecutor +from radical.asyncflow import WorkflowEngine, factory async def main(): - # Create backend and workflow - backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) + # Create backend and workflow using factory + backend = await factory.create_backend("concurrent", config={ + "max_workers": 4, + "executor_type": "thread" + }) flow = await WorkflowEngine.create(backend=backend) @flow.executable_task @@ -79,7 +96,7 @@ async def main(): # create the workflow t1_fut = task1() t2_result = await task2(t1_fut) # t2 depends on t1 (waits for it) - + print(t2_result) # shutdown the execution backend await flow.shutdown() diff --git a/_typos.toml b/_typos.toml new file mode 100644 index 0000000..89e721b --- /dev/null +++ b/_typos.toml @@ -0,0 +1,45 @@ +[default] +extend-ignore-identifiers-re = [ + # Allow common variable names that might be flagged as typos + "asyncflow", + "rhapsody", + "mturilli", + "alsaadi", + "cybertools", + "devcontainer", + "pytest", + "pydantic", + "typeguard", + "mkdocs", + "mkdocstrings", + "mkdocs-.*", + "PnP", # Plug-and-Play abbreviation + "Pn", # Part of PnP that gets flagged +] + +[default.extend-words] +# Add any project-specific words that should not be flagged as typos +asyncflow = "asyncflow" +rhapsody = "rhapsody" +PnP = "PnP" # Plug-and-Play abbreviation +Pn = "Pn" # Part of PnP that gets flagged + +[files] +extend-exclude = [ + "*.pyc", + "*.pyo", + "*.pyd", + ".git/", + ".mypy_cache/", + ".pytest_cache/", + ".ruff_cache/", + "__pycache__/", + "build/", + "dist/", + "*.egg-info/", + ".direnv/", + ".venv/", + "venv/", + ".ipynb_checkpoints/", + "node_modules/", +] diff --git a/docs/basic.md b/docs/basic.md index 0ca4809..e7bccd1 100644 --- a/docs/basic.md +++ b/docs/basic.md @@ -3,38 +3,39 @@ This guide walks you through running a **single workflow with task dependencies** using `radical.asyncflow`. -You’ll learn how to define tasks, set dependencies, execute the workflow, and shut down the engine gracefully. +You'll learn how to define tasks, set dependencies, execute the workflow, and shut down the engine gracefully. --- ## Prerequisites -- Make sure you have installed `radical.asyncflow` in your Python environment. -- You also need a working Jupyter Notebook or Python >=3.8. +- Make sure you have installed `radical.asyncflow` in your Python environment. +- You also need a working Jupyter Notebook or Python >=3.9. --- ## Import the necessary modules -You’ll need `time`, `asyncio`, and the key classes from `radical.asyncflow`. +You'll need `time`, `asyncio`, and the key classes from `radical.asyncflow`. ```python import time import asyncio -from concurrent.futures import ThreadPoolExecutor - -from radical.asyncflow import WorkflowEngine, ConcurrentExecutionBackend +from radical.asyncflow import WorkflowEngine, factory ``` --- ## Set up the workflow engine -We initialize the workflow engine with a `ConcurrentExecutionBackend` using Python’s `ThreadPoolExecutor` or `ProcessPoolExecutor`. +We initialize the workflow engine using the backend factory to create a concurrent execution backend. ```python -backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) +backend = await factory.create_backend("concurrent", config={ + "max_workers": 4, + "executor_type": "thread" # or "process" +}) flow = await WorkflowEngine.create(backend=backend) ``` @@ -64,7 +65,7 @@ async def task3(t1_result, t2_result): return time.time() ``` -!!! note +!!! note - `task3` depends on the outputs of `task1` and `task2`. - You express this dependency by calling `task3(task1(), task2())`. - `task1` and `task2` will be automatically resolved during runtime and their values will be assigned to `task3` accordingly. @@ -113,7 +114,7 @@ Here’s an example of the output you might see: --- -!!! warning +!!! warning Make sure to **await the shutdown** of the `WorkflowEngine` before your script exits. Otherwise, resources may leak. @@ -122,9 +123,9 @@ Make sure to **await the shutdown** of the `WorkflowEngine` before your script e ## Summary You now know how to: -- Define a set of tasks with dependencies. -- Submit them to the workflow engine. -- Run the workflow asynchronously. +- Define a set of tasks with dependencies. +- Submit them to the workflow engine. +- Run the workflow asynchronously. - Shut down the engine properly. --- diff --git a/docs/best_practice.md b/docs/best_practice.md index a8bd2ae..cd6ff41 100644 --- a/docs/best_practice.md +++ b/docs/best_practice.md @@ -1,7 +1,7 @@ # Best Practices for AsyncFlow -AsyncFlow is built on top of Python’s `asyncio`, combining asynchronous execution and task dependency management with a simple API. +AsyncFlow is built on top of Python’s `asyncio`, combining asynchronous execution and task dependency management with a simple API. This page outlines **recommended practices** when using AsyncFlow effectively in your projects. --- @@ -28,7 +28,7 @@ By following these best practices, you can: - Prefer pure functions or side-effect-free coroutines as tasks. - Use `@flow.function_task` or `@flow.executable_task` decorators consistently. -!!! tip +!!! tip Name your tasks clearly to improve logs and debugging ```python @@ -124,8 +124,8 @@ Logs show task dependencies, execution order, errors. !!! success -- Define tasks clearly and concisely. -- Pass tasks as arguments to express dependencies. -- Only await at the top level. -- Shut down cleanly. -- Log at `DEBUG` level when needed. +- Define tasks clearly and concisely. +- Pass tasks as arguments to express dependencies. +- Only await at the top level. +- Shut down cleanly. +- Log at `DEBUG` level when needed. diff --git a/docs/composite_workflow.md b/docs/composite_workflow.md index 91b4450..65ce9dd 100644 --- a/docs/composite_workflow.md +++ b/docs/composite_workflow.md @@ -8,21 +8,21 @@ This page walks you step by step through defining and running composite workflow graph TD subgraph Block A - A_WF1[task1 --> task2 --> task3] --> A_WF2[task1 --> task2 --> task3] --> A_WF3[task1 --> task2 --> task3] + A_WF1[task1 --> task2 --> task3] --> A_WF2[task1 --> task2 --> task3] --> A_WF3[task1 --> task2 --> task3] end subgraph Block B - B_WF1[task1 --> task2 --> task3] --> B_WF2[task1 --> task2 --> task3] --> B_WF3[task1 --> task2 --> task3] + B_WF1[task1 --> task2 --> task3] --> B_WF2[task1 --> task2 --> task3] --> B_WF3[task1 --> task2 --> task3] end subgraph Block C - C_WF1[task1 --> task2 --> task3] --> C_WF2[task1 --> task2 --> task3] --> C_WF3[task1 --> task2 --> task3] + C_WF1[task1 --> task2 --> task3] --> C_WF2[task1 --> task2 --> task3] --> C_WF3[task1 --> task2 --> task3] end ``` !!! note -`Block` entity can have DAG shaped workflows where some workflows depends on others. +`Block` entity can have DAG shaped workflows where some workflows depends on others. ## Example: Independent Blocks diff --git a/docs/exec_backends.md b/docs/exec_backends.md index dc1b537..1af7e17 100644 --- a/docs/exec_backends.md +++ b/docs/exec_backends.md @@ -2,6 +2,32 @@ AsyncFlow's architecture follows a **separation of concerns** principle, completely isolating the execution backend from the asynchronous programming layer. This **plug-and-play (PnP)** design allows you to switch between different execution environments with minimal code changes β€” from local development to massive HPC clusters. +## Backend Registry and Factory System + +AsyncFlow uses a modern **registry and factory pattern** for backend management: + +- **Registry**: Discovers and lazy-loads available backends on demand +- **Factory**: Creates and initializes backend instances with proper configuration +- **Lazy Loading**: Backends are only loaded when requested, avoiding unnecessary dependencies + +This architecture provides: +- **Centralized backend management** with automatic discovery +- **Better error messages** with installation hints for missing backends +- **Proper caching** to avoid repeated failed imports +- **Type safety** with automatic validation of backend interfaces + +## Available Backends + +AsyncFlow automatically discovers and manages the following backends: + +- **`noop`** - No-operation backend for dry runs and testing +- **`concurrent`** - Local execution using Python's concurrent.futures +- **`dask`** - Distributed computing with Dask (requires `radical.asyncflow[dask]`) +- **`radical_pilot`** - HPC execution with RADICAL-Pilot (requires `radical.asyncflow[radicalpilot]`) + +!!! tip "Backend Discovery" + Use `factory.list_available_backends()` to see which backends are available in your environment and get installation hints for missing ones. + ## The Power of Backend Abstraction By design, AsyncFlow enforces that the execution backend should be entirely isolated from the asynchronous programming layer. This means you can seamlessly transition your workflows from: @@ -14,44 +40,71 @@ By design, AsyncFlow enforces that the execution backend should be entirely isol ## Local vs HPC Execution: A Side-by-Side Comparison -### Local Execution with ConcurrentExecutionBackend +### Local Execution with Factory Pattern ```python -# Local execution with threads - -from concurrent.futures import ThreadPoolExecutor -from radical.asyncflow import ConcurrentExecutionBackend +# Local execution with concurrent backend +from radical.asyncflow import factory -backend = ConcurrentExecutionBackend(ThreadPoolExecutor()) +# Create backend using factory +backend = await factory.create_backend("concurrent", config={ + "max_workers": 4, + "executor_type": "thread" # or "process" +}) ``` -### HPC Execution with RadicalExecutionBackend +### HPC Execution with Factory Pattern ```python -# HPC execution with Radical.Pilot -from radical.asyncflow import RadicalExecutionBackend +# HPC execution with RADICAL-Pilot +from radical.asyncflow import factory -backend = RadicalExecutionBackend({'resource': 'local.localhost'}) +# Create backend using factory +backend = await factory.create_backend("radical_pilot", config={ + "resource": "local.localhost" +}) ``` !!! success **One line change** transforms your workflow from local thread execution to distributed HPC execution across thousands of nodes. +### Backend Discovery and Error Handling + +```python +from radical.asyncflow import factory + +# List available backends +backends = factory.list_available_backends() +for name, info in backends.items(): + print(f"Backend '{name}': {'βœ…' if info['available'] else '❌'}") + if not info['available']: + print(f" Installation hint: {info['installation_hint']}") +``` + +!!! tip "Helpful Error Messages" + When a backend is not available, AsyncFlow provides clear error messages with installation instructions: + ``` + Backend 'dask' is not available. + Available backends: noop, concurrent + Installation hint: Try: pip install 'radical.asyncflow[dask]' + ``` + ## Complete HPC Workflow Example -Below is a complete example demonstrating how to execute workflows on HPC infrastructure using `RadicalExecutionBackend`. +Below is a complete example demonstrating how to execute workflows on HPC infrastructure using the factory pattern. ### Setup for HPC Execution ```python import time import asyncio -from radical.asyncflow import RadicalExecutionBackend -from radical.asyncflow import WorkflowEngine +from radical.asyncflow import factory, WorkflowEngine -# HPC backend configuration -backend = RadicalExecutionBackend({'resource': 'local.localhost'}) # (1)! -flow = WorkflowEngine(backend=backend) +# HPC backend configuration using factory +backend = await factory.create_backend("radical_pilot", config={ + "resource": "local.localhost" # (1)! +}) +flow = await WorkflowEngine.create(backend=backend) ``` 1. Configure for HPC execution - can target supercomputers, GPU clusters, local resources @@ -94,11 +147,11 @@ async def task3(*args): ```python async def run_wf(wf_id): print(f'Starting workflow {wf_id} at {time.time()}') - + # Create dependent task execution t3 = task3(task1(), task2()) # (1)! await t3 # Wait for distributed execution to complete - + print(f'Workflow {wf_id} completed at {time.time()}') ``` @@ -158,12 +211,12 @@ await flow.shutdown() ```python # Configure for GPU-accelerated computing -backend = RadicalExecutionBackend({ - 'resource': 'ornl.summit', - 'queue': 'gpu', - 'nodes': 100, - 'gpus_per_node': 6, - 'walltime': 120 # minutes +backend = await factory.create_backend("radical_pilot", config={ + "resource": "ornl.summit", + "queue": "gpu", + "nodes": 100, + "gpus_per_node": 6, + "walltime": 120 # minutes }) ``` @@ -171,15 +224,28 @@ backend = RadicalExecutionBackend({ ```python # Configure for massive CPU parallelism -backend = RadicalExecutionBackend({ - 'resource': 'tacc.frontera', - 'queue': 'normal', - 'nodes': 1000, - 'cores_per_node': 56, - 'walltime': 240 # minutes +backend = await factory.create_backend("radical_pilot", config={ + "resource": "tacc.frontera", + "queue": "normal", + "nodes": 1000, + "cores_per_node": 56, + "walltime": 240 # minutes }) ``` +### Backend Availability Check + +```python +# Check if HPC backend is available before creating +backends = factory.list_available_backends() +if backends["radical_pilot"]["available"]: + backend = await factory.create_backend("radical_pilot", config={...}) +else: + print(f"RADICAL-Pilot not available: {backends['radical_pilot']['installation_hint']}") + # Fallback to local execution + backend = await factory.create_backend("concurrent") +``` + !!! warning **Resource Management**: Always call `await flow.shutdown()` to properly release HPC resources and prevent job queue issues. @@ -200,8 +266,8 @@ backend = RadicalExecutionBackend({ AsyncFlow's backend abstraction means your workflow logic remains **identical** whether running on: - Your laptop with 8 cores -- A university cluster with 1,000 nodes +- A university cluster with 1,000 nodes - A national supercomputer with 100,000+ cores - GPU clusters with thousands of accelerators -This **write-once, run-anywhere** approach dramatically reduces the complexity of scaling computational workflows from development to production HPC environments. \ No newline at end of file +This **write-once, run-anywhere** approach dramatically reduces the complexity of scaling computational workflows from development to production HPC environments. diff --git a/docs/generate_api_references.py b/docs/generate_api_references.py index 4e6ac89..c186e10 100644 --- a/docs/generate_api_references.py +++ b/docs/generate_api_references.py @@ -1,6 +1,7 @@ """Generate the code reference pages and navigation.""" from pathlib import Path + import mkdocs_gen_files nav = mkdocs_gen_files.Nav() diff --git a/docs/index.md b/docs/index.md index 1346d48..102735b 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,6 +1,6 @@ # RADICAL AsyncFlow (RAF) -**RADICAL AsyncFlow (RAF)** is a fast asynchronous scripting library built on top of [asyncio](https://docs.python.org/3/library/asyncio.html) for building powerful asynchronous workflows on HPC, clusters, and local machines. It supports pluggable execution backends with intuitive task dependencies and workflow composition. +**RADICAL AsyncFlow (RAF)** is a fast asynchronous scripting library built on top of [asyncio](https://docs.python.org/3/library/asyncio.html) for building powerful asynchronous workflows on HPC, clusters, and local machines. It supports pluggable execution backends with intuitive task dependencies and workflow composition. - ⚑ **Powerful asynchronous workflows** β€” Compose complex async and sync workflows easily, with intuitive task dependencies. diff --git a/docs/install.md b/docs/install.md index 35859d8..5cc6cf1 100644 --- a/docs/install.md +++ b/docs/install.md @@ -7,7 +7,7 @@ This guide will help you install AsyncFlow in a clean Python environment. ## Prerequisites -* Python β‰₯ **3.8** (recommended: 3.11 or newer) +* Python β‰₯ **3.9** (recommended: 3.11 or newer) * `pip` β‰₯ 22.0 * Optional: `conda` β‰₯ 4.10 for Conda environments @@ -20,6 +20,39 @@ pip --version --- +## Installation Options + +AsyncFlow supports different installation modes depending on your execution backend needs: + +### Core Installation (Local Development) +```bash +pip install radical.asyncflow +``` + +This provides: +- **`noop`** backend for testing and dry runs +- **`concurrent`** backend for local parallel execution + +### HPC Execution Backends +```bash +# All HPC backends +pip install 'radical.asyncflow[hpc]' + +# Specific backends +pip install 'radical.asyncflow[dask]' # Dask distributed computing +pip install 'radical.asyncflow[radicalpilot]' # RADICAL-Pilot for HPC +``` + +### Development Installation +```bash +pip install 'radical.asyncflow[dev]' # Testing and development tools +``` + +!!! tip "Backend Discovery" + AsyncFlow automatically discovers available backends at runtime. Missing backends show helpful installation hints when requested. + +--- + ## Recommended: Create a Clean Environment It is **strongly recommended** to install AsyncFlow in an isolated environment (Conda or `venv`) to avoid conflicts with system packages. diff --git a/examples/01-workflows.py b/examples/01-workflows.py index 0267396..3c97842 100644 --- a/examples/01-workflows.py +++ b/examples/01-workflows.py @@ -1,18 +1,19 @@ import asyncio import logging import time -from concurrent.futures import ThreadPoolExecutor -from radical.asyncflow import ConcurrentExecutionBackend, WorkflowEngine +from radical.asyncflow import WorkflowEngine, factory from radical.asyncflow.logging import init_default_logger logger = logging.getLogger(__name__) -async def main(): +async def main(): init_default_logger(logging.INFO) - backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) + backend = await factory.create_backend( + "concurrent", config={"max_workers": 4, "executor_type": "thread"} + ) flow = await WorkflowEngine.create(backend=backend) @flow.function_task @@ -35,24 +36,26 @@ async def task2(*args): async def task3(*args): # Simulate aggregating results from task1 and task2 sum_data, even_numbers = args - logger.info(f"Task 3: Aggregating, sum: {sum_data}," - f"even count: {len(even_numbers)}") + logger.info( + f"Task 3: Aggregating, sum: {sum_data},even count: {len(even_numbers)}" + ) await asyncio.sleep(1) # Aggregate results return {"total_sum": sum_data, "even_count": len(even_numbers)} async def run_wf(wf_id): - logger.info(f'Starting workflow {wf_id} at {time.time()}') + logger.info(f"Starting workflow {wf_id} at {time.time()}") t1 = task1() t2 = task2(t1) t3 = task3(t1, t2) result = await t3 # Await the final task - logger.info(f'Workflow {wf_id} completed at {time.time()}, result: {result}') + logger.info(f"Workflow {wf_id} completed at {time.time()}, result: {result}") # Run workflows concurrently await asyncio.gather(*[run_wf(i) for i in range(1024)]) await flow.shutdown() -if __name__ == '__main__': + +if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/02-blocks.py b/examples/02-blocks.py index eabcb59..f7a53bb 100644 --- a/examples/02-blocks.py +++ b/examples/02-blocks.py @@ -1,43 +1,44 @@ import asyncio import logging -from concurrent.futures import ThreadPoolExecutor -from radical.asyncflow import ConcurrentExecutionBackend, WorkflowEngine +from radical.asyncflow import WorkflowEngine, factory from radical.asyncflow.logging import init_default_logger logger = logging.getLogger(__name__) -async def main(): +async def main(): init_default_logger(logging.INFO) - # Create backend and workflow - backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) + # Create backend and workflow using factory + backend = await factory.create_backend( + "concurrent", config={"max_workers": 4, "executor_type": "thread"} + ) flow = await WorkflowEngine.create(backend=backend) @flow.function_task async def task1(*args): - logger.info('TASK: task1 executing') - return 'task1 result' + logger.info("TASK: task1 executing") + return "task1 result" @flow.function_task async def task2(*args): - logger.info('TASK: task2 executing') - return 'task2 result' + logger.info("TASK: task2 executing") + return "task2 result" @flow.function_task async def task3(*args): - logger.info('TASK: task3 executing') - return 'task3 result' + logger.info("TASK: task3 executing") + return "task3 result" @flow.block async def block(block_id, wf_id, *args): - logger.info(f'BLOCK-{block_id}: Starting workflow {wf_id}') + logger.info(f"BLOCK-{block_id}: Starting workflow {wf_id}") t1 = task1() t2 = task2(t1) t3 = task3(t1, t2) await t3 - logger.info(f'BLOCK-{block_id}: Workflow {wf_id} completed') + logger.info(f"BLOCK-{block_id}: Workflow {wf_id} completed") async def run_blocks(wf_id): b1 = block(1, wf_id) @@ -49,5 +50,6 @@ async def run_blocks(wf_id): await flow.shutdown() -if __name__ == '__main__': + +if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/03-nested_blocks.py b/examples/03-nested_blocks.py index c7c92e3..e944e13 100644 --- a/examples/03-nested_blocks.py +++ b/examples/03-nested_blocks.py @@ -1,73 +1,75 @@ import asyncio import logging import time -from concurrent.futures import ThreadPoolExecutor -from radical.asyncflow import ConcurrentExecutionBackend, WorkflowEngine +from radical.asyncflow import WorkflowEngine, factory from radical.asyncflow.logging import init_default_logger logger = logging.getLogger(__name__) -async def main(): +async def main(): init_default_logger(logging.INFO) - # Create backend and workflow - backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) + # Create backend and workflow using factory + backend = await factory.create_backend( + "concurrent", config={"max_workers": 4, "executor_type": "thread"} + ) flow = await WorkflowEngine.create(backend=backend) @flow.function_task async def task1(*args): - logger.info(f'TASK: task1 executing at {time.time()}') - return 'task1 result' + logger.info(f"TASK: task1 executing at {time.time()}") + return "task1 result" @flow.function_task async def task2(*args): - logger.info(f'TASK: task2 executing at {time.time()}') - return 'task2 result' + logger.info(f"TASK: task2 executing at {time.time()}") + return "task2 result" @flow.block async def block1(*args): - logger.info(f'BLOCK: block1 started at {time.time()}') + logger.info(f"BLOCK: block1 started at {time.time()}") t1 = task1() t2 = task2(t1) await t2 - logger.info(f'BLOCK: block1 completed at {time.time()}') + logger.info(f"BLOCK: block1 completed at {time.time()}") @flow.block async def block2(*args): - logger.info(f'BLOCK: block2 started at {time.time()}') + logger.info(f"BLOCK: block2 started at {time.time()}") t3 = task1() t4 = task2(t3) await t4 - logger.info(f'BLOCK: block2 completed at {time.time()}') + logger.info(f"BLOCK: block2 completed at {time.time()}") @flow.block async def block1_of_blocks(*args): - logger.info(f'NESTED-BLOCK: block of blocks-1 started at {time.time()}') + logger.info(f"NESTED-BLOCK: block of blocks-1 started at {time.time()}") b1 = block1() b2 = block2(b1) await b2 - logger.info(f'NESTED-BLOCK: block of blocks-1 completed at {time.time()}') + logger.info(f"NESTED-BLOCK: block of blocks-1 completed at {time.time()}") @flow.block async def block2_of_blocks(*args): - logger.info(f'NESTED-BLOCK: block of blocks-2 started at {time.time()}') + logger.info(f"NESTED-BLOCK: block of blocks-2 started at {time.time()}") b1 = block1() b2 = block2(b1) await b2 - logger.info(f'NESTED-BLOCK: block of blocks-2 completed at {time.time()}') + logger.info(f"NESTED-BLOCK: block of blocks-2 completed at {time.time()}") async def run_block_of_blocks(i): - logger.info(f'WORKFLOW: run_block_of_blocks {i} starting at {time.time()}') + logger.info(f"WORKFLOW: run_block_of_blocks {i} starting at {time.time()}") bob1 = block1_of_blocks() bob2 = block2_of_blocks(bob1) await bob2 - logger.info(f'WORKFLOW: Block of blocks-{i} is finished at {time.time()}') + logger.info(f"WORKFLOW: Block of blocks-{i} is finished at {time.time()}") await asyncio.gather(*[run_block_of_blocks(i) for i in range(10)]) await flow.shutdown() -if __name__ == '__main__': + +if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/04-dask_execution_backend.py b/examples/04-dask_execution_backend.py index e366c5c..6c92f01 100644 --- a/examples/04-dask_execution_backend.py +++ b/examples/04-dask_execution_backend.py @@ -3,15 +3,17 @@ import numpy as np -from radical.asyncflow import DaskExecutionBackend, WorkflowEngine +from radical.asyncflow import WorkflowEngine, factory from radical.asyncflow.logging import init_default_logger logger = logging.getLogger(__name__) init_default_logger(logging.INFO) -async def main(): - backend = await DaskExecutionBackend({'n_workers': 2, 'threads_per_worker': 4}) +async def main(): + backend = await factory.create_backend( + "dask", config={"n_workers": 2, "threads_per_worker": 4} + ) flow = await WorkflowEngine.create(backend=backend) @flow.function_task @@ -50,5 +52,6 @@ async def compute_final_stats(*args): await flow.shutdown() -if __name__ == '__main__': + +if __name__ == "__main__": asyncio.run(main()) diff --git a/examples/05-radical_execution_backend.py b/examples/05-radical_execution_backend.py index 707d909..fee4f7b 100644 --- a/examples/05-radical_execution_backend.py +++ b/examples/05-radical_execution_backend.py @@ -2,7 +2,7 @@ import logging import time -from radical.asyncflow import RadicalExecutionBackend, WorkflowEngine +from radical.asyncflow import WorkflowEngine, factory from radical.asyncflow.logging import init_default_logger logger = logging.getLogger(__name__) @@ -11,8 +11,10 @@ async def main(): init_default_logger(logging.INFO) - # Create backend and workflow - backend = await RadicalExecutionBackend({"resource": "local.localhost"}) + # Create backend and workflow using factory + backend = await factory.create_backend( + "radical_pilot", config={"resource": "local.localhost"} + ) flow = await WorkflowEngine.create(backend=backend) task1_resources = {"ranks": 1, "gpus_per_rank": 1} diff --git a/examples/tutorials/build_async_workflows.ipynb b/examples/tutorials/build_async_workflows.ipynb index 7711d8a..07008fa 100644 --- a/examples/tutorials/build_async_workflows.ipynb +++ b/examples/tutorials/build_async_workflows.ipynb @@ -11,15 +11,16 @@ "metadata": {}, "source": [ "# Express, Manage, and Execute async/sync workflows with RADICAL-AsyncFlow.\n", - "![image.png](attachment:ee8dc055-f6e0-4487-9b12-b76f60b16b8e.png)\n", - "RADICAL AsyncFlow is a fast asynchronous scripting library built on top of asyncio for complex asynchronous workflows on HPC, clusters, and local machines. It supports pluggable execution backends with intuitive task dependencies and workflow composition. Currently, AsyncFlow supports the following execution backends:\n", + "![image.png](attachment:ee8dc055-f6e0-4487-9b12-b76f60b16b8e.png) # pragma: allowlist secret\n", + "RADICAL AsyncFlow is a fast asynchronous scripting library built on top of asyncio for complex asynchronous workflows on HPC, clusters, and local machines. It supports pluggable execution backends with lazy loading and automatic discovery. AsyncFlow uses a modern registry and factory pattern for backend management:\n", "\n", + "* **Local Development**: `concurrent` (multi-threading/processing), `noop` (testing)\n", + "* **HPC/Distributed** *(optional)*: `dask` (distributed computing), `radical_pilot` (HPC/supercomputing)\n", "\n", "**The goal of this notebook is to:**\n", "\n", "1. Showcase the ability to use asynchronous execution within Jupyter.\n", - "2. Show the performance when submitting 5 workflows asynchronously.\n", - "3. Highlight the support for different execution backends. " + "2. Show the performance when submitting 5 workflows asynchronously." ] }, { @@ -44,7 +45,7 @@ "metadata": {}, "source": [ "## Asynchronous programming with `AsyncFlow`\n", - "The goal here is to show that the power of asynchronous programming that `asyncflow` enables 4 blocking `workflows` to run asynchronously. We will demonstrate the case where we have N independent blocking workflows, and we will program them with Radical.AsyncFlow in `async` appraoch and deliver the results based on their completion order." + "The goal here is to show that the power of asynchronous programming that `asyncflow` enables 4 blocking `workflows` to run asynchronously. We will demonstrate the case where we have N independent blocking workflows, and we will program them with Radical.AsyncFlow in `async` approach and deliver the results based on their completion order." ] }, { @@ -87,7 +88,9 @@ " β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”\n", " β”‚ Shutdown WorkflowEngine β”‚\n", " β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜\n", - "```" + "```\n", + "\n", + "The goal here is to show that the power of asynchronous programming that `asyncflow` enables 4 blocking `workflows` to run asynchronously. We will demonstrate the case where we have N independent blocking workflows, and we will program them with Radical.AsyncFlow in `async` approach and deliver the results based on their completion order." ] }, { @@ -99,38 +102,44 @@ "source": [ "import asyncio\n", "import time\n", - "from concurrent.futures import ThreadPoolExecutor\n", "\n", - "from radical.asyncflow import ConcurrentExecutionBackend, WorkflowEngine\n", + "from radical.asyncflow import WorkflowEngine, factory\n", "\n", - "backend = await ConcurrentExecutionBackend(ThreadPoolExecutor())\n", + "backend = await factory.create_backend(\"concurrent\", config={\n", + " \"max_workers\": 4,\n", + " \"executor_type\": \"thread\"\n", + "})\n", "\n", "flow = await WorkflowEngine.create(backend=backend)\n", "\n", + "\n", "@flow.function_task\n", "async def task1(*args):\n", " return time.time()\n", "\n", + "\n", "@flow.function_task\n", "async def task2(*args):\n", " return time.time()\n", "\n", + "\n", "@flow.function_task\n", "async def task3(*args):\n", " return time.time()\n", "\n", - "async def run_wf(wf_id):\n", "\n", - " print(f'Starting workflow {wf_id} at {time.time()}')\n", + "async def run_wf(wf_id):\n", + " print(f\"Starting workflow {wf_id} at {time.time()}\")\n", " t3 = task3(task1(), task2())\n", - " await t3 # Blocking operation so the entire workflow will block\n", - " print(f'Workflow {wf_id} completed at {time.time()}')\n", + " await t3 # Blocking operation so the entire workflow will block\n", + " print(f\"Workflow {wf_id} completed at {time.time()}\")\n", + "\n", "\n", "start_time = time.time()\n", "await asyncio.gather(*[run_wf(i) for i in range(5)])\n", "end_time = time.time()\n", "\n", - "print(f'\\nTotal time running asynchronously is: {end_time - start_time}')\n", + "print(f\"\\nTotal time running asynchronously is: {end_time - start_time}\")\n", "\n", "# We are in an async context, so we have to use **await**\n", "await flow.shutdown()" @@ -142,7 +151,7 @@ "metadata": {}, "source": [ "## Different Execution Backend? Couldn't be any easier. Just change 1 line of code\n", - "By design, `AsyncFlow` focuses on the concept of `separation of concern`, which enforces that the execution backend should be entirely isolated from the `Asynchronous` programming layer and follows the `PnP` design choice. In this way, the user can plug and unplug any execution backend they prefer by just replacing a few lines of code." + "By design, `AsyncFlow` focuses on the concept of `separation of concern`, which enforces that the execution backend should be entirely isolated from the `Asynchronous` programming layer and follows the `PnP` design choice. With the new factory pattern, you can switch backends by simply changing the backend identifier string - AsyncFlow automatically discovers available backends and provides helpful installation hints for missing ones." ] }, { @@ -150,9 +159,9 @@ "id": "856d3a4a-4c4d-4bee-bb6f-f4925f9dff2b", "metadata": {}, "source": [ - "### Using `RadicalExecutionBackend` instead of `ConcurrentExecutionBackend` to execute `Async` workflows\n", + "### Using `radical_pilot` backend instead of `concurrent` backend to execute `Async` workflows\n", "\n", - "We will show how we can use `Radical.Pilot` to execute `blocking` workflows like the previus example." + "We will show how we can use `RADICAL-Pilot` backend for HPC execution of `blocking` workflows like the previous example." ] }, { @@ -165,43 +174,48 @@ "import asyncio\n", "import time\n", "\n", - "from radical.asyncflow import RadicalExecutionBackend, WorkflowEngine\n", + "from radical.asyncflow import WorkflowEngine, factory\n", "\n", - "backend = await RadicalExecutionBackend({'resource': 'local.localhost'})\n", + "backend = await factory.create_backend(\"radical_pilot\", config={\n", + " \"resource\": \"local.localhost\"\n", + "})\n", "\n", "# vs\n", "\n", - "# backend = await ConcurrentExecutionBackend(ThreadPoolExecutor())\n", + "# backend = await factory.create_backend(\"concurrent\", config={\n", + "# \"max_workers\": 4, \"executor_type\": \"thread\"\n", + "# })\n", "\n", "flow = await WorkflowEngine.create(backend=backend)\n", "\n", + "\n", "@flow.executable_task\n", "async def task1(*args):\n", " return \"/bin/date\"\n", "\n", + "\n", "@flow.executable_task\n", "async def task2(*args):\n", " return \"/bin/date\"\n", "\n", + "\n", "@flow.executable_task\n", "async def task3(*args):\n", " return \"/bin/date\"\n", "\n", - "async def run_wf(wf_id):\n", "\n", - " print(f'Starting workflow {wf_id} at {time.time()}')\n", + "async def run_wf(wf_id):\n", + " print(f\"Starting workflow {wf_id} at {time.time()}\")\n", " t3 = task3(task1(), task2())\n", - " await t3 # Blocking operation so the entire workflow will block\n", - " print(f'Workflow {wf_id} completed at {time.time()}')\n", + " await t3 # Blocking operation so the entire workflow will block\n", + " print(f\"Workflow {wf_id} completed at {time.time()}\")\n", + "\n", "\n", "start_time = time.time()\n", "await asyncio.gather(*[run_wf(i) for i in range(5)])\n", "end_time = time.time()\n", "\n", - "print(f'\\nTotal time running asynchronously is: {end_time - start_time}')\n", - "\n", - "# We are in an async context, so we have to use **await**\n", - "await flow.shutdown()" + "print(f\"\\nTotal time running asynchronously is: {end_time - start_time}\")" ] } ], diff --git a/mkdocs.yml b/mkdocs.yml index 2b4e379..2f74c9b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -100,12 +100,12 @@ markdown_extensions: - md_in_html nav: - - Home: + - Home: - Introduction: index.md - Installation: install.md - Getting Started: - Basic Usage: basic.md - - Best Practices: + - Best Practices: - Best Practices: best_practice.md - Workflows: - Asynchronous Workflows: async_workflows.md @@ -121,4 +121,4 @@ extra_css: - css/extra.css extra_javascript: - - https://unpkg.com/mermaid@10/dist/mermaid.min.js \ No newline at end of file + - https://unpkg.com/mermaid@10/dist/mermaid.min.js diff --git a/pyproject.toml b/pyproject.toml index a015278..2bbd075 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ requires-python = ">=3.9" dependencies = [ "pydantic", "typeguard", - "requests" + "requests", ] [project.urls] @@ -29,17 +29,30 @@ Issues = "https://github.com/radical-cybertools/radical.asyncflow/issues" Documentation = "https://radical-cybertools.github.io/radical.asyncflow" [project.optional-dependencies] -dask = ["dask[distributed]"] -radicalpilot = ["radical.pilot"] - +# HPC execution backends (requires rhapsody) +hpc = ["rhapsody @ git+https://github.com/radical-cybertools/rhapsody.git@dev"] +dask = [ + "rhapsody @ git+https://github.com/radical-cybertools/rhapsody.git@dev", + "dask[distributed]" +] +radicalpilot = [ + "rhapsody @ git+https://github.com/radical-cybertools/rhapsody.git@dev", + "radical.pilot" +] -lint = ["ruff"] +lint = [ + "ruff", + "docformatter", +] # All test deps dev = [ "pytest", "pytest-asyncio", - "pytest-cov" + "pytest-cov", + "pre-commit", + "tox", + "detect-secrets" ] doc = [ @@ -58,6 +71,7 @@ doc = [ line-length = 88 target-version = "py39" fix = true +exclude = ["*.ipynb"] [tool.ruff.lint] select = ["E", "F", "W", "B", "I", "N", "UP"] @@ -72,6 +86,7 @@ minversion = "6.0" testpaths = ["tests"] asyncio_mode = "auto" +asyncio_default_fixture_loop_scope = "function" markers = [ "slow: marks tests as slow (deselect with '-m \"not slow\"')", "integration: marks tests as integration tests", diff --git a/src/radical/asyncflow/__init__.py b/src/radical/asyncflow/__init__.py index 5b9758e..dacbdb8 100644 --- a/src/radical/asyncflow/__init__.py +++ b/src/radical/asyncflow/__init__.py @@ -2,21 +2,20 @@ import importlib.metadata as importlib_metadata -from .backends.execution.concurrent import ConcurrentExecutionBackend -from .backends.execution.dask_parallel import DaskExecutionBackend -from .backends.execution.noop import NoopExecutionBackend -from .backends.execution.radical_pilot import RadicalExecutionBackend +# Import core components with new plugin architecture +from .backends import factory, registry +from .backends.execution import ConcurrentExecutionBackend, NoopExecutionBackend from .data import InputFile, OutputFile from .workflow_manager import WorkflowEngine __version__ = importlib_metadata.version("radical.asyncflow") __all__ = [ - "ConcurrentExecutionBackend", - "DaskExecutionBackend", - "NoopExecutionBackend", - "RadicalExecutionBackend", + "WorkflowEngine", "InputFile", "OutputFile", - "WorkflowEngine", + "ConcurrentExecutionBackend", + "NoopExecutionBackend", + "factory", + "registry", ] diff --git a/src/radical/asyncflow/backends/__init__.py b/src/radical/asyncflow/backends/__init__.py index e69de29..f0eba5e 100644 --- a/src/radical/asyncflow/backends/__init__.py +++ b/src/radical/asyncflow/backends/__init__.py @@ -0,0 +1,24 @@ +"""Backend subsystem for AsyncFlow with plugin-based architecture. + +This module provides a plugin-based backend system that supports both local backends +(for development and testing) and optional external backends (for HPC and scale-out +execution) without requiring hard dependencies. +""" + +from __future__ import annotations + +from .execution import ConcurrentExecutionBackend, NoopExecutionBackend + +# Import core components +from .execution.base import BaseExecutionBackend, Session +from .factory import factory +from .registry import registry + +__all__ = [ + "BaseExecutionBackend", + "Session", + "NoopExecutionBackend", + "ConcurrentExecutionBackend", + "factory", + "registry", +] diff --git a/src/radical/asyncflow/backends/execution/__init__.py b/src/radical/asyncflow/backends/execution/__init__.py index e69de29..38f9660 100644 --- a/src/radical/asyncflow/backends/execution/__init__.py +++ b/src/radical/asyncflow/backends/execution/__init__.py @@ -0,0 +1,16 @@ +"""Execution backends for AsyncFlow with local and optional external backends. + +This module provides core local execution backends and optional external backends +through the registry system. +""" + +from __future__ import annotations + +# Import base class +from .base import BaseExecutionBackend + +# Import local core backends (always available) +from .concurrent import ConcurrentExecutionBackend +from .noop import NoopExecutionBackend + +__all__ = ["BaseExecutionBackend", "NoopExecutionBackend", "ConcurrentExecutionBackend"] diff --git a/src/radical/asyncflow/backends/execution/base.py b/src/radical/asyncflow/backends/execution/base.py index 6ed6dfa..e8fbf6c 100644 --- a/src/radical/asyncflow/backends/execution/base.py +++ b/src/radical/asyncflow/backends/execution/base.py @@ -1,5 +1,63 @@ +"""Base execution backend compatibility layer. + +This module provides a compatibility layer for execution backends to work with +AsyncFlow's type system. +""" + +from __future__ import annotations + import os from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, Any, Protocol, runtime_checkable + +if TYPE_CHECKING: + from ...constants import StateMapper + + +@runtime_checkable +class ExecutionBackendProtocol(Protocol): + """Protocol defining the interface that execution backends must implement. + + This protocol allows both internal AsyncFlow backends and external backends (like + Rhapsody) to be used with WorkflowEngine type checking. + """ + + async def submit_tasks(self, tasks: list[dict]) -> None: + """Submit a list of tasks for execution.""" + ... + + async def shutdown(self) -> None: + """Gracefully shutdown the execution backend.""" + ... + + def state(self) -> str: + """Get the current state of the execution backend.""" + ... + + def register_callback(self, func) -> None: + """Register a callback function for task state changes.""" + ... + + def get_task_states_map(self) -> Any: + """Get the task states mapping.""" + ... + + async def cancel_task(self, uid: str) -> bool: + """Cancel a task by its UID.""" + ... + + def link_implicit_data_deps(self, src_task, dst_task): + """Link implicit data dependencies between tasks.""" + ... + + def link_explicit_data_deps( + self, src_task=None, dst_task=None, file_name=None, file_path=None + ): + """Link explicit data dependencies between tasks.""" + ... + + # Required for WorkflowEngine initialization - make it flexible + session: Any # Backend must have a session with a path attribute class BaseExecutionBackend(ABC): @@ -19,16 +77,14 @@ async def submit_tasks(self, tasks: list[dict]) -> None: Each task dictionary should contain the necessary information for task execution. """ - pass @abstractmethod async def shutdown(self) -> None: """Gracefully shutdown the execution backend. - This method should clean up resources, terminate running tasks if necessary, - and prepare the backend for termination. + This method should clean up resources, terminate running tasks if necessary, and + prepare the backend for termination. """ - pass @abstractmethod def state(self) -> str: @@ -38,7 +94,6 @@ def state(self) -> str: A string representing the current state of the backend (e.g., 'running', 'idle', 'shutting_down', 'error'). """ - pass @abstractmethod def task_state_cb(self, task: dict, state: str) -> None: @@ -46,10 +101,9 @@ def task_state_cb(self, task: dict, state: str) -> None: Args: task: Dictionary containing task information and metadata. - state: The new state of the task (e.g., 'pending', 'running', 'completed', - 'failed'). + state: The new state of the task (e.g., 'pending', 'running', + 'completed', 'failed'). """ - pass @abstractmethod def register_callback(self, func) -> None: @@ -59,26 +113,24 @@ def register_callback(self, func) -> None: func: A callable that will be invoked when task states change. The function should accept task and state parameters. """ - pass @abstractmethod - def get_task_states_map(self) -> None: + def get_task_states_map(self) -> StateMapper: """Retrieve a mapping of task IDs to their current states. Returns: - A dictionary mapping task identifiers to their current execution states. + A StateMapper object containing task state mappings. """ - pass @abstractmethod - def build_task(self, task: dict) -> None: + def build_task(self, uid, task_desc, task_specific_kwargs) -> None: """Build or prepare a task for execution. Args: - task: Dictionary containing task definition, parameters, and metadata - required for task construction. + uid: Unique identifier for the task. + task_desc: Dictionary containing task description and metadata. + task_specific_kwargs: Backend-specific keyword arguments. """ - pass @abstractmethod def link_implicit_data_deps(self, src_task, dst_task): @@ -92,7 +144,6 @@ def link_implicit_data_deps(self, src_task, dst_task): src_task: The source task that produces data. dst_task: The destination task that depends on the source task's output. """ - pass @abstractmethod def link_explicit_data_deps( @@ -109,33 +160,33 @@ def link_explicit_data_deps( file_name: Name of the file that represents the dependency. file_path: Full path to the file that represents the dependency. """ - pass @abstractmethod async def cancel_task(self, uid: str) -> bool: - """ - Cancel a task in the execution backend. + """Cancel a task in the execution backend. Args: uid: Task identifier - Raises: - NotImplementedError: If the backend doesn't support cancellation + Returns: + bool: True if cancellation was successful, False otherwise. """ - raise NotImplementedError("Not implemented in the base backend") class Session: """Manages execution session state and working directory. - This class maintains session-specific information including the current - working directory path for task execution. + This class maintains session-specific information including the current working + directory path for task execution. """ def __init__(self): """Initialize a new session with the current working directory. - Sets the session path to the current working directory at the time - of initialization. + Sets the session path to the current working directory at the time of + initialization. """ self.path = os.getcwd() + + +__all__ = ["BaseExecutionBackend", "Session"] diff --git a/src/radical/asyncflow/backends/execution/concurrent.py b/src/radical/asyncflow/backends/execution/concurrent.py index e929077..7a60ddb 100644 --- a/src/radical/asyncflow/backends/execution/concurrent.py +++ b/src/radical/asyncflow/backends/execution/concurrent.py @@ -1,4 +1,13 @@ +"""Concurrent execution backend using Python's concurrent.futures. + +This module provides a backend that executes tasks using ThreadPoolExecutor or +ProcessPoolExecutor from the concurrent.futures module. +""" + +from __future__ import annotations + import asyncio +import gc import logging import subprocess from concurrent.futures import Executor @@ -19,7 +28,7 @@ def __init__(self, executor: Executor): raise TypeError(err) self.executor = executor - self.tasks: dict[str, asyncio.Task] = {} + self.tasks: dict[str, dict] = {} self.session = Session() self._callback_func: Optional[Callable] = None self._initialized = False @@ -80,27 +89,50 @@ async def _execute_function(self, task: dict) -> tuple[dict, str]: async def _execute_command(self, task: dict) -> tuple[dict, str]: """Execute command task.""" cmd = " ".join([task["executable"]] + task.get("arguments", [])) - + process = None try: process = await asyncio.create_subprocess_shell( - cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE + cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + close_fds=True, ) + + # Communicate and get results stdout, stderr = await process.communicate() + exit_code = process.returncode + + # Force cleanup of the process to prevent ResourceWarnings + if process.returncode is None: + process.terminate() + try: + await asyncio.wait_for(process.wait(), timeout=1.0) + except asyncio.TimeoutError: + process.kill() + await process.wait() task.update( { "stdout": stdout.decode(), "stderr": stderr.decode(), - "exit_code": process.returncode, + "exit_code": exit_code, } ) except Exception: - # Fallback to thread executor + # Fallback to thread executor with proper subprocess configuration + def run_subprocess(): + result = subprocess.run( # noqa: S602 + cmd, + shell=True, + capture_output=True, + text=True, + close_fds=True, + ) + return result + loop = asyncio.get_running_loop() - result = await loop.run_in_executor( - self.executor, subprocess.run, cmd, True, True - ) + result = await loop.run_in_executor(self.executor, run_subprocess) task.update( { @@ -110,28 +142,36 @@ async def _execute_command(self, task: dict) -> tuple[dict, str]: } ) + finally: + # Ensure process cleanup to prevent ResourceWarnings + if process is not None and process.returncode is None: + try: + process.terminate() + await asyncio.wait_for(process.wait(), timeout=1.0) + except asyncio.TimeoutError: + process.kill() + await process.wait() + except Exception as e: + # Log cleanup errors but don't fail the task + logger.debug(f"Process cleanup error: {e}") + state = "DONE" if task["exit_code"] == 0 else "FAILED" return task, state async def _handle_task(self, task: dict) -> None: """Handle task execution with callback.""" result_task, state = await self._execute_task(task) + if self._callback_func: + self._callback_func(result_task, state) - self._callback_func(result_task, state) - - async def submit_tasks(self, tasks: list[dict[str, Any]]) -> list[asyncio.Task]: + async def submit_tasks(self, tasks: list[dict[str, Any]]) -> None: """Submit tasks for execution.""" - submitted_tasks = [] - for task in tasks: future = asyncio.create_task(self._handle_task(task)) - submitted_tasks.append(future) self.tasks[task["uid"]] = task self.tasks[task["uid"]]["future"] = future - return submitted_tasks - async def cancel_task(self, uid: str) -> bool: """Cancel a task by its UID. @@ -146,7 +186,8 @@ async def cancel_task(self, uid: str) -> bool: task = self.tasks[uid] future = task["future"] if future and future.cancel(): - self._callback_func(task, "CANCELED") + if self._callback_func: + self._callback_func(task, "CANCELED") return True return False @@ -161,27 +202,57 @@ async def cancel_all_tasks(self) -> int: return cancelled_count async def shutdown(self) -> None: - """Shutdown the executor.""" + """Shutdown the executor with proper resource cleanup.""" await self.cancel_all_tasks() + + # Give time for tasks to complete cleanup + await asyncio.sleep(0.1) + + # Shutdown executor self.executor.shutdown(wait=True) + + # Force garbage collection to clean up any remaining resources + gc.collect() + logger.info("Concurrent execution backend shutdown complete") def build_task(self, uid, task_desc, task_specific_kwargs): - pass + """Build or prepare a task for execution. + + Note: + This is a no-op implementation for the concurrent backend. + """ def link_explicit_data_deps( self, src_task=None, dst_task=None, file_name=None, file_path=None ): - pass + """Link explicit data dependencies between tasks. + + Note: + This is a no-op implementation for the concurrent backend. + """ def link_implicit_data_deps(self, src_task, dst_task): - pass + """Link implicit data dependencies between tasks. + + Note: + This is a no-op implementation for the concurrent backend. + """ def state(self): - pass + """Get the current state of the execution backend. + + Returns: + str: Always returns 'RUNNING' for active backend. + """ + return "RUNNING" + + def task_state_cb(self, task: dict, state: str) -> None: + """Callback function invoked when a task's state changes. - def task_state_cb(self): - pass + Note: + This is a no-op implementation for the concurrent backend. + """ async def __aenter__(self): """Async context manager entry.""" diff --git a/src/radical/asyncflow/backends/execution/dask_parallel.py b/src/radical/asyncflow/backends/execution/dask_parallel.py deleted file mode 100644 index 8d33336..0000000 --- a/src/radical/asyncflow/backends/execution/dask_parallel.py +++ /dev/null @@ -1,346 +0,0 @@ -import asyncio -import logging -from functools import wraps -from typing import Any, Callable, Optional - -import typeguard - -from ...constants import StateMapper -from .base import BaseExecutionBackend, Session - -try: - import dask.distributed as dask -except ImportError: - dask = None - - -logger = logging.getLogger(__name__) - - -class DaskExecutionBackend(BaseExecutionBackend): - """An async-only Dask execution backend for distributed task execution. - - Handles task submission, cancellation, and proper async event loop handling - for distributed task execution using Dask. All functions must be async. - - Usage: - backend = await DaskExecutionBackend(resources) - # or - async with DaskExecutionBackend(resources) as backend: - await backend.submit_tasks(tasks) - """ - - @typeguard.typechecked - def __init__(self, resources: Optional[dict] = None): - """Initialize the Dask execution backend (non-async setup only). - - Args: - resources: Dictionary of resource requirements for tasks. Contains - configuration parameters for the Dask client initialization. - """ - - if dask is None: - raise ImportError("Dask is required for DaskExecutionBackend.") - - self.tasks = {} - self._client = None - self.session = Session() - self._callback_func = None - self._resources = resources or {} - self._initialized = False - - def __await__(self): - """Make DaskExecutionBackend awaitable like Dask Client.""" - return self._async_init().__await__() - - async def _async_init(self): - """Async initialization that happens when awaited.""" - if not self._initialized: - await self._initialize() - self._initialized = True - StateMapper.register_backend_states_with_defaults(backend=self) - return self - - async def _initialize(self) -> None: - """Initialize the Dask client and set up worker environments. - - Raises: - Exception: If Dask client initialization fails. - """ - try: - self._client = await dask.Client(asynchronous=True, **self._resources) - dashboard_link = self._client.dashboard_link - logger.info(f"Dask backend initialized with dashboard at {dashboard_link}") - except Exception as e: - logger.exception(f"Failed to initialize Dask client: {str(e)}") - raise - - def register_callback(self, callback: Callable) -> None: - """Register a callback for task state changes. - - Args: - callback: Function to be called when task states change. Should accept - task and state parameters. - """ - self._callback_func = callback - - def get_task_states_map(self): - """Retrieve a mapping of task IDs to their current states. - - Returns: - StateMapper: Object containing the mapping of task states for this backend. - """ - return StateMapper(backend=self) - - async def cancel_task(self, uid: str) -> bool: - """Cancel a task by its UID. - - Args: - uid (str): The UID of the task to cancel. - - Returns: - bool: True if the task was found and cancellation was attempted, - False otherwise. - """ - self._ensure_initialized() - if uid in self.tasks: - task = self.tasks[uid] - future = task.get("future") - if future: - return await future.cancel() - return False - - async def submit_tasks(self, tasks: list[dict[str, Any]]) -> None: - """Submit async tasks to Dask cluster. - - Processes a list of tasks and submits them to the Dask cluster for execution. - Filters out future objects from arguments and validates that all functions - are async coroutine functions. - - Args: - tasks: List of task dictionaries containing: - - uid: Unique task identifier - - function: Async callable to execute - - args: Positional arguments - - kwargs: Keyword arguments - - executable: Optional executable path (not supported) - - task_backend_specific_kwargs: Backend-specific parameters - - Note: - Executable tasks are not supported and will result in FAILED state. - Only async functions are supported - sync functions will result in - FAILED state. - Future objects are filtered out from arguments as they are not picklable. - """ - self._ensure_initialized() - - for task in tasks: - is_func_task = bool(task.get("function")) - is_exec_task = bool(task.get("executable")) - - if is_exec_task: - error_msg = "DaskExecutionBackend does not support executable tasks" - task["stderr"] = ValueError(error_msg) - self._callback_func(task, "FAILED") - continue - - # Validate that function is async - if is_func_task and not asyncio.iscoroutinefunction(task["function"]): - error_msg = "DaskExecutionBackend only supports async functions" - task["exception"] = ValueError(error_msg) - self._callback_func(task, "FAILED") - continue - - self.tasks[task["uid"]] = task - - # Filter out future objects as they are not picklable - filtered_args = [ - arg for arg in task["args"] if not isinstance(arg, asyncio.Future) - ] - task["args"] = tuple(filtered_args) - try: - await self._submit_async_function(task) - except Exception as e: - task["exception"] = e - self._callback_func(task, "FAILED") - - async def _submit_to_dask(self, task: dict[str, Any], fn: Callable, *args) -> None: - """Submit function to Dask and register completion callback. - - Submits the wrapped function to Dask client and registers a callback - to handle task completion or failure. - - Args: - task: Task dictionary containing task metadata and configuration. - fn: The async function to submit to Dask. - *args: Arguments to pass to the function. - """ - - async def on_done(f: dask.Future): - task_uid = task["uid"] - try: - result = await f - task["return_value"] = result - self._callback_func(task, "DONE") - except dask.client.FutureCancelledError: - self._callback_func(task, "CANCELED") - except Exception as e: - task["exception"] = e - self._callback_func(task, "FAILED") - finally: - # Clean up the future reference once task is complete - if task_uid in self.tasks: - del self.tasks[task_uid] - - dask_future = self._client.submit( - fn, *args, **task["task_backend_specific_kwargs"] - ) - - # Store the future for potential cancellation - self.tasks[task["uid"]]["future"] = dask_future - - # Schedule the callback to run when future completes - asyncio.create_task(on_done(dask_future)) - - async def _submit_async_function(self, task: dict[str, Any]) -> None: - """Submit async function to Dask. - - Creates an async wrapper that preserves the original function name - for better visibility in the Dask dashboard. - - Args: - task: Task dictionary containing the async function and its parameters. - """ - - # Preserve the real task name in dask dashboard - @wraps(task["function"]) - async def async_wrapper(): - return await task["function"](*task["args"], **task["kwargs"]) - - await self._submit_to_dask(task, async_wrapper) - - async def cancel_all_tasks(self) -> int: - """Cancel all currently running/pending tasks. - - Returns: - Number of tasks that were successfully cancelled - """ - self._ensure_initialized() - cancelled_count = 0 - task_uids = list(self.tasks.keys()) - - for task_uid in task_uids: - if await self.cancel_task(task_uid): - cancelled_count += 1 - - return cancelled_count - - def link_explicit_data_deps( - self, src_task=None, dst_task=None, file_name=None, file_path=None - ): - """Handle explicit data dependencies between tasks. - - Args: - src_task: The source task that produces the dependency. - dst_task: The destination task that depends on the source. - file_name: Name of the file that represents the dependency. - file_path: Full path to the file that represents the dependency. - """ - pass - - def link_implicit_data_deps(self, src_task, dst_task): - """Handle implicit data dependencies for a task. - - Args: - src_task: The source task that produces data. - dst_task: The destination task that depends on the source task's output. - """ - pass - - async def state(self) -> str: - """Get the current state of the Dask execution backend. - - Returns: - Current state of the backend as a string. - """ - if not self._initialized or self._client is None: - return "DISCONNECTED" - - try: - # Check if client is still connected - await self._client.scheduler_info() - return "CONNECTED" - except Exception: - return "DISCONNECTED" - - async def task_state_cb(self, task: dict, state: str) -> None: - """Callback function invoked when a task's state changes. - - Args: - task: Dictionary containing task information and metadata. - state: The new state of the task. - """ - pass - - async def build_task(self, task: dict) -> None: - """Build or prepare a task for execution. - - Args: - task: Dictionary containing task definition, parameters, and metadata - required for task construction. - """ - pass - - async def shutdown(self) -> None: - """Shutdown the Dask client and clean up resources. - - Closes the Dask client connection, clears task storage, and handles - any cleanup exceptions gracefully. - """ - if self._client is not None: - try: - # Cancel all running tasks first - await self.cancel_all_tasks() - - # Close the client - await self._client.close() - logger.info("Dask client shutdown complete") - except Exception as e: - logger.exception(f"Error during shutdown: {str(e)}") - finally: - self._client = None - self.tasks.clear() - self._initialized = False - logger.info("Dask execution backend shutdown complete") - - def _ensure_initialized(self): - """Ensure the backend has been properly initialized.""" - if not self._initialized: - raise RuntimeError( - "DaskExecutionBackend must be awaited before use. " - "Use: backend = await DaskExecutionBackend(resources)" - ) - - async def __aenter__(self): - """Async context manager entry.""" - if not self._initialized: - await self._async_init() - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb): - """Async context manager exit.""" - await self.shutdown() - - # Class method for cleaner instantiation (optional alternative pattern) - @classmethod - async def create(cls, resources: Optional[dict] = None): - """Alternative factory method for creating initialized backend. - - Args: - resources: Configuration parameters for Dask client initialization. - - Returns: - Fully initialized DaskExecutionBackend instance. - """ - backend = cls(resources) - return await backend diff --git a/src/radical/asyncflow/backends/execution/noop.py b/src/radical/asyncflow/backends/execution/noop.py index 7194c98..5ff8a96 100644 --- a/src/radical/asyncflow/backends/execution/noop.py +++ b/src/radical/asyncflow/backends/execution/noop.py @@ -1,3 +1,11 @@ +"""No-operation execution backend for testing and development. + +This module provides a no-op backend that simulates task execution without actually +running any tasks. +""" + +from __future__ import annotations + from typing import Callable from ...constants import StateMapper @@ -7,16 +15,16 @@ class NoopExecutionBackend(BaseExecutionBackend): """A no-operation execution backend for testing and development purposes. - This backend simulates task execution without actually running any tasks. - All submitted tasks immediately return dummy output and transition to DONE state. - Useful for testing workflow logic without computational overhead. + This backend simulates task execution without actually running any tasks. All + submitted tasks immediately return dummy output and transition to DONE state. Useful + for testing workflow logic without computational overhead. """ def __init__(self): """Initialize the no-op execution backend. - Sets up dummy task storage, session, and default callback function. - Registers backend states and confirms successful initialization. + Sets up dummy task storage, session, and default callback function. Registers + backend states and confirms successful initialization. """ self.tasks = {} self.session = Session() @@ -41,7 +49,6 @@ def task_state_cb(self, task: dict, state: str) -> None: Note: This is a no-op implementation that performs no actions. """ - pass def get_task_states_map(self): """Retrieve a mapping of task IDs to their current states. @@ -71,10 +78,18 @@ def build_task(self, uid, task_desc, task_specific_kwargs): Note: This is a no-op implementation that performs no actual task building. """ - pass - async def cancel_task(self, uid: str) -> None: - pass + async def cancel_task(self, uid: str) -> bool: + """Cancel a task by its UID. + + Args: + uid: The UID of the task to cancel. + + Returns: + bool: Always returns False since noop backend doesn't track running tasks + that can be cancelled. + """ + return False async def submit_tasks(self, tasks): """Submit tasks for mock execution. @@ -105,7 +120,6 @@ def link_explicit_data_deps( Note: This is a no-op implementation as this backend doesn't handle dependencies. """ - pass def link_implicit_data_deps(self, src_task, dst_task): """Handle implicit data dependencies for a task. @@ -117,12 +131,10 @@ def link_implicit_data_deps(self, src_task, dst_task): Note: This is a no-op implementation as this backend doesn't handle dependencies. """ - pass async def shutdown(self) -> None: """Shutdown the no-op execution backend. - Performs cleanup operations. Since this is a no-op backend, no actual - resources need to be cleaned up. + Performs cleanup operations. Since this is a no-op backend, no actual resources + need to be cleaned up. """ - pass diff --git a/src/radical/asyncflow/backends/execution/radical_pilot.py b/src/radical/asyncflow/backends/execution/radical_pilot.py deleted file mode 100644 index f050501..0000000 --- a/src/radical/asyncflow/backends/execution/radical_pilot.py +++ /dev/null @@ -1,678 +0,0 @@ -import asyncio -import copy -import logging -import threading -from typing import Callable, Optional - -import typeguard - -from ...constants import StateMapper -from .base import BaseExecutionBackend - -try: - import radical.pilot as rp -except ImportError: - rp = None - -try: - import radical.utils as ru -except ImportError: - ru = None - - -logger = logging.getLogger(__name__) - - -def service_ready_callback(future: asyncio.Future, task, state) -> None: - """Callback for handling service task readiness. - - Runs wait_info() in a daemon thread to avoid blocking execution flow. - - Args: - future: Future object to set result or exception. - task: Task with wait_info() method. - state: Current task state (unused). - """ - - def wait_and_set() -> None: - try: - info = task.wait_info() # synchronous call - future.set_result(info) - except Exception as e: - future.set_exception(e) - - threading.Thread(target=wait_and_set, daemon=True).start() - - -class RadicalExecutionBackend(BaseExecutionBackend): - """Radical Pilot-based execution backend for large-scale HPC task execution. - - The RadicalExecutionBackend manages computing resources and task execution - using the Radical Pilot framework. It interfaces with various resource - management systems (SLURM, FLUX, etc.) on diverse HPC machines, providing - capabilities for session management, task lifecycle control, and resource - allocation. - - This backend supports both traditional task execution and advanced features - like Raptor mode for high-throughput computing scenarios. It handles pilot - submission, task management, and provides data dependency linking mechanisms. - - Attributes: - session (rp.Session): Primary session for managing task execution context, - uniquely identified by a generated ID. - task_manager (rp.TaskManager): Manages task lifecycle including submission, - tracking, and completion within the session. - pilot_manager (rp.PilotManager): Coordinates computing resources (pilots) - that are dynamically allocated based on resource requirements. - resource_pilot (rp.Pilot): Submitted computing resources configured - according to the provided resource specifications. - tasks (dict): Dictionary storing task descriptions indexed by UID. - raptor_mode (bool): Flag indicating whether Raptor mode is enabled. - masters (list): List of master tasks when Raptor mode is enabled. - workers (list): List of worker tasks when Raptor mode is enabled. - master_selector (callable): Generator for load balancing across masters. - _callback_func (Callable): Registered callback function for task events. - - Args: - resources (dict): Resource requirements for the pilot including CPU, GPU, - and memory specifications. - raptor_config (Optional[dict]): Configuration for enabling Raptor mode. - Contains master and worker task specifications. - - Raises: - Exception: If session creation, pilot submission, or task manager setup fails. - SystemExit: If KeyboardInterrupt or SystemExit occurs during initialization. - - Example: - :: - resources = { - "resource": "local.localhost", - "runtime": 30, - "exit_on_error": True, - "cores": 4 - } - backend = await RadicalExecutionBackend(resources) - - # With Raptor mode - raptor_config = { - "masters": [{ - "executable": "/path/to/master", - "arguments": ["--config", "master.conf"], - "ranks": 1, - "workers": [{ - "executable": "/path/to/worker", - "arguments": ["--mode", "compute"], - "ranks": 4 - }] - }] - } - backend = await RadicalExecutionBackend(resources, raptor_config) - """ - - @typeguard.typechecked - def __init__(self, resources: dict, raptor_config: Optional[dict] = None) -> None: - """Initialize the RadicalExecutionBackend with resources. - - Creates a new Radical Pilot session, initializes task and pilot managers, - submits pilots based on resource configuration, and optionally enables - Raptor mode for high-throughput computing. - - Args: - resources (Dict): Resource configuration for the Radical Pilot session. - Must contain valid pilot description parameters such as: - - resource: Target resource (e.g., "local.localhost") - - runtime: Maximum runtime in minutes - - cores: Number of CPU cores - - gpus: Number of GPUs (optional) - raptor_config (Optional[Dict]): Configuration for Raptor mode containing: - - masters: List of master task configurations - - Each master can have associated workers - Defaults to None (Raptor mode disabled). - - Raises: - Exception: If RadicalPilot backend fails to initialize properly. - SystemExit: If keyboard interrupt or system exit occurs during setup, - with session path information for debugging. - - Note: - - Automatically registers backend states with the global StateMapper - - logs status messages for successful initialization or failures - - Session UID is generated using radical.utils for uniqueness - """ - - if rp is None or ru is None: - raise ImportError( - "Radical.Pilot and Radical.utils are required for " - "RadicalExecutionBackend." - ) - - self.resources = resources - self.raptor_config = raptor_config or {} - self._initialized = False - - def __await__(self): - """Make RadicalExecutionBackend awaitable.""" - return self._async_init().__await__() - - async def _async_init(self): - """Async initialization on await.""" - if not self._initialized: - await self._initialize() - self._initialized = True - return self - - async def _initialize(self) -> None: - """Initialize Radical Pilot components.""" - try: - self.tasks = {} - self.raptor_mode = False - self.session = rp.Session( - uid=ru.generate_id("asyncflow.session", mode=ru.ID_PRIVATE) - ) - self.task_manager = rp.TaskManager(self.session) - self.pilot_manager = rp.PilotManager(self.session) - self.resource_pilot = self.pilot_manager.submit_pilots( - rp.PilotDescription(self.resources) - ) - self.task_manager.add_pilots(self.resource_pilot) - self._callback_func: Callable[[asyncio.Future], None] = lambda f: None - - if self.raptor_config: - self.raptor_mode = True - logger.info("Enabling Raptor mode for RadicalExecutionBackend") - self.setup_raptor_mode(self.raptor_config) - - StateMapper.register_backend_states( - backend=self, - done_state=rp.DONE, - failed_state=rp.FAILED, - canceled_state=rp.CANCELED, - running_state=rp.AGENT_EXECUTING, - ) - - logger.info("RadicalPilot execution backend started successfully\n") - - except Exception: - logger.exception("RadicalPilot backend failed to start, terminating\n") - raise - - except (KeyboardInterrupt, SystemExit) as e: - msg = f"Radical backend failed, check {self.session.path}" - raise SystemExit(msg) from e - - def get_task_states_map(self) -> StateMapper: - """Get the state mapper for this backend. - - Returns: - StateMapper: StateMapper instance configured for RadicalPilot backend - with appropriate state mappings (DONE, FAILED, - CANCELED, AGENT_EXECUTING). - """ - return StateMapper(backend=self) - - def setup_raptor_mode(self, raptor_config: dict) -> None: - """Set up Raptor mode by configuring and submitting master and worker tasks. - - Initializes Raptor mode by creating master tasks and their associated - worker tasks based on the provided configuration. Masters coordinate - work distribution while workers execute the actual computations. - - Args: - raptor_config (Dict): Configuration dictionary with the following structure: - { - 'masters': [ - { - 'executable': str, # Path to master executable - 'arguments': list, # Arguments for master - 'ranks': int, # Number of CPU processes - 'workers': [ # Worker configurations - { - 'executable': str, # Worker executable path - 'arguments': list, # Worker arguments - 'ranks': int, # Worker CPU processes - 'worker_type': str # Optional worker class - }, - ... - ] - }, - ... - ] - } - - Raises: - Exception: If task description creation or submission fails. - - Note: - - Creates unique UIDs for masters and workers using session namespace - - Sets up master selector for load balancing across masters - - Workers default to 'DefaultWorker' class if not specified - - All master and worker tasks are stored in respective class attributes - """ - - self.masters = [] - self.workers = [] - self.master_selector = self.select_master() - - cfg = copy.deepcopy(raptor_config) - masters = cfg["masters"] - - for master_description in masters: - workers = master_description.pop("workers") - md = rp.TaskDescription(master_description) - md.uid = ru.generate_id( - "flow.master.%(item_counter)06d", ru.ID_CUSTOM, ns=self.session.uid - ) - md.mode = rp.RAPTOR_MASTER - master = self.resource_pilot.submit_raptors(md)[0] - self.masters.append(master) - - for worker_description in workers: - raptor_class = worker_description.pop("worker_type", "DefaultWorker") - worker = master.submit_workers( - rp.TaskDescription( - { - **worker_description, - "raptor_id": md.uid, - "mode": rp.RAPTOR_WORKER, - "raptor_class": raptor_class, - "uid": ru.generate_id( - "flow.worker.%(item_counter)06d", - ru.ID_CUSTOM, - ns=self.session.uid, - ), - } - ) - ) - self.workers.append(worker) - - def select_master(self): - """Create a generator for load balancing task submission across masters. - - Provides a round-robin generator that cycles through available master - UIDs to distribute tasks evenly across all masters in Raptor mode. - - Returns: - Generator[str]: Generator yielding master UIDs in round-robin fashion. - - Raises: - RuntimeError: If Raptor mode is not enabled or no masters are available. - - Example: - :: - selector = backend.select_master() - master_uid = next(selector) # Get next master for task assignment - """ - if not self.raptor_mode or not self.masters: - raise RuntimeError("Raptor mode disabled or no masters available") - - current_master = 0 - masters_uids = [m.uid for m in self.masters] - - while True: - yield masters_uids[current_master] - current_master = (current_master + 1) % len(self.masters) - - def register_callback(self, func: Callable) -> None: - """Register a callback function for task state changes. - - Sets up a callback mechanism that handles task state transitions, - with special handling for service tasks that require additional - readiness confirmation. - - Args: - func (Callable): Callback function that will be invoked on task - state changes. Should accept parameters: - (task, state, service_callback=None). - - Note: - - Service tasks in AGENT_EXECUTING state get special service_ready_callback - - All other tasks use the standard callback mechanism - - The callback is registered with the underlying task manager - """ - self._callback_func = func - - def backend_callback(task, state) -> None: - service_callback = None - - if task.mode == rp.TASK_SERVICE and state == rp.AGENT_EXECUTING: - service_callback = service_ready_callback - - elif task.mode == rp.TASK_EXECUTABLE and state == rp.FAILED: - task = task.as_dict() - stderr = task.get("stderr") - exception = task.get("exception") - if stderr or exception: - task["stderr"] = ", ".join(filter(None, [stderr, exception])) - task["exception"] = "" - - func(task, state, service_callback=service_callback) - - self.task_manager.register_callback(backend_callback) - - def build_task( - self, uid: str, task_desc: dict, task_backend_specific_kwargs: dict - ) -> "Optional[rp.TaskDescription]": - """Build a RadicalPilot task description from workflow task parameters. - - Converts a workflow task description into a RadicalPilot TaskDescription, - handling different task modes (executable, function, service) and applying - appropriate configurations. - - Args: - uid (str): Unique identifier for the task. - task_desc (Dict): Task description containing: - - executable: Path to executable (for executable tasks) - - function: Python function (for function tasks) - - args: Function arguments - - kwargs: Function keyword arguments - - is_service: Boolean indicating service task - task_backend_specific_kwargs (Dict): RadicalPilot-specific parameters - for the task description. - - Returns: - rp.TaskDescription: Configured RadicalPilot task description, or None - if task creation failed. - - Note: - - Function tasks require Raptor mode to be enabled - - Service tasks cannot be Python functions - - Failed tasks trigger callback with FAILED state - - Raptor tasks are assigned to masters via load balancing - - Example: - :: - task_desc = { - 'executable': '/bin/echo', - 'args': ['Hello World'], - 'is_service': False - } - rp_task = backend.build_task('task_001', task_desc, {}) - """ - - is_service = task_desc.get("is_service", False) - rp_task = rp.TaskDescription(from_dict=task_backend_specific_kwargs) - rp_task.uid = uid - - if task_desc["executable"]: - rp_task.mode = rp.TASK_SERVICE if is_service else rp.TASK_EXECUTABLE - rp_task.executable = task_desc["executable"] - elif task_desc["function"]: - if is_service: - error_msg = ( - "RadicalExecutionBackend does not support function service tasks" - ) - rp_task["exception"] = ValueError(error_msg) - self._callback_func(rp_task, rp.FAILED) - return None - - rp_task.mode = rp.TASK_FUNCTION - rp_task.function = rp.PythonTask( - task_desc["function"], task_desc["args"], task_desc["kwargs"] - ) - - if rp_task.mode in [ - rp.TASK_FUNCTION, - rp.TASK_EVAL, - rp.TASK_PROC, - rp.TASK_METHOD, - ]: - if not self.raptor_mode: - error_msg = f"Raptor mode not enabled, cannot register {rp_task.mode}" - rp_task["exception"] = RuntimeError(error_msg) - self._callback_func(rp_task, rp.FAILED) - return None - - rp_task.raptor_id = next(self.master_selector) - - self.tasks[uid] = rp_task - return rp_task - - def link_explicit_data_deps( - self, - src_task: Optional[dict] = None, - dst_task: Optional[dict] = None, - file_name: Optional[str] = None, - file_path: Optional[str] = None, - ) -> dict: - """Link explicit data dependencies between tasks or from external sources. - - Creates data staging entries to establish explicit dependencies where - files are transferred or linked from source to destination tasks. - Supports both task-to-task dependencies and external file staging. - - Args: - src_task (Optional[Dict]): Source task dictionary containing the file. - None when staging from external path. - dst_task (Dict): Destination task dictionary that will receive the file. - Must contain 'task_backend_specific_kwargs' key. - file_name (Optional[str]): Name of the file to stage. Defaults to: - - src_task UID if staging from task - - basename of file_path if staging from external path - file_path (Optional[str]): External file path to stage (alternative - to task-sourced files). - - Returns: - Dict: The data dependency dictionary that was added to input staging. - - Raises: - ValueError: If neither file_name nor file_path is provided, or if - src_task is missing when file_path is not specified. - - Note: - - External files use TRANSFER action - - Task-to-task dependencies use LINK action - - Files are staged to task:/// namespace in destination - - Input staging list is created if it doesn't exist - - Example: - :: - # Link output from task1 to task2 - backend.link_explicit_data_deps( - src_task={'uid': 'task1'}, - dst_task={'task_backend_specific_kwargs': {}}, - file_name='output.dat' - ) - - # Stage external file - backend.link_explicit_data_deps( - dst_task={'task_backend_specific_kwargs': {}}, - file_path='/path/to/input.txt' - ) - """ - if not file_name and not file_path: - raise ValueError("Either file_name or file_path must be provided") - - dst_kwargs = dst_task["task_backend_specific_kwargs"] - - if not file_name: - if file_path: - file_name = file_path.split("/")[-1] - elif src_task: - file_name = src_task["uid"] - else: - raise ValueError("Must provide file_name, file_path, or src_task") - - if file_path: - data_dep = { - "source": file_path, - "target": f"task:///{file_name}", - "action": rp.TRANSFER, - } - else: - if not src_task: - raise ValueError("src_task required when file_path not specified") - data_dep = { - "source": f"pilot:///{src_task['uid']}/{file_name}", - "target": f"task:///{file_name}", - "action": rp.LINK, - } - - if "input_staging" not in dst_kwargs: - dst_kwargs["input_staging"] = [data_dep] - else: - dst_kwargs["input_staging"].append(data_dep) - - return data_dep - - def link_implicit_data_deps(self, src_task: dict, dst_task: dict) -> None: - """Add implicit data dependencies through symbolic links in task sandboxes. - - Creates pre-execution commands that establish symbolic links from the - source task's sandbox to the destination task's sandbox, simulating - implicit data dependencies without explicit file specifications. - - Args: - src_task (Dict): Source task dictionary containing 'uid' key. - dst_task (Dict): Destination task dictionary with - 'task_backend_specific_kwargs'. - - Note: - - Links all files from source sandbox except the task UID file itself - - Uses environment variables for source task identification - - Commands are added to the destination task's pre_exec list - - Symbolic links are created in the destination task's sandbox - - Implementation Details: - 1. Sets SRC_TASK_ID environment variable - 2. Sets SRC_TASK_SANDBOX path variable - 3. Creates symbolic links for all files except the task ID file - - Example: - :: - src_task = {'uid': 'producer_task'} - dst_task = {'task_backend_specific_kwargs': {}} - backend.link_implicit_data_deps(src_task, dst_task) - """ - - dst_kwargs = dst_task["task_backend_specific_kwargs"] - src_uid = src_task["uid"] - - cmd1 = f"export SRC_TASK_ID={src_uid}" - cmd2 = 'export SRC_TASK_SANDBOX="$RP_PILOT_SANDBOX/$SRC_TASK_ID"' - cmd3 = """files=$(cd "$SRC_TASK_SANDBOX" && ls | grep -ve "^$SRC_TASK_ID") - for f in $files - do - ln -sf "$SRC_TASK_SANDBOX/$f" "$RP_TASK_SANDBOX" - done""" - - commands = [cmd1, cmd2, cmd3] - - if dst_kwargs.get("pre_exec"): - dst_kwargs["pre_exec"].extend(commands) - else: - dst_kwargs["pre_exec"] = commands - - async def submit_tasks(self, tasks: list) -> None: - """Submit a list of tasks for execution. - - Processes a list of workflow tasks, builds RadicalPilot task descriptions, - and submits them to the task manager for execution. Handles task building - failures gracefully by skipping invalid tasks. - - Args: - tasks (list): List of task dictionaries, each containing: - - uid: Unique task identifier - - task_backend_specific_kwargs: RadicalPilot-specific parameters - - Other task description fields - - Returns: - The result of task_manager.submit_tasks() with successfully built tasks. - - Note: - - Failed task builds are skipped (build_task returns None) - - Only successfully built tasks are submitted to the task manager - - Task building includes validation and error handling - """ - - _tasks = [] - for task in tasks: - task_to_submit = self.build_task( - task["uid"], task, task["task_backend_specific_kwargs"] - ) - if not task_to_submit: - continue - _tasks.append(task_to_submit) - - return self.task_manager.submit_tasks(_tasks) - - async def cancel_task(self, uid: str) -> bool: - """Cancel a task. - - Args: - uid: Task UID to cancel. - - Returns: - True if task found and cancellation attempted, False otherwise. - """ - if uid in self.tasks: - self.task_manager.cancel_tasks(uid) - return True - return False - - def get_nodelist(self) -> "Optional[rp.NodeList]": - """Get information about allocated compute nodes. - - Retrieves the nodelist from the active resource pilot, providing - details about the compute nodes allocated for task execution. - - Returns: - rp.NodeList: NodeList object containing information about allocated - nodes. Each node in nodelist.nodes is of type rp.NodeResource. - Returns None if the pilot is not in PMGR_ACTIVE state. - - Note: - - Only returns nodelist when pilot is in active state - - Nodelist provides detailed resource information for each node - - Useful for resource-aware task scheduling and monitoring - """ - - nodelist = None - if self.resource_pilot.state == rp.PMGR_ACTIVE: - nodelist = self.resource_pilot.nodelist - return nodelist - - def state(self): - """Retrieve resource pilot state.""" - raise NotImplementedError - - def task_state_cb(self, task, state) -> None: - """Handle task state changes.""" - raise NotImplementedError - - async def shutdown(self) -> None: - """Gracefully shutdown the backend and clean up resources. - - Closes the RadicalPilot session with data download, ensuring proper - cleanup of all resources including pilots, tasks, and session data. - - Note: - - Downloads session data before closing - - Ensures graceful termination of all backend resources - - Prints confirmation message when shutdown is triggered - """ - self.session.close(download=True) - logger.info("Radical Pilot backend shutdown complete") - - async def __aenter__(self): - """Async context manager entry.""" - if not self._initialized: - await self._async_init() - return self - - async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: - """Async context manager exit.""" - await self.shutdown() - - @classmethod - async def create(cls, resources: dict, raptor_config: Optional[dict] = None): - """Create initialized backend. - - Args: - resources: Radical Pilot configuration. - raptor_config: Optional Raptor mode configuration. - - Returns: - Initialized RadicalExecutionBackend instance. - """ - backend = cls(resources, raptor_config) - return await backend diff --git a/src/radical/asyncflow/backends/factory.py b/src/radical/asyncflow/backends/factory.py new file mode 100644 index 0000000..43edc5b --- /dev/null +++ b/src/radical/asyncflow/backends/factory.py @@ -0,0 +1,228 @@ +"""Factory for creating execution backends with proper configuration. + +This module provides a factory pattern for creating and initializing execution backends, +handling both synchronous and asynchronous backend initialization. +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING, Any, Optional + +from .registry import registry + +if TYPE_CHECKING: + from .execution.base import BaseExecutionBackend + +logger = logging.getLogger(__name__) + + +class BackendFactory: + """Factory for creating and initializing execution backends. + + This factory handles the creation of backend instances, configuration management, + and proper initialization of both synchronous and asynchronous backends. + """ + + @staticmethod + def _suggest_installation(backend_type: str) -> str: + """Provide helpful installation suggestions for missing backends. + + Args: + backend_type: Backend identifier + + Returns: + Installation suggestion string + """ + suggestions = { + "dask": "pip install 'radical.asyncflow[dask]'", + "radical_pilot": "pip install 'radical.asyncflow[radicalpilot]'", + } + + if backend_type in suggestions: + return f"Try: {suggestions[backend_type]}" + return "pip install 'radical.asyncflow[hpc]' for HPC backends" + + @staticmethod + async def create_backend( + backend_type: str, + config: Optional[dict[str, Any]] = None, + **kwargs, + ) -> BaseExecutionBackend: + """Create and initialize a backend. + + Args: + backend_type: Backend identifier ('concurrent', 'dask', + 'radical_pilot', etc.) + config: Backend-specific configuration dictionary + **kwargs: Additional arguments passed to backend constructor + + Returns: + Initialized backend instance + + Raises: + ValueError: If backend is not available + RuntimeError: If backend initialization fails + """ + backend_class = registry.get_backend(backend_type) + if backend_class is None: + available = list(registry.list_available().keys()) + available_str = ", ".join(sorted(available)) + suggestion = BackendFactory._suggest_installation(backend_type) + + # Get failure reason if available + failure_reason = registry.get_failure_reason(backend_type) + failure_info = f"\nReason: {failure_reason}" if failure_reason else "" + + raise ValueError( + f"Backend '{backend_type}' is not available.\n" + f"Available backends: {available_str}\n" + f"Installation hint: {suggestion}{failure_info}" + ) + + try: + # Handle different backend initialization patterns + if backend_type == "noop": + # Noop backend takes no arguments + logger.debug("Creating noop backend") + backend = backend_class() + + elif backend_type == "concurrent": + # Concurrent backend requires an executor + import concurrent.futures + + # Use config or kwargs to get executor parameters + max_workers = (config or {}).get("max_workers") or kwargs.get( + "max_workers", 4 + ) + executor_type = (config or {}).get("executor_type") or kwargs.get( + "executor_type", "thread" + ) + + if executor_type == "process": + executor = concurrent.futures.ProcessPoolExecutor( + max_workers=max_workers + ) + else: + executor = concurrent.futures.ThreadPoolExecutor( + max_workers=max_workers + ) + + logger.debug( + f"Creating concurrent backend with {executor_type} executor " + f"(max_workers={max_workers})" + ) + backend = backend_class(executor) + + # Concurrent backend is awaitable for async initialization + if hasattr(backend, "__await__"): + backend = await backend + + else: + # For other backends, try the generic approach + backend_config = config or {} + logger.debug( + f"Creating backend '{backend_type}' with config: {backend_config}" + ) + backend = backend_class(backend_config, **kwargs) + + # Handle async initialization if needed + if hasattr(backend, "__aenter__"): + # Backend is an async context manager + backend = await backend.__aenter__() + elif hasattr(backend, "initialize") and callable(backend.initialize): + # Backend has an explicit initialize method + await backend.initialize() + elif hasattr(backend, "__await__"): + # Backend is awaitable + backend = await backend + + logger.info( + f"Successfully created and initialized '{backend_type}' backend" + ) + return backend + + except Exception as e: + logger.error(f"Failed to initialize backend '{backend_type}': {e}") + raise RuntimeError( + f"Failed to initialize backend '{backend_type}': {e}" + ) from e + + @staticmethod + def list_available_backends() -> dict[str, dict[str, Any]]: + """List all available backends with detailed information. + + Returns: + Dictionary with backend info including availability and failure reasons + """ + info = {} + for name in registry._backend_specs: + backend_class = registry.get_backend(name) + failure_reason = registry.get_failure_reason(name) + + info[name] = { + "available": backend_class is not None, + "class": backend_class.__name__ if backend_class else None, + "failure_reason": failure_reason, + "installation_hint": ( + BackendFactory._suggest_installation(name) + if backend_class is None + else None + ), + } + + return info + + @staticmethod + def create_backend_sync( + backend_type: str, + config: Optional[dict[str, Any]] = None, + **kwargs, + ) -> BaseExecutionBackend: + """Create a backend synchronously (for backends that don't require async init). + + Args: + backend_type: Backend identifier + config: Backend-specific configuration dictionary + **kwargs: Additional arguments passed to backend constructor + + Returns: + Backend instance (not initialized if async initialization is required) + + Raises: + ValueError: If backend is not available + RuntimeError: If backend creation fails + + Warning: + This method should only be used for backends that don't require + asynchronous initialization. Use create_backend() for proper async init. + """ + backend_class = registry.get_backend(backend_type) + if backend_class is None: + available = list(registry.list_available().keys()) + available_str = ", ".join(sorted(available)) + suggestion = BackendFactory._suggest_installation(backend_type) + + raise ValueError( + f"Backend '{backend_type}' is not available.\n" + f"Available backends: {available_str}\n" + f"Installation hint: {suggestion}" + ) + + try: + backend_config = config or {} + logger.debug( + f"Creating backend '{backend_type}' (sync) with config: " + f"{backend_config}" + ) + backend = backend_class(backend_config, **kwargs) + logger.info(f"Successfully created '{backend_type}' backend (sync)") + return backend + + except Exception as e: + logger.error(f"Failed to create backend '{backend_type}' (sync): {e}") + raise RuntimeError(f"Failed to create backend '{backend_type}': {e}") from e + + +# Convenience factory instance +factory = BackendFactory() diff --git a/src/radical/asyncflow/backends/registry.py b/src/radical/asyncflow/backends/registry.py new file mode 100644 index 0000000..e87286d --- /dev/null +++ b/src/radical/asyncflow/backends/registry.py @@ -0,0 +1,192 @@ +"""Backend registry with discovery and lazy loading. + +This module implements a plugin-style registry for execution backends, allowing +AsyncFlow to discover and load backends on demand without requiring hard dependencies. +""" + +from __future__ import annotations + +import importlib +import logging +from typing import Optional + +logger = logging.getLogger(__name__) + + +class BackendRegistry: + """Registry for execution backends with lazy loading and discovery. + + This registry manages both core backends (always available) and optional backends + (loaded on demand from external packages like rhapsody). It provides a clean + separation between AsyncFlow core functionality and optional HPC/scale-out backends. + """ + + def __init__(self): + """Initialize the backend registry.""" + self._backends: dict[str, type] = {} + self._failed_backends: dict[str, str] = {} # Track failed import reasons + + # Backend specifications: name -> module_path:class_name + self._backend_specs = { + # Core backends (should always be available) + "noop": "radical.asyncflow.backends.execution.noop:NoopExecutionBackend", + "concurrent": ( + "radical.asyncflow.backends.execution.concurrent:" + "ConcurrentExecutionBackend" + ), + # Optional rhapsody backends (loaded on demand) + "dask": "rhapsody.backends.execution.dask_parallel:DaskExecutionBackend", + "radical_pilot": ( + "rhapsody.backends.execution.radical_pilot:RadicalExecutionBackend" + ), + } + + def get_backend(self, name: str) -> Optional[type]: + """Get backend class by name, loading it if necessary. + + Args: + name: Backend identifier (e.g., 'dask', 'radical_pilot', 'noop') + + Returns: + Backend class if available and successfully loaded, None otherwise + """ + # Return cached backend if already loaded + if name in self._backends: + return self._backends[name] + + # Return None if previously failed to load + if name in self._failed_backends: + error_msg = self._failed_backends[name] + logger.debug(f"Backend '{name}' previously failed to load: {error_msg}") + return None + + # Check if backend specification exists + if name not in self._backend_specs: + available = list(self._backend_specs.keys()) + logger.debug(f"Unknown backend '{name}'. Available: {available}") + return None + + # Attempt to load the backend + try: + module_path, class_name = self._backend_specs[name].split(":") + module = importlib.import_module(module_path) + backend_class = getattr(module, class_name) + + # Validate that it's a proper backend + from radical.asyncflow.backends.execution.base import BaseExecutionBackend + + # Check if it's a BaseExecutionBackend subclass (for internal backends) + # or implements the required interface (for external backends like Rhapsody) + if issubclass(backend_class, BaseExecutionBackend): + # Internal backend - passes strict inheritance check + pass + else: + # External backend - validate using duck-typing + required_methods = ["submit_tasks", "shutdown", "state"] + missing_methods = [] + + for method in required_methods: + if not hasattr(backend_class, method): + missing_methods.append(method) + elif not callable(getattr(backend_class, method)): + missing_methods.append(f"{method} (not callable)") + + if missing_methods: + error_msg = ( + f"Backend '{class_name}' missing required methods: " + f"{', '.join(missing_methods)}" + ) + self._failed_backends[name] = error_msg + logger.error(f"Backend '{name}' validation failed: {error_msg}") + return None + + logger.debug(f"External backend '{name}' passed duck-typing validation") + + # Cache successful load + self._backends[name] = backend_class + logger.debug(f"Successfully loaded backend '{name}' from {module_path}") + return backend_class + + except ImportError as e: + error_msg = f"Import error: {e}" + self._failed_backends[name] = error_msg + logger.debug(f"Backend '{name}' not available: {error_msg}") + return None + except AttributeError as e: + error_msg = f"Class not found: {e}" + self._failed_backends[name] = error_msg + logger.error(f"Backend '{name}' load failed: {error_msg}") + return None + except Exception as e: + error_msg = f"Unexpected error: {e}" + self._failed_backends[name] = error_msg + logger.error(f"Backend '{name}' load failed: {error_msg}") + return None + + def list_available(self) -> dict[str, bool]: + """List all backends and their availability status. + + Returns: + Dictionary mapping backend name to availability (True/False) + """ + available = {} + for name in self._backend_specs: + available[name] = self.get_backend(name) is not None + return available + + def list_loaded(self) -> dict[str, type]: + """Get all currently loaded backends. + + Returns: + Dictionary mapping backend name to backend class + """ + return self._backends.copy() + + def get_failure_reason(self, name: str) -> Optional[str]: + """Get the reason why a backend failed to load. + + Args: + name: Backend identifier + + Returns: + Failure reason string if backend failed to load, None otherwise + """ + return self._failed_backends.get(name) + + def register_backend(self, name: str, backend_class: type) -> None: + """Register a backend class directly (for testing or custom backends). + + Args: + name: Backend identifier + backend_class: Backend class to register + + Raises: + TypeError: If backend_class is not a BaseExecutionBackend subclass + """ + from radical.asyncflow.backends.execution.base import BaseExecutionBackend + + if not issubclass(backend_class, BaseExecutionBackend): + raise TypeError("Backend class must be a BaseExecutionBackend subclass") + + self._backends[name] = backend_class + # Remove from failed backends if it was there + self._failed_backends.pop(name, None) + logger.debug(f"Registered backend '{name}': {backend_class}") + + def add_backend_spec(self, name: str, module_class_spec: str) -> None: + """Add a new backend specification for lazy loading. + + Args: + name: Backend identifier + module_class_spec: Module and class specification in format + "module.path:ClassName" + """ + self._backend_specs[name] = module_class_spec + # Clear any cached results for this backend + self._backends.pop(name, None) + self._failed_backends.pop(name, None) + logger.debug(f"Added backend spec '{name}': {module_class_spec}") + + +# Global registry instance +registry = BackendRegistry() diff --git a/src/radical/asyncflow/constants.py b/src/radical/asyncflow/constants.py index db6fcc5..f017d4b 100644 --- a/src/radical/asyncflow/constants.py +++ b/src/radical/asyncflow/constants.py @@ -22,8 +22,8 @@ class TasksMainStates(Enum): class StateMapper: - """Unified interface for mapping task states between main workflow and - backend systems. + """Unified interface for mapping task states between main workflow and backend + systems. StateMapper provides a centralized mechanism for translating task states between the main workflow system and various backend execution systems diff --git a/src/radical/asyncflow/data.py b/src/radical/asyncflow/data.py index 907f458..d97a1de 100644 --- a/src/radical/asyncflow/data.py +++ b/src/radical/asyncflow/data.py @@ -22,15 +22,15 @@ class File: """Base class for file handling in task execution systems. - Provides common attributes and functionality for managing files with - filename and filepath properties. + Provides common attributes and functionality for managing files with filename and + filepath properties. """ def __init__(self) -> None: """Initialize a File object with default None values. - Sets filename and filepath attributes to None, to be populated - by subclasses during file resolution. + Sets filename and filepath attributes to None, to be populated by subclasses + during file resolution. """ self.filename = None self.filepath = None @@ -74,12 +74,12 @@ def download_remote_url(url: str) -> Path: class InputFile(File): - """Represents an input file that can be sourced from remote URLs, local paths, - or task outputs. + """Represents an input file that can be sourced from remote URLs, local paths, or + task outputs. Automatically detects the file source type and handles appropriate resolution. - Supports remote file downloading, local file path resolution, and task output - file references. + Supports remote file downloading, local file path resolution, and task output file + references. """ def __init__(self, file): @@ -147,8 +147,8 @@ def __init__(self, file): class OutputFile(File): """Represents an output file that will be produced by a task. - Handles filename validation and extraction from file paths, ensuring - proper output file naming for task execution. + Handles filename validation and extraction from file paths, ensuring proper output + file naming for task execution. """ def __init__(self, filename): diff --git a/src/radical/asyncflow/errors.py b/src/radical/asyncflow/errors.py index 4298a32..e0f46e8 100644 --- a/src/radical/asyncflow/errors.py +++ b/src/radical/asyncflow/errors.py @@ -1,9 +1,9 @@ class DependencyFailureError(Exception): """Exception raised when a workflow component cannot execute due to dependency - failures. + failures. - This exception provides detailed information about the failed dependencies - and maintains the chain of causation for debugging purposes. + This exception provides detailed information about the failed dependencies and + maintains the chain of causation for debugging purposes. """ def __init__(self, message, failed_dependencies=None, root_cause=None): diff --git a/src/radical/asyncflow/utils.py b/src/radical/asyncflow/utils.py index c5a46b5..8415d35 100644 --- a/src/radical/asyncflow/utils.py +++ b/src/radical/asyncflow/utils.py @@ -19,8 +19,7 @@ def reset_uid_counter(): def get_event_loop_or_raise( context_name: str = "AsyncWorkflowEngine", ) -> asyncio.AbstractEventLoop: - """ - Get the current running event loop or raise a helpful error. + """Get the current running event loop or raise a helpful error. Args: context_name: Name of the class/context for error messages diff --git a/src/radical/asyncflow/workflow_manager.py b/src/radical/asyncflow/workflow_manager.py index 630c317..6f71cc6 100644 --- a/src/radical/asyncflow/workflow_manager.py +++ b/src/radical/asyncflow/workflow_manager.py @@ -8,19 +8,17 @@ import signal from collections import defaultdict, deque from functools import wraps -from itertools import count +from itertools import chain from pathlib import Path from typing import Any, Callable, Optional, Union import typeguard -from .backends.execution.base import BaseExecutionBackend -from .backends.execution.noop import NoopExecutionBackend +from .backends.execution import NoopExecutionBackend +from .backends.execution.base import ExecutionBackendProtocol from .data import InputFile, OutputFile from .errors import DependencyFailureError -from .utils import get_next_uid -from .utils import reset_uid_counter -from .utils import get_event_loop_or_raise +from .utils import get_event_loop_or_raise, get_next_uid, reset_uid_counter TASK = "task" BLOCK = "block" @@ -31,10 +29,9 @@ class WorkflowEngine: - """ - An asynchronous workflow manager that uses asyncio event loops and coroutines - to manage and execute workflow components (blocks and/or tasks) within - Directed Acyclic Graph (DAG) or Chain Graph (CG) structures. + """An asynchronous workflow manager that uses asyncio event loops and coroutines to + manage and execute workflow components (blocks and/or tasks) within Directed Acyclic + Graph (DAG) or Chain Graph (CG) structures. This class provides async/await operations and handles task dependencies, input/output data staging, and execution. @@ -51,12 +48,11 @@ class WorkflowEngine: @typeguard.typechecked def __init__( self, - backend: BaseExecutionBackend, + backend: ExecutionBackendProtocol, dry_run: bool = False, implicit_data: bool = True, ) -> None: - """ - Initialize the WorkflowEngine (sync part only). + """Initialize the WorkflowEngine (sync part only). Note: This is a private constructor. Use WorkflowEngine.create() instead. @@ -110,9 +106,8 @@ def __init__( self._setup_signal_handlers() def _setup_signal_handlers(self): - """ - Register signal handlers for graceful shutdown on SIGHUP, SIGTERM, and SIGINT. - """ + """Register signal handlers for graceful shutdown on SIGHUP, SIGTERM, and + SIGINT.""" signals = (signal.SIGHUP, signal.SIGTERM, signal.SIGINT) for sig in signals: try: @@ -129,8 +124,7 @@ def _setup_signal_handlers(self): async def _handle_shutdown_signal( self, sig: signal.Signals, source: str = "external" ): - """ - Handle received signals by initiating a graceful shutdown. + """Handle received signals by initiating a graceful shutdown. Args: sig: The signal received (e.g., SIGHUP, SIGTERM, SIGINT) @@ -149,12 +143,11 @@ async def _handle_shutdown_signal( @classmethod async def create( cls, - backend: Optional[BaseExecutionBackend] = None, + backend: Optional[ExecutionBackendProtocol] = None, dry_run: bool = False, implicit_data: bool = True, ) -> "WorkflowEngine": - """ - Factory method to create and initialize a WorkflowEngine. + """Factory method to create and initialize a WorkflowEngine. Args: backend: Execution backend. If None and dry_run=True, @@ -183,8 +176,8 @@ async def create( @staticmethod def _setup_execution_backend( - backend: Optional[BaseExecutionBackend], dry_run: bool - ) -> BaseExecutionBackend: + backend: Optional[ExecutionBackendProtocol], dry_run: bool + ) -> ExecutionBackendProtocol: """Setup and validate the execution backend.""" if backend is None: if dry_run: @@ -565,8 +558,7 @@ def patched_cancel(*args, **kwargs): return patched_cancel def _assign_uid(self, prefix: str) -> str: - """ - Generates a unique identifier (UID) for a flow component using a counter. + """Generates a unique identifier (UID) for a flow component using a counter. Args: prefix (str): The prefix to use for the UID. @@ -578,8 +570,7 @@ def _assign_uid(self, prefix: str) -> str: return f"{prefix}.{uid}" def _detect_dependencies(self, possible_dependencies): - """ - Detects and categorizes possible dependencies into blocks/tasks, input files, + """Detects and categorizes possible dependencies into blocks/tasks, input files, and output files. This method iterates over a list of possible dependencies and classifies @@ -624,10 +615,8 @@ def _detect_dependencies(self, possible_dependencies): return dependencies, input_files, output_files async def _extract_dependency_values(self, comp_desc: dict): - """ - Resolve Future objects in args and kwargs to their actual values. - This is called right before submission when all dependencies are - guaranteed to be done. + """Resolve Future objects in args and kwargs to their actual values. This is + called right before submission when all dependencies are guaranteed to be done. Args: comp_desc: Component description dict @@ -680,9 +669,8 @@ def _notify_dependents(self, comp_uid: str): del self._dependency_count[comp_uid] def _create_dependency_failure_exception(self, comp_desc: dict, failed_deps: list): - """ - Create a DependencyFailureError exception that shows both the immediate failure - and the root cause from failed dependencies. + """Create a DependencyFailureError exception that shows both the immediate + failure and the root cause from failed dependencies. Args: comp_desc (dict): Description of the component that cannot execute @@ -714,8 +702,7 @@ def _create_dependency_failure_exception(self, comp_desc: dict, failed_deps: lis ) def _get_dependency_output_files(self, dependencies): - """ - Helper method to get all output files from dependencies. + """Helper method to get all output files from dependencies. Args: dependencies: List of dependency descriptions @@ -1278,8 +1265,7 @@ def wait_and_set(): self.handle_task_failure(task_dct, task_fut) async def shutdown(self, skip_execution_backend: bool = False): - """ - Internal implementation of asynchronous shutdown for the workflow manager. + """Internal implementation of asynchronous shutdown for the workflow manager. This method performs the following steps: 1. Sets the shutdown event to signal components to exit diff --git a/tests/integration/backends/test_dask_parallel.py b/tests/integration/backends/test_dask_parallel.py index b97c99e..2620ae9 100644 --- a/tests/integration/backends/test_dask_parallel.py +++ b/tests/integration/backends/test_dask_parallel.py @@ -5,14 +5,15 @@ import pytest import pytest_asyncio -from radical.asyncflow import DaskExecutionBackend, WorkflowEngine +from radical.asyncflow import WorkflowEngine, factory @pytest_asyncio.fixture(scope="function") async def backend(): # Setup: create backend and flow - backend = await DaskExecutionBackend( - {"n_workers": 2, "threads_per_worker": 1, "dashboard_address": None} + backend = await factory.create_backend( + "dask", + config={"n_workers": 2, "threads_per_worker": 1, "dashboard_address": None}, ) # provide the flow to the test yield backend diff --git a/tests/integration/backends/test_radical_pilot.py b/tests/integration/backends/test_radical_pilot.py index c3b4fcc..72e7a19 100644 --- a/tests/integration/backends/test_radical_pilot.py +++ b/tests/integration/backends/test_radical_pilot.py @@ -3,23 +3,22 @@ import pytest import pytest_asyncio -from radical.asyncflow import ( - InputFile, - OutputFile, - RadicalExecutionBackend, - WorkflowEngine, -) +from radical.asyncflow import InputFile, OutputFile, WorkflowEngine, factory + +# Configure all tests in this module to use module-scoped event loop for performance +pytestmark = pytest.mark.asyncio(loop_scope="module") @pytest_asyncio.fixture(scope="module") async def backend(): - """Initialize RadicalExecutionBackend once for all tests.""" - be = await RadicalExecutionBackend({"resource": "local.localhost"}) + """Initialize RADICAL-Pilot backend once for all tests.""" + be = await factory.create_backend( + "radical_pilot", config={"resource": "local.localhost"} + ) yield be await be.shutdown() -@pytest.mark.asyncio async def test_async_bag_of_tasks(backend): flow = await WorkflowEngine.create(backend=backend) @@ -39,7 +38,6 @@ async def echo_task(i): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_radical_backend_reject_service_task_function(backend): flow = await WorkflowEngine.create(backend=backend) @@ -57,7 +55,6 @@ async def bad_task2(): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_radical_backend_reject_function_task_with_raptor_off(backend): flow = await WorkflowEngine.create(backend=backend) @@ -72,7 +69,6 @@ async def bad_task3(): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_radical_backend_implicit_data(backend): flow = await WorkflowEngine.create(backend=backend) @@ -91,7 +87,6 @@ async def task2(*args): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_radical_backend_explicit_data(backend): flow = await WorkflowEngine.create(backend=backend) @@ -110,7 +105,6 @@ async def task2(*args): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_radical_backend_input_data_staging(backend): flow = await WorkflowEngine.create(backend=backend) @@ -127,7 +121,6 @@ async def task1(*args): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_async_cancel_tasks(backend): flow = await WorkflowEngine.create(backend=backend) @@ -151,7 +144,6 @@ async def task(): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_async_cancel_before_start(backend): flow = await WorkflowEngine.create(backend=backend) @@ -176,7 +168,6 @@ async def fast_task(): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_async_cancel_after_completion(backend): flow = await WorkflowEngine.create(backend=backend) @@ -195,7 +186,6 @@ async def quick_task(): await flow.shutdown(skip_execution_backend=True) -@pytest.mark.asyncio async def test_async_cancel_one_of_many(backend): flow = await WorkflowEngine.create(backend=backend) diff --git a/tests/integration/test_backend_plugin_system.py b/tests/integration/test_backend_plugin_system.py new file mode 100644 index 0000000..6868f14 --- /dev/null +++ b/tests/integration/test_backend_plugin_system.py @@ -0,0 +1,204 @@ +"""Integration tests for backend plugin system. + +Tests the complete plugin architecture in realistic scenarios, including workflow +execution through the plugin system. +""" + +import pytest + +import radical.asyncflow as asyncflow + + +class TestBackendPluginIntegration: + """Test backend plugin system integration with workflows.""" + + @pytest.mark.asyncio + async def test_workflow_with_noop_backend_via_factory(self): + """Test complete workflow execution using noop backend through factory.""" + # Create backend via plugin system + backend = await asyncflow.factory.create_backend("noop") + flow = await asyncflow.WorkflowEngine.create(backend=backend) + + @flow.function_task + async def simple_task(x: int) -> int: + return x * 2 + + # Submit task through the workflow (no await on task call) + future = simple_task(5) + result = await future + + # Noop backend returns dummy output, not actual computation + assert result == "Dummy Output" + + # Clean shutdown + await flow.shutdown() + + @pytest.mark.asyncio + async def test_workflow_with_concurrent_backend_via_factory(self): + """Test complete workflow execution using concurrent backend through factory.""" + # Create backend via plugin system with custom config + backend = await asyncflow.factory.create_backend( + "concurrent", config={"max_workers": 2} + ) + flow = await asyncflow.WorkflowEngine.create(backend=backend) + + @flow.function_task + async def compute_task(x: int) -> int: + return x**2 + + # Submit multiple tasks (no await on task calls) + tasks = [] + for i in range(3): + future = compute_task(i + 1) + tasks.append(future) + + # Wait for all results + results = [] + for task in tasks: + result = await task + results.append(result) + + assert results == [1, 4, 9] + + # Clean shutdown + await flow.shutdown() + + def test_backend_availability_discovery(self): + """Test that plugin system correctly reports backend availability.""" + available = asyncflow.registry.list_available() + + # Core backends should be available + assert "noop" in available + assert "concurrent" in available + assert available["noop"] is True + assert available["concurrent"] is True + + # Optional backends should be listed (may or may not be loadable) + assert "dask" in available + assert "radical_pilot" in available + + # Removed backends should not be present + assert "dragon" not in available + assert "flux" not in available + + @pytest.mark.asyncio + async def test_helpful_error_for_unavailable_backend(self): + """Test that attempting to use unavailable backend provides helpful guidance.""" + with pytest.raises(ValueError) as exc_info: + await asyncflow.factory.create_backend("nonexistent_backend") + + error_msg = str(exc_info.value) + + # Should provide comprehensive error information + assert "Backend 'nonexistent_backend' is not available" in error_msg + assert "Available backends:" in error_msg + assert "Available backends:" in error_msg + + # Should list working backends + assert "concurrent" in error_msg + assert "noop" in error_msg + + def test_backend_specs_accessibility(self): + """Test that backend specifications are properly configured.""" + specs = asyncflow.registry._backend_specs + + # Verify core backend specs + assert "noop" in specs + assert "concurrent" in specs + assert "radical.asyncflow.backends.execution.noop" in specs["noop"] + assert "radical.asyncflow.backends.execution.concurrent" in specs["concurrent"] + + # Verify rhapsody backend specs point to correct modules + assert "dask" in specs + assert "radical_pilot" in specs + assert "rhapsody.backends.execution.dask_parallel" in specs["dask"] + assert "rhapsody.backends.execution.radical_pilot" in specs["radical_pilot"] + + @pytest.mark.asyncio + async def test_factory_backend_lifecycle(self): + """Test complete backend lifecycle through factory.""" + # Create backend + backend = await asyncflow.factory.create_backend("concurrent") + + # Verify backend is properly initialized + assert hasattr(backend, "submit_tasks") + assert hasattr(backend, "shutdown") + assert hasattr(backend, "register_callback") + + # Backend should be in proper state + state = backend.state() + valid_states = ["CONNECTED", "READY", "INITIALIZED", "DISCONNECTED", "RUNNING"] + assert state in valid_states + + # Clean shutdown + await backend.shutdown() + + def test_optional_dependency_installation_hints(self): + """Test that installation hints match pyproject.toml configuration.""" + from radical.asyncflow.backends.factory import BackendFactory + + # Test specific backend installation hints + dask_hint = BackendFactory._suggest_installation("dask") + assert "radical.asyncflow[dask]" in dask_hint + + radicalpilot_hint = BackendFactory._suggest_installation("radical_pilot") + assert "radical.asyncflow[radicalpilot]" in radicalpilot_hint + + # Test generic HPC hint + unknown_hint = BackendFactory._suggest_installation("unknown_hpc_backend") + assert "radical.asyncflow[hpc]" in unknown_hint + + @pytest.mark.asyncio + async def test_multiple_backend_instances(self): + """Test that plugin system can create multiple backend instances.""" + # Create multiple instances of same backend type + backend1 = await asyncflow.factory.create_backend("noop") + backend2 = await asyncflow.factory.create_backend("noop") + + # Should be different instances + assert backend1 is not backend2 + assert id(backend1) != id(backend2) + + # Both should be functional + assert hasattr(backend1, "submit_tasks") + assert hasattr(backend2, "submit_tasks") + + # Clean shutdown both + await backend1.shutdown() + await backend2.shutdown() + + def test_registry_caching_behavior(self): + """Test that registry properly caches backend class loading.""" + registry = asyncflow.registry + + # First access - should load and cache + noop_class1 = registry.get_backend("noop") + assert noop_class1 is not None + + # Second access - should use cache + noop_class2 = registry.get_backend("noop") + assert noop_class2 is not None + assert noop_class1 is noop_class2 # Should be same cached class + + # Verify cache state + assert "noop" in registry._backends + assert registry._backends["noop"] is noop_class1 + + @pytest.mark.asyncio + async def test_backend_configuration_passing(self): + """Test that backend configuration is properly passed through factory.""" + # Test with config dict + config = {"max_workers": 3, "executor_type": "thread"} + backend = await asyncflow.factory.create_backend("concurrent", config=config) + + # Backend should be created successfully with config + assert backend is not None + await backend.shutdown() + + # Test with kwargs + backend2 = await asyncflow.factory.create_backend( + "concurrent", max_workers=2, executor_type="process" + ) + + assert backend2 is not None + await backend2.shutdown() diff --git a/tests/integration/test_workflow_failures.py b/tests/integration/test_workflow_failures.py index d573094..6c26eb0 100644 --- a/tests/integration/test_workflow_failures.py +++ b/tests/integration/test_workflow_failures.py @@ -8,8 +8,7 @@ @pytest.mark.asyncio async def test_task_failure_handling(): - """ - Test the workflow engine's ability to handle task failures. + """Test the workflow engine's ability to handle task failures. This test verifies that: - A successful task returns the expected result. @@ -53,9 +52,9 @@ async def dependent_task(prev_result): @pytest.mark.asyncio async def test_awaiting_failed_task_propagates_exception(): - """ - Test that awaiting a failed task in a workflow propagates - the exception to dependent tasks. + """Test that awaiting a failed task in a workflow propagates the exception to + dependent tasks. + This test verifies that when a task (`task1`) raises an exception, any subsequent task (`task2`) that awaits its result does not execute, and the exception is properly @@ -86,11 +85,11 @@ async def task2(x): @pytest.mark.asyncio async def test_independent_workflow_failures_do_not_affect_others(): - """ - Test that failure in one async workflow does not impact other - concurrently running workflows. Workflow 0 is designed to fail - at task1. Other workflows should complete successfully regardless - of the failure in workflow 0. + """Test that failure in one async workflow does not impact other concurrently + running workflows. + + Workflow 0 is designed to fail at task1. Other workflows should complete + successfully regardless of the failure in workflow 0. """ backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) diff --git a/tests/integration/test_workflow_runs_to_completion.py b/tests/integration/test_workflow_runs_to_completion.py index d5f45f2..01e8836 100644 --- a/tests/integration/test_workflow_runs_to_completion.py +++ b/tests/integration/test_workflow_runs_to_completion.py @@ -9,9 +9,10 @@ @pytest.mark.asyncio async def test_flow_function_tasks(): - """ - Integration test using `function_task`. Each task updates a shared workflow state, - which is verified at the end of execution. + """Integration test using `function_task`. + + Each task updates a shared workflow state, which is verified at the end of + execution. """ backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) flow = await WorkflowEngine.create(backend=backend) @@ -46,9 +47,7 @@ async def task5(state): return state async def run_wf(wf_id): - """ - Runs a chain of function tasks where each builds upon shared state. - """ + """Runs a chain of function tasks where each builds upon shared state.""" print(f"\n[WF {wf_id}] Start: {time.time():.2f}") s1 = await task1() s2 = await task2(s1) @@ -75,10 +74,10 @@ async def run_wf(wf_id): @pytest.mark.asyncio async def test_flow_executable_tasks(tmp_path): - """ - Integration test using `executable_task`. Each task appends - to a workflow-local file. - Final task output is used to validate execution order. + """Integration test using `executable_task`. + + Each task appends to a workflow-local file. Final task output is used to validate + execution order. """ backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) flow = await WorkflowEngine.create(backend=backend) @@ -105,9 +104,7 @@ async def task5(wf_file, t4): return f'echo "task5" >> {wf_file}' async def run_wf(wf_id): - """ - Runs executable tasks that log their execution to a local file. - """ + """Runs executable tasks that log their execution to a local file.""" wf_file = tmp_path / f"workflow_{wf_id}.log" wf_file_path = str(wf_file) @@ -142,8 +139,8 @@ async def run_wf(wf_id): @pytest.mark.asyncio async def test_flow_mixed_function_and_executable_tasks(tmp_path): - """ - Integration test mixing `function_task` and `executable_task`. + """Integration test mixing `function_task` and `executable_task`. + Function tasks modify state, while executable tasks log their invocation. """ backend = await ConcurrentExecutionBackend(ThreadPoolExecutor()) @@ -180,9 +177,7 @@ async def e_task3(wf_file, t2): return f'echo "e_task3" >> {wf_file}' async def run_wf(wf_id): - """ - Mixed function and executable tasks within one workflow. - """ + """Mixed function and executable tasks within one workflow.""" wf_file = tmp_path / f"wf_{wf_id}.log" wf_file_path = str(wf_file) diff --git a/tests/unit/test_backend_factory.py b/tests/unit/test_backend_factory.py new file mode 100644 index 0000000..9375276 --- /dev/null +++ b/tests/unit/test_backend_factory.py @@ -0,0 +1,446 @@ +"""Unit tests for BackendFactory.""" + +import concurrent.futures +import logging +from unittest.mock import Mock, patch + +import pytest +import pytest_asyncio + +from radical.asyncflow.backends.execution.base import BaseExecutionBackend +from radical.asyncflow.backends.factory import BackendFactory +from radical.asyncflow.backends.registry import registry + + +# Mock backend classes for testing +class MockAsyncBackend(BaseExecutionBackend): + """Mock backend with async initialization.""" + + def __init__(self, config=None, **kwargs): + self.config = config or {} + self.kwargs = kwargs + self.initialized = False + + async def __aenter__(self): + self.initialized = True + return self + + async def submit_tasks(self, tasks): + pass + + async def shutdown(self): + pass + + def state(self): + return "RUNNING" + + def task_state_cb(self, task, state): + pass + + def register_callback(self, func): + pass + + def get_task_states_map(self): + pass + + def build_task(self, uid, task_desc, task_specific_kwargs): + pass + + def link_implicit_data_deps(self, src_task, dst_task): + pass + + def link_explicit_data_deps( + self, src_task=None, dst_task=None, file_name=None, file_path=None + ): + pass + + async def cancel_task(self, uid: str) -> bool: + return True + + +class MockAwaitableBackend(BaseExecutionBackend): + """Mock backend that is awaitable for initialization.""" + + def __init__(self, config=None, **kwargs): + self.config = config or {} + self.kwargs = kwargs + self.initialized = False + + def __await__(self): + async def _init(): + self.initialized = True + return self + + return _init().__await__() + + async def submit_tasks(self, tasks): + pass + + async def shutdown(self): + pass + + def state(self): + return "RUNNING" + + def task_state_cb(self, task, state): + pass + + def register_callback(self, func): + pass + + def get_task_states_map(self): + pass + + def build_task(self, uid, task_desc, task_specific_kwargs): + pass + + def link_implicit_data_deps(self, src_task, dst_task): + pass + + def link_explicit_data_deps( + self, src_task=None, dst_task=None, file_name=None, file_path=None + ): + pass + + async def cancel_task(self, uid: str) -> bool: + return True + + +class MockInitializeBackend(BaseExecutionBackend): + """Mock backend with explicit initialize method.""" + + def __init__(self, config=None, **kwargs): + self.config = config or {} + self.kwargs = kwargs + self.initialized = False + + async def initialize(self): + self.initialized = True + + async def submit_tasks(self, tasks): + pass + + async def shutdown(self): + pass + + def state(self): + return "RUNNING" + + def task_state_cb(self, task, state): + pass + + def register_callback(self, func): + pass + + def get_task_states_map(self): + pass + + def build_task(self, uid, task_desc, task_specific_kwargs): + pass + + def link_implicit_data_deps(self, src_task, dst_task): + pass + + def link_explicit_data_deps( + self, src_task=None, dst_task=None, file_name=None, file_path=None + ): + pass + + async def cancel_task(self, uid: str) -> bool: + return True + + +class TestBackendFactory: + """Test suite for BackendFactory functionality.""" + + @pytest_asyncio.fixture + def factory(self): + """Create a BackendFactory instance for testing.""" + return BackendFactory() + + def test_suggest_installation_known_backend(self): + """Test installation suggestions for known backends.""" + suggestions = { + "dask": "pip install 'radical.asyncflow[dask]'", + "radical_pilot": "pip install 'radical.asyncflow[radicalpilot]'", + } + + for backend_type, expected in suggestions.items(): + result = BackendFactory._suggest_installation(backend_type) + assert expected in result + + def test_suggest_installation_unknown_backend(self): + """Test installation suggestion for unknown backend.""" + result = BackendFactory._suggest_installation("unknown") + assert "pip install 'radical.asyncflow[hpc]'" in result + + @pytest.mark.asyncio + async def test_create_noop_backend_success(self, factory): + """Test successful creation of noop backend.""" + with patch.object(registry, "get_backend") as mock_get_backend: + # Mock successful backend retrieval + from radical.asyncflow.backends.execution.noop import NoopExecutionBackend + + mock_get_backend.return_value = NoopExecutionBackend + + # Act + backend = await factory.create_backend("noop") + + # Assert + assert isinstance(backend, NoopExecutionBackend) + mock_get_backend.assert_called_once_with("noop") + + @pytest.mark.asyncio + async def test_create_concurrent_backend_success(self, factory): + """Test successful creation of concurrent backend with default config.""" + with patch.object(registry, "get_backend") as mock_get_backend: + # Mock concurrent backend class that accepts executor + mock_backend_class = Mock() + mock_backend_instance = Mock() + mock_backend_class.return_value = mock_backend_instance + mock_get_backend.return_value = mock_backend_class + + # Act + backend = await factory.create_backend("concurrent") + + # Assert + assert backend is mock_backend_instance + mock_get_backend.assert_called_once_with("concurrent") + # Verify that backend was created with an executor + args = mock_backend_class.call_args[0] + assert len(args) == 1 + assert isinstance(args[0], concurrent.futures.ThreadPoolExecutor) + + @pytest.mark.asyncio + async def test_create_concurrent_backend_with_process_executor(self, factory): + """Test creation of concurrent backend with process executor.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_backend_class = Mock() + mock_backend_instance = Mock() + mock_backend_class.return_value = mock_backend_instance + mock_get_backend.return_value = mock_backend_class + + # Act + config = {"executor_type": "process", "max_workers": 2} + await factory.create_backend("concurrent", config=config) + + # Assert + args = mock_backend_class.call_args[0] + assert isinstance(args[0], concurrent.futures.ProcessPoolExecutor) + + @pytest.mark.asyncio + async def test_create_backend_not_available(self, factory): + """Test creation of unavailable backend raises ValueError.""" + with ( + patch.object(registry, "get_backend") as mock_get_backend, + patch.object(registry, "list_available") as mock_list_available, + patch.object(registry, "get_failure_reason") as mock_get_failure_reason, + ): + mock_get_backend.return_value = None + mock_list_available.return_value = { + "noop": True, + "concurrent": True, + "unavailable": False, + } + mock_get_failure_reason.return_value = "Import error" + + # Act & Assert + with pytest.raises( + ValueError, match="Backend 'unavailable' is not available" + ): + await factory.create_backend("unavailable") + + @pytest.mark.asyncio + async def test_create_backend_initialization_failure(self, factory): + """Test handling of backend initialization failure.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_backend_class = Mock(side_effect=RuntimeError("Init failed")) + mock_get_backend.return_value = mock_backend_class + + # Act & Assert + with pytest.raises( + RuntimeError, match="Failed to initialize backend 'test'" + ): + await factory.create_backend("test") + + @pytest.mark.asyncio + async def test_create_backend_async_context_manager(self, factory): + """Test creation of backend that is an async context manager.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_get_backend.return_value = MockAsyncBackend + + # Act + backend = await factory.create_backend("test_async") + + # Assert + assert isinstance(backend, MockAsyncBackend) + assert backend.initialized is True + + @pytest.mark.asyncio + async def test_create_backend_awaitable(self, factory): + """Test creation of backend that is awaitable.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_get_backend.return_value = MockAwaitableBackend + + # Act + backend = await factory.create_backend("test_awaitable") + + # Assert + assert isinstance(backend, MockAwaitableBackend) + assert backend.initialized is True + + @pytest.mark.asyncio + async def test_create_backend_with_initialize_method(self, factory): + """Test creation of backend with explicit initialize method.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_get_backend.return_value = MockInitializeBackend + + # Act + backend = await factory.create_backend("test_initialize") + + # Assert + assert isinstance(backend, MockInitializeBackend) + assert backend.initialized is True + + @pytest.mark.asyncio + async def test_create_backend_with_config_and_kwargs(self, factory): + """Test creation of backend with config and keyword arguments.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_backend_class = Mock() + mock_backend_instance = Mock() + # Make sure the mock doesn't have initialize method to avoid the await issue + del mock_backend_instance.initialize + mock_backend_class.return_value = mock_backend_instance + mock_get_backend.return_value = mock_backend_class + + # Act + config = {"setting1": "value1"} + await factory.create_backend( + "test_config", config=config, extra_param="extra_value" + ) + + # Assert + mock_backend_class.assert_called_once_with( + config, extra_param="extra_value" + ) + + def test_list_available_backends(self, factory): + """Test listing available backends with detailed information.""" + mock_specs = {"available": "test:Available", "unavailable": "test:Unavailable"} + with ( + patch.object(registry, "get_backend") as mock_get_backend, + patch.object(registry, "get_failure_reason") as mock_get_failure_reason, + patch.object(registry, "_backend_specs", mock_specs), + ): + mock_backend_class = Mock() + mock_backend_class.__name__ = "MockBackend" + + mock_get_backend.side_effect = lambda name: ( + mock_backend_class if name == "available" else None + ) + mock_get_failure_reason.side_effect = lambda name: ( + None if name == "available" else "Import failed" + ) + + # Act + info = factory.list_available_backends() + + # Assert + assert "available" in info + assert "unavailable" in info + + available_info = info["available"] + assert available_info["available"] is True + assert available_info["class"] == "MockBackend" + assert available_info["failure_reason"] is None + assert available_info["installation_hint"] is None + + unavailable_info = info["unavailable"] + assert unavailable_info["available"] is False + assert unavailable_info["class"] is None + assert unavailable_info["failure_reason"] == "Import failed" + assert unavailable_info["installation_hint"] is not None + + def test_create_backend_sync_success(self, factory): + """Test synchronous backend creation.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_backend_class = Mock() + mock_backend_instance = Mock() + mock_backend_class.return_value = mock_backend_instance + mock_get_backend.return_value = mock_backend_class + + # Act + config = {"test": "value"} + backend = factory.create_backend_sync("test", config=config) + + # Assert + assert backend is mock_backend_instance + mock_backend_class.assert_called_once_with(config) + + def test_create_backend_sync_not_available(self, factory): + """Test synchronous creation of unavailable backend.""" + with ( + patch.object(registry, "get_backend") as mock_get_backend, + patch.object(registry, "list_available") as mock_list_available, + ): + mock_get_backend.return_value = None + mock_list_available.return_value = {"noop": True} + + # Act & Assert + with pytest.raises( + ValueError, match="Backend 'unavailable' is not available" + ): + factory.create_backend_sync("unavailable") + + def test_create_backend_sync_initialization_failure(self, factory): + """Test handling of sync backend creation failure.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_backend_class = Mock(side_effect=RuntimeError("Creation failed")) + mock_get_backend.return_value = mock_backend_class + + # Act & Assert + with pytest.raises(RuntimeError, match="Failed to create backend 'test'"): + factory.create_backend_sync("test") + + def test_logging_during_operations(self, factory, caplog): + """Test that appropriate log messages are generated.""" + with patch.object(registry, "get_backend") as mock_get_backend: + mock_backend_class = Mock() + mock_backend_instance = Mock() + mock_backend_class.return_value = mock_backend_instance + mock_get_backend.return_value = mock_backend_class + + with caplog.at_level(logging.DEBUG): + # Test sync creation + factory.create_backend_sync("test_logging") + + # Check that debug messages were logged + debug_messages = [ + record.message + for record in caplog.records + if record.levelno == logging.DEBUG + ] + assert any( + "Creating backend 'test_logging'" in msg for msg in debug_messages + ) + + with caplog.at_level(logging.INFO): + # Test sync creation + factory.create_backend_sync("test_logging_info") + + # Check that info messages were logged + info_messages = [ + record.message + for record in caplog.records + if record.levelno == logging.INFO + ] + assert any( + "Successfully created 'test_logging_info' backend" in msg + for msg in info_messages + ) + + def test_global_factory_instance(self): + """Test that the global factory instance is available.""" + from radical.asyncflow.backends.factory import factory + + assert isinstance(factory, BackendFactory) diff --git a/tests/unit/test_backend_registry.py b/tests/unit/test_backend_registry.py new file mode 100644 index 0000000..4f6c4dc --- /dev/null +++ b/tests/unit/test_backend_registry.py @@ -0,0 +1,342 @@ +"""Unit tests for BackendRegistry.""" + +import logging +from unittest.mock import Mock, patch + +import pytest + +from radical.asyncflow.backends.execution.base import BaseExecutionBackend +from radical.asyncflow.backends.registry import BackendRegistry + + +# Mock backend classes for testing +class MockValidBackend(BaseExecutionBackend): + """Valid mock backend for testing.""" + + async def submit_tasks(self, tasks): + pass + + async def shutdown(self): + pass + + def state(self): + return "IDLE" + + def task_state_cb(self, task, state): + pass + + def register_callback(self, func): + pass + + def get_task_states_map(self): + pass + + def build_task(self, uid, task_desc, task_specific_kwargs): + pass + + def link_implicit_data_deps(self, src_task, dst_task): + pass + + def link_explicit_data_deps( + self, src_task=None, dst_task=None, file_name=None, file_path=None + ): + pass + + async def cancel_task(self, uid: str) -> bool: + return True + + +class MockInvalidBackend: + """Invalid mock backend that doesn't inherit from BaseExecutionBackend.""" + + def some_method(self): + pass + + +class TestBackendRegistry: + """Test suite for BackendRegistry functionality.""" + + def test_registry_initialization(self): + """Test that registry initializes with correct backend specifications.""" + registry = BackendRegistry() + + # Check that registry has expected internal state + assert hasattr(registry, "_backends") + assert hasattr(registry, "_failed_backends") + assert hasattr(registry, "_backend_specs") + + # Check that expected backend specifications are present + expected_backends = [ + "noop", + "concurrent", + "dask", + "radical_pilot", + ] + for backend_name in expected_backends: + assert backend_name in registry._backend_specs + + @patch("importlib.import_module") + def test_successful_backend_loading(self, mock_import_module): + """Test successful loading of a valid backend.""" + # Arrange + registry = BackendRegistry() + mock_module = Mock() + mock_module.MockBackend = MockValidBackend + mock_import_module.return_value = mock_module + + # Add a test backend spec + registry.add_backend_spec("test_valid", "test.module:MockBackend") + + # Act + backend_class = registry.get_backend("test_valid") + + # Assert + assert backend_class is MockValidBackend + assert "test_valid" in registry._backends + assert "test_valid" not in registry._failed_backends + mock_import_module.assert_called_once_with("test.module") + + @patch("importlib.import_module") + def test_backend_loading_import_error(self, mock_import_module): + """Test handling of import errors during backend loading.""" + # Arrange + registry = BackendRegistry() + mock_import_module.side_effect = ImportError("Module not found") + + registry.add_backend_spec("test_missing", "missing.module:MissingBackend") + + # Act + backend_class = registry.get_backend("test_missing") + + # Assert + assert backend_class is None + assert "test_missing" in registry._failed_backends + assert "Import error" in registry._failed_backends["test_missing"] + assert "test_missing" not in registry._backends + + @patch("importlib.import_module") + def test_backend_loading_attribute_error(self, mock_import_module): + """Test handling of missing class in module.""" + # Arrange + registry = BackendRegistry() + mock_module = Mock() + # Remove the expected class from the module + del mock_module.MissingClass + mock_import_module.return_value = mock_module + + registry.add_backend_spec("test_missing_class", "test.module:MissingClass") + + # Act + backend_class = registry.get_backend("test_missing_class") + + # Assert + assert backend_class is None + assert "test_missing_class" in registry._failed_backends + assert "Class not found" in registry._failed_backends["test_missing_class"] + + @patch("importlib.import_module") + def test_invalid_backend_class_validation(self, mock_import_module): + """Test validation that backend class inherits from BaseExecutionBackend.""" + # Arrange + registry = BackendRegistry() + mock_module = Mock() + mock_module.InvalidBackend = MockInvalidBackend + mock_import_module.return_value = mock_module + + registry.add_backend_spec("test_invalid", "test.module:InvalidBackend") + + # Act + backend_class = registry.get_backend("test_invalid") + + # Assert + assert backend_class is None + assert "test_invalid" in registry._failed_backends + assert "missing required methods" in registry._failed_backends["test_invalid"] + + def test_cached_backend_retrieval(self): + """Test that successfully loaded backends are cached.""" + registry = BackendRegistry() + + # Add backend directly to cache + registry._backends["test_cached"] = MockValidBackend + + # Act + backend_class = registry.get_backend("test_cached") + + # Assert + assert backend_class is MockValidBackend + + def test_cached_failure_retrieval(self): + """Test that failed backends are not retried.""" + registry = BackendRegistry() + + # Add failure to cache + registry._failed_backends["test_failed"] = "Previously failed" + + # Act + backend_class = registry.get_backend("test_failed") + + # Assert + assert backend_class is None + + def test_unknown_backend_request(self): + """Test request for backend that doesn't exist in specifications.""" + registry = BackendRegistry() + + # Act + backend_class = registry.get_backend("nonexistent_backend") + + # Assert + assert backend_class is None + + def test_list_available_backends(self): + """Test listing of all available backends.""" + registry = BackendRegistry() + + # Add some test results + registry._backends["available_backend"] = MockValidBackend + registry._failed_backends["failed_backend"] = "Import error" + registry._backend_specs["available_backend"] = "test:Backend" + registry._backend_specs["failed_backend"] = "test:Backend" + + # Act + available = registry.list_available() + + # Assert + assert isinstance(available, dict) + # Note: The test will trigger actual backend loading for specs, + # so we mainly test the structure + assert "noop" in available # This should be available + assert "concurrent" in available # This should be available + + def test_list_loaded_backends(self): + """Test listing of currently loaded backends.""" + registry = BackendRegistry() + + # Add loaded backends + registry._backends["loaded1"] = MockValidBackend + registry._backends["loaded2"] = MockValidBackend + + # Act + loaded = registry.list_loaded() + + # Assert + assert loaded == {"loaded1": MockValidBackend, "loaded2": MockValidBackend} + # Ensure it returns a copy, not the original dict + loaded["new_item"] = "test" + assert "new_item" not in registry._backends + + def test_get_failure_reason(self): + """Test retrieving failure reasons for backends.""" + registry = BackendRegistry() + + # Add failure reason + test_reason = "Import error: Module not found" + registry._failed_backends["failed_backend"] = test_reason + + # Act & Assert + assert registry.get_failure_reason("failed_backend") == test_reason + assert registry.get_failure_reason("nonexistent_backend") is None + + def test_register_backend_direct(self): + """Test direct registration of a backend class.""" + registry = BackendRegistry() + + # Act + registry.register_backend("custom_backend", MockValidBackend) + + # Assert + assert registry._backends["custom_backend"] is MockValidBackend + assert "custom_backend" not in registry._failed_backends + + def test_register_backend_invalid_class(self): + """Test direct registration with invalid backend class.""" + registry = BackendRegistry() + + # Act & Assert + with pytest.raises(TypeError, match="must be a BaseExecutionBackend subclass"): + registry.register_backend("invalid_backend", MockInvalidBackend) + + def test_register_backend_clears_failure(self): + """Test that direct registration clears any previous failure.""" + registry = BackendRegistry() + + # Add failure first + registry._failed_backends["test_backend"] = "Previous failure" + + # Act + registry.register_backend("test_backend", MockValidBackend) + + # Assert + assert "test_backend" not in registry._failed_backends + assert registry._backends["test_backend"] is MockValidBackend + + def test_add_backend_spec(self): + """Test adding new backend specification.""" + registry = BackendRegistry() + + # Act + registry.add_backend_spec("new_backend", "new.module:NewBackend") + + # Assert + assert registry._backend_specs["new_backend"] == "new.module:NewBackend" + + def test_add_backend_spec_clears_cache(self): + """Test that adding spec clears cached results.""" + registry = BackendRegistry() + + # Add cached results + registry._backends["test_spec"] = MockValidBackend + registry._failed_backends["test_spec"] = "Old failure" + + # Act + registry.add_backend_spec("test_spec", "updated.module:UpdatedBackend") + + # Assert + assert "test_spec" not in registry._backends + assert "test_spec" not in registry._failed_backends + + @patch("importlib.import_module") + def test_unexpected_error_during_loading(self, mock_import_module): + """Test handling of unexpected errors during backend loading.""" + # Arrange + registry = BackendRegistry() + mock_import_module.side_effect = RuntimeError("Unexpected error") + + registry.add_backend_spec("test_error", "test.module:ErrorBackend") + + # Act + backend_class = registry.get_backend("test_error") + + # Assert + assert backend_class is None + assert "test_error" in registry._failed_backends + assert "Unexpected error" in registry._failed_backends["test_error"] + + def test_logging_during_operations(self, caplog): + """Test that appropriate log messages are generated.""" + registry = BackendRegistry() + + with caplog.at_level(logging.DEBUG): + # Test successful registration + registry.register_backend("log_test", MockValidBackend) + + # Test spec addition + registry.add_backend_spec("log_spec", "test:Backend") + + # Check that debug messages were logged + assert any( + "Registered backend 'log_test'" in record.message + for record in caplog.records + ) + assert any( + "Added backend spec 'log_spec'" in record.message + for record in caplog.records + ) + + def test_global_registry_instance(self): + """Test that the global registry instance is available.""" + from radical.asyncflow.backends.registry import registry + + assert isinstance(registry, BackendRegistry) + assert hasattr(registry, "_backend_specs") diff --git a/tests/unit/test_installation_mechanism.py b/tests/unit/test_installation_mechanism.py new file mode 100644 index 0000000..66358c0 --- /dev/null +++ b/tests/unit/test_installation_mechanism.py @@ -0,0 +1,225 @@ +"""Tests for the optional dependency installation mechanism. + +Tests that verify the pyproject.toml configuration and expected behavior when users +install different optional dependency groups. +""" + +import sys +from pathlib import Path + +import pytest + + +class TestInstallationMechanism: + """Test the optional dependency installation mechanism.""" + + @pytest.fixture + def pyproject_path(self): + """Get path to pyproject.toml file.""" + repo_root = Path(__file__).parent.parent.parent + return repo_root / "pyproject.toml" + + def test_base_dependencies_minimal(self, pyproject_path): + """Test that base AsyncFlow has minimal dependencies.""" + try: + import tomllib + except ImportError: + # Python < 3.11 fallback + import tomli as tomllib + + with open(pyproject_path, "rb") as f: + config = tomllib.load(f) + + base_deps = config["project"]["dependencies"] + + # Should have minimal, essential dependencies only + expected_base = {"pydantic", "typeguard", "requests"} + actual_base = {dep.split()[0] for dep in base_deps} + + assert actual_base == expected_base, ( + f"Base dependencies should be minimal. " + f"Expected: {expected_base}, Got: {actual_base}" + ) + + # Should NOT have rhapsody as base dependency + assert not any("rhapsody" in dep for dep in base_deps) + + def test_optional_dependencies_structure(self, pyproject_path): + """Test that optional dependencies are properly structured.""" + try: + import tomllib + except ImportError: + # Python < 3.11 fallback + import tomli as tomllib + + with open(pyproject_path, "rb") as f: + config = tomllib.load(f) + + optional_deps = config["project"]["optional-dependencies"] + + # Should have HPC optional dependency groups + expected_groups = {"hpc", "dask", "radicalpilot"} + actual_groups = set(optional_deps.keys()) - {"lint", "dev"} # Exclude dev deps + + assert expected_groups.issubset(actual_groups), ( + f"Missing optional dependency groups. " + f"Expected: {expected_groups}, Got: {actual_groups}" + ) + + def test_hpc_dependencies_include_rhapsody(self, pyproject_path): + """Test that all HPC optional deps include rhapsody.""" + try: + import tomllib + except ImportError: + # Python < 3.11 fallback + import tomli as tomllib + + with open(pyproject_path, "rb") as f: + config = tomllib.load(f) + + optional_deps = config["project"]["optional-dependencies"] + + # All HPC groups should include rhapsody + hpc_groups = ["hpc", "dask", "radicalpilot"] + for group_name in hpc_groups: + if group_name in optional_deps: + deps = optional_deps[group_name] + rhapsody_present = any("rhapsody" in dep for dep in deps) + assert rhapsody_present, ( + f"Optional dependency group '{group_name}' should include rhapsody" + ) + + def test_dask_group_includes_dask_distributed(self, pyproject_path): + """Test that dask optional dependency includes dask[distributed].""" + try: + import tomllib + except ImportError: + # Python < 3.11 fallback + import tomli as tomllib + + with open(pyproject_path, "rb") as f: + config = tomllib.load(f) + + optional_deps = config["project"]["optional-dependencies"] + + if "dask" in optional_deps: + deps = optional_deps["dask"] + dask_distributed_present = any("dask[distributed]" in dep for dep in deps) + assert dask_distributed_present, ( + "dask optional dependency should include 'dask[distributed]'" + ) + + def test_radicalpilot_group_includes_radical_pilot(self, pyproject_path): + """Test that radicalpilot optional dependency includes radical.pilot.""" + try: + import tomllib + except ImportError: + # Python < 3.11 fallback + import tomli as tomllib + + with open(pyproject_path, "rb") as f: + config = tomllib.load(f) + + optional_deps = config["project"]["optional-dependencies"] + + if "radicalpilot" in optional_deps: + deps = optional_deps["radicalpilot"] + radical_pilot_present = any("radical.pilot" in dep for dep in deps) + assert radical_pilot_present, ( + "radicalpilot optional dependency should include 'radical.pilot'" + ) + + def test_rhapsody_uses_dev_branch(self, pyproject_path): + """Test that rhapsody dependencies point to dev branch.""" + try: + import tomllib + except ImportError: + # Python < 3.11 fallback + import tomli as tomllib + + with open(pyproject_path, "rb") as f: + config = tomllib.load(f) + + optional_deps = config["project"]["optional-dependencies"] + + # All rhapsody references should use dev branch + for group_name, deps in optional_deps.items(): + if group_name in {"hpc", "dask", "radicalpilot"}: + for dep in deps: + if "rhapsody" in dep: + assert "@dev" in dep, ( + f"rhapsody dependency in '{group_name}' should use " + f"dev branch: {dep}" + ) + + def test_no_removed_backends_in_optional_deps(self, pyproject_path): + """Test that removed backends (dragon, flux) are not in optional deps.""" + try: + import tomllib + except ImportError: + # Python < 3.11 fallback + import tomli as tomllib + + with open(pyproject_path, "rb") as f: + config = tomllib.load(f) + + optional_deps = config["project"]["optional-dependencies"] + + # Should not have removed backends + removed_backends = {"dragon", "flux"} + for backend_name in removed_backends: + assert backend_name not in optional_deps, ( + f"Removed backend '{backend_name}' should not be in " + f"optional dependencies" + ) + + @pytest.mark.skipif( + sys.version_info < (3, 9), + reason="Python 3.9+ required for modern importlib features", + ) + def test_importlib_availability_in_runtime(self): + """Test that importlib features work as expected in current Python.""" + import importlib + + # Test that importlib can handle missing modules gracefully + try: + importlib.import_module("non_existent_module_for_testing") + except ImportError as e: + assert "non_existent_module_for_testing" in str(e) + + # Test that we can check for module existence + try: + import importlib.util + + spec = importlib.util.find_spec("radical.asyncflow") + assert spec is not None # Should exist since we're testing it + except (AttributeError, ImportError): + # Some Python versions might not have util.find_spec + pass + + def test_backend_factory_installation_hints_consistency(self): + """Test that factory installation hints match pyproject.toml.""" + from radical.asyncflow.backends.factory import BackendFactory + + try: + import tomllib + except ImportError: + # Python < 3.11 fallback + import tomli as tomllib + + repo_root = Path(__file__).parent.parent.parent + pyproject_path = repo_root / "pyproject.toml" + + with open(pyproject_path, "rb") as f: + config = tomllib.load(f) + + optional_deps = config["project"]["optional-dependencies"] + + # Test that factory hints match pyproject.toml groups + if "dask" in optional_deps: + dask_hint = BackendFactory._suggest_installation("dask") + assert "radical.asyncflow[dask]" in dask_hint + + if "radicalpilot" in optional_deps: + radicalpilot_hint = BackendFactory._suggest_installation("radical_pilot") + assert "radical.asyncflow[radicalpilot]" in radicalpilot_hint diff --git a/tests/unit/test_plugin_architecture.py b/tests/unit/test_plugin_architecture.py new file mode 100644 index 0000000..a74d727 --- /dev/null +++ b/tests/unit/test_plugin_architecture.py @@ -0,0 +1,172 @@ +"""Tests for the complete plugin architecture integration. + +Tests the full flow: factory -> registry -> backend loading, including +optional dependency behavior and error handling. +""" + +from unittest.mock import Mock, patch + +import pytest + +from radical.asyncflow.backends.factory import BackendFactory +from radical.asyncflow.backends.registry import BackendRegistry + + +class TestPluginArchitecture: + """Test the complete plugin architecture integration.""" + + def test_factory_registry_integration(self): + """Test that factory properly uses registry for backend discovery.""" + # Create fresh instances to avoid cached state + registry = BackendRegistry() + + # Registry should have core backends available + core_backends = ["noop", "concurrent"] + available = registry.list_available() + + for backend_name in core_backends: + assert backend_name in available + assert available[backend_name] is True # Should be loadable + + @pytest.mark.asyncio + async def test_end_to_end_core_backend_creation(self): + """Test complete flow for core backend creation.""" + # Test noop backend + noop_backend = await BackendFactory.create_backend("noop") + assert noop_backend is not None + assert hasattr(noop_backend, "submit_tasks") + assert hasattr(noop_backend, "shutdown") + + # Test concurrent backend + concurrent_backend = await BackendFactory.create_backend("concurrent") + assert concurrent_backend is not None + assert hasattr(concurrent_backend, "submit_tasks") + assert hasattr(concurrent_backend, "shutdown") + + @pytest.mark.asyncio + async def test_optional_backend_helpful_error_messages(self): + """Test that optional backends provide helpful error messages.""" + # Use truly unavailable backends instead of dask/radical_pilot + # which might be available now + unavailable_backends = ["nonexistent_backend", "fake_backend"] + + for backend_name in unavailable_backends: + with pytest.raises(ValueError) as exc_info: + await BackendFactory.create_backend(backend_name) + + error_msg = str(exc_info.value) + + # Should contain helpful information + assert f"Backend '{backend_name}' is not available" in error_msg + assert "Available backends:" in error_msg + assert "Installation hint:" in error_msg + assert "radical.asyncflow[" in error_msg # Should suggest optional install + + # Should list actual available backends + assert "noop" in error_msg + assert "concurrent" in error_msg + + def test_optional_backend_installation_hints(self): + """Test that installation hints are correct for each backend.""" + factory = BackendFactory() + + # Test specific backend hints + dask_hint = factory._suggest_installation("dask") + assert "radical.asyncflow[dask]" in dask_hint + + radical_pilot_hint = factory._suggest_installation("radical_pilot") + assert "radical.asyncflow[radicalpilot]" in radical_pilot_hint + + # Test fallback for unknown backends + unknown_hint = factory._suggest_installation("unknown_backend") + assert "radical.asyncflow[hpc]" in unknown_hint + + @patch("importlib.import_module") + def test_backend_validation_failure_handling(self, mock_import): + """Test handling of backends that load but fail validation.""" + # Simulate rhapsody backend that loads but has wrong base class + mock_module = Mock() + mock_backend_class = Mock() + mock_backend_class.__name__ = "MockRhapsodyBackend" + mock_module.MockRhapsodyBackend = mock_backend_class + mock_import.return_value = mock_module + + registry = BackendRegistry() + + # Add a mock rhapsody-style backend that will fail validation + registry.add_backend_spec("mock_rhapsody", "mock.module:MockRhapsodyBackend") + + # Should return None due to validation failure + backend_class = registry.get_backend("mock_rhapsody") + assert backend_class is None + + # Should have failure reason + failure_reason = registry.get_failure_reason("mock_rhapsody") + assert failure_reason is not None + assert "unexpected error" in failure_reason.lower() + + def test_registry_caching_behavior_with_optional_backends(self): + """Test that registry properly caches both successes and failures.""" + registry = BackendRegistry() + + # Test with concurrent backend which should always be available + result1 = registry.get_backend("concurrent") + result2 = registry.get_backend("concurrent") + + # Both should be the same backend class and consistent + assert result1 is not None + assert result2 is not None + assert result1 is result2 # Should be cached + + # Test with unknown backend (not in _backend_specs) + result3 = registry.get_backend("nonexistent_backend") + result4 = registry.get_backend("nonexistent_backend") + + # Both should be None (not available) but consistent + assert result3 is None + assert result4 is None + + # For unknown backends, no failure reason is recorded + # (they just return None without attempting to load) + failure_reason = registry.get_failure_reason("nonexistent_backend") + assert failure_reason is None # No failure reason for unknown backends + + def test_backend_specs_updated_for_phase2(self): + """Test that backend specs reflect Phase 2 changes.""" + registry = BackendRegistry() + specs = registry._backend_specs + + # Should have core backends + assert "noop" in specs + assert "concurrent" in specs + assert "radical.asyncflow.backends.execution" in specs["noop"] + assert "radical.asyncflow.backends.execution" in specs["concurrent"] + + # Should have rhapsody backends with correct module paths + assert "dask" in specs + assert "radical_pilot" in specs + assert "rhapsody.backends.execution.dask_parallel" in specs["dask"] + assert "rhapsody.backends.execution.radical_pilot" in specs["radical_pilot"] + + # Should NOT have removed backends + assert "dragon" not in specs + assert "flux" not in specs + + def test_list_available_includes_optional_backends(self): + """Test that registry lists optional backends even when not loadable.""" + registry = BackendRegistry() + available = registry.list_available() + + # Should include all backend specs + expected_backends = ["noop", "concurrent", "dask", "radical_pilot"] + for backend in expected_backends: + assert backend in available + + # Core backends should be available + assert available["noop"] is True + assert available["concurrent"] is True + + # Optional backends will be listed but may not be loadable + # (depends on whether rhapsody is installed in test environment) + assert "dask" in available + assert "radical_pilot" in available diff --git a/tests/unit/test_task_registeration_component.py b/tests/unit/test_task_registration_component.py similarity index 95% rename from tests/unit/test_task_registeration_component.py rename to tests/unit/test_task_registration_component.py index ad444be..0d58e8c 100644 --- a/tests/unit/test_task_registeration_component.py +++ b/tests/unit/test_task_registration_component.py @@ -94,10 +94,10 @@ async def dummy_block(): # only 1 block that is not unpacked yet assert len(engine.components) == 1 - buid = next(iter(engine.components.keys())) + build = next(iter(engine.components.keys())) # block gets registered first - assert BLOCK in buid + assert BLOCK in build await block_future @@ -112,7 +112,7 @@ async def test_dynamic_task_backend_specific_kwargs(): task_resources = {"ranks": 8} - @engine.function_task + @engine.function_task # noqa: B006 # Intentional mutable default for testing async def dummy_task(task_description=task_resources): return "dummy return value" diff --git a/tox.ini b/tox.ini index fd705b9..7299208 100644 --- a/tox.ini +++ b/tox.ini @@ -12,25 +12,30 @@ commands = pytest tests/unit {posargs} extras = dev, dask, radicalpilot setenv = RADICAL_VERBOSE=DEBUG -commands_pre = +commands_pre = radical-stack -commands = pytest tests/integration {posargs} +commands = pytest tests/integration --ignore=tests/integration/backends {posargs} # Linting [testenv:lint] extras = lint -commands = - ruff check src tests - ruff format --check src tests +commands = + ruff check src tests examples --exclude="*.ipynb" + ruff format --check src tests examples --exclude="*.ipynb" # Formatting [testenv:format] extras = lint -commands = - ruff format src tests - ruff check --fix src tests +commands = + ruff format src tests examples --exclude="*.ipynb" + ruff check --fix src tests examples --exclude="*.ipynb" # Coverage [testenv:coverage] extras = dev commands = pytest --cov=radical.asyncflow --cov-report=html --cov-report=term {posargs} + +# Pre-commit +[testenv:pre-commit] +extras = dev +commands = pre-commit run --all-files