Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
7bebc7b
feat: bumping devex branch changes ahead
justinmerrell Oct 3, 2023
66322c2
Update rp_job.py
justinmerrell Oct 3, 2023
ee85a2c
Update rp_job.py
justinmerrell Oct 3, 2023
a2714cc
Update rp_job.py
justinmerrell Oct 3, 2023
47340d8
Update rp_job.py
justinmerrell Oct 3, 2023
60097ca
Update test_ctl_commands.py
justinmerrell Oct 3, 2023
223ce56
Update test_ctl_commands.py
justinmerrell Oct 3, 2023
911bbe2
Update test_ctl_commands.py
justinmerrell Oct 3, 2023
f207482
Update test_ctl_commands.py
justinmerrell Oct 3, 2023
b09bb09
Update test_ctl_commands.py
justinmerrell Oct 3, 2023
c8fd8bd
Update tests.json
justinmerrell Oct 3, 2023
36f2b20
Update CI-e2e.yml
justinmerrell Oct 3, 2023
a36676d
Merge pull request #151 from runpod/devex-cherry-pick
justinmerrell Oct 3, 2023
629a25f
feat: added 1.2.2 changelog
justinmerrell Oct 4, 2023
32a0af4
Merge pull request #152 from runpod/1.2.2-changelog
justinmerrell Oct 4, 2023
20b3d49
fix: handle lists
justinmerrell Oct 4, 2023
9e99fa2
Update test_job.py
justinmerrell Oct 4, 2023
538a5e2
Update CHANGELOG.md
justinmerrell Oct 4, 2023
794e7b7
Merge pull request #153 from runpod/output-patch
justinmerrell Oct 4, 2023
d2f0f70
Update CI-e2e.yml
justinmerrell Oct 4, 2023
51ff1d4
Update tests.json
justinmerrell Oct 4, 2023
8177926
Update rp_job.py
justinmerrell Oct 4, 2023
f35c728
fix: test raised error
justinmerrell Oct 5, 2023
be7b4f2
Merge pull request #154 from runpod/catch-e2e-results
justinmerrell Oct 5, 2023
92add72
Update ctl_commands.py
justinmerrell Oct 5, 2023
135ef4c
Update rp_job.py
justinmerrell Oct 5, 2023
4844e80
fix: handler called twice
justinmerrell Oct 5, 2023
84258ea
Merge pull request #157 from runpod/ignore-template
justinmerrell Oct 5, 2023
c9057c8
Merge pull request #156 from runpod/ignore-template
justinmerrell Oct 5, 2023
cc5301c
Merge pull request #158 from runpod/fix-double-job
justinmerrell Oct 5, 2023
90e08b8
Update CHANGELOG.md
justinmerrell Oct 5, 2023
b891be9
Merge pull request #159 from runpod/anomaly-fix
justinmerrell Oct 5, 2023
1820d2b
initial code to upload files (& images)
MartinKlefas Oct 5, 2023
45fda50
Merge branch 'runpod:main' into fix-s3-region-issue
MartinKlefas Oct 5, 2023
7f17a73
remove blank line
MartinKlefas Oct 6, 2023
da60441
feat: force urllib3 logging level
justinmerrell Oct 6, 2023
0292e4b
Merge pull request #160 from MartinKlefas/add-file-upload
justinmerrell Oct 6, 2023
e97436e
Merge pull request #161 from runpod/force-logger
justinmerrell Oct 6, 2023
a659a3d
Update CHANGELOG.md
justinmerrell Oct 6, 2023
5348086
Merge pull request #162 from runpod/changelog-update
justinmerrell Oct 6, 2023
164e237
Bump setuptools-scm from 8.0.3 to 8.0.4
dependabot[bot] Oct 9, 2023
59edc46
Merge pull request #163 from runpod/dependabot/pip/setuptools-scm-8.0.4
justinmerrell Oct 9, 2023
8cece74
Merge branch 'devex' into main>devex
justinmerrell Oct 9, 2023
a1346e1
Update __init__.py
justinmerrell Oct 9, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/tests.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
{
"hardwareConfig": {
"endpointConfig": {
"gpuIds": "AMPERE_16",
"gpuIds": "ADA_24,AMPERE_16,AMPERE_24,AMPERE_48,AMPERE_80",
"name": "runpod-python E2E Test - Basic"
}
},
Expand All @@ -13,7 +13,7 @@
{
"hardwareConfig": {
"endpointConfig": {
"gpuIds": "AMPERE_16",
"gpuIds": "ADA_24,AMPERE_16,AMPERE_24,AMPERE_48,AMPERE_80",
"name": "runpod-python E2E Test - Long Job"
}
},
Expand All @@ -25,7 +25,7 @@
{
"hardwareConfig": {
"endpointConfig": {
"gpuIds": "AMPERE_16",
"gpuIds": "ADA_24,AMPERE_16,AMPERE_24,AMPERE_48,AMPERE_80",
"name": "runpod-python E2E Test - Generator Handler"
},
"templateConfig": {
Expand All @@ -43,7 +43,7 @@
{
"hardwareConfig": {
"endpointConfig": {
"gpuIds": "AMPERE_16",
"gpuIds": "ADA_24,AMPERE_16,AMPERE_24,AMPERE_48,AMPERE_80",
"name": "runpod-python E2E Test - Async Generator Handler"
},
"templateConfig": {
Expand Down
15 changes: 14 additions & 1 deletion .github/workflows/CI-e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,22 @@ jobs:

steps:
- uses: actions/checkout@v4

- name: Run Tests
id: run-tests
uses: direlines/[email protected]
with:
image-tag: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ needs.e2e-build.outputs.docker_tag }}
runpod-api-key: ${{ secrets.RUNPOD_API_KEY }}
request-timeout: 600
request-timeout: 1200

- name: Verify Tests
env:
TOTAL_TESTS: ${{ steps.run-tests.outputs.total-tests }}
SUCCESSFUL_TESTS: ${{ steps.run-tests.outputs.succeeded }}
run: |
echo "Total tests: $TOTAL_TESTS"
echo "Successful tests: $SUCCESSFUL_TESTS"
if [ "$TOTAL_TESTS" != "$SUCCESSFUL_TESTS" ]; then
exit 1
fi
51 changes: 50 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,53 @@
# Change Log

## Release 1.2.1 (89/22/23)
## Release 1.2.6 (10/6/23)

### Changes

- Force `urllib3` logging to `WARNING` level to avoid spamming the console if global logging level is set to `DEBUG`.

---

## Release 1.2.5 (10/5/23)

### Fixed

- Handler called twice.
- Default container disk size removed if template is provided when creating a new pod.

---

## ~~Release (Patch) 1.2.3 (10/4/23)~~ Replaced by 1.2.5

### Bug Fix

- Job outputs that were not dictionaries, bool, or str were swallowed by the serverless worker. This has been fixed.

---

## ~~Release 1.2.2 (10/4/23)~~ Replaced by 1.2.5

### Added

- User queries and mutations are now available in the python API wrapper.
- `start_ssh` added with default `True` when creating new pods.
- `network_volume_id` can now be passed in when creating new pods, correct data center is automatically selected.
- `template_id` can now be passed in when creating new pods.

### Changes

- Dependencies updated to latest versions.
- Reduced circular imports for version reference.
- `support_public_ip` is not default to `True` when creating new pods.

### Fixed

- Reduce pool_connections for ping requests to 10.
- Double timeout for ping requests.

---

## Release 1.2.1 (9/22/23)

### Added

Expand All @@ -13,6 +60,8 @@

- Region is included when using S3 storage via rp_upload, automatically filled in for Amazon S3 buckets and Digital Ocean Spaces.

---

## Release 1.2.0 (8/29/23)

### Added
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ python-dotenv >= 1.0.0
requests >= 2.31.0
tomli >= 2.0.1
tqdm-loggable == 0.1.4
setuptools_scm == 8.0.3
setuptools_scm == 8.0.4
watchdog >= 3.0.0


fastapi[all] == 0.103.2

# Minimum versions for dependencies
Expand Down
8 changes: 7 additions & 1 deletion runpod/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
""" Allows runpod to be imported as a module. """

import os
import logging

from .version import __version__
from . import serverless
from .endpoint import Endpoint
from .endpoint import AsyncioEndpoint, AsyncioJob
from .version import __version__
from .api.ctl_commands import(
get_user, update_user_settings,
get_gpus, get_gpu,
Expand All @@ -30,3 +31,8 @@
api_url_base = "https://api.runpod.io" # pylint: disable=invalid-name

endpoint_url_base = "https://api.runpod.ai/v2" # pylint: disable=invalid-name


# --------------------------- Force Logging Levels --------------------------- #
logging.getLogger("urllib3").setLevel(logging.WARNING)

7 changes: 5 additions & 2 deletions runpod/api/ctl_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,9 @@ def create_pod(
cloud_type:str="ALL", support_public_ip:bool=True,
start_ssh:bool=True,
data_center_id : Optional[str]=None, country_code:Optional[str]=None,
gpu_count:int=1, volume_in_gb:int=0, container_disk_in_gb:int=5,
gpu_count:int=1, volume_in_gb:int=0, container_disk_in_gb:Optional[int]=None,
min_vcpu_count:int=1, min_memory_in_gb:int=1, docker_args:str="",
ports:Optional[str]=None, volume_mount_path:str="/runpod_volume",
ports:Optional[str]=None, volume_mount_path:str="/runpod-volume",
env:Optional[dict]=None, template_id:Optional[str]=None,
network_volume_id:Optional[str]=None
) -> dict:
Expand Down Expand Up @@ -118,6 +118,9 @@ def create_pod(
data_center_id = network_volume["dataCenterId"]
break

if container_disk_in_gb is None and template_id is None:
container_disk_in_gb = 10

raw_response = run_graphql_query(
pod_mutations.generate_pod_deployment_mutation(
name, image_name, gpu_type_id,
Expand Down
5 changes: 3 additions & 2 deletions runpod/serverless/modules/rp_fastapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
from .rp_job import run_job
from .worker_state import Jobs
from .rp_ping import Heartbeat
from ...version import __version__ as runpod_Version
from ...version import __version__ as runpod_version


RUNPOD_ENDPOINT_ID = os.environ.get("RUNPOD_ENDPOINT_ID", None)

Expand Down Expand Up @@ -68,7 +69,7 @@ def __init__(self, handler=None):
self.rp_app = FastAPI(
title="RunPod | Test Worker | API",
description=DESCRIPTION,
version=runpod_Version
version=runpod_version,
)

# Create an APIRouter and add the route for processing jobs.
Expand Down
52 changes: 28 additions & 24 deletions runpod/serverless/modules/rp_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from runpod.serverless.modules.rp_logger import RunPodLogger
from .worker_state import WORKER_ID, Jobs
from .rp_tips import check_return_size
from ...version import __version__ as runpod_version

JOB_GET_URL = str(os.environ.get('RUNPOD_WEBHOOK_GET_JOB')).replace('$ID', WORKER_ID)

Expand Down Expand Up @@ -108,51 +109,54 @@ async def run_job(handler: Callable, job: Dict[str, Any]) -> Dict[str, Any]:
Returns the job output or error.
"""
log.info(f'{job["id"]} | Started')
run_result = {"error": "No output from handler."}

try:
result = handler(job)
job_output = await result if inspect.isawaitable(result) else result
handler_return = handler(job)
job_output = await handler_return if inspect.isawaitable(handler_return) else handler_return

log.debug(f'{job["id"]} | Handler output: {job_output}')

run_result = {"output": job_output}

if isinstance(job_output, dict):
error_msg = job_output.pop("error", None)
refresh_worker = job_output.pop("refresh_worker", None)

if job_output.get("error", False):
run_result["error"] = str(run_result["output"].pop("error"))
run_result = {"output": job_output}

if job_output.get("refresh_worker", False):
if error_msg:
run_result["error"] = error_msg
if refresh_worker:
run_result["stopPod"] = True
run_result["output"].pop("refresh_worker")

if run_result["output"] == {}:
run_result.pop("output")

elif isinstance(job_output, bool):
run_result = {"output": job_output}

else:
run_result = {"output": job_output}

if run_result.get("output") == {}:
run_result.pop("output")

check_return_size(run_result) # Checks the size of the return body.

except Exception as err: # pylint: disable=broad-except
from runpod import __version__ as runpod_version # pylint: disable=import-outside-toplevel,cyclic-import
error_content = json.dumps(
{
"error_type": str(type(err)),
"error_message": str(err),
"error_traceback": traceback.format_exc(),
"hostname": os.environ.get("RUNPOD_POD_HOSTNAME", "unknown"),
"worker_id": os.environ.get("RUNPOD_POD_ID", "unknown"),
"runpod_version": runpod_version
}, indent=4)
error_info = {
"error_type": str(type(err)),
"error_message": str(err),
"error_traceback": traceback.format_exc(),
"hostname": os.environ.get("RUNPOD_POD_HOSTNAME", "unknown"),
"worker_id": os.environ.get("RUNPOD_POD_ID", "unknown"),
"runpod_version": runpod_version
}

log.error(f'{job["id"]} | Captured Handler Exception')
log.error(error_content)
log.error(json.dumps(error_info, indent=4))
run_result = {"error": json.dumps(error_info)}

run_result = {"error": error_content}
finally:
log.debug(f'{job["id"]} | run_job return: {run_result}')

return run_result # pylint: disable=lost-exception
return run_result


async def run_job_generator(
Expand Down
8 changes: 2 additions & 6 deletions runpod/serverless/modules/rp_ping.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from runpod.serverless.modules.rp_logger import RunPodLogger
from .worker_state import Jobs, WORKER_ID
from ...version import __version__ as runpod_version

log = RunPodLogger()
jobs = Jobs() # Contains the list of jobs that are currently running.
Expand Down Expand Up @@ -47,8 +48,6 @@ def __init__(self, pool_connections=10, retries=3) -> None:
self._session.mount('http://', adapter)
self._session.mount('https://', adapter)

self.runpod_version = None

def start_ping(self, test=False):
'''
Sends heartbeat pings to the Runpod server.
Expand All @@ -58,9 +57,6 @@ def start_ping(self, test=False):
return

if not Heartbeat._thread_started:
from runpod import __version__ as runpod_version # pylint: disable=import-outside-toplevel,cyclic-import
self.runpod_version = runpod_version

threading.Thread(target=self.ping_loop, daemon=True, args=(test,)).start()
Heartbeat._thread_started = True

Expand All @@ -82,7 +78,7 @@ def _send_ping(self):
job_ids = jobs.get_job_list()
ping_params = {
'job_id': job_ids,
'runpod_version': self.runpod_version
'runpod_version': runpod_version
}

try:
Expand Down
Loading