Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
# Changelog

## [0.11.0](https://github.com/runpod/tetra-rp/compare/v0.10.0...v0.11.0) (2025-08-19)


### Features

* Add download acceleration for dependencies and HuggingFace models ([#83](https://github.com/runpod/tetra-rp/issues/83)) ([e47c9e3](https://github.com/runpod/tetra-rp/commit/e47c9e37030ead1831893dd70a1322421befbaad))

## [0.10.0](https://github.com/runpod/tetra-rp/compare/v0.9.0...v0.10.0) (2025-08-07)


### Features

* Add idempotent network volume deployment ([#79](https://github.com/runpod/tetra-rp/issues/79)) ([289d333](https://github.com/runpod/tetra-rp/commit/289d333aaaf48e00bfdad2a5f6356bdfc6bcf286))

## [0.9.0](https://github.com/runpod/tetra-rp/compare/v0.8.0...v0.9.0) (2025-08-04)


### Features

* AE-961 Add class serialization caching for remote execution ([#76](https://github.com/runpod/tetra-rp/issues/76)) ([95f9eed](https://github.com/runpod/tetra-rp/commit/95f9eed1810e6a623091348c326e2ea571c6dddf))

## [0.8.0](https://github.com/runpod/tetra-rp/compare/v0.7.0...v0.8.0) (2025-07-22)


Expand Down
4 changes: 4 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ help: # Show this help menu
dev: # Install development dependencies
uv sync --all-groups

update:
uv sync --upgrade --all-groups
uv lock --upgrade

proto: # TODO: auto-generate proto files
@echo "TODO"

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -785,6 +785,6 @@ def fetch_data(url):
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

<p align="center">
<a href="https://github.com/yourusername/tetra">Tetra</a> •
<a href="https://github.com/runpod/tetra-rp">Tetra</a> •
<a href="https://runpod.io">Runpod</a>
</p>
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "tetra_rp"
version = "0.8.0"
version = "0.11.0"
description = "A Python library for distributed inference and serving of machine learning models"
authors = [
{ name = "Marut Pandya", email = "[email protected]" },
Expand All @@ -21,6 +21,8 @@ dependencies = [
"cloudpickle>=3.1.1",
"runpod",
"python-dotenv>=1.0.0",
"aiohttp>=3.9.0",
"pydantic>=2.11.4",
]

[dependency-groups]
Expand Down
69 changes: 69 additions & 0 deletions simple_deployment_runtime_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""
Simple LoadBalancerSls Example

Shows basic usage of LoadBalancerSls with LoadBalancerSlsResource
"""

import asyncio
import logging

# Add tetra-rp to Python path
# sys.path.insert(0, "/Users/marut/tetra/tetra-rp/src")

from tetra_rp import remote, LoadBalancerSlsResource, endpoint

# Reduce logging noise
logging.getLogger("tetra_rp").setLevel(logging.WARNING)

# Configure LoadBalancerSls resource
lb_config = LoadBalancerSlsResource(name="loadbalancer-sls-test")


# Define class outside main function
@remote(
resource_config=lb_config, # Clean resource-based approach
dependencies=["numpy"],
)
class MLModel:
def __init__(self):
self.counter = 0

@endpoint(methods=["POST"])
def predict(self, text):
self.counter += 1
return {"input": text, "result": f"Processed: {text}", "count": self.counter}

def compute(self, x, y):
self.counter += 1
return {"sum": x + y, "count": self.counter}


async def main():
print("🚀 LoadBalancerSls Test")
print("=" * 25)

# Create model (health check happens automatically)
print("\n📦 Creating model...")
model = MLModel()

# Test remote execution
print("\n🔄 Remote execution...")
try:
result = await model.compute(10, 5)
print(f"✅ Sum: {result['sum']}, Count: {result['count']}")
except Exception as e:
print(f"❌ Remote failed: {e}")

# Test HTTP endpoint
print("\n🌐 HTTP endpoint...")
try:
result = await model.predict("hello")
print(f"✅ Result: {result['result']}, Count: {result['count']}")
except Exception as e:
print(f"❌ HTTP failed: {e}")

print("\n🎉 Test completed!")


if __name__ == "__main__":
asyncio.run(main())
11 changes: 10 additions & 1 deletion src/tetra_rp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,26 +14,35 @@
CpuServerlessEndpoint,
CpuInstanceType,
CudaVersion,
DataCenter,
GpuGroup,
LiveServerless,
LoadBalancerSlsResource,
PodTemplate,
ResourceManager,
ServerlessResource,
ServerlessEndpoint,
runpod,
NetworkVolume,
)
from .core.resources.load_balancer_sls import LoadBalancerSls, endpoint # noqa: E402


__all__ = [
"remote",
"CpuServerlessEndpoint",
"CpuInstanceType",
"CudaVersion",
"DataCenter",
"GpuGroup",
"LiveServerless",
"LiveServerless",
"LoadBalancerSlsResource",
"PodTemplate",
"ResourceManager",
"ServerlessResource",
"ServerlessEndpoint",
"runpod",
"NetworkVolume",
"LoadBalancerSls",
"endpoint",
]
68 changes: 61 additions & 7 deletions src/tetra_rp/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@
from functools import wraps
from typing import List, Optional

from .core.resources import ResourceManager, ServerlessResource
from .core.resources import ResourceManager, ServerlessResource, LoadBalancerSlsResource
from .execute_class import create_remote_class
from .core.resources.load_balancer_sls.integration import create_load_balancer_sls_class
from .stubs import stub_resource

log = logging.getLogger(__name__)
Expand All @@ -14,6 +15,8 @@ def remote(
resource_config: ServerlessResource,
dependencies: Optional[List[str]] = None,
system_dependencies: Optional[List[str]] = None,
accelerate_downloads: bool = True,
hf_models_to_cache: Optional[List[str]] = None,
**extra,
):
"""
Expand All @@ -22,10 +25,17 @@ def remote(
This decorator allows a function to be executed in a remote serverless environment, with support for
dynamic resource provisioning and installation of required dependencies.

Args:
resource_config (ServerlessResource): Configuration object specifying the serverless resource
to be provisioned or used.
to be provisioned or used. Set resource_config.type="LB" for LoadBalancerSls mode.
dependencies (List[str], optional): A list of pip package names to be installed in the remote
environment before executing the function. Defaults to None.
system_dependencies (List[str], optional): A list of system packages to be installed in the remote
environment before executing the function. Defaults to None.
accelerate_downloads (bool, optional): Enable download acceleration for dependencies and models.
Defaults to True.
hf_models_to_cache (List[str], optional): List of HuggingFace model IDs to pre-cache using
download acceleration. Defaults to None.
extra (dict, optional): Additional parameters for the execution of the resource. Defaults to an empty dict.

Returns:
Expand All @@ -34,25 +44,63 @@ def remote(

Example:
```python
# Traditional serverless execution
@remote(
resource_config=my_resource_config,
dependencies=["numpy", "pandas"],
sync=True # Optional, to run synchronously
accelerate_downloads=True,
hf_models_to_cache=["gpt2", "bert-base-uncased"]
)
async def my_function(data):
# Function logic here
pass

# LoadBalancerSls execution (Load Balancer mode)
@remote(
resource_config=my_resource_config,
type="LB",
dependencies=["torch", "transformers"]
)
class MLModel:
@endpoint(methods=['POST'])
def predict(self, data):
return result
```
"""

def decorator(func_or_class):
if inspect.isclass(func_or_class):
# Handle class decoration
return create_remote_class(
func_or_class, resource_config, dependencies, system_dependencies, extra
)
if isinstance(resource_config, LoadBalancerSlsResource):
# Use LoadBalancerSls (Load Balancer) mode
log.info(
f"Using LoadBalancerSls mode for class {func_or_class.__name__}"
)
return create_load_balancer_sls_class(
func_or_class,
resource_config,
dependencies,
system_dependencies,
extra,
)
else:
# Use traditional serverless execution
return create_remote_class(
func_or_class,
resource_config,
dependencies,
system_dependencies,
accelerate_downloads,
hf_models_to_cache,
extra,
)
else:
# Handle function decoration (unchanged)
if isinstance(resource_config, LoadBalancerSlsResource):
raise ValueError(
"LoadBalancerSlsResource can only be used with classes, not functions"
)

@wraps(func_or_class)
async def wrapper(*args, **kwargs):
resource_manager = ResourceManager()
Expand All @@ -62,7 +110,13 @@ async def wrapper(*args, **kwargs):

stub = stub_resource(remote_resource, **extra)
return await stub(
func_or_class, dependencies, system_dependencies, *args, **kwargs
func_or_class,
dependencies,
system_dependencies,
accelerate_downloads,
hf_models_to_cache,
*args,
**kwargs,
)

return wrapper
Expand Down
24 changes: 24 additions & 0 deletions src/tetra_rp/core/api/runpod.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,30 @@ async def create_network_volume(self, payload: Dict[str, Any]) -> Dict[str, Any]

return result

async def list_network_volumes(self) -> Dict[str, Any]:
"""
List all network volumes in Runpod.
Returns:
List of network volume objects or dict containing networkVolumes key.
The API may return either format depending on version.
"""
log.debug("Listing network volumes")

result = await self._execute_rest(
"GET", f"{RUNPOD_REST_API_URL}/networkvolumes"
)

# Handle both list and dict responses
if isinstance(result, list):
volume_count = len(result)
else:
volume_count = len(result.get("networkVolumes", []))

log.debug(f"Listed {volume_count} network volumes")

return result

async def close(self):
"""Close the HTTP session."""
if self.session and not self.session.closed:
Expand Down
5 changes: 4 additions & 1 deletion src/tetra_rp/core/resources/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from .gpu import GpuGroup, GpuType, GpuTypeDetail
from .resource_manager import ResourceManager
from .live_serverless import LiveServerless
from .load_balancer_sls_resource import LoadBalancerSlsResource
from .serverless import (
CpuServerlessEndpoint,
ServerlessResource,
Expand All @@ -12,7 +13,7 @@
CudaVersion,
)
from .template import PodTemplate
from .network_volume import NetworkVolume
from .network_volume import NetworkVolume, DataCenter


__all__ = [
Expand All @@ -21,12 +22,14 @@
"CpuInstanceType",
"CpuServerlessEndpoint",
"CudaVersion",
"DataCenter",
"DeployableResource",
"GpuGroup",
"GpuType",
"GpuTypeDetail",
"JobOutput",
"LiveServerless",
"LoadBalancerSlsResource",
"ResourceManager",
"ServerlessResource",
"ServerlessEndpoint",
Expand Down
26 changes: 26 additions & 0 deletions src/tetra_rp/core/resources/load_balancer_sls/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
LoadBalancerSls Package

This package provides LoadBalancerSls functionality for dual-capability remote execution,
supporting both HTTP endpoints and remote execution through a unified interface.
"""

from .client import LoadBalancerSls
from .endpoint import endpoint
from .exceptions import (
LoadBalancerSlsError,
LoadBalancerSlsConnectionError,
LoadBalancerSlsAuthenticationError,
LoadBalancerSlsExecutionError,
LoadBalancerSlsConfigurationError,
)

__all__ = [
"LoadBalancerSls",
"endpoint",
"LoadBalancerSlsError",
"LoadBalancerSlsConnectionError",
"LoadBalancerSlsAuthenticationError",
"LoadBalancerSlsExecutionError",
"LoadBalancerSlsConfigurationError",
]
Loading