Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ packaging = ">=16.0.0"
opentelemetry-api = ">=1.27.0"
opentelemetry-sdk = ">=1.27.0"
opentelemetry-exporter-otlp = ">=1.27.0"
cachetools = "^6.2.1"

[tool.poetry.group.dev.dependencies]
python-dotenv = "^1.0.1"
Expand Down
1 change: 1 addition & 0 deletions src/rapidata/rapidata_client/config/upload_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ class UploadConfig(BaseModel):
maxWorkers: int = Field(default=10)
maxRetries: int = Field(default=3)
chunkSize: int = Field(default=50)
cacheUploads: bool = Field(default=True)
27 changes: 25 additions & 2 deletions src/rapidata/rapidata_client/datapoints/_asset_uploader.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,27 +9,50 @@
from rapidata.service.openapi_service import OpenAPIService
from rapidata.rapidata_client.config import logger
from rapidata.rapidata_client.config import tracer
from rapidata.rapidata_client.config import rapidata_config
from cachetools import LRUCache


class AssetUploader:
_shared_upload_cache: LRUCache = LRUCache(maxsize=100_000)

def __init__(self, openapi_service: OpenAPIService):
self.openapi_service = openapi_service

def _get_cache_key(self, asset: str) -> str:
"""Generate cache key for an asset."""
if re.match(r"^https?://", asset):
return asset
else:
if not os.path.exists(asset):
raise FileNotFoundError(f"File not found: {asset}")

stat = os.stat(asset)
Copy link

Copilot AI Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The _get_cache_key method calls os.stat(asset) without checking if the file exists first. This will raise FileNotFoundError for non-existent files. However, the file existence check happens later in upload_asset at line 44. This causes cache key generation to fail before the proper error handling. Consider adding existence check in _get_cache_key or handling the exception appropriately.

Suggested change
stat = os.stat(asset)
try:
stat = os.stat(asset)
except FileNotFoundError:
raise FileNotFoundError(f"File not found: {asset}")

Copilot uses AI. Check for mistakes.
# Combine path, size, and modification time
return f"{asset}:{stat.st_size}:{stat.st_mtime_ns}"

def upload_asset(self, asset: str) -> str:
with tracer.start_as_current_span("AssetUploader.upload_asset"):
logger.debug("Uploading asset: %s", asset)
assert isinstance(asset, str), "Asset must be a string"

asset_key = self._get_cache_key(asset)
if asset_key in self._shared_upload_cache:
logger.debug("Asset found in cache")
return self._shared_upload_cache[asset_key]

if re.match(r"^https?://", asset):
response = self.openapi_service.asset_api.asset_url_post(
url=asset,
)
else:
if not os.path.exists(asset):
raise FileNotFoundError(f"File not found: {asset}")
response = self.openapi_service.asset_api.asset_file_post(
file=asset,
)
logger.info("Asset uploaded: %s", response.file_name)
if rapidata_config.upload.cacheUploads:
self._shared_upload_cache[asset_key] = response.file_name
logger.debug("Asset added to cache")
return response.file_name

def get_uploaded_text_input(
Expand Down
Loading