Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
5c18471
feat(dataset): add support for file cells in datasets with upload and…
galzilber Nov 24, 2025
cc20bc1
nl
galzilber Nov 24, 2025
70ba304
fix(dataset): handle request exceptions during file upload to S3
galzilber Nov 24, 2025
f21926c
refactor(dataset): enhance dataset module by adding file cell types a…
galzilber Nov 24, 2025
a8ad5fd
fx
galzilber Nov 24, 2025
c284acf
rm
galzilber Nov 24, 2025
469700c
refactor(tests): remove unused Mock import and clean up assertion for…
galzilber Nov 24, 2025
01d0f22
format
galzilber Nov 24, 2025
cc0c5b6
fix
galzilber Nov 24, 2025
fd39dfe
fx
galzilber Nov 24, 2025
443c64f
rm
galzilber Nov 24, 2025
94659ab
fx
galzilber Nov 24, 2025
bfe3250
fx
galzilber Nov 24, 2025
32e15b8
fx
galzilber Nov 24, 2025
3543f6a
fx
galzilber Nov 24, 2025
4ba485c
fx
galzilber Nov 25, 2025
cee7fb7
fx
galzilber Nov 25, 2025
77b8c26
Refactor code for improved functionality and performance
galzilber Nov 25, 2025
8b13f98
fx
galzilber Nov 25, 2025
65c207d
fx
galzilber Nov 25, 2025
cd925a4
Merge branch 'main' into gz/add-files-to-dataset
galzilber Nov 25, 2025
bc8a5ca
fx
galzilber Nov 25, 2025
e08cf4b
fx
galzilber Nov 25, 2025
5cee01d
fix(traceloop-sdk): enhance type annotations and improve data handlin…
galzilber Nov 25, 2025
9b00146
fx
galzilber Nov 25, 2025
2d5f510
Merge branch 'main' into gz/add-files-to-dataset
galzilber Nov 25, 2025
75d7541
fx
galzilber Nov 25, 2025
8157cfe
Merge branch 'gz/add-files-to-dataset' of github.com:traceloop/openll…
galzilber Nov 25, 2025
fb871d9
refactor(sdk): migrate dataset-related classes to a new 'datasets' mo…
galzilber Nov 25, 2025
992c75c
Merge branch 'main' into gz/add-files-to-dataset
galzilber Nov 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
180 changes: 180 additions & 0 deletions packages/traceloop-sdk/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,183 @@ def create_joke():

return completion.choices[0].message.content
```

## Working with Attachments in Datasets

Datasets now support file attachments through a declarative API inspired by Braintrust. You can upload files to internal storage (S3) or link to external URLs using the new Attachment classes.

### File Types Supported

- `FileCellType.IMAGE` - Images (PNG, JPEG, GIF, etc.)
- `FileCellType.VIDEO` - Video files (MP4, AVI, etc.) or video URLs (YouTube, Vimeo)
- `FileCellType.AUDIO` - Audio files (MP3, WAV, etc.) or audio URLs (Spotify, SoundCloud)
- `FileCellType.FILE` - General files (PDF, TXT, DOC, etc.)

### Creating Datasets with Initial Attachments

You can now create datasets with attachments directly in the row values:

```python
from traceloop.sdk import Traceloop
from traceloop.sdk.dataset import Attachment, ExternalAttachment, FileCellType
from traceloop.sdk.dataset.model import CreateDatasetRequest, ColumnDefinition, ColumnType

Traceloop.init(api_key="your-api-key")
datasets = Traceloop.get_datasets()

# Create dataset request with attachments in row values
dataset_request = CreateDatasetRequest(
slug="product-catalog",
name="Product Catalog with Media",
description="Products with images and videos",
columns=[
ColumnDefinition(slug="name", name="Product Name", type=ColumnType.STRING),
ColumnDefinition(slug="price", name="Price", type=ColumnType.NUMBER),
ColumnDefinition(slug="image", name="Product Image", type=ColumnType.FILE),
ColumnDefinition(slug="video", name="Demo Video", type=ColumnType.FILE),
ColumnDefinition(slug="manual", name="User Manual", type=ColumnType.FILE),
],
rows=[
{
"name": "Smart Watch Pro",
"price": 299.99,
"image": Attachment(
file_path="/path/to/watch.jpg",
file_type=FileCellType.IMAGE,
metadata={"alt_text": "Smart Watch Pro"}
),
"video": ExternalAttachment(
url="https://www.youtube.com/watch?v=demo123",
file_type=FileCellType.VIDEO,
metadata={"duration": "2:30"}
),
"manual": Attachment(
file_path="/path/to/manual.pdf",
file_type=FileCellType.FILE
),
},
{
"name": "Wireless Earbuds",
"price": 149.99,
"image": Attachment(
data=image_bytes, # From memory
filename="earbuds.png",
content_type="image/png",
file_type=FileCellType.IMAGE
),
"video": ExternalAttachment(
url="https://vimeo.com/demo456",
file_type=FileCellType.VIDEO
),
"manual": None, # No manual for this product
},
]
)

# Create the dataset - attachments are automatically processed
dataset = datasets.create(dataset_request)
print(f"Created dataset: {dataset.slug}")

# The dataset is created with all attachments processed
for row in dataset.rows:
print(f"Product: {row.values['name']}")
if row.values.get('image'):
print(f" Image: {row.values['image']['status']}") # 'success' or 'failed'
if row.values.get('video'):
print(f" Video URL: {row.values['video']['url']}")
```

### Adding Attachments to Existing Datasets

For existing datasets, you can upload attachments to specific cells:

```python
from traceloop.sdk import Traceloop
from traceloop.sdk.dataset import Attachment, ExternalAttachment, FileCellType

Traceloop.init(api_key="your-api-key")
datasets = Traceloop.get_datasets()
dataset = datasets.get_by_slug("my-dataset")
row = dataset.rows[0]

# Upload a file attachment
attachment = Attachment(
file_path="/path/to/document.pdf",
file_type=FileCellType.FILE,
metadata={"version": "1.0", "pages": 10}
)
ref = attachment.upload(datasets._http, dataset.slug, row.id, "document")

# Link an external URL
external = ExternalAttachment(
url="https://docs.google.com/document/d/abc123",
file_type=FileCellType.FILE,
metadata={"source": "Google Docs"}
)
ref = external.attach(datasets._http, dataset.slug, row.id, "specifications")
```

### Working with In-Memory Data

You can create attachments from bytes data without saving to disk:

```python
# Generate or fetch data
image_data = generate_chart() # Returns bytes
pdf_data = generate_report() # Returns bytes

# Create attachments from memory
image_attachment = Attachment(
data=image_data,
filename="chart.png",
content_type="image/png",
file_type=FileCellType.IMAGE,
metadata={"chart_type": "bar", "date": "2024-01-15"}
)

pdf_attachment = Attachment(
data=pdf_data,
filename="report.pdf",
content_type="application/pdf",
file_type=FileCellType.FILE,
metadata={"report_type": "quarterly", "q": "Q4-2023"}
)

# Use in dataset creation
dataset_request = CreateDatasetRequest(
slug="reports",
name="Generated Reports",
columns=[
ColumnDefinition(slug="title", name="Title", type=ColumnType.STRING),
ColumnDefinition(slug="chart", name="Chart", type=ColumnType.FILE),
ColumnDefinition(slug="report", name="Report", type=ColumnType.FILE),
],
rows=[{
"title": "Q4 2023 Results",
"chart": image_attachment,
"report": pdf_attachment,
}]
)

dataset = datasets.create(dataset_request)
```

### Attachment Validation

The Attachment class includes validation to ensure proper usage:

```python
# This will raise ValueError - can't provide both file_path and data
attachment = Attachment(
file_path="/path/to/file.txt",
data=b"test data" # Error!
)

# This will raise ValueError - must provide either file_path or data
attachment = Attachment() # Error!

# This will raise FileNotFoundError when uploading if file doesn't exist
attachment = Attachment(
file_path="/nonexistent/file.txt" # Error during upload!
)
```
Loading
Loading