diff --git a/packages/sample-app/sample_app/dataset_attachments_example.py b/packages/sample-app/sample_app/dataset_attachments_example.py new file mode 100644 index 0000000000..6606cbb77b --- /dev/null +++ b/packages/sample-app/sample_app/dataset_attachments_example.py @@ -0,0 +1,356 @@ +#!/usr/bin/env python3 +""" +Sample application demonstrating how to use the attachment feature in Traceloop SDK. + +This example shows: +1. Creating datasets with external URL attachments (YouTube videos, Google Docs) +2. Creating datasets with file uploads (local images, PDFs) +3. Creating datasets with in-memory data attachments +4. Mixed attachment types in a single dataset +""" + +import os +import tempfile + +from traceloop.sdk import Traceloop +from traceloop.sdk.datasets import ( + Attachment, + Datasets, + ExternalAttachment, + FileCellType, +) +from traceloop.sdk.datasets.model import ( + ColumnDefinition, + ColumnType, + CreateDatasetRequest, +) + + +def example_external_attachments(): + """Example: Creating a dataset with external URL attachments.""" + print("\n=== Example 1: External URL Attachments ===") + + # Initialize Traceloop + Traceloop.init(app_name="attachment-demo") + datasets = Datasets() + + # Create a product catalog with external media + dataset_request = CreateDatasetRequest( + slug="product-catalog-with-media", + name="Product Catalog with Media", + description="Product catalog with videos and documentation links", + columns=[ + ColumnDefinition( + slug="product_name", name="Product Name", type=ColumnType.STRING + ), + ColumnDefinition(slug="price", name="Price", type=ColumnType.NUMBER), + ColumnDefinition( + slug="demo_video", name="Demo Video", type=ColumnType.FILE + ), + ColumnDefinition( + slug="user_manual", name="User Manual", type=ColumnType.FILE + ), + ], + rows=[ + { + "product_name": "Smart Widget Pro", + "price": 299.99, + "demo_video": ExternalAttachment( + url="https://www.youtube.com/watch?v=dQw4w9WgXcQ", + file_type=FileCellType.VIDEO, + metadata={ + "title": "Smart Widget Pro Demo", + "duration": "5:32", + "resolution": "1080p", + }, + ), + "user_manual": ExternalAttachment( + url="https://docs.google.com/document/d/example-manual-id", + file_type=FileCellType.FILE, + metadata={"pages": 45, "format": "Google Docs", "version": "2.1"}, + ), + }, + { + "product_name": "EcoGadget Plus", + "price": 199.99, + "demo_video": ExternalAttachment( + url="https://vimeo.com/123456789", + file_type=FileCellType.VIDEO, + metadata={"title": "EcoGadget Plus Overview", "duration": "3:15"}, + ), + "user_manual": ExternalAttachment( + url="https://example.com/manuals/ecogadget-plus.pdf", + file_type=FileCellType.FILE, + metadata={"pages": 30, "format": "PDF"}, + ), + }, + ], + ) + + # Create the dataset + dataset = datasets.create(dataset_request) + print(f"Created dataset: {dataset.slug}") + print(f"Total rows: {len(dataset.rows)}") + + # Access the attachment information + for row in dataset.rows: + print(f"\nProduct: {row.values['product_name']}") + video = row.values.get("demo_video") + if video: + print(f" Video URL: {video.get('url')}") + print(f" Video Type: {video.get('type')}") + + +def example_file_uploads(): + """Example: Creating a dataset with file uploads.""" + print("\n=== Example 2: File Upload Attachments ===") + + # Create temporary test files + # In a real application, these would be actual files + image_file = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) + image_file.write(b"fake image data for demo") + image_file.close() + + pdf_file = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) + pdf_file.write(b"fake pdf data for demo") + pdf_file.close() + + try: + datasets = Datasets() + + # Create a dataset with file uploads + dataset_request = CreateDatasetRequest( + slug="employee-records-with-photos", + name="Employee Records with Photos", + description="Employee database with profile photos and resumes", + columns=[ + ColumnDefinition( + slug="employee_id", name="Employee ID", type=ColumnType.STRING + ), + ColumnDefinition(slug="name", name="Full Name", type=ColumnType.STRING), + ColumnDefinition( + slug="profile_photo", name="Profile Photo", type=ColumnType.FILE + ), + ColumnDefinition(slug="resume", name="Resume", type=ColumnType.FILE), + ], + rows=[ + { + "employee_id": "EMP001", + "name": "Alice Johnson", + "profile_photo": Attachment( + file_path=image_file.name, + file_type=FileCellType.IMAGE, + metadata={ + "alt_text": "Alice Johnson profile photo", + "photographer": "Company Photo Services", + "date_taken": "2024-01-15", + }, + ), + "resume": Attachment( + file_path=pdf_file.name, + file_type=FileCellType.FILE, + content_type="application/pdf", + metadata={ + "version": "3.0", + "last_updated": "2024-03-01", + "pages": 2, + }, + ), + }, + ], + ) + + # Create the dataset (uploads will happen automatically) + dataset = datasets.create(dataset_request) + print(f"Created dataset: {dataset.slug}") + + # Check upload status + for row in dataset.rows: + print(f"\nEmployee: {row.values['name']}") + photo = row.values.get("profile_photo") + if photo: + print(f" Photo Status: {photo.get('status')}") + print(f" Storage Type: {photo.get('storage')}") + + resume = row.values.get("resume") + if resume: + print(f" Resume Status: {resume.get('status')}") + print(f" Storage Type: {resume.get('storage')}") + + finally: + # Clean up temporary files + os.unlink(image_file.name) + os.unlink(pdf_file.name) + + +def example_in_memory_attachments(): + """Example: Creating a dataset with in-memory data attachments.""" + datasets = Datasets() + + # Generate some in-memory data + # This could be data generated by your application + csv_data = b"name,score\nAlice,95\nBob,87\nCarol,92" + json_data = b'{"config": "example", "version": "1.0"}' + + # Create dataset with in-memory attachments + dataset_request = CreateDatasetRequest( + slug="analysis-results", + name="Analysis Results", + description="Results from data analysis with generated reports", + columns=[ + ColumnDefinition( + slug="analysis_id", name="Analysis ID", type=ColumnType.STRING + ), + ColumnDefinition( + slug="dataset_name", name="Dataset Name", type=ColumnType.STRING + ), + ColumnDefinition( + slug="results_csv", name="Results CSV", type=ColumnType.FILE + ), + ColumnDefinition( + slug="config_json", name="Configuration", type=ColumnType.FILE + ), + ], + rows=[ + { + "analysis_id": "ANA001", + "dataset_name": "Q4 Sales Analysis", + "results_csv": Attachment( + data=csv_data, + filename="q4_results.csv", + content_type="text/csv", + file_type=FileCellType.FILE, + metadata={"rows": 3, "columns": 2, "analysis_date": "2024-01-10"}, + ), + "config_json": Attachment( + data=json_data, + filename="analysis_config.json", + content_type="application/json", + file_type=FileCellType.FILE, + metadata={"version": "1.0", "algorithm": "regression"}, + ), + }, + ], + ) + + dataset = datasets.create(dataset_request) + print(f"Created dataset: {dataset.slug}") + print("Attachments uploaded from memory") + + +def example_mixed_attachments(): + """Example: Creating a dataset with mixed attachment types.""" + print("\n=== Example 4: Mixed Attachment Types ===") + + # Create a temporary file for local attachment + local_file = tempfile.NamedTemporaryFile(suffix=".txt", delete=False) + local_file.write(b"Sample report content") + local_file.close() + + try: + datasets = Datasets() + + # Create dataset with different attachment types + dataset_request = CreateDatasetRequest( + slug="project-documentation", + name="Project Documentation", + description="Project docs with various attachment types", + columns=[ + ColumnDefinition( + slug="doc_id", name="Document ID", type=ColumnType.STRING + ), + ColumnDefinition(slug="title", name="Title", type=ColumnType.STRING), + ColumnDefinition( + slug="attachment", name="Attachment", type=ColumnType.FILE + ), + ColumnDefinition( + slug="reference_video", name="Reference Video", type=ColumnType.FILE + ), + ], + rows=[ + { + "doc_id": "DOC001", + "title": "Project Overview", + "attachment": Attachment( + file_path=local_file.name, + file_type=FileCellType.FILE, + metadata={"author": "Team Lead"}, + ), + "reference_video": ExternalAttachment( + url="https://www.youtube.com/watch?v=example", + file_type=FileCellType.VIDEO, + metadata={"relevance": "high"}, + ), + }, + { + "doc_id": "DOC002", + "title": "Technical Specs", + "attachment": Attachment( + data=b"Technical specification details...", + filename="tech_specs.txt", + file_type=FileCellType.FILE, + ), + "reference_video": None, # No video for this document + }, + { + "doc_id": "DOC003", + "title": "External Resources", + "attachment": ExternalAttachment( + url="https://github.com/example/repo/blob/main/README.md", + file_type=FileCellType.FILE, + metadata={"type": "markdown"}, + ), + "reference_video": ExternalAttachment( + url="https://vimeo.com/example-tutorial", + file_type=FileCellType.VIDEO, + ), + }, + ], + ) + + dataset = datasets.create(dataset_request) + print(f"Created dataset: {dataset.slug}") + + # Show the different storage types + for row in dataset.rows: + print(f"\nDocument: {row.values['title']}") + attachment = row.values.get("attachment") + if attachment: + print(f" Attachment Storage: {attachment.get('storage')}") + if attachment.get("storage") == "external": + print(f" URL: {attachment.get('url')}") + + finally: + os.unlink(local_file.name) + + +def main(): + """Run all examples.""" + print("=" * 60) + print("Traceloop SDK Attachment Feature Examples") + print("=" * 60) + + # Set your API key + # os.environ["TRACELOOP_API_KEY"] = "your-api-key-here" + + # Note: These examples use mock data and won't actually upload to S3 + # In production, real files would be uploaded to S3 storage + + try: + # Run examples + example_external_attachments() + example_file_uploads() + example_in_memory_attachments() + example_mixed_attachments() + + print("\n" + "=" * 60) + print("All examples completed successfully!") + print("=" * 60) + + except Exception as e: + print(f"\nError: {e}") + print("Make sure to set TRACELOOP_API_KEY environment variable") + + +if __name__ == "__main__": + main() diff --git a/packages/sample-app/sample_app/dataset_example.py b/packages/sample-app/sample_app/dataset_example.py index 03d44cdb27..3b36f0a71a 100644 --- a/packages/sample-app/sample_app/dataset_example.py +++ b/packages/sample-app/sample_app/dataset_example.py @@ -7,7 +7,7 @@ from typing import Optional from datetime import datetime from traceloop.sdk import Traceloop -from traceloop.sdk.dataset import Dataset, ColumnType, Column, Row +from traceloop.sdk.datasets import Dataset, ColumnType, Column, Row import pandas as pd import openai diff --git a/packages/traceloop-sdk/tests/dataset/__init__.py b/packages/traceloop-sdk/tests/dataset/__init__.py deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_columns_operations/test_create_dataset_with_columns.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_columns_operations/test_create_dataset_with_columns.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_columns_operations/test_create_dataset_with_columns.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_columns_operations/test_create_dataset_with_columns.yaml diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_columns_operations/test_dataset_operations_errors.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_columns_operations/test_dataset_operations_errors.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_columns_operations/test_dataset_operations_errors.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_columns_operations/test_dataset_operations_errors.yaml diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_columns_operations/test_get_dataset_with_columns.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_columns_operations/test_get_dataset_with_columns.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_columns_operations/test_get_dataset_with_columns.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_columns_operations/test_get_dataset_with_columns.yaml diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_dataset_operations/test_get_dataset_by_version.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_operations/test_get_dataset_by_version.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_dataset_operations/test_get_dataset_by_version.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_operations/test_get_dataset_by_version.yaml diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_dataset_operations/test_publish_dataset.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_operations/test_publish_dataset.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_dataset_operations/test_publish_dataset.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_operations/test_publish_dataset.yaml diff --git a/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_external_attachments.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_external_attachments.yaml new file mode 100644 index 0000000000..dae34ba687 --- /dev/null +++ b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_external_attachments.yaml @@ -0,0 +1,245 @@ +interactions: +- request: + body: '{"slug": "test-dataset-external-attachments", "name": "Products with External + Media", "description": "Test dataset with external URLs", "columns": [{"slug": + "name", "name": "Product Name", "type": "string"}, {"slug": "price", "name": + "Price", "type": "number"}, {"slug": "video", "name": "Demo Video", "type": + "file"}, {"slug": "manual", "name": "Manual", "type": "file"}], "rows": [{"name": + "Widget Pro", "price": 99.99, "video": null, "manual": null}, {"name": "Gadget + Plus", "price": 149.99, "video": null, "manual": null}]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '527' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets + response: + body: + string: '{"id":"cmidi6rjt001x01r5bsu7xlhr","slug":"test-dataset-external-attachments","name":"Products + with External Media","description":"Test dataset with external URLs","columns":{"manual":{"slug":"manual","name":"Manual","type":"file"},"name":{"slug":"name","name":"Product + Name","type":"string"},"price":{"slug":"price","name":"Price","type":"number"},"video":{"slug":"video","name":"Demo + Video","type":"file"}},"total_columns":4,"created_at":"2025-11-24T18:51:41.177964302Z","updated_at":"2025-11-24T18:51:41.177964356Z","rows":[{"id":"cmidi6rk3001y01r56v1z7q7k","row_index":1,"values":{"manual":null,"name":"Widget + Pro","price":99.99,"video":null},"created_at":"2025-11-24T18:51:41.191181097Z","updated_at":"2025-11-24T18:51:41.191181097Z"},{"id":"cmidi6rk3001z01r58myok3jg","row_index":2,"values":{"manual":null,"name":"Gadget + Plus","price":149.99,"video":null},"created_at":"2025-11-24T18:51:41.191181097Z","updated_at":"2025-11-24T18:51:41.191181097Z"}]}' + headers: + CF-RAY: + - 9a3b195dcaffd0ed-TLV + Connection: + - keep-alive + Content-Length: + - '955' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:41 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - 892f1a76b9e9da2d11beb336f90f3e17 + x-kong-upstream-latency: + - '25' + status: + code: 201 + message: Created +- request: + body: '{"type": "video", "url": "https://www.youtube.com/watch?v=demo1", "metadata": + {"title": "Widget Pro Demo", "duration": "3:45"}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '127' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-external-attachments/rows/cmidi6rk3001y01r56v1z7q7k/cells/video/external-url + response: + body: + string: '{"storage":"external","success":true,"url":"https://www.youtube.com/watch?v=demo1"}' + headers: + CF-RAY: + - 9a3b195f2d7c9b09-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:41 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - 57bd7938e041191b32381f0d3f3f9e73 + x-kong-upstream-latency: + - '17' + status: + code: 200 + message: OK +- request: + body: '{"type": "file", "url": "https://docs.google.com/document/d/widget-manual", + "metadata": {"pages": 25}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '102' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-external-attachments/rows/cmidi6rk3001y01r56v1z7q7k/cells/manual/external-url + response: + body: + string: '{"storage":"external","success":true,"url":"https://docs.google.com/document/d/widget-manual"}' + headers: + CF-RAY: + - 9a3b19607f0b9d70-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:41 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - dc26aa33261e190afe72264d77ef703f + x-kong-upstream-latency: + - '14' + status: + code: 200 + message: OK +- request: + body: '{"type": "video", "url": "https://vimeo.com/demo2", "metadata": {}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '67' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-external-attachments/rows/cmidi6rk3001z01r58myok3jg/cells/video/external-url + response: + body: + string: '{"storage":"external","success":true,"url":"https://vimeo.com/demo2"}' + headers: + CF-RAY: + - 9a3b1961cfca9384-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:41 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - d40f6a00071a0219db2d8a3220af8460 + x-kong-upstream-latency: + - '15' + status: + code: 200 + message: OK +version: 1 diff --git a/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_file_attachments_mocked.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_file_attachments_mocked.yaml new file mode 100644 index 0000000000..12e8bf99b4 --- /dev/null +++ b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_file_attachments_mocked.yaml @@ -0,0 +1,295 @@ +interactions: +- request: + body: '{"slug": "test-dataset-file-attachments", "name": "Products with Files", + "description": null, "columns": [{"slug": "name", "name": "Name", "type": "string"}, + {"slug": "image", "name": "Image", "type": "file"}, {"slug": "manual", "name": + "Manual", "type": "file"}], "rows": [{"name": "Product A", "image": null, "manual": + null}]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '328' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets + response: + body: + string: '{"id":"cmidi707h002001r5flvjw2kb","slug":"test-dataset-file-attachments","name":"Products + with Files","columns":{"image":{"slug":"image","name":"Image","type":"file"},"manual":{"slug":"manual","name":"Manual","type":"file"},"name":{"slug":"name","name":"Name","type":"string"}},"total_columns":3,"created_at":"2025-11-24T18:51:52.397956969Z","updated_at":"2025-11-24T18:51:52.397957046Z","rows":[{"id":"cmidi707m002101r55mui89ue","row_index":1,"values":{"image":null,"manual":null,"name":"Product + A"},"created_at":"2025-11-24T18:51:52.405313955Z","updated_at":"2025-11-24T18:51:52.405313955Z"}]}' + headers: + CF-RAY: + - 9a3b19a3fc8a935b-TLV + Connection: + - keep-alive + Content-Length: + - '595' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:52 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - 09b8232cac028f2ce957157a14730136 + x-kong-upstream-latency: + - '14' + status: + code: 201 + message: Created +- request: + body: '{"type": "image", "file_name": "tmpdch0oqhw.jpg", "content_type": "image/jpeg", + "with_thumbnail": false, "metadata": {"alt_text": "Product A image"}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '149' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-file-attachments/rows/cmidi707m002101r55mui89ue/cells/image/upload-url + response: + body: + string: '{"upload_url":"https://traceloop-staging-files.s3.amazonaws.com/c108269c-cf1e-4ac6-a7e4-5a456cc9fdb7/8a72d3a2-42fc-4128-b145-a6d9ae98cd94.jpg?X-Amz-Algorithm=AWS4-HMAC-SHA256\u0026X-Amz-Credential=ASIAQEMAC2MS4BWMXJDU%2F20251124%2Fus-east-1%2Fs3%2Faws4_request\u0026X-Amz-Date=20251124T185152Z\u0026X-Amz-Expires=3600\u0026X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJP%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJIMEYCIQCONTCaBRlO5kvg8sAs%2Ff9Y6jTWVoxZYZD%2BLI9IhRMmzQIhAOtze2VPR8dSsR7%2BIHtuSdo2rO0er2cHV%2F2ANvAdVPnnKvkECFsQABoMMDA5Mzk1NDI2MDg1IgwNq0aTuMvw1ZbN%2BNQq1gQhtdA%2BEFf%2B1xvs%2FuaGdLHYlqH7Q%2BaCO3or%2B3lycbLEOv7bgnXE7avgnE37GeiVXODMLVPLuuHK5hQAdCQWN4nqGFU%2F9H3u7P0TPiyRhfPhmDqf2d6cDSaedp%2BBP%2F21iXDIZiSEI%2BTqef6PyVblw3d6wWWfhEN0qhNs%2FvVwlG69yfZuWa2VUoXLi4nJRNbZRYa0iARamvFmdPBR%2FywKYIXnkhsKranY6%2FZB5b1eRkXwCcrv3%2FvXlONQL2F1kkesrE4cJ83%2FUUN4LvKE27JENyMRSHNBiiZ1ADMfvtQc95G9zxAWSf6Ua5lSe2bQaC%2B9cB5nRreVIV%2F1Nnc1%2BNYvimEI7c7BVGXd99ELmGcoRqigisAEiJkTk0uxQ7rVlC5xpG8QIe7kdCDdXVMQl1c3UyubFeo2%2FYbNrSnVL23J1iceRbnbULtcwwvM67V4%2BPu%2BkK1Naqv47D5X3oKex%2B%2BcQkNW0KpYJBfDIfkTjvqnzK3JTKFJR2%2FXdZYj51%2FXfJFRpU4qDqO1L1lY9ZcLdltTv6OlkKp0g0iW716SH07i28arIGK5ZQVxJ%2FlxNHHSd6H4TGdiDyJJG%2FwhzAppOX732junQjTSxf2MG8DKYvcZQ3A9UAj700WRKzgObjdvyLDV76LmjxbtmYbXYODIqrBe8dDXiTuEO89QQCZtbLUmzPiIxZ%2BDdvKqBSoxJUQ%2BkI4joejm3QmbUpWXPUXGSJEuY86LvCHBBKCm%2B2Nf%2BrA7kse00OIXFNlwrmcypjEeqf%2Br12aAEtwarGL3lfJVaZRdJmxvgVNXsyYLMLfDkskGOpcBVMVnUCa%2FgQaCqJFcVSLdKCrwZSjtBJ5REA6s7OK81wrClfA6uvMDbmCeffxvbqPaOAF%2BetvyxRjBRw30cZmpM1bgagB7Dw5LgoiJF2O8oKgn3LcrCqJk%2FQeXf2w7v1Dmv3vQ%2FS0Fzuooax0O3MFHA7T6%2BpL1ckZ%2Fv0J%2BdGItqYDyoOXk6NBvx2Qymtu8u98DY0VZFm140g%3D%3D\u0026X-Amz-SignedHeaders=content-type%3Bhost\u0026X-Amz-Signature=b6b9c0ad1a888fe3d17c6e4b0f643e147ba18685e2e03fe47d9d4dcff9a09904","storage_key":"c108269c-cf1e-4ac6-a7e4-5a456cc9fdb7/8a72d3a2-42fc-4128-b145-a6d9ae98cd94.jpg","expires_at":"2025-11-24T19:51:52.613303009Z","method":"PUT"}' + headers: + CF-RAY: + - 9a3b19a538750bed-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:52 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - 40c5c756858e7f7ee5a55f21c9f4e14c + x-kong-upstream-latency: + - '15' + status: + code: 200 + message: OK +- request: + body: '{"status": "success", "metadata": {"alt_text": "Product A image", "size_bytes": + 15}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '84' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: PUT + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-file-attachments/rows/cmidi707m002101r55mui89ue/cells/image/upload-status + response: + body: + string: '{"success":true,"status":"success"}' + headers: + CF-RAY: + - 9a3b19a68ea5d0ed-TLV + Connection: + - keep-alive + Content-Length: + - '35' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:52 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - 3e785a2cf7631358c20420649635d464 + x-kong-upstream-latency: + - '14' + status: + code: 200 + message: OK +- request: + body: '{"type": "file", "file_name": "tmpq7gdzjjq.pdf", "content_type": "application/pdf", + "with_thumbnail": false, "metadata": {"version": "1.0"}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '140' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-file-attachments/rows/cmidi707m002101r55mui89ue/cells/manual/upload-url + response: + body: + string: '{"upload_url":"https://traceloop-staging-files.s3.amazonaws.com/c108269c-cf1e-4ac6-a7e4-5a456cc9fdb7/b1dbc28c-09f8-42e3-9492-b0b42832fd13.pdf?X-Amz-Algorithm=AWS4-HMAC-SHA256\u0026X-Amz-Credential=ASIAQEMAC2MS4BWMXJDU%2F20251124%2Fus-east-1%2Fs3%2Faws4_request\u0026X-Amz-Date=20251124T185153Z\u0026X-Amz-Expires=3600\u0026X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJP%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJIMEYCIQCONTCaBRlO5kvg8sAs%2Ff9Y6jTWVoxZYZD%2BLI9IhRMmzQIhAOtze2VPR8dSsR7%2BIHtuSdo2rO0er2cHV%2F2ANvAdVPnnKvkECFsQABoMMDA5Mzk1NDI2MDg1IgwNq0aTuMvw1ZbN%2BNQq1gQhtdA%2BEFf%2B1xvs%2FuaGdLHYlqH7Q%2BaCO3or%2B3lycbLEOv7bgnXE7avgnE37GeiVXODMLVPLuuHK5hQAdCQWN4nqGFU%2F9H3u7P0TPiyRhfPhmDqf2d6cDSaedp%2BBP%2F21iXDIZiSEI%2BTqef6PyVblw3d6wWWfhEN0qhNs%2FvVwlG69yfZuWa2VUoXLi4nJRNbZRYa0iARamvFmdPBR%2FywKYIXnkhsKranY6%2FZB5b1eRkXwCcrv3%2FvXlONQL2F1kkesrE4cJ83%2FUUN4LvKE27JENyMRSHNBiiZ1ADMfvtQc95G9zxAWSf6Ua5lSe2bQaC%2B9cB5nRreVIV%2F1Nnc1%2BNYvimEI7c7BVGXd99ELmGcoRqigisAEiJkTk0uxQ7rVlC5xpG8QIe7kdCDdXVMQl1c3UyubFeo2%2FYbNrSnVL23J1iceRbnbULtcwwvM67V4%2BPu%2BkK1Naqv47D5X3oKex%2B%2BcQkNW0KpYJBfDIfkTjvqnzK3JTKFJR2%2FXdZYj51%2FXfJFRpU4qDqO1L1lY9ZcLdltTv6OlkKp0g0iW716SH07i28arIGK5ZQVxJ%2FlxNHHSd6H4TGdiDyJJG%2FwhzAppOX732junQjTSxf2MG8DKYvcZQ3A9UAj700WRKzgObjdvyLDV76LmjxbtmYbXYODIqrBe8dDXiTuEO89QQCZtbLUmzPiIxZ%2BDdvKqBSoxJUQ%2BkI4joejm3QmbUpWXPUXGSJEuY86LvCHBBKCm%2B2Nf%2BrA7kse00OIXFNlwrmcypjEeqf%2Br12aAEtwarGL3lfJVaZRdJmxvgVNXsyYLMLfDkskGOpcBVMVnUCa%2FgQaCqJFcVSLdKCrwZSjtBJ5REA6s7OK81wrClfA6uvMDbmCeffxvbqPaOAF%2BetvyxRjBRw30cZmpM1bgagB7Dw5LgoiJF2O8oKgn3LcrCqJk%2FQeXf2w7v1Dmv3vQ%2FS0Fzuooax0O3MFHA7T6%2BpL1ckZ%2Fv0J%2BdGItqYDyoOXk6NBvx2Qymtu8u98DY0VZFm140g%3D%3D\u0026X-Amz-SignedHeaders=content-type%3Bhost\u0026X-Amz-Signature=a80916a4f8087f32758ffc40c602de46bd9af2a6f91c4813d1fcc225a4940ac9","storage_key":"c108269c-cf1e-4ac6-a7e4-5a456cc9fdb7/b1dbc28c-09f8-42e3-9492-b0b42832fd13.pdf","expires_at":"2025-11-24T19:51:53.021374552Z","method":"PUT"}' + headers: + CF-RAY: + - 9a3b19a7d8919b09-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:53 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - d7a52c73f43c3f71d5747258b3ccd626 + x-kong-upstream-latency: + - '19' + status: + code: 200 + message: OK +- request: + body: '{"status": "success", "metadata": {"version": "1.0", "size_bytes": 13}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '71' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: PUT + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-file-attachments/rows/cmidi707m002101r55mui89ue/cells/manual/upload-status + response: + body: + string: '{"success":true,"status":"success"}' + headers: + CF-RAY: + - 9a3b19a92ad1d31d-TLV + Connection: + - keep-alive + Content-Length: + - '35' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:51:53 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - 1a5b587bd4e798252abc4766ef6ac27b + x-kong-upstream-latency: + - '19' + status: + code: 200 + message: OK +version: 1 diff --git a/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_in_memory_attachment.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_in_memory_attachment.yaml new file mode 100644 index 0000000000..e4445440c9 --- /dev/null +++ b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_in_memory_attachment.yaml @@ -0,0 +1,178 @@ +interactions: +- request: + body: '{"slug": "test-dataset-memory-attachment", "name": "Memory Attachment Dataset", + "description": null, "columns": [{"slug": "name", "name": "Name", "type": "string"}, + {"slug": "image", "name": "Image", "type": "file"}], "rows": [{"name": "Test + Item", "image": null}]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '265' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets + response: + body: + string: '{"id":"cmidno3nq002t01r53voiw01a","slug":"test-dataset-memory-attachment","name":"Memory + Attachment Dataset","columns":{"image":{"slug":"image","name":"Image","type":"file"},"name":{"slug":"name","name":"Name","type":"string"}},"total_columns":2,"created_at":"2025-11-24T21:25:08.102105041Z","updated_at":"2025-11-24T21:25:08.102105127Z","rows":[{"id":"cmidno3nx002u01r55spkw8dn","row_index":1,"values":{"image":null,"name":"Test + Item"},"created_at":"2025-11-24T21:25:08.113547367Z","updated_at":"2025-11-24T21:25:08.113547367Z"}]}' + headers: + CF-RAY: + - 9a3bfa250a8b7546-TLV + Connection: + - keep-alive + Content-Length: + - '531' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 21:25:08 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '1' + x-kong-request-id: + - b0f0e6271ec3e30f1fa55322d8148871 + x-kong-upstream-latency: + - '22' + status: + code: 201 + message: Created +- request: + body: '{"type": "image", "file_name": "test.jpg", "content_type": "image/jpeg", + "with_thumbnail": false, "metadata": {}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '113' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-memory-attachment/rows/cmidno3nx002u01r55spkw8dn/cells/image/upload-url + response: + body: + string: '{"upload_url":"https://traceloop-staging-files.s3.amazonaws.com/c108269c-cf1e-4ac6-a7e4-5a456cc9fdb7/744992d1-7f89-4bf7-9978-38db53dd12ae.jpg?X-Amz-Algorithm=AWS4-HMAC-SHA256\u0026X-Amz-Credential=ASIAQEMAC2MSQIN6EEGS%2F20251124%2Fus-east-1%2Fs3%2Faws4_request\u0026X-Amz-Date=20251124T212508Z\u0026X-Amz-Expires=3600\u0026X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJb%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJGMEQCIH3%2Bh%2BsH0KvYJqXGuQ0GF0xk8%2BkRicgYH8Ltvo08DLW%2FAiAfcHu0cnrxCIkB7hd2UBATZ1ZSbOQq8L38mh6%2BkJf18Cr5BAheEAAaDDAwOTM5NTQyNjA4NSIMCJbAo9CFNseeyr20KtYEipuqeoYbzFZDx8VMzfnyRA58M7AkLLzZ14X6E8WNz3Fo%2BRbtOZeDaIgG6Q1vvqFtg%2B3JxRcNUyU3Q372FdJsfQZnWlI%2BpmZLykoja%2FasIHEz%2B8h9eTz8tl97XYnWafZTgVHFWD2SJXmFchA0rLVVyRi0YvG6gq4X02ns0Xgn718hzuBvSqbnJ82oa0RvhrAyKeGvCPQwxhghqyNpG0ntXxJzmkgezFZSelFnz7T4FwKXr8pKgEu%2BRCPBG%2BSHGDT1DhZ3sG2F5ORq8aJyj%2FKkGhENcyogtju9Kgaq8oVMJtV%2FrQEkxuftMFrnvaO9vR4e2tgk6p%2FJLv7akwI61%2Fn89pdSBjB0pklC4gGk5zBWwnPq814OhIGcCzmlf1gdD8k4DR1AUCy9Q2wGwnWScpsSEipr3r7i0kvXZIRlx2YVGXOnk0XJHKd32RM9wrpcFb0LSxkABelmMGsYll%2BTktuBkOKJidBLBjzE2kU4oKbbmtcv8FKyXTwXQALerpO54UVzuABh6gmLAWAZspWEcJ%2B2M8tC1n5GxFICqvhgXzFHL7x%2FilwJcjhH9I%2Bt28K7RGIqQpQfTffC2gJtUAUcohLFtmdahNQ6DHLuPiBSKk9XUFd8l5URLndJVNhzMos580RO%2FrcIHQv8FU95kEyrNuaCH6nxk3xC2ef97AyikYu%2BPrdfRDn2JPENF86x4q0Qu1FzErRBQX%2Ft6osQ9TVP3wdnswk41b9EQO4nxzikMp9h1MEoZUETf1nWpelQNxgva7PvCm7ci0NVDNrm20PfbIdNSmnaoYxFsjDykJPJBjqZASCiHdsuYM4lR%2FDje4Zp0IzfS2KDr8069je%2FGD91v%2FurCR5IQMToDFUyIiHRBZ5F%2FW93oSK6z3OpqOXoxyG2iDmmmY5VxR9iRNqjVFu%2FjZLZh53RetQgplscJRgnqD3q8ICez2qwQJY%2FcPwnVddN6CngNOz%2FPbjZ33eETP8V9IizhuKqBA4ExS75x2n50DpzQZei%2ByGCuvYeZw%3D%3D\u0026X-Amz-SignedHeaders=content-type%3Bhost\u0026X-Amz-Signature=e961a868ba46c0a58b13af2d7a2f56eaeec39c6f177f20050341ec45af00f1fb","storage_key":"c108269c-cf1e-4ac6-a7e4-5a456cc9fdb7/744992d1-7f89-4bf7-9978-38db53dd12ae.jpg","expires_at":"2025-11-24T22:25:08.316243221Z","method":"PUT"}' + headers: + CF-RAY: + - 9a3bfa266dfa9d70-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 21:25:08 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - a55d3d2fcc57500eaca0204e73fbb731 + x-kong-upstream-latency: + - '22' + status: + code: 200 + message: OK +- request: + body: '{"status": "success", "metadata": {"size_bytes": 16}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '53' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: PUT + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-memory-attachment/rows/cmidno3nx002u01r55spkw8dn/cells/image/upload-status + response: + body: + string: '{"success":true,"status":"success"}' + headers: + CF-RAY: + - 9a3bfa27aa82f9c6-TLV + Connection: + - keep-alive + Content-Length: + - '35' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 21:25:08 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - 45288e44ceb94d4c5e58c12e9942955e + x-kong-upstream-latency: + - '15' + status: + code: 200 + message: OK +version: 1 diff --git a/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_mixed_attachments.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_mixed_attachments.yaml new file mode 100644 index 0000000000..4c432e149c --- /dev/null +++ b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_with_mixed_attachments.yaml @@ -0,0 +1,295 @@ +interactions: +- request: + body: '{"slug": "test-dataset-mixed-attachments", "name": "Mixed Attachments Dataset", + "description": null, "columns": [{"slug": "id", "name": "ID", "type": "string"}, + {"slug": "file", "name": "File", "type": "file"}, {"slug": "video", "name": + "Video", "type": "file"}], "rows": [{"id": "001", "file": null, "video": null}, + {"id": "002", "file": null, "video": null}]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '361' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets + response: + body: + string: '{"id":"cmidi78u3002201r54gpwb6t2","slug":"test-dataset-mixed-attachments","name":"Mixed + Attachments Dataset","columns":{"file":{"slug":"file","name":"File","type":"file"},"id":{"slug":"id","name":"ID","type":"string"},"video":{"slug":"video","name":"Video","type":"file"}},"total_columns":3,"created_at":"2025-11-24T18:52:03.579985772Z","updated_at":"2025-11-24T18:52:03.579985844Z","rows":[{"id":"cmidi78um002301r52ayx2y15","row_index":1,"values":{"file":null,"id":"001","video":null},"created_at":"2025-11-24T18:52:03.601881471Z","updated_at":"2025-11-24T18:52:03.601881471Z"},{"id":"cmidi78um002401r5y67rwgzu","row_index":2,"values":{"file":null,"id":"002","video":null},"created_at":"2025-11-24T18:52:03.601881471Z","updated_at":"2025-11-24T18:52:03.601881471Z"}]}' + headers: + CF-RAY: + - 9a3b19e9cf513120-TLV + Connection: + - keep-alive + Content-Length: + - '768' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:52:03 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - b1f5b90cc7d3005b699e1752676c8816 + x-kong-upstream-latency: + - '46' + status: + code: 201 + message: Created +- request: + body: '{"type": "file", "file_name": "tmp1xhli07m.txt", "content_type": "text/plain", + "with_thumbnail": false, "metadata": {}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '119' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-mixed-attachments/rows/cmidi78um002301r52ayx2y15/cells/file/upload-url + response: + body: + string: '{"upload_url":"https://traceloop-staging-files.s3.amazonaws.com/c108269c-cf1e-4ac6-a7e4-5a456cc9fdb7/12202af5-fb12-4f17-a1e9-687be9f20c83.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256\u0026X-Amz-Credential=ASIAQEMAC2MS4BWMXJDU%2F20251124%2Fus-east-1%2Fs3%2Faws4_request\u0026X-Amz-Date=20251124T185203Z\u0026X-Amz-Expires=3600\u0026X-Amz-Security-Token=IQoJb3JpZ2luX2VjEJP%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJIMEYCIQCONTCaBRlO5kvg8sAs%2Ff9Y6jTWVoxZYZD%2BLI9IhRMmzQIhAOtze2VPR8dSsR7%2BIHtuSdo2rO0er2cHV%2F2ANvAdVPnnKvkECFsQABoMMDA5Mzk1NDI2MDg1IgwNq0aTuMvw1ZbN%2BNQq1gQhtdA%2BEFf%2B1xvs%2FuaGdLHYlqH7Q%2BaCO3or%2B3lycbLEOv7bgnXE7avgnE37GeiVXODMLVPLuuHK5hQAdCQWN4nqGFU%2F9H3u7P0TPiyRhfPhmDqf2d6cDSaedp%2BBP%2F21iXDIZiSEI%2BTqef6PyVblw3d6wWWfhEN0qhNs%2FvVwlG69yfZuWa2VUoXLi4nJRNbZRYa0iARamvFmdPBR%2FywKYIXnkhsKranY6%2FZB5b1eRkXwCcrv3%2FvXlONQL2F1kkesrE4cJ83%2FUUN4LvKE27JENyMRSHNBiiZ1ADMfvtQc95G9zxAWSf6Ua5lSe2bQaC%2B9cB5nRreVIV%2F1Nnc1%2BNYvimEI7c7BVGXd99ELmGcoRqigisAEiJkTk0uxQ7rVlC5xpG8QIe7kdCDdXVMQl1c3UyubFeo2%2FYbNrSnVL23J1iceRbnbULtcwwvM67V4%2BPu%2BkK1Naqv47D5X3oKex%2B%2BcQkNW0KpYJBfDIfkTjvqnzK3JTKFJR2%2FXdZYj51%2FXfJFRpU4qDqO1L1lY9ZcLdltTv6OlkKp0g0iW716SH07i28arIGK5ZQVxJ%2FlxNHHSd6H4TGdiDyJJG%2FwhzAppOX732junQjTSxf2MG8DKYvcZQ3A9UAj700WRKzgObjdvyLDV76LmjxbtmYbXYODIqrBe8dDXiTuEO89QQCZtbLUmzPiIxZ%2BDdvKqBSoxJUQ%2BkI4joejm3QmbUpWXPUXGSJEuY86LvCHBBKCm%2B2Nf%2BrA7kse00OIXFNlwrmcypjEeqf%2Br12aAEtwarGL3lfJVaZRdJmxvgVNXsyYLMLfDkskGOpcBVMVnUCa%2FgQaCqJFcVSLdKCrwZSjtBJ5REA6s7OK81wrClfA6uvMDbmCeffxvbqPaOAF%2BetvyxRjBRw30cZmpM1bgagB7Dw5LgoiJF2O8oKgn3LcrCqJk%2FQeXf2w7v1Dmv3vQ%2FS0Fzuooax0O3MFHA7T6%2BpL1ckZ%2Fv0J%2BdGItqYDyoOXk6NBvx2Qymtu8u98DY0VZFm140g%3D%3D\u0026X-Amz-SignedHeaders=content-type%3Bhost\u0026X-Amz-Signature=6053a316b72cab371a8881c8d824c50c1baca0a834eec11f627255458a6a0391","storage_key":"c108269c-cf1e-4ac6-a7e4-5a456cc9fdb7/12202af5-fb12-4f17-a1e9-687be9f20c83.txt","expires_at":"2025-11-24T19:52:03.819615102Z","method":"PUT"}' + headers: + CF-RAY: + - 9a3b19eb4f055591-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:52:03 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - e00b261baf4d0a0abdc73d341d2e2f08 + x-kong-upstream-latency: + - '20' + status: + code: 200 + message: OK +- request: + body: '{"status": "success", "metadata": {"size_bytes": 12}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '53' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: PUT + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-mixed-attachments/rows/cmidi78um002301r52ayx2y15/cells/file/upload-status + response: + body: + string: '{"success":true,"status":"success"}' + headers: + CF-RAY: + - 9a3b19ecaa2e3120-TLV + Connection: + - keep-alive + Content-Length: + - '35' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:52:04 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '1' + x-kong-request-id: + - a4b19d4245b0da60828263406c283c6d + x-kong-upstream-latency: + - '22' + status: + code: 200 + message: OK +- request: + body: '{"type": "video", "url": "https://youtube.com/watch?v=test", "metadata": + {}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '76' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-mixed-attachments/rows/cmidi78um002301r52ayx2y15/cells/video/external-url + response: + body: + string: '{"storage":"external","success":true,"url":"https://youtube.com/watch?v=test"}' + headers: + CF-RAY: + - 9a3b19eded6f1833-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:52:04 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '1' + x-kong-request-id: + - 21f33a7abb50627f0c869d5f03382e0e + x-kong-upstream-latency: + - '34' + status: + code: 200 + message: OK +- request: + body: '{"type": "video", "url": "https://vimeo.com/test", "metadata": {}}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '66' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets/test-dataset-mixed-attachments/rows/cmidi78um002401r5y67rwgzu/cells/video/external-url + response: + body: + string: '{"storage":"external","success":true,"url":"https://vimeo.com/test"}' + headers: + CF-RAY: + - 9a3b19ef4a669b09-TLV + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 18:52:04 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + Transfer-Encoding: + - chunked + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '1' + x-kong-request-id: + - bcb91f1be2654f7fbb28e6216dafd927 + x-kong-upstream-latency: + - '20' + status: + code: 200 + message: OK +version: 1 diff --git a/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_without_attachments.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_without_attachments.yaml new file mode 100644 index 0000000000..f74b4ab48e --- /dev/null +++ b/packages/traceloop-sdk/tests/datasets/cassettes/test_dataset_with_attachments/test_create_dataset_without_attachments.yaml @@ -0,0 +1,62 @@ +interactions: +- request: + body: '{"slug": "test-dataset-no-attachments", "name": "Regular Dataset", "description": + null, "columns": [{"slug": "id", "name": "ID", "type": "string"}, {"slug": "value", + "name": "Value", "type": "number"}], "rows": [{"id": "A", "value": 100}, {"id": + "B", "value": 200}]}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '266' + Content-Type: + - application/json + User-Agent: + - python-requests/2.32.3 + X-Traceloop-SDK-Version: + - 1.0.0 + method: POST + uri: https://api-staging.traceloop.com/v2/datasets + response: + body: + string: '{"id":"cmidno458002v01r5hk9xpcyo","slug":"test-dataset-no-attachments","name":"Regular + Dataset","columns":{"id":{"slug":"id","name":"ID","type":"string"},"value":{"slug":"value","name":"Value","type":"number"}},"total_columns":2,"created_at":"2025-11-24T21:25:08.732664156Z","updated_at":"2025-11-24T21:25:08.73266424Z","rows":[{"id":"cmidno45d002w01r5c1k3jma2","row_index":1,"values":{"id":"A","value":100},"created_at":"2025-11-24T21:25:08.740245074Z","updated_at":"2025-11-24T21:25:08.740245074Z"},{"id":"cmidno45d002x01r5peiayzfx","row_index":2,"values":{"id":"B","value":200},"created_at":"2025-11-24T21:25:08.740245074Z","updated_at":"2025-11-24T21:25:08.740245074Z"}]}' + headers: + CF-RAY: + - 9a3bfa290e3c9384-TLV + Connection: + - keep-alive + Content-Length: + - '675' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 24 Nov 2025 21:25:08 GMT + Permissions-Policy: + - geolocation=(self), microphone=() + Server: + - cloudflare + cf-cache-status: + - DYNAMIC + referrer-policy: + - strict-origin-when-cross-origin + strict-transport-security: + - max-age=7776000; includeSubDomains + via: + - kong/3.7.1 + x-content-type: + - nosniff + x-kong-proxy-latency: + - '0' + x-kong-request-id: + - c2aa7832bed4932664eb0348d36b923c + x-kong-upstream-latency: + - '16' + status: + code: 201 + message: Created +version: 1 diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_rows_operations/test_add_rows.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_rows_operations/test_add_rows.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_rows_operations/test_add_rows.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_rows_operations/test_add_rows.yaml diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_rows_operations/test_create_dataset_and_add_rows.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_rows_operations/test_create_dataset_and_add_rows.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_rows_operations/test_create_dataset_and_add_rows.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_rows_operations/test_create_dataset_and_add_rows.yaml diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_rows_operations/test_dataset_deletion.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_rows_operations/test_dataset_deletion.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_rows_operations/test_dataset_deletion.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_rows_operations/test_dataset_deletion.yaml diff --git a/packages/traceloop-sdk/tests/dataset/cassettes/test_rows_operations/test_dataset_row_operations_api_errors.yaml b/packages/traceloop-sdk/tests/datasets/cassettes/test_rows_operations/test_dataset_row_operations_api_errors.yaml similarity index 100% rename from packages/traceloop-sdk/tests/dataset/cassettes/test_rows_operations/test_dataset_row_operations_api_errors.yaml rename to packages/traceloop-sdk/tests/datasets/cassettes/test_rows_operations/test_dataset_row_operations_api_errors.yaml diff --git a/packages/traceloop-sdk/tests/dataset/test_columns_operations.py b/packages/traceloop-sdk/tests/datasets/test_columns_operations.py similarity index 100% rename from packages/traceloop-sdk/tests/dataset/test_columns_operations.py rename to packages/traceloop-sdk/tests/datasets/test_columns_operations.py diff --git a/packages/traceloop-sdk/tests/datasets/test_create_dataset.py b/packages/traceloop-sdk/tests/datasets/test_create_dataset.py index 6ca58effbf..424eed3843 100644 --- a/packages/traceloop-sdk/tests/datasets/test_create_dataset.py +++ b/packages/traceloop-sdk/tests/datasets/test_create_dataset.py @@ -9,7 +9,7 @@ except ImportError: PANDAS_AVAILABLE = False -from traceloop.sdk.dataset.dataset import Dataset +from traceloop.sdk.datasets.dataset import Dataset from .test_constants import TestConstants diff --git a/packages/traceloop-sdk/tests/dataset/test_dataset_operations.py b/packages/traceloop-sdk/tests/datasets/test_dataset_operations.py similarity index 100% rename from packages/traceloop-sdk/tests/dataset/test_dataset_operations.py rename to packages/traceloop-sdk/tests/datasets/test_dataset_operations.py diff --git a/packages/traceloop-sdk/tests/datasets/test_dataset_with_attachments.py b/packages/traceloop-sdk/tests/datasets/test_dataset_with_attachments.py new file mode 100644 index 0000000000..5761db94cd --- /dev/null +++ b/packages/traceloop-sdk/tests/datasets/test_dataset_with_attachments.py @@ -0,0 +1,288 @@ +"""Tests for creating datasets with initial attachments.""" + +import os +import tempfile +from unittest.mock import patch + +import pytest +from traceloop.sdk.datasets import ( + Attachment, + ExternalAttachment, + FileCellType, +) +from traceloop.sdk.datasets.model import ( + ColumnDefinition, + ColumnType, + CreateDatasetRequest, +) + + +@pytest.mark.vcr +def test_create_dataset_with_external_attachments(datasets): + """Test creating dataset with ExternalAttachment objects in rows.""" + + # Create dataset request with external attachments + dataset_request = CreateDatasetRequest( + slug="test-dataset-external-attachments", + name="Products with External Media", + description="Test dataset with external URLs", + columns=[ + ColumnDefinition(slug="name", name="Product Name", type=ColumnType.STRING), + ColumnDefinition(slug="price", name="Price", type=ColumnType.NUMBER), + ColumnDefinition(slug="video", name="Demo Video", type=ColumnType.FILE), + ColumnDefinition(slug="manual", name="Manual", type=ColumnType.FILE), + ], + rows=[ + { + "name": "Widget Pro", + "price": 99.99, + "video": ExternalAttachment( + url="https://www.youtube.com/watch?v=demo1", + file_type=FileCellType.VIDEO, + metadata={"title": "Widget Pro Demo", "duration": "3:45"}, + ), + "manual": ExternalAttachment( + url="https://docs.google.com/document/d/widget-manual", + file_type=FileCellType.FILE, + metadata={"pages": 25}, + ), + }, + { + "name": "Gadget Plus", + "price": 149.99, + "video": ExternalAttachment( + url="https://vimeo.com/demo2", file_type=FileCellType.VIDEO + ), + "manual": None, # No manual for this product + }, + ], + ) + + # Create the dataset + dataset = datasets.create(dataset_request) + + # Verify dataset was created + assert dataset.slug == "test-dataset-external-attachments" + assert len(dataset.rows) == 2 + + # Verify first row attachments + row1 = dataset.rows[0] + assert row1.values["name"] == "Widget Pro" + assert row1.values["price"] == 99.99 + + # Check video attachment + video_cell = row1.values.get("video") + assert video_cell is not None + assert video_cell["storage"] == "external" + assert video_cell["url"] == "https://www.youtube.com/watch?v=demo1" + assert video_cell["type"] == "video" + assert video_cell["status"] == "success" + + # Check manual attachment + manual_cell = row1.values.get("manual") + assert manual_cell is not None + assert manual_cell["storage"] == "external" + assert manual_cell["url"] == "https://docs.google.com/document/d/widget-manual" + + # Verify second row + row2 = dataset.rows[1] + assert row2.values["name"] == "Gadget Plus" + video_cell2 = row2.values.get("video") + assert video_cell2 is not None + assert video_cell2["url"] == "https://vimeo.com/demo2" + assert row2.values.get("manual") is None # Should remain None + + +@pytest.mark.vcr +def test_create_dataset_with_file_attachments_mocked(datasets): + """Test creating dataset with Attachment objects (file uploads) using mocked S3.""" + + # Create test files + test_image = tempfile.NamedTemporaryFile(suffix=".jpg", delete=False) + test_image.write(b"fake image data") + test_image.close() + + test_pdf = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False) + test_pdf.write(b"fake pdf data") + test_pdf.close() + + # Create dataset request with file attachments + dataset_request = CreateDatasetRequest( + slug="test-dataset-file-attachments", + name="Products with Files", + columns=[ + ColumnDefinition(slug="name", name="Name", type=ColumnType.STRING), + ColumnDefinition(slug="image", name="Image", type=ColumnType.FILE), + ColumnDefinition(slug="manual", name="Manual", type=ColumnType.FILE), + ], + rows=[ + { + "name": "Product A", + "image": Attachment( + file_path=test_image.name, + file_type=FileCellType.IMAGE, + metadata={"alt_text": "Product A image"}, + ), + "manual": Attachment( + file_path=test_pdf.name, + file_type=FileCellType.FILE, + metadata={"version": "1.0"}, + ), + } + ], + ) + + # Mock the S3 upload to succeed + with patch.object(Attachment, "_upload_to_s3", return_value=True): + dataset = datasets.create(dataset_request) + + # Verify dataset was created + assert dataset.slug == "test-dataset-file-attachments" + assert len(dataset.rows) == 1 + + # Verify attachments were processed + row = dataset.rows[0] + assert row.values["name"] == "Product A" + + image_cell = row.values.get("image") + assert image_cell is not None + assert image_cell["storage"] == "internal" + assert image_cell["type"] == "image" + assert image_cell["status"] == "success" + assert "storage_key" in image_cell + + manual_cell = row.values.get("manual") + assert manual_cell is not None + assert manual_cell["storage"] == "internal" + assert manual_cell["type"] == "file" + assert manual_cell["status"] == "success" + + # Clean up + os.unlink(test_image.name) + os.unlink(test_pdf.name) + + +@pytest.mark.vcr +def test_create_dataset_with_mixed_attachments(datasets): + """Test creating dataset with both Attachment and ExternalAttachment objects.""" + + # Create a test file + test_file = tempfile.NamedTemporaryFile(suffix=".txt", delete=False) + test_file.write(b"test content") + test_file.close() + + dataset_request = CreateDatasetRequest( + slug="test-dataset-mixed-attachments", + name="Mixed Attachments Dataset", + columns=[ + ColumnDefinition(slug="id", name="ID", type=ColumnType.STRING), + ColumnDefinition(slug="file", name="File", type=ColumnType.FILE), + ColumnDefinition(slug="video", name="Video", type=ColumnType.FILE), + ], + rows=[ + { + "id": "001", + "file": Attachment( + file_path=test_file.name, file_type=FileCellType.FILE + ), + "video": ExternalAttachment( + url="https://youtube.com/watch?v=test", file_type=FileCellType.VIDEO + ), + }, + { + "id": "002", + "file": None, # No file for this row + "video": ExternalAttachment( + url="https://vimeo.com/test", file_type=FileCellType.VIDEO + ), + }, + ], + ) + + # Mock S3 upload + with patch.object(Attachment, "_upload_to_s3", return_value=True): + dataset = datasets.create(dataset_request) + + # Verify both rows + assert len(dataset.rows) == 2 + + # First row should have both attachments + row1 = dataset.rows[0] + assert row1.values["file"]["storage"] == "internal" + assert row1.values["video"]["storage"] == "external" + assert row1.values["video"]["url"] == "https://youtube.com/watch?v=test" + + # Second row should have only video + row2 = dataset.rows[1] + assert row2.values["file"] is None + assert row2.values["video"]["storage"] == "external" + + # Clean up + os.unlink(test_file.name) + + +@pytest.mark.vcr +def test_create_dataset_with_in_memory_attachment(datasets): + """Test creating dataset with Attachment from in-memory data.""" + + # Create attachment from bytes + image_data = b"fake image bytes" + + dataset_request = CreateDatasetRequest( + slug="test-dataset-memory-attachment", + name="Memory Attachment Dataset", + columns=[ + ColumnDefinition(slug="name", name="Name", type=ColumnType.STRING), + ColumnDefinition(slug="image", name="Image", type=ColumnType.FILE), + ], + rows=[ + { + "name": "Test Item", + "image": Attachment( + data=image_data, + filename="test.jpg", + content_type="image/jpeg", + file_type=FileCellType.IMAGE, + ), + } + ], + ) + + # Mock S3 upload + with patch.object(Attachment, "_upload_to_s3", return_value=True): + dataset = datasets.create(dataset_request) + + # Verify + assert len(dataset.rows) == 1 + row = dataset.rows[0] + assert row.values["image"]["storage"] == "internal" + assert row.values["image"]["type"] == "image" + assert row.values["image"]["status"] == "success" + + +@pytest.mark.vcr +def test_create_dataset_without_attachments(datasets): + """Test that create() method works normally for datasets without attachments.""" + + dataset_request = CreateDatasetRequest( + slug="test-dataset-no-attachments", + name="Regular Dataset", + columns=[ + ColumnDefinition(slug="id", name="ID", type=ColumnType.STRING), + ColumnDefinition(slug="value", name="Value", type=ColumnType.NUMBER), + ], + rows=[ + {"id": "A", "value": 100}, + {"id": "B", "value": 200}, + ], + ) + + dataset = datasets.create(dataset_request) + + # Verify normal dataset creation + assert dataset.slug == "test-dataset-no-attachments" + assert len(dataset.rows) == 2 + assert dataset.rows[0].values["id"] == "A" + assert dataset.rows[0].values["value"] == 100 + assert dataset.rows[1].values["id"] == "B" + assert dataset.rows[1].values["value"] == 200 diff --git a/packages/traceloop-sdk/tests/datasets/test_datasets_operations.py b/packages/traceloop-sdk/tests/datasets/test_datasets_operations.py index 581621450d..342972c6dc 100644 --- a/packages/traceloop-sdk/tests/datasets/test_datasets_operations.py +++ b/packages/traceloop-sdk/tests/datasets/test_datasets_operations.py @@ -1,6 +1,6 @@ import pytest -from traceloop.sdk.dataset.dataset import Dataset -from traceloop.sdk.dataset.model import DatasetMetadata +from traceloop.sdk.datasets.dataset import Dataset +from traceloop.sdk.datasets.model import DatasetMetadata @pytest.mark.vcr diff --git a/packages/traceloop-sdk/tests/dataset/test_rows_operations.py b/packages/traceloop-sdk/tests/datasets/test_rows_operations.py similarity index 100% rename from packages/traceloop-sdk/tests/dataset/test_rows_operations.py rename to packages/traceloop-sdk/tests/datasets/test_rows_operations.py diff --git a/packages/traceloop-sdk/traceloop/sdk/dataset/__init__.py b/packages/traceloop-sdk/traceloop/sdk/dataset/__init__.py deleted file mode 100644 index 09a13c32f8..0000000000 --- a/packages/traceloop-sdk/traceloop/sdk/dataset/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from traceloop.sdk.dataset.dataset import Dataset -from traceloop.sdk.dataset.column import Column -from traceloop.sdk.dataset.row import Row -from traceloop.sdk.dataset.base import BaseDatasetEntity -from traceloop.sdk.dataset.model import ColumnType, DatasetMetadata - -__all__ = ["Dataset", "Column", "Row", "BaseDatasetEntity", "ColumnType", "DatasetMetadata"] diff --git a/packages/traceloop-sdk/traceloop/sdk/datasets/__init__.py b/packages/traceloop-sdk/traceloop/sdk/datasets/__init__.py index e69de29bb2..82cd3e91f5 100644 --- a/packages/traceloop-sdk/traceloop/sdk/datasets/__init__.py +++ b/packages/traceloop-sdk/traceloop/sdk/datasets/__init__.py @@ -0,0 +1,29 @@ +from traceloop.sdk.datasets.attachment import ( + Attachment, + AttachmentReference, + ExternalAttachment, +) +from traceloop.sdk.datasets.base import BaseDatasetEntity +from traceloop.sdk.datasets.column import Column +from traceloop.sdk.datasets.dataset import Dataset +from traceloop.sdk.datasets.model import ( + ColumnType, + DatasetMetadata, + FileCellType, + FileStorageType, +) +from traceloop.sdk.datasets.row import Row + +__all__ = [ + "Dataset", + "Column", + "Row", + "BaseDatasetEntity", + "ColumnType", + "DatasetMetadata", + "FileCellType", + "FileStorageType", + "Attachment", + "ExternalAttachment", + "AttachmentReference", +] diff --git a/packages/traceloop-sdk/traceloop/sdk/datasets/attachment.py b/packages/traceloop-sdk/traceloop/sdk/datasets/attachment.py new file mode 100644 index 0000000000..df76905da6 --- /dev/null +++ b/packages/traceloop-sdk/traceloop/sdk/datasets/attachment.py @@ -0,0 +1,288 @@ +""" +Attachment classes for handling file uploads and downloads in datasets. +Simplified implementation inspired by Braintrust's attachment pattern. +""" + +import mimetypes +import os +from pathlib import Path +from typing import Any, Dict, Optional + +import requests +from traceloop.sdk.client.http import HTTPClient + +from .model import ( + ExternalURLRequest, + FileCellType, + FileStorageType, + UploadStatusRequest, + UploadURLRequest, + UploadURLResponse, +) + + +class Attachment: + """ + Represents a file to be uploaded to a dataset cell. + Supports both file paths and in-memory data. + """ + + def __init__( + self, + file_path: Optional[str] = None, + data: Optional[bytes] = None, + filename: Optional[str] = None, + content_type: Optional[str] = None, + file_type: Optional[FileCellType] = None, + metadata: Optional[Dict[str, Any]] = None, + with_thumbnail: bool = False, + thumbnail_path: Optional[str] = None, + thumbnail_data: Optional[bytes] = None, + ): + # Validate input + if file_path and data: + raise ValueError("Cannot provide both file_path and data") + if not file_path and not data: + raise ValueError("Must provide either file_path or data") + + self.file_path = file_path + self.data = data + self.metadata = metadata or {} + self.with_thumbnail = with_thumbnail + self.thumbnail_path = thumbnail_path + self.thumbnail_data = thumbnail_data + + # Set filename + if filename: + self.filename = filename + elif file_path: + self.filename = os.path.basename(file_path) + else: + self.filename = "attachment" + + # Set content type + if content_type: + self.content_type = content_type + elif file_path: + self.content_type = ( + mimetypes.guess_type(file_path)[0] or "application/octet-stream" + ) + else: + self.content_type = "application/octet-stream" + + # Set file type + if file_type: + self.file_type = file_type + else: + self.file_type = self._guess_file_type() + + def _guess_file_type(self) -> FileCellType: + """Guess file type from content type.""" + if self.content_type.startswith("image/"): + return FileCellType.IMAGE + elif self.content_type.startswith("video/"): + return FileCellType.VIDEO + elif self.content_type.startswith("audio/"): + return FileCellType.AUDIO + else: + return FileCellType.FILE + + def _get_file_data(self) -> bytes: + """Get file data as bytes.""" + if self.data is not None: + return self.data + elif self.file_path: + if not os.path.exists(self.file_path): + raise FileNotFoundError(f"File not found: {self.file_path}") + with open(self.file_path, "rb") as f: + return f.read() + raise ValueError("No file data available") + + def _get_file_size(self) -> int: + """Get file size in bytes.""" + if self.data is not None: + return len(self.data) + elif self.file_path and os.path.exists(self.file_path): + return os.path.getsize(self.file_path) + return 0 + + def upload( + self, + http_client: HTTPClient, + dataset_slug: str, + row_id: str, + column_slug: str, + ) -> "AttachmentReference": + """Upload the attachment to a dataset cell.""" + # Request upload URL + request = UploadURLRequest( + type=self.file_type, + file_name=self.filename, + content_type=self.content_type, + with_thumbnail=self.with_thumbnail, + metadata=self.metadata, + ) + + result = http_client.post( + f"datasets/{dataset_slug}/rows/{row_id}/cells/{column_slug}/upload-url", + request.model_dump(), + ) + + if not result: + raise Exception(f"Failed to get upload URL for {column_slug}") + + upload_response = UploadURLResponse(**result) + + # Upload to S3 + if not self._upload_to_s3(upload_response.upload_url): + raise Exception(f"Failed to upload {self.filename}") + + # Upload thumbnail if provided + if self.with_thumbnail and upload_response.thumbnail_upload_url: + if self.thumbnail_data is not None: + thumb_bytes = self.thumbnail_data + elif self.thumbnail_path: + with open(self.thumbnail_path, "rb") as f: + thumb_bytes = f.read() + else: + thumb_bytes = None + if thumb_bytes is not None: + requests.put(upload_response.thumbnail_upload_url, data=thumb_bytes) + + # Confirm upload + metadata = self.metadata.copy() + metadata["size_bytes"] = self._get_file_size() + + status_request = UploadStatusRequest(status="success", metadata=metadata) + http_client.put( + f"datasets/{dataset_slug}/rows/{row_id}/cells/{column_slug}/upload-status", + status_request.model_dump(), + ) + + return AttachmentReference( + storage_type=FileStorageType.INTERNAL, + storage_key=upload_response.storage_key, + file_type=self.file_type, + metadata=metadata, + ) + + def _upload_to_s3(self, upload_url: str) -> bool: + """Upload file to S3.""" + try: + file_data = self._get_file_data() + response = requests.put( + upload_url, data=file_data, headers={"Content-Type": self.content_type} + ) + return response.status_code in [200, 201, 204] + except Exception: + return False + + +class ExternalAttachment: + """ + Represents an external file URL to be linked to a dataset cell. + """ + + def __init__( + self, + url: str, + filename: Optional[str] = None, + content_type: Optional[str] = None, + file_type: FileCellType = FileCellType.FILE, + metadata: Optional[Dict[str, Any]] = None, + ): + self.url = url + self.filename = filename or url.split("/")[-1] + self.content_type = content_type + self.file_type = file_type + self.metadata = metadata or {} + + def attach( + self, + http_client: HTTPClient, + dataset_slug: str, + row_id: str, + column_slug: str, + ) -> "AttachmentReference": + """Attach external URL to a dataset cell.""" + request = ExternalURLRequest( + type=self.file_type, + url=self.url, + metadata=self.metadata, + ) + + result = http_client.post( + f"datasets/{dataset_slug}/rows/{row_id}/cells/{column_slug}/external-url", + request.model_dump(), + ) + + if not result: + raise Exception(f"Failed to set external URL for {column_slug}") + + return AttachmentReference( + storage_type=FileStorageType.EXTERNAL, + url=self.url, + file_type=self.file_type, + metadata=self.metadata, + ) + + +class AttachmentReference: + """ + Reference to an attachment in a dataset cell. + """ + + def __init__( + self, + storage_type: FileStorageType, + storage_key: Optional[str] = None, + url: Optional[str] = None, + file_type: Optional[FileCellType] = None, + metadata: Optional[Dict[str, Any]] = None, + http_client: Optional[HTTPClient] = None, + dataset_slug: Optional[str] = None, + ): + self.storage_type = storage_type + self.storage_key = storage_key + self.url = url + self.file_type = file_type + self.metadata = metadata or {} + self.http_client = http_client + self.dataset_slug = dataset_slug + self._cached_data: Optional[bytes] = None + + @property + def data(self) -> bytes: + """Download and return attachment data as bytes.""" + if self._cached_data is None: + download_url = self.get_url() + if not download_url: + raise Exception("No download URL available") + response = requests.get(download_url) + response.raise_for_status() + self._cached_data = response.content + return self._cached_data + + def download(self, file_path: Optional[str] = None) -> Optional[bytes]: + """Download the attachment.""" + file_data = self.data + if file_path: + Path(file_path).parent.mkdir(parents=True, exist_ok=True) + with open(file_path, "wb") as f: + f.write(file_data) + return None + return file_data + + def get_url(self) -> Optional[str]: + """Get download URL for the attachment.""" + if self.storage_type == FileStorageType.EXTERNAL: + return self.url + # For internal storage, would need to implement presigned URL generation + return None + + def __repr__(self) -> str: + """String representation.""" + if self.storage_type == FileStorageType.EXTERNAL: + return f"" + else: + return f"" diff --git a/packages/traceloop-sdk/traceloop/sdk/dataset/base.py b/packages/traceloop-sdk/traceloop/sdk/datasets/base.py similarity index 100% rename from packages/traceloop-sdk/traceloop/sdk/dataset/base.py rename to packages/traceloop-sdk/traceloop/sdk/datasets/base.py diff --git a/packages/traceloop-sdk/traceloop/sdk/dataset/column.py b/packages/traceloop-sdk/traceloop/sdk/datasets/column.py similarity index 100% rename from packages/traceloop-sdk/traceloop/sdk/dataset/column.py rename to packages/traceloop-sdk/traceloop/sdk/datasets/column.py diff --git a/packages/traceloop-sdk/traceloop/sdk/dataset/dataset.py b/packages/traceloop-sdk/traceloop/sdk/datasets/dataset.py similarity index 98% rename from packages/traceloop-sdk/traceloop/sdk/dataset/dataset.py rename to packages/traceloop-sdk/traceloop/sdk/datasets/dataset.py index 1506ff0aaf..949ffe83a4 100644 --- a/packages/traceloop-sdk/traceloop/sdk/dataset/dataset.py +++ b/packages/traceloop-sdk/traceloop/sdk/datasets/dataset.py @@ -1,7 +1,7 @@ from typing import List, Optional, Dict from pydantic import Field -from traceloop.sdk.dataset.model import ( +from traceloop.sdk.datasets.model import ( ColumnDefinition, ValuesMap, CreateDatasetResponse, diff --git a/packages/traceloop-sdk/traceloop/sdk/datasets/datasets.py b/packages/traceloop-sdk/traceloop/sdk/datasets/datasets.py index 1d3d9ae87e..8ec71fb18c 100644 --- a/packages/traceloop-sdk/traceloop/sdk/datasets/datasets.py +++ b/packages/traceloop-sdk/traceloop/sdk/datasets/datasets.py @@ -1,6 +1,7 @@ import csv -from typing import List, Optional, cast +import logging from pathlib import Path +from typing import Any, Dict, List, Optional, cast try: import pandas as pd @@ -10,16 +11,19 @@ PANDAS_AVAILABLE = False -from traceloop.sdk.dataset.model import ( +from traceloop.sdk.client.http import HTTPClient +from traceloop.sdk.datasets.attachment import Attachment, ExternalAttachment +from traceloop.sdk.datasets.dataset import Dataset +from traceloop.sdk.datasets.model import ( ColumnDefinition, - ValuesMap, + ColumnType, CreateDatasetRequest, CreateDatasetResponse, - ColumnType, DatasetMetadata, + ValuesMap, ) -from traceloop.sdk.dataset.dataset import Dataset -from traceloop.sdk.client.http import HTTPClient + +logger = logging.getLogger(__name__) class Datasets: @@ -57,6 +61,50 @@ def get_by_slug(self, slug: str) -> "Dataset": return Dataset.from_create_dataset_response(validated_data, self._http) + def create(self, dataset_request: CreateDatasetRequest) -> Dataset: + """ + Create a dataset with support for initial attachments. + + If row values contain Attachment or ExternalAttachment objects, + they will be automatically uploaded/attached after dataset creation. + + Args: + dataset_request: Dataset creation request, can contain Attachment objects in row values + + Returns: + Created dataset with all attachments processed + + Example: + dataset_request = CreateDatasetRequest( + slug="products", + name="Product Catalog", + columns=[ + ColumnDefinition(slug="name", name="Name", type=ColumnType.STRING), + ColumnDefinition(slug="image", name="Image", type=ColumnType.FILE), + ], + rows=[{ + "name": "Product A", + "image": Attachment(file_path="/path/to/image.jpg", file_type=FileCellType.IMAGE) + }] + ) + dataset = datasets.create(dataset_request) + """ + # Extract attachment objects from rows + attachments_to_process = self._extract_attachments(dataset_request) + + # Replace attachment objects with None for initial creation + clean_request = self._prepare_request_for_creation(dataset_request) + + # Create the dataset + response = self._create_dataset(clean_request) + dataset = Dataset.from_create_dataset_response(response, self._http) + + # Process attachments if any + if attachments_to_process: + self._process_attachments(dataset, attachments_to_process) + + return dataset + def from_csv( self, file_path: str, @@ -210,3 +258,111 @@ def _slugify(self, name: str) -> str: raise ValueError(f"Name '{name}' cannot be slugified to a valid slug") return slug + + def _extract_attachments( + self, request: CreateDatasetRequest + ) -> Dict[int, Dict[str, Any]]: + """ + Extract attachment objects from row values. + + Returns: + Dictionary mapping row index to column slug to attachment object + """ + attachments: Dict[int, Dict[str, Any]] = {} + if request.rows: + for row_idx, row in enumerate(request.rows): + for col_slug, value in row.items(): + if isinstance(value, (Attachment, ExternalAttachment)): + if row_idx not in attachments: + attachments[row_idx] = {} + attachments[row_idx][col_slug] = value + return attachments + + def _prepare_request_for_creation( + self, request: CreateDatasetRequest + ) -> CreateDatasetRequest: + """ + Replace attachment objects with None in row values for initial dataset creation. + + Args: + request: Original dataset request with potential attachment objects + + Returns: + Modified request with attachment objects replaced by None + """ + if not request.rows: + return request + + # Create a deep copy of rows to avoid modifying the original + clean_rows = [] + for row in request.rows: + clean_row: Dict[str, Any] = {} + for col_slug, value in row.items(): + if isinstance(value, (Attachment, ExternalAttachment)): + clean_row[col_slug] = None + else: + clean_row[col_slug] = value + clean_rows.append(clean_row) + + # Create a new request with cleaned rows + return CreateDatasetRequest( + slug=request.slug, + name=request.name, + description=request.description, + columns=request.columns, + rows=clean_rows, + ) + + def _process_attachments( + self, dataset: Dataset, attachments: Dict[int, Dict[str, Any]] + ) -> None: + """ + Upload/attach all attachment objects to their respective cells. + + Args: + dataset: The created dataset + attachments: Dictionary mapping row index to column slug to attachment object + """ + if not dataset.rows: + return + + for row_idx, row_attachments in attachments.items(): + if row_idx >= len(dataset.rows): + logger.warning( + f"Warning: Row index {row_idx} out of range, skipping attachments" + ) + continue + + row = dataset.rows[row_idx] + for col_slug, attachment in row_attachments.items(): + try: + if isinstance(attachment, Attachment): + ref = attachment.upload( + self._http, dataset.slug, row.id, col_slug + ) + elif isinstance(attachment, ExternalAttachment): + ref = attachment.attach( + self._http, dataset.slug, row.id, col_slug + ) + else: + continue + + # Update row values locally + row.values[col_slug] = { + "type": ref.file_type.value if ref.file_type else "file", + "status": "success", + "storage": ref.storage_type.value, + "storage_key": getattr(ref, "storage_key", None), + "url": getattr(ref, "url", None), + "metadata": ref.metadata, + } + except Exception as e: + logger.warning( + f"Warning: Failed to process attachment for row {row_idx}, column {col_slug}: {e}" + ) + # Mark as failed in row values + row.values[col_slug] = { + "type": "file", + "status": "failed", + "error": str(e), + } diff --git a/packages/traceloop-sdk/traceloop/sdk/dataset/model.py b/packages/traceloop-sdk/traceloop/sdk/datasets/model.py similarity index 55% rename from packages/traceloop-sdk/traceloop/sdk/dataset/model.py rename to packages/traceloop-sdk/traceloop/sdk/datasets/model.py index 35595feb93..652799503d 100644 --- a/packages/traceloop-sdk/traceloop/sdk/dataset/model.py +++ b/packages/traceloop-sdk/traceloop/sdk/datasets/model.py @@ -1,7 +1,8 @@ import datetime from enum import Enum +from typing import Any, Dict, List, Optional + from pydantic import BaseModel -from typing import List, Optional, Dict, Any class ColumnType(str, Enum): @@ -9,6 +10,66 @@ class ColumnType(str, Enum): NUMBER = "number" BOOLEAN = "boolean" JSON = "json" + FILE = "file" + + +class FileCellType(str, Enum): + IMAGE = "image" + VIDEO = "video" + AUDIO = "audio" + FILE = "file" + + +class FileStorageType(str, Enum): + INTERNAL = "internal" + EXTERNAL = "external" + + +class FileCellMetadata(BaseModel): + file_name: Optional[str] = None + content_type: Optional[str] = None + size_bytes: Optional[int] = None + uploaded_at: Optional[datetime.datetime] = None + thumbnail_url: Optional[str] = None + thumbnail_key: Optional[str] = None + + +class FileCellValue(BaseModel): + type: FileCellType + status: str # "in_progress", "success", "failed" + storage: FileStorageType + storage_key: Optional[str] = None + url: Optional[str] = None + metadata: Optional[FileCellMetadata] = None + + +# Internal models for API communication (not exposed directly to users) +class UploadURLRequest(BaseModel): + type: FileCellType + file_name: str + content_type: Optional[str] = None + with_thumbnail: bool = False + metadata: Optional[Dict[str, Any]] = None + + +class UploadURLResponse(BaseModel): + upload_url: str + thumbnail_upload_url: Optional[str] = None + storage_key: str + thumbnail_key: Optional[str] = None + expires_at: datetime.datetime + method: str = "PUT" + + +class UploadStatusRequest(BaseModel): + status: str # "success", "failed" + metadata: Optional[Dict[str, Any]] = None + + +class ExternalURLRequest(BaseModel): + type: FileCellType + url: str + metadata: Optional[Dict[str, Any]] = None class ColumnDefinition(BaseModel): diff --git a/packages/traceloop-sdk/traceloop/sdk/dataset/row.py b/packages/traceloop-sdk/traceloop/sdk/datasets/row.py similarity index 100% rename from packages/traceloop-sdk/traceloop/sdk/dataset/row.py rename to packages/traceloop-sdk/traceloop/sdk/datasets/row.py