Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: improve file name generation in upload_file function to prevent files with the same name #3550

Merged
merged 9 commits into from
Aug 26, 2024
7 changes: 5 additions & 2 deletions src/backend/base/langflow/api/v1/files.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import datetime
import hashlib
from http import HTTPStatus
from io import BytesIO
Expand Down Expand Up @@ -45,10 +46,12 @@ async def upload_file(
try:
flow_id_str = str(flow_id)
file_content = await file.read()
timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
file_name = file.filename or hashlib.sha256(file_content).hexdigest()
full_file_name = f"{timestamp}_{file_name}"
folder = flow_id_str
await storage_service.save_file(flow_id=folder, file_name=file_name, data=file_content)
return UploadFileResponse(flowId=flow_id_str, file_path=f"{folder}/{file_name}")
await storage_service.save_file(flow_id=folder, file_name=full_file_name, data=file_content)
return UploadFileResponse(flowId=flow_id_str, file_path=f"{folder}/{full_file_name}")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))

Expand Down
39 changes: 23 additions & 16 deletions src/backend/tests/unit/test_files.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from unittest.mock import MagicMock

import pytest
Expand Down Expand Up @@ -29,10 +30,13 @@ def test_upload_file(client, mock_storage_service, created_api_key, flow):
headers=headers,
)
assert response.status_code == 201
assert response.json() == {
"flowId": str(flow.id),
"file_path": f"{flow.id}/test.txt",
}

response_json = response.json()
assert response_json["flowId"] == str(flow.id)

# Check that the file_path matches the expected pattern
file_path_pattern = re.compile(rf"{flow.id}/\d{{4}}-\d{{2}}-\d{{2}}_\d{{2}}-\d{{2}}-\d{{2}}_test\.txt")
assert file_path_pattern.match(response_json["file_path"])


def test_download_file(client, mock_storage_service, created_api_key, flow):
Expand Down Expand Up @@ -75,30 +79,33 @@ def test_file_operations(client, created_api_key, flow):
headers=headers,
)
assert response.status_code == 201
assert response.json() == {
"flowId": str(flow_id),
"file_path": f"{flow_id}/{file_name}",
}

response_json = response.json()
assert response_json["flowId"] == str(flow_id)

# Check that the file_path matches the expected pattern
file_path_pattern = re.compile(rf"{flow_id}/\d{{4}}-\d{{2}}-\d{{2}}_\d{{2}}-\d{{2}}-\d{{2}}_{file_name}")
assert file_path_pattern.match(response_json["file_path"])

# Extract the full file name with timestamp from the response
full_file_name = response_json["file_path"].split("/")[-1]

# Step 2: List files in the folder
response = client.get(f"api/v1/files/list/{flow_id}", headers=headers)
assert response.status_code == 200
assert file_name in response.json()["files"]
assert full_file_name in response.json()["files"]

# Step 3: Download the file and verify its content

response = client.get(f"api/v1/files/download/{flow_id}/{file_name}", headers=headers)
response = client.get(f"api/v1/files/download/{flow_id}/{full_file_name}", headers=headers)
assert response.status_code == 200
assert response.content == file_content
# the headers are application/octet-stream
assert response.headers["content-type"] == "application/octet-stream"
# mime_type is inside media_type

# Step 4: Delete the file
response = client.delete(f"api/v1/files/delete/{flow_id}/{file_name}", headers=headers)
response = client.delete(f"api/v1/files/delete/{flow_id}/{full_file_name}", headers=headers)
assert response.status_code == 200
assert response.json() == {"message": f"File {file_name} deleted successfully"}
assert response.json() == {"message": f"File {full_file_name} deleted successfully"}

# Verify that the file is indeed deleted
response = client.get(f"api/v1/files/list/{flow_id}", headers=headers)
assert file_name not in response.json()["files"]
assert full_file_name not in response.json()["files"]