From 26cbc1aead1f6632d05b0e031e0bf172b4610f88 Mon Sep 17 00:00:00 2001 From: Siqi Chen Date: Mon, 24 Nov 2025 15:36:25 +0100 Subject: [PATCH] Fix memory exhaustion when downloading large files Enable streaming for file downloads by passing stream=True to requests. This prevents loading entire files into memory when downloading datasets, competitions, models, and kernel outputs. Fixes #754 --- src/kagglesdk/kaggle_http_client.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/kagglesdk/kaggle_http_client.py b/src/kagglesdk/kaggle_http_client.py index 16390d5..bb65d4d 100644 --- a/src/kagglesdk/kaggle_http_client.py +++ b/src/kagglesdk/kaggle_http_client.py @@ -16,6 +16,8 @@ KaggleEnv, ) from kagglesdk.kaggle_object import KaggleObject +from kagglesdk.common.types.file_download import FileDownload +from kagglesdk.common.types.http_redirect import HttpRedirect from typing import Type # TODO (http://b/354237483) Generate the client from the existing one. @@ -81,6 +83,12 @@ def call( # Merge environment settings into session settings = self._session.merge_environment_settings(http_request.url, {}, None, None, None) + + # Use stream=True for file downloads to avoid loading entire file into memory + # See: https://github.com/Kaggle/kaggle-api/issues/754 + if response_type is not None and (response_type == FileDownload or response_type == HttpRedirect): + settings['stream'] = True + http_response = self._session.send(http_request, **settings) response = self._prepare_response(response_type, http_response)