From 9c5d09e43999c3bdbb450ed03bf71cc0dba6792a Mon Sep 17 00:00:00 2001 From: lsocha Date: Fri, 28 Mar 2025 15:09:25 +0100 Subject: [PATCH 1/4] feat: Add `stream_file_content` parameter to upload methods --- boxsdk/object/folder.py | 11 ++++- boxsdk/session/session.py | 14 +++--- test/integration_new/object/folder_itest.py | 17 +++++-- test/unit/object/test_folder.py | 32 +++++++++++++- test/unit/session/test_session.py | 49 ++++++++++++++++++++- 5 files changed, 110 insertions(+), 13 deletions(-) diff --git a/boxsdk/object/folder.py b/boxsdk/object/folder.py index 037ae575a..d20f5b008 100644 --- a/boxsdk/object/folder.py +++ b/boxsdk/object/folder.py @@ -263,6 +263,7 @@ def upload_stream( additional_attributes: Optional[dict] = None, sha1: Optional[str] = None, etag: Optional[str] = None, + stream_file_content: bool = True, ) -> 'File': """ Upload a file to the folder. @@ -298,6 +299,9 @@ def upload_stream( A sha1 checksum for the file. :param etag: If specified, instruct the Box API to update the item only if the current version's etag matches. + :param stream_file_content: + If True, the upload will be performed as a stream request. If False, the file will be read into memory + before being uploaded, but this may be required if using some proxy servers to handle redirects correctly. :returns: The newly uploaded file. """ @@ -335,7 +339,7 @@ def upload_stream( if not headers: headers = None file_response = self._session.post( - url, data=data, files=files, expect_json_response=False, headers=headers + url, data=data, files=files, expect_json_response=False, headers=headers, stream_file_content=stream_file_content, ).json() if 'entries' in file_response: file_response = file_response['entries'][0] @@ -358,6 +362,7 @@ def upload( additional_attributes: Optional[dict] = None, sha1: Optional[str] = None, etag: Optional[str] = None, + stream_file_content: bool = True, ) -> 'File': """ Upload a file to the folder. @@ -394,6 +399,9 @@ def upload( A sha1 checksum for the new content. :param etag: If specified, instruct the Box API to update the item only if the current version's etag matches. + :param stream_file_content: + If True, the upload will be performed as a stream request. If False, the file will be read into memory + before being uploaded, but this may be required if using some proxy servers to handle redirects correctly. :returns: The newly uploaded file. """ @@ -412,6 +420,7 @@ def upload( additional_attributes=additional_attributes, sha1=sha1, etag=etag, + stream_file_content=stream_file_content, ) @api_call diff --git a/boxsdk/session/session.py b/boxsdk/session/session.py index ee039bd35..e60f5f0a6 100644 --- a/boxsdk/session/session.py +++ b/boxsdk/session/session.py @@ -468,7 +468,8 @@ def _send_request(self, request: '_BoxRequest', **kwargs: Any) -> 'NetworkRespon """ # Reset stream positions to what they were when the request was made so the same data is sent even if this # is a retried attempt. - files, file_stream_positions = kwargs.get('files'), kwargs.pop('file_stream_positions') + files, file_stream_positions, stream_file_content = ( + kwargs.get('files'), kwargs.pop('file_stream_positions'), kwargs.pop('stream_file_content', True)) request_kwargs = self._default_network_request_kwargs.copy() request_kwargs.update(kwargs) proxy_dict = self._prepare_proxy() @@ -477,11 +478,12 @@ def _send_request(self, request: '_BoxRequest', **kwargs: Any) -> 'NetworkRespon if files and file_stream_positions: for name, position in file_stream_positions.items(): files[name][1].seek(position) - data = request_kwargs.pop('data', {}) - multipart_stream = MultipartStream(data, files) - request_kwargs['data'] = multipart_stream - del request_kwargs['files'] - request.headers['Content-Type'] = multipart_stream.content_type + if stream_file_content: + data = request_kwargs.pop('data', {}) + multipart_stream = MultipartStream(data, files) + request_kwargs['data'] = multipart_stream + del request_kwargs['files'] + request.headers['Content-Type'] = multipart_stream.content_type request.access_token = request_kwargs.pop('access_token', None) # send the request diff --git a/test/integration_new/object/folder_itest.py b/test/integration_new/object/folder_itest.py index 016ac8482..8da6d69b7 100644 --- a/test/integration_new/object/folder_itest.py +++ b/test/integration_new/object/folder_itest.py @@ -103,8 +103,8 @@ def test_auto_chunked_upload_NOT_using_upload_session_urls(parent_folder, large_ def test_get_items(parent_folder, small_file_path): - with BoxTestFolder(parent_folder=parent_folder) as subfolder,\ - BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as file,\ + with BoxTestFolder(parent_folder=parent_folder) as subfolder, \ + BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as file, \ BoxTestWebLink(parent_folder=parent_folder, url='https://box.com') as web_link: assert set(parent_folder.get_items()) == {subfolder, file, web_link} @@ -130,6 +130,17 @@ def test_upload_small_file_to_folder(parent_folder, small_file_name, small_file_ util.permanently_delete(uploaded_file) +def test_upload_small_file_to_folder_with_disabled_streaming_file_content( + parent_folder, small_file_name, small_file_path +): + uploaded_file = parent_folder.upload(file_path=small_file_path, file_name=small_file_name, stream_file_content=False) + try: + assert uploaded_file.id + assert uploaded_file.parent == parent_folder + finally: + util.permanently_delete(uploaded_file) + + def test_create_subfolder(parent_folder): created_subfolder = parent_folder.create_subfolder(name=util.random_name()) try: @@ -199,7 +210,7 @@ def test_delete_folder(parent_folder): def test_cascade_and_get_metadata_cascade_policies(parent_folder): - with BoxTestMetadataTemplate(display_name="test_template") as metadata_template,\ + with BoxTestMetadataTemplate(display_name="test_template") as metadata_template, \ BoxTestFolder(parent_folder=parent_folder) as folder: folder.cascade_metadata(metadata_template) diff --git a/test/unit/object/test_folder.py b/test/unit/object/test_folder.py index acf278417..9d067d3aa 100644 --- a/test/unit/object/test_folder.py +++ b/test/unit/object/test_folder.py @@ -2,7 +2,7 @@ from datetime import datetime from io import BytesIO from os.path import basename -from unittest.mock import mock_open, patch, Mock, MagicMock +from unittest.mock import mock_open, patch, Mock, MagicMock, ANY import pytest import pytz @@ -334,7 +334,14 @@ def test_upload( # in Python 2 tests attributes.update(additional_attributes) data = {'attributes': json.dumps(attributes)} - mock_box_session.post.assert_called_once_with(expected_url, expect_json_response=False, files=mock_files, data=data, headers=if_match_sha1_header) + mock_box_session.post.assert_called_once_with( + expected_url, + expect_json_response=False, + files=mock_files, + data=data, + headers=if_match_sha1_header, + stream_file_content=True + ) assert isinstance(new_file, File) assert new_file.object_id == mock_object_id assert 'id' in new_file @@ -438,6 +445,27 @@ def test_upload_does_preflight_check_if_specified( assert not test_folder.preflight_check.called +@patch('boxsdk.object.folder.open', mock_open(read_data=b'some bytes'), create=True) +@pytest.mark.parametrize('stream_file_content', (True, False)) +def test_upload_if_flag_stream_file_content_is_passed_to_session( + mock_box_session, + test_folder, + stream_file_content, +): + expected_url = f'{API.UPLOAD_URL}/files/content' + + test_folder.upload('foo.txt', file_name='foo.txt', stream_file_content=stream_file_content) + + mock_files = {'file': ('unused', ANY)} + mock_box_session.post.assert_called_once_with( + expected_url, + data=ANY, + files=mock_files, + expect_json_response=False, + headers=None, + stream_file_content=stream_file_content) + + def test_create_subfolder(test_folder, mock_box_session, mock_object_id, mock_folder_response): expected_url = test_folder.get_type_url() mock_box_session.post.return_value = mock_folder_response diff --git a/test/unit/session/test_session.py b/test/unit/session/test_session.py index d6d9e55bb..623f773ff 100644 --- a/test/unit/session/test_session.py +++ b/test/unit/session/test_session.py @@ -1,8 +1,10 @@ from functools import partial -from io import IOBase +from io import IOBase, BytesIO from numbers import Number +import os from unittest.mock import MagicMock, Mock, PropertyMock, call, patch, ANY from requests.exceptions import RequestException, SSLError, ConnectionError as RequestsConnectionError +from requests_toolbelt import MultipartEncoder import pytest @@ -449,3 +451,48 @@ def test_proxy_malformed_dict_does_not_attach(box_session, monkeypatch, mock_net def test_proxy_network_config_property(box_session): assert isinstance(box_session.proxy_config, Proxy) + + +def test_multipart_request_with_disabled_streaming_file_content( + box_session, mock_network_layer, generic_successful_response): + test_url = 'https://example.com' + file_bytes = os.urandom(1024) + mock_network_layer.request.side_effect = [generic_successful_response] + box_session.post( + url=test_url, + files={'file': ('unused', BytesIO(file_bytes))}, + data={'attributes': '{"name": "test_file"}'}, + stream_file_content=False + ) + mock_network_layer.request.assert_called_once_with( + 'POST', + test_url, + access_token='fake_access_token', + headers=ANY, + log_response_content=True, + files={'file': ('unused', ANY)}, + data={'attributes': '{"name": "test_file"}'}, + ) + + +def test_multipart_request_with_enabled_streaming_file_content( + box_session, mock_network_layer, generic_successful_response): + test_url = 'https://example.com' + file_bytes = os.urandom(1024) + mock_network_layer.request.side_effect = [generic_successful_response] + box_session.post( + url=test_url, + files={'file': ('unused', BytesIO(file_bytes))}, + data={'attributes': '{"name": "test_file"}'}, + stream_file_content=True + ) + call_args = mock_network_layer.request.call_args[0] + call_kwargs = mock_network_layer.request.call_args[1] + assert call_args[0] == 'POST' + assert call_args[1] == test_url + assert call_kwargs['access_token'] == 'fake_access_token' + assert call_kwargs['log_response_content'] is True + assert isinstance(call_kwargs['data'], MultipartEncoder) + assert call_kwargs['data'].fields['attributes'] == '{"name": "test_file"}' + assert call_kwargs['data'].fields['file'][0] == 'unused' + assert isinstance(call_kwargs['data'].fields['file'][1], BytesIO) From 157d3e995283138525faca8f1c6d7f9c5fbf9d69 Mon Sep 17 00:00:00 2001 From: lsocha Date: Fri, 28 Mar 2025 15:15:51 +0100 Subject: [PATCH 2/4] chore --- .github/workflows/build.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fe792be79..570266547 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,6 @@ jobs: - '3.6' - '3.7' - '3.8' - - 'pypy-3.8' - '3.9' - '3.10' - '3.11' From fcb1af35038046faa3c1410d761b488b4719f3c4 Mon Sep 17 00:00:00 2001 From: lsocha Date: Fri, 28 Mar 2025 16:04:50 +0100 Subject: [PATCH 3/4] fix integration tests --- test/integration_new/object/ai_itest.py | 19 ++----------------- test/integration_new/object/trash_itest.py | 14 +++++++------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/test/integration_new/object/ai_itest.py b/test/integration_new/object/ai_itest.py index 53deaa4e3..8b478d4fe 100644 --- a/test/integration_new/object/ai_itest.py +++ b/test/integration_new/object/ai_itest.py @@ -22,17 +22,10 @@ def test_send_ai_question(parent_folder, small_file_path): 'type': 'file', 'content': 'The sun raises in the east.' }] - ai_agent = { - 'type': 'ai_agent_ask', - 'basic_text_multi': { - 'model': 'openai__gpt_3_5_turbo' - } - } answer = CLIENT.send_ai_question( items=items, prompt='Which direction does the sun raise?', mode='single_item_qa', - ai_agent=ai_agent ) assert 'east' in answer['answer'].lower() assert answer['completion_reason'] == 'done' @@ -54,17 +47,10 @@ def test_send_ai_text_gen(parent_folder, small_file_path): 'answer': 'It takes 24 hours for the sun to rise.', 'created_at': '2013-12-12T11:20:43-08:00' }] - ai_agent = { - 'type': 'ai_agent_text_gen', - 'basic_gen': { - 'model': 'openai__gpt_3_5_turbo_16k' - } - } answer = CLIENT.send_ai_text_gen( dialogue_history=dialogue_history, items=items, prompt='Which direction does the sun raise?', - ai_agent=ai_agent ) assert 'east' in answer['answer'].lower() assert answer['completion_reason'] == 'done' @@ -73,8 +59,7 @@ def test_send_ai_text_gen(parent_folder, small_file_path): def test_get_ai_agent_default_config(): config = CLIENT.get_ai_agent_default_config( mode='text_gen', - language='en', - model='openai__gpt_3_5_turbo' + language='en' ) assert config['type'] == 'ai_agent_text_gen' - assert config['basic_gen']['model'] == 'openai__gpt_3_5_turbo' + assert config['basic_gen']['model'] != '' diff --git a/test/integration_new/object/trash_itest.py b/test/integration_new/object/trash_itest.py index d330aac3e..26b9e88e0 100644 --- a/test/integration_new/object/trash_itest.py +++ b/test/integration_new/object/trash_itest.py @@ -23,8 +23,8 @@ def test_trash_get_items(parent_folder, small_file_path): test_file = parent_folder.upload(file_path=small_file_path, file_name=name) test_file.delete() try: - trash_items = CLIENT.trash().get_items() - assert test_file.id in [item.id for item in trash_items] + trashed_file = test_file.get() + assert trashed_file.item_status == 'trashed' finally: CLIENT.trash().permanently_delete_item(test_file) @@ -32,8 +32,8 @@ def test_trash_get_items(parent_folder, small_file_path): def test_trash_restore_item(parent_folder, small_file_path): with BoxTestFile(parent_folder=parent_folder, file_path=small_file_path) as test_file: test_file.delete() - trash_items = CLIENT.trash().get_items() - assert test_file.id in [item.id for item in trash_items] + folder_items = parent_folder.get_items() + assert test_file.id not in [item.id for item in folder_items] CLIENT.trash().restore_item(test_file) folder_items = parent_folder.get_items() assert test_file.id in [item.id for item in folder_items] @@ -46,7 +46,7 @@ def test_trash_get_items_with_offset(parent_folder, small_file_path): try: trash_items = CLIENT.trash().get_items() assert isinstance(trash_items, LimitOffsetBasedObjectCollection) - assert test_file.id in [item.id for item in trash_items] + assert trash_items.next() is not None finally: CLIENT.trash().permanently_delete_item(test_file) @@ -56,8 +56,8 @@ def test_trash_get_items_with_marker(parent_folder, small_file_path): test_file = parent_folder.upload(file_path=small_file_path, file_name=name) test_file.delete() try: - trash_items = CLIENT.trash().get_items(limit=100, use_marker=True) + trash_items = CLIENT.trash().get_items(limit=5, use_marker=True) assert isinstance(trash_items, MarkerBasedObjectCollection) - assert test_file.id in [item.id for item in trash_items] + assert trash_items.next() is not None finally: CLIENT.trash().permanently_delete_item(test_file) From f1c44f25ca62c5937d30dbf2c3411338e9173d6c Mon Sep 17 00:00:00 2001 From: lsocha Date: Mon, 7 Apr 2025 17:46:00 +0200 Subject: [PATCH 4/4] add docs --- docs/usage/files.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/docs/usage/files.md b/docs/usage/files.md index 73dab7a4e..a4f67de87 100644 --- a/docs/usage/files.md +++ b/docs/usage/files.md @@ -187,9 +187,32 @@ new_file = client.folder(folder_id).upload_stream(stream, file_name) print(f'File "{new_file.name}" uploaded to Box with file ID {new_file.id}') ``` +---- +**NOTE:** + +Both methods `folder.upload()` and `folder.upload_stream()` include the `stream_file_content` parameter, +which controls how the file content is uploaded. + +If you are uploading a large file, you may want to stream the request to avoid excessive memory usage. +According to `requests'` library [docs][request_docs], by default, the `requests` library does not support streaming uploads, +and all the data must be read into memory before being sent to the server. +However, the `requests-toolbelt` package includes a `MultipartEncoder` class, which enables file uploads without +loading the entire file into memory. This approach is the default in the Box Python SDK. + +That said, handling 307 Temporary Redirects presents a challenge with streamed file uploads. +307 redirect requires that both the request method and body remain unchanged. +This can be problematic when uploading a file stream because the stream will already be exhausted when the redirect occurs. + +To address this issue, the `stream_file_content` parameter has been introduced in upload methods. This allows you to choose between: + - Streaming the file (`stream_file_content=True`): Optimizes memory usage but may cause issues with redirects. + + - Using the default `requests'` library behavior (`stream_file_content=False`): Ensures the file can be re-read if a + redirect occurs but may consume more memory. This is especially important when working with proxy servers. + [folder_class]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder [upload]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder.upload [upload_stream]: https://box-python-sdk.readthedocs.io/en/latest/boxsdk.object.html#boxsdk.object.folder.Folder.upload_stream +[request_docs]: https://docs.python-requests.org/en/latest/user/quickstart/#post-a-multipart-encoded-file Chunked Upload --------------