From d8f5542d006e65a6419c173a1bdc13976dc3c4d3 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 26 Feb 2026 13:16:21 +0000 Subject: [PATCH 1/5] Use STD_TORCH_CHECK in io/ --- torchvision/csrc/io/image/common.cpp | 7 +- torchvision/csrc/io/image/common.h | 1 + torchvision/csrc/io/image/cpu/decode_gif.cpp | 11 +-- .../csrc/io/image/cpu/decode_image.cpp | 16 ++-- torchvision/csrc/io/image/cpu/decode_jpeg.cpp | 7 +- torchvision/csrc/io/image/cpu/decode_png.cpp | 21 ++--- torchvision/csrc/io/image/cpu/decode_webp.cpp | 8 +- torchvision/csrc/io/image/cpu/encode_jpeg.cpp | 17 ++-- torchvision/csrc/io/image/cpu/encode_png.cpp | 19 ++-- torchvision/csrc/io/image/cpu/exif.h | 3 +- .../csrc/io/image/cpu/read_write_file.cpp | 21 +++-- .../csrc/io/image/cuda/decode_jpegs_cuda.cpp | 88 +++++++++---------- .../csrc/io/image/cuda/encode_jpegs_cuda.cpp | 42 ++++----- 13 files changed, 142 insertions(+), 119 deletions(-) diff --git a/torchvision/csrc/io/image/common.cpp b/torchvision/csrc/io/image/common.cpp index 7743961a09d..0be5f67532f 100644 --- a/torchvision/csrc/io/image/common.cpp +++ b/torchvision/csrc/io/image/common.cpp @@ -13,12 +13,13 @@ namespace vision { namespace image { void validate_encoded_data(const torch::Tensor& encoded_data) { - TORCH_CHECK(encoded_data.is_contiguous(), "Input tensor must be contiguous."); - TORCH_CHECK( + STD_TORCH_CHECK( + encoded_data.is_contiguous(), "Input tensor must be contiguous."); + STD_TORCH_CHECK( encoded_data.dtype() == torch::kU8, "Input tensor must have uint8 data type, got ", encoded_data.dtype()); - TORCH_CHECK( + STD_TORCH_CHECK( encoded_data.dim() == 1 && encoded_data.numel() > 0, "Input tensor must be 1-dimensional and non-empty, got ", encoded_data.dim(), diff --git a/torchvision/csrc/io/image/common.h b/torchvision/csrc/io/image/common.h index d81acfda7d4..1b459c5d7ea 100644 --- a/torchvision/csrc/io/image/common.h +++ b/torchvision/csrc/io/image/common.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include namespace vision { diff --git a/torchvision/csrc/io/image/cpu/decode_gif.cpp b/torchvision/csrc/io/image/cpu/decode_gif.cpp index ae757ff44ed..e3a194b8845 100644 --- a/torchvision/csrc/io/image/cpu/decode_gif.cpp +++ b/torchvision/csrc/io/image/cpu/decode_gif.cpp @@ -63,7 +63,7 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) { GifFileType* gifFile = DGifOpen(static_cast(&reader_helper), read_from_tensor, &error); - TORCH_CHECK( + STD_TORCH_CHECK( (gifFile != nullptr) && (error == D_GIF_SUCCEEDED), "DGifOpenFileName() failed - ", error); @@ -71,12 +71,13 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) { if (DGifSlurp(gifFile) == GIF_ERROR) { auto gifFileError = gifFile->Error; DGifCloseFile(gifFile, &error); - TORCH_CHECK(false, "DGifSlurp() failed - ", gifFileError); + STD_TORCH_CHECK(false, "DGifSlurp() failed - ", gifFileError); } auto num_images = gifFile->ImageCount; // This check should already done within DGifSlurp(), just to be safe - TORCH_CHECK(num_images > 0, "GIF file should contain at least one image!"); + STD_TORCH_CHECK( + num_images > 0, "GIF file should contain at least one image!"); GifColorType bg = {0, 0, 0}; if (gifFile->SColorMap) { @@ -109,7 +110,7 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) { const GifImageDesc& desc = img.ImageDesc; const ColorMapObject* cmap = desc.ColorMap ? desc.ColorMap : gifFile->SColorMap; - TORCH_CHECK( + STD_TORCH_CHECK( cmap != nullptr, "Global and local color maps are missing. This should never happen!"); @@ -161,7 +162,7 @@ torch::Tensor decode_gif(const torch::Tensor& encoded_data) { out = out.squeeze(0); // remove batch dim if there's only one image DGifCloseFile(gifFile, &error); - TORCH_CHECK(error == D_GIF_SUCCEEDED, "DGifCloseFile() failed - ", error); + STD_TORCH_CHECK(error == D_GIF_SUCCEEDED, "DGifCloseFile() failed - ", error); return out; } diff --git a/torchvision/csrc/io/image/cpu/decode_image.cpp b/torchvision/csrc/io/image/cpu/decode_image.cpp index 43a688604f6..a1674993bfc 100644 --- a/torchvision/csrc/io/image/cpu/decode_image.cpp +++ b/torchvision/csrc/io/image/cpu/decode_image.cpp @@ -13,11 +13,11 @@ torch::Tensor decode_image( ImageReadMode mode, bool apply_exif_orientation) { // Check that tensor is a CPU tensor - TORCH_CHECK(data.device() == torch::kCPU, "Expected a CPU tensor"); + STD_TORCH_CHECK(data.device() == torch::kCPU, "Expected a CPU tensor"); // Check that the input tensor dtype is uint8 - TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); + STD_TORCH_CHECK(data.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); // Check that the input tensor is 1-dimensional - TORCH_CHECK( + STD_TORCH_CHECK( data.dim() == 1 && data.numel() > 0, "Expected a non empty 1-dimensional tensor"); @@ -27,13 +27,13 @@ torch::Tensor decode_image( auto datap = data.data_ptr(); const uint8_t jpeg_signature[3] = {255, 216, 255}; // == "\xFF\xD8\xFF" - TORCH_CHECK(data.numel() >= 3, err_msg); + STD_TORCH_CHECK(data.numel() >= 3, err_msg); if (memcmp(jpeg_signature, datap, 3) == 0) { return decode_jpeg(data, mode, apply_exif_orientation); } const uint8_t png_signature[4] = {137, 80, 78, 71}; // == "\211PNG" - TORCH_CHECK(data.numel() >= 4, err_msg); + STD_TORCH_CHECK(data.numel() >= 4, err_msg); if (memcmp(png_signature, datap, 4) == 0) { return decode_png(data, mode, apply_exif_orientation); } @@ -42,7 +42,7 @@ torch::Tensor decode_image( 0x47, 0x49, 0x46, 0x38, 0x39, 0x61}; // == "GIF89a" const uint8_t gif_signature_2[6] = { 0x47, 0x49, 0x46, 0x38, 0x37, 0x61}; // == "GIF87a" - TORCH_CHECK(data.numel() >= 6, err_msg); + STD_TORCH_CHECK(data.numel() >= 6, err_msg); if (memcmp(gif_signature_1, datap, 6) == 0 || memcmp(gif_signature_2, datap, 6) == 0) { return decode_gif(data); @@ -51,13 +51,13 @@ torch::Tensor decode_image( const uint8_t webp_signature_begin[4] = {0x52, 0x49, 0x46, 0x46}; // == "RIFF" const uint8_t webp_signature_end[7] = { 0x57, 0x45, 0x42, 0x50, 0x56, 0x50, 0x38}; // == "WEBPVP8" - TORCH_CHECK(data.numel() >= 15, err_msg); + STD_TORCH_CHECK(data.numel() >= 15, err_msg); if ((memcmp(webp_signature_begin, datap, 4) == 0) && (memcmp(webp_signature_end, datap + 8, 7) == 0)) { return decode_webp(data, mode); } - TORCH_CHECK(false, err_msg); + STD_TORCH_CHECK(false, err_msg); } } // namespace image diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp index 052b98e1be9..8163ace3307 100644 --- a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp +++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp @@ -11,7 +11,7 @@ torch::Tensor decode_jpeg( const torch::Tensor& data, ImageReadMode mode, bool apply_exif_orientation) { - TORCH_CHECK( + STD_TORCH_CHECK( false, "decode_jpeg: torchvision not compiled with libjpeg support"); } #else @@ -151,7 +151,7 @@ torch::Tensor decode_jpeg( * We need to clean up the JPEG object. */ jpeg_destroy_decompress(&cinfo); - TORCH_CHECK(false, jerr.jpegLastErrorMsg); + STD_TORCH_CHECK(false, jerr.jpegLastErrorMsg); } jpeg_create_decompress(&cinfo); @@ -192,7 +192,8 @@ torch::Tensor decode_jpeg( */ default: jpeg_destroy_decompress(&cinfo); - TORCH_CHECK(false, "The provided mode is not supported for JPEG files"); + STD_TORCH_CHECK( + false, "The provided mode is not supported for JPEG files"); } jpeg_calc_output_dimensions(&cinfo); diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp index 5ea6f073975..67c788455c4 100644 --- a/torchvision/csrc/io/image/cpu/decode_png.cpp +++ b/torchvision/csrc/io/image/cpu/decode_png.cpp @@ -13,7 +13,7 @@ torch::Tensor decode_png( const torch::Tensor& data, ImageReadMode mode, bool apply_exif_orientation) { - TORCH_CHECK( + STD_TORCH_CHECK( false, "decode_png: torchvision not compiled with libPNG support"); } #else @@ -33,12 +33,12 @@ torch::Tensor decode_png( auto png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); - TORCH_CHECK(png_ptr, "libpng read structure allocation failed!") + STD_TORCH_CHECK(png_ptr, "libpng read structure allocation failed!") auto info_ptr = png_create_info_struct(png_ptr); if (!info_ptr) { png_destroy_read_struct(&png_ptr, nullptr, nullptr); // Seems redundant with the if statement. done here to avoid leaking memory. - TORCH_CHECK(info_ptr, "libpng info structure allocation failed!") + STD_TORCH_CHECK(info_ptr, "libpng info structure allocation failed!") } auto accessor = data.accessor(); @@ -47,11 +47,11 @@ torch::Tensor decode_png( if (setjmp(png_jmpbuf(png_ptr)) != 0) { png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - TORCH_CHECK(false, "Internal error."); + STD_TORCH_CHECK(false, "Internal error."); } - TORCH_CHECK(datap_len >= 8, "Content is too small for png!") + STD_TORCH_CHECK(datap_len >= 8, "Content is too small for png!") auto is_png = !png_sig_cmp(datap, 0, 8); - TORCH_CHECK(is_png, "Content is not png!") + STD_TORCH_CHECK(is_png, "Content is not png!") struct Reader { png_const_bytep ptr; @@ -64,7 +64,7 @@ torch::Tensor decode_png( png_bytep output, png_size_t bytes) { auto reader = static_cast(png_get_io_ptr(png_ptr)); - TORCH_CHECK( + STD_TORCH_CHECK( reader->count >= bytes, "Out of bound read in decode_png. Probably, the input image is corrupted"); std::copy(reader->ptr, reader->ptr + bytes, output); @@ -91,12 +91,12 @@ torch::Tensor decode_png( if (retval != 1) { png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - TORCH_CHECK(retval == 1, "Could read image metadata from content.") + STD_TORCH_CHECK(retval == 1, "Could read image metadata from content.") } if (bit_depth > 8 && bit_depth != 16) { png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - TORCH_CHECK( + STD_TORCH_CHECK( false, "bit depth of png image is " + std::to_string(bit_depth) + ". Only <=8 and 16 are supported.") @@ -188,7 +188,8 @@ torch::Tensor decode_png( break; default: png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - TORCH_CHECK(false, "The provided mode is not supported for PNG files"); + STD_TORCH_CHECK( + false, "The provided mode is not supported for PNG files"); } png_read_update_info(png_ptr, info_ptr); diff --git a/torchvision/csrc/io/image/cpu/decode_webp.cpp b/torchvision/csrc/io/image/cpu/decode_webp.cpp index 80fe68862fb..d5c949e77eb 100644 --- a/torchvision/csrc/io/image/cpu/decode_webp.cpp +++ b/torchvision/csrc/io/image/cpu/decode_webp.cpp @@ -13,7 +13,7 @@ namespace image { torch::Tensor decode_webp( const torch::Tensor& encoded_data, ImageReadMode mode) { - TORCH_CHECK( + STD_TORCH_CHECK( false, "decode_webp: torchvision not compiled with libwebp support"); } #else @@ -28,9 +28,9 @@ torch::Tensor decode_webp( WebPBitstreamFeatures features; auto res = WebPGetFeatures(encoded_data_p, encoded_data_size, &features); - TORCH_CHECK( + STD_TORCH_CHECK( res == VP8_STATUS_OK, "WebPGetFeatures failed with error code ", res); - TORCH_CHECK( + STD_TORCH_CHECK( !features.has_animation, "Animated webp files are not supported."); if (mode == IMAGE_READ_MODE_GRAY || mode == IMAGE_READ_MODE_GRAY_ALPHA) { @@ -52,7 +52,7 @@ torch::Tensor decode_webp( auto decoded_data = decoding_func(encoded_data_p, encoded_data_size, &width, &height); - TORCH_CHECK(decoded_data != nullptr, "WebPDecodeRGB[A] failed."); + STD_TORCH_CHECK(decoded_data != nullptr, "WebPDecodeRGB[A] failed."); auto deleter = [decoded_data](void*) { WebPFree(decoded_data); }; auto out = torch::from_blob( diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp index d2ed73071a2..99b0b6097db 100644 --- a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp +++ b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp @@ -1,5 +1,7 @@ #include "encode_jpeg.h" +#include + #include "common_jpeg.h" namespace vision { @@ -8,7 +10,7 @@ namespace image { #if !JPEG_FOUND torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { - TORCH_CHECK( + STD_TORCH_CHECK( false, "encode_jpeg: torchvision not compiled with libjpeg support"); } @@ -48,17 +50,20 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { free(jpegBuf); } - TORCH_CHECK(false, (const char*)jerr.jpegLastErrorMsg); + STD_TORCH_CHECK(false, (const char*)jerr.jpegLastErrorMsg); } // Check that the input tensor is on CPU - TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + STD_TORCH_CHECK( + data.device() == torch::kCPU, "Input tensor should be on CPU"); // Check that the input tensor dtype is uint8 - TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + STD_TORCH_CHECK( + data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); // Check that the input tensor is 3-dimensional - TORCH_CHECK(data.dim() == 3, "Input data should be a 3-dimensional tensor"); + STD_TORCH_CHECK( + data.dim() == 3, "Input data should be a 3-dimensional tensor"); // Get image info int channels = data.size(0); @@ -66,7 +71,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { int width = data.size(2); auto input = data.permute({1, 2, 0}).contiguous(); - TORCH_CHECK( + STD_TORCH_CHECK( channels == 1 || channels == 3, "The number of channels should be 1 or 3, got: ", channels); diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp index d55a0ed3ff6..d015f44cb39 100644 --- a/torchvision/csrc/io/image/cpu/encode_png.cpp +++ b/torchvision/csrc/io/image/cpu/encode_png.cpp @@ -1,5 +1,7 @@ #include "encode_jpeg.h" +#include + #include "common_png.h" namespace vision { @@ -8,7 +10,7 @@ namespace image { #if !PNG_FOUND torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { - TORCH_CHECK( + STD_TORCH_CHECK( false, "encode_png: torchvision not compiled with libpng support"); } @@ -93,22 +95,25 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { free(buf_info.buffer); } - TORCH_CHECK(false, err_ptr.pngLastErrorMsg); + STD_TORCH_CHECK(false, err_ptr.pngLastErrorMsg); } // Check that the compression level is between 0 and 9 - TORCH_CHECK( + STD_TORCH_CHECK( compression_level >= 0 && compression_level <= 9, "Compression level should be between 0 and 9"); // Check that the input tensor is on CPU - TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + STD_TORCH_CHECK( + data.device() == torch::kCPU, "Input tensor should be on CPU"); // Check that the input tensor dtype is uint8 - TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + STD_TORCH_CHECK( + data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); // Check that the input tensor is 3-dimensional - TORCH_CHECK(data.dim() == 3, "Input data should be a 3-dimensional tensor"); + STD_TORCH_CHECK( + data.dim() == 3, "Input data should be a 3-dimensional tensor"); // Get image info int channels = data.size(0); @@ -116,7 +121,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { int width = data.size(2); auto input = data.permute({1, 2, 0}).contiguous(); - TORCH_CHECK( + STD_TORCH_CHECK( channels == 1 || channels == 3, "The number of channels should be 1 or 3, got: ", channels); diff --git a/torchvision/csrc/io/image/cpu/exif.h b/torchvision/csrc/io/image/cpu/exif.h index 7680737f8c0..e55a800b220 100644 --- a/torchvision/csrc/io/image/cpu/exif.h +++ b/torchvision/csrc/io/image/cpu/exif.h @@ -58,6 +58,7 @@ direct, #include #endif +#include #include namespace vision { @@ -78,7 +79,7 @@ class ExifDataReader { return _size; } const unsigned char& operator[](size_t index) const { - TORCH_CHECK(index >= 0 && index < _size); + STD_TORCH_CHECK(index >= 0 && index < _size); return _ptr[index]; } diff --git a/torchvision/csrc/io/image/cpu/read_write_file.cpp b/torchvision/csrc/io/image/cpu/read_write_file.cpp index 06de72a5053..b97b4c2284a 100644 --- a/torchvision/csrc/io/image/cpu/read_write_file.cpp +++ b/torchvision/csrc/io/image/cpu/read_write_file.cpp @@ -1,5 +1,7 @@ #include "read_write_file.h" +#include + #include #ifdef _WIN32 @@ -18,7 +20,7 @@ std::wstring utf8_decode(const std::string& str) { } int size_needed = MultiByteToWideChar( CP_UTF8, 0, str.c_str(), static_cast(str.size()), nullptr, 0); - TORCH_CHECK(size_needed > 0, "Error converting the content to Unicode"); + STD_TORCH_CHECK(size_needed > 0, "Error converting the content to Unicode"); std::wstring wstrTo(size_needed, 0); MultiByteToWideChar( CP_UTF8, @@ -47,12 +49,12 @@ torch::Tensor read_file(const std::string& filename) { int rc = stat(filename.c_str(), &stat_buf); #endif // errno is a variable defined in errno.h - TORCH_CHECK( + STD_TORCH_CHECK( rc == 0, "[Errno ", errno, "] ", strerror(errno), ": '", filename, "'"); int64_t size = stat_buf.st_size; - TORCH_CHECK(size > 0, "Expected a non empty file"); + STD_TORCH_CHECK(size > 0, "Expected a non empty file"); #ifdef _WIN32 // TODO: Once torch::from_file handles UTF-8 paths correctly, we should move @@ -62,7 +64,7 @@ torch::Tensor read_file(const std::string& filename) { // torch::kU8).clone() FILE* infile = _wfopen(fileW.c_str(), L"rb"); - TORCH_CHECK(infile != nullptr, "Error opening input file"); + STD_TORCH_CHECK(infile != nullptr, "Error opening input file"); auto data = torch::empty({size}, torch::kU8); auto dataBytes = data.data_ptr(); @@ -81,13 +83,16 @@ void write_file(const std::string& filename, torch::Tensor& data) { C10_LOG_API_USAGE_ONCE( "torchvision.csrc.io.image.cpu.read_write_file.write_file"); // Check that the input tensor is on CPU - TORCH_CHECK(data.device() == torch::kCPU, "Input tensor should be on CPU"); + STD_TORCH_CHECK( + data.device() == torch::kCPU, "Input tensor should be on CPU"); // Check that the input tensor dtype is uint8 - TORCH_CHECK(data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + STD_TORCH_CHECK( + data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); // Check that the input tensor is 3-dimensional - TORCH_CHECK(data.dim() == 1, "Input data should be a 1-dimensional tensor"); + STD_TORCH_CHECK( + data.dim() == 1, "Input data should be a 1-dimensional tensor"); auto fileBytes = data.data_ptr(); auto fileCStr = filename.c_str(); @@ -98,7 +103,7 @@ void write_file(const std::string& filename, torch::Tensor& data) { FILE* outfile = fopen(fileCStr, "wb"); #endif - TORCH_CHECK(outfile != nullptr, "Error opening output file"); + STD_TORCH_CHECK(outfile != nullptr, "Error opening output file"); fwrite(fileBytes, sizeof(uint8_t), data.numel(), outfile); fclose(outfile); diff --git a/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp b/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp index 85aa6c760c1..c082485a2a7 100644 --- a/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp +++ b/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp @@ -6,7 +6,7 @@ std::vector decode_jpegs_cuda( const std::vector& encoded_images, vision::image::ImageReadMode mode, torch::Device device) { - TORCH_CHECK( + STD_TORCH_CHECK( false, "decode_jpegs_cuda: torchvision not compiled with nvJPEG support"); } } // namespace image @@ -42,18 +42,18 @@ std::vector decode_jpegs_cuda( std::vector contig_images; contig_images.reserve(encoded_images.size()); - TORCH_CHECK( + STD_TORCH_CHECK( device.is_cuda(), "Expected the device parameter to be a cuda device"); for (auto& encoded_image : encoded_images) { - TORCH_CHECK( + STD_TORCH_CHECK( encoded_image.dtype() == torch::kU8, "Expected a torch.uint8 tensor"); - TORCH_CHECK( + STD_TORCH_CHECK( !encoded_image.is_cuda(), "The input tensor must be on CPU when decoding with nvjpeg") - TORCH_CHECK( + STD_TORCH_CHECK( encoded_image.dim() == 1 && encoded_image.numel() > 0, "Expected a non empty 1-dimensional tensor"); @@ -72,11 +72,11 @@ std::vector decode_jpegs_cuda( nvjpegStatus_t get_minor_property_status = nvjpegGetProperty(MINOR_VERSION, &minor_version); - TORCH_CHECK( + STD_TORCH_CHECK( get_major_property_status == NVJPEG_STATUS_SUCCESS, "nvjpegGetProperty failed: ", get_major_property_status); - TORCH_CHECK( + STD_TORCH_CHECK( get_minor_property_status == NVJPEG_STATUS_SUCCESS, "nvjpegGetProperty failed: ", get_minor_property_status); @@ -114,7 +114,7 @@ std::vector decode_jpegs_cuda( output_format = NVJPEG_OUTPUT_RGB; break; default: - TORCH_CHECK( + STD_TORCH_CHECK( false, "The provided mode is not supported for JPEG decoding on GPU"); } @@ -130,7 +130,7 @@ std::vector decode_jpegs_cuda( return result; } catch (const std::exception& e) { if (typeid(e) != typeid(std::runtime_error)) { - TORCH_CHECK(false, "Error while decoding JPEG images: ", e.what()); + STD_TORCH_CHECK(false, "Error while decoding JPEG images: ", e.what()); } else { throw; } @@ -160,70 +160,70 @@ CUDAJpegDecoder::CUDAJpegDecoder(const torch::Device& target_device) NULL, NVJPEG_FLAGS_DEFAULT, &nvjpeg_handle); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to initialize nvjpeg with default backend: ", status); hw_decode_available = false; } else { - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to initialize nvjpeg with hardware backend: ", status); } status = nvjpegJpegStateCreate(nvjpeg_handle, &nvjpeg_state); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create nvjpeg state: ", status); status = nvjpegDecoderCreate( nvjpeg_handle, NVJPEG_BACKEND_DEFAULT, &nvjpeg_decoder); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create nvjpeg decoder: ", status); status = nvjpegDecoderStateCreate( nvjpeg_handle, nvjpeg_decoder, &nvjpeg_decoupled_state); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create nvjpeg decoder state: ", status); status = nvjpegBufferPinnedCreate(nvjpeg_handle, NULL, &pinned_buffers[0]); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create pinned buffer: ", status); status = nvjpegBufferPinnedCreate(nvjpeg_handle, NULL, &pinned_buffers[1]); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create pinned buffer: ", status); status = nvjpegBufferDeviceCreate(nvjpeg_handle, NULL, &device_buffer); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create device buffer: ", status); status = nvjpegJpegStreamCreate(nvjpeg_handle, &jpeg_streams[0]); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create jpeg stream: ", status); status = nvjpegJpegStreamCreate(nvjpeg_handle, &jpeg_streams[1]); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create jpeg stream: ", status); status = nvjpegDecodeParamsCreate(nvjpeg_handle, &nvjpeg_decode_params); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create decode params: ", status); @@ -243,61 +243,61 @@ CUDAJpegDecoder::~CUDAJpegDecoder() { // nvjpegStatus_t status; // status = nvjpegDecodeParamsDestroy(nvjpeg_decode_params); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy nvjpeg decode params: ", // status); // status = nvjpegJpegStreamDestroy(jpeg_streams[0]); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy jpeg stream: ", // status); // status = nvjpegJpegStreamDestroy(jpeg_streams[1]); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy jpeg stream: ", // status); // status = nvjpegBufferPinnedDestroy(pinned_buffers[0]); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy pinned buffer[0]: ", // status); // status = nvjpegBufferPinnedDestroy(pinned_buffers[1]); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy pinned buffer[1]: ", // status); // status = nvjpegBufferDeviceDestroy(device_buffer); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy device buffer: ", // status); // status = nvjpegJpegStateDestroy(nvjpeg_decoupled_state); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy nvjpeg decoupled state: ", // status); // status = nvjpegDecoderDestroy(nvjpeg_decoder); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy nvjpeg decoder: ", // status); // status = nvjpegJpegStateDestroy(nvjpeg_state); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy nvjpeg state: ", // status); // status = nvjpegDestroy(nvjpeg_handle); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, "nvjpegDestroy failed: ", status); } @@ -350,10 +350,10 @@ CUDAJpegDecoder::prepare_buffers( &subsampling, width, height); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to get image info: ", status); - TORCH_CHECK( + STD_TORCH_CHECK( subsampling != NVJPEG_CSS_UNKNOWN, "Unknown chroma subsampling"); // output channels may be different from the actual number of channels in @@ -420,7 +420,7 @@ std::vector CUDAJpegDecoder::decode_images( cudaError_t cudaStatus; cudaStatus = cudaStreamSynchronize(stream); - TORCH_CHECK( + STD_TORCH_CHECK( cudaStatus == cudaSuccess, "Failed to synchronize CUDA stream: ", cudaStatus); @@ -479,7 +479,7 @@ std::vector CUDAJpegDecoder::decode_images( 1, output_format == NVJPEG_OUTPUT_UNCHANGED ? NVJPEG_OUTPUT_RGB : output_format); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to initialize batch decoding: ", status); @@ -491,14 +491,14 @@ std::vector CUDAJpegDecoder::decode_images( hw_input_buffer_size.data(), hw_output_buffer.data(), stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to decode batch: ", status); } if (sw_input_buffer.size() > 0) { status = nvjpegStateAttachDeviceBuffer(nvjpeg_decoupled_state, device_buffer); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to attach device buffer: ", status); @@ -508,7 +508,7 @@ std::vector CUDAJpegDecoder::decode_images( nvjpeg_decode_params, output_format == NVJPEG_OUTPUT_UNCHANGED ? NVJPEG_OUTPUT_RGB : output_format); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to set output format: ", status); @@ -521,14 +521,14 @@ std::vector CUDAJpegDecoder::decode_images( 0, 0, jpeg_streams[buffer_index]); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to parse jpeg stream: ", status); status = nvjpegStateAttachPinnedBuffer( nvjpeg_decoupled_state, pinned_buffers[buffer_index]); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to attach pinned buffer: ", status); @@ -539,13 +539,13 @@ std::vector CUDAJpegDecoder::decode_images( nvjpeg_decoupled_state, nvjpeg_decode_params, jpeg_streams[buffer_index]); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to decode jpeg stream: ", status); cudaStatus = cudaStreamSynchronize(stream); - TORCH_CHECK( + STD_TORCH_CHECK( cudaStatus == cudaSuccess, "Failed to synchronize CUDA stream: ", cudaStatus); @@ -556,7 +556,7 @@ std::vector CUDAJpegDecoder::decode_images( nvjpeg_decoupled_state, jpeg_streams[buffer_index], stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to transfer jpeg to device: ", status); @@ -570,7 +570,7 @@ std::vector CUDAJpegDecoder::decode_images( nvjpeg_decoupled_state, &sw_output_buffer[i], stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to decode jpeg stream: ", status); @@ -578,7 +578,7 @@ std::vector CUDAJpegDecoder::decode_images( } cudaStatus = cudaStreamSynchronize(stream); - TORCH_CHECK( + STD_TORCH_CHECK( cudaStatus == cudaSuccess, "Failed to synchronize CUDA stream: ", cudaStatus); diff --git a/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp b/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp index 80accc1a241..c5cf4b63d75 100644 --- a/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp +++ b/torchvision/csrc/io/image/cuda/encode_jpegs_cuda.cpp @@ -1,11 +1,13 @@ #include "encode_jpegs_cuda.h" + +#include #if !NVJPEG_FOUND namespace vision { namespace image { std::vector encode_jpegs_cuda( const std::vector& decoded_images, const int64_t quality) { - TORCH_CHECK( + STD_TORCH_CHECK( false, "encode_jpegs_cuda: torchvision not compiled with nvJPEG support"); } } // namespace image @@ -37,7 +39,7 @@ std::vector encode_jpegs_cuda( // threaded for now. In the future this may be an opportunity to unlock // further speedups std::lock_guard lock(encoderMutex); - TORCH_CHECK(decoded_images.size() > 0, "Empty input tensor list"); + STD_TORCH_CHECK(decoded_images.size() > 0, "Empty input tensor list"); torch::Device device = decoded_images[0].device(); at::cuda::CUDAGuard device_guard(device); @@ -66,18 +68,18 @@ std::vector encode_jpegs_cuda( std::vector contig_images; contig_images.reserve(decoded_images.size()); for (const auto& image : decoded_images) { - TORCH_CHECK( + STD_TORCH_CHECK( image.dtype() == torch::kU8, "Input tensor dtype should be uint8"); - TORCH_CHECK( + STD_TORCH_CHECK( image.device() == device, "All input tensors must be on the same CUDA device when encoding with nvjpeg") - TORCH_CHECK( + STD_TORCH_CHECK( image.dim() == 3 && image.numel() > 0, "Input data should be a 3-dimensional tensor"); - TORCH_CHECK( + STD_TORCH_CHECK( image.size(0) == 3, "The number of channels should be 3, got: ", image.size(0)); @@ -123,19 +125,19 @@ CUDAJpegEncoder::CUDAJpegEncoder(const torch::Device& target_device) : at::cuda::getCurrentCUDAStream()} { nvjpegStatus_t status; status = nvjpegCreateSimple(&nvjpeg_handle); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create nvjpeg handle: ", status); status = nvjpegEncoderStateCreate(nvjpeg_handle, &nv_enc_state, stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create nvjpeg encoder state: ", status); status = nvjpegEncoderParamsCreate(nvjpeg_handle, &nv_enc_params, stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to create nvjpeg encoder params: ", status); @@ -166,13 +168,13 @@ CUDAJpegEncoder::~CUDAJpegEncoder() { // nvjpegStatus_t status; // status = nvjpegEncoderParamsDestroy(nv_enc_params); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy nvjpeg encoder params: ", // status); // status = nvjpegEncoderStateDestroy(nv_enc_state); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, // "Failed to destroy nvjpeg encoder state: ", // status); @@ -180,7 +182,7 @@ CUDAJpegEncoder::~CUDAJpegEncoder() { // cudaStreamSynchronize(stream); // status = nvjpegDestroy(nvjpeg_handle); - // TORCH_CHECK( + // STD_TORCH_CHECK( // status == NVJPEG_STATUS_SUCCESS, "nvjpegDestroy failed: ", status); } @@ -190,7 +192,7 @@ torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) { // Ensure that the incoming src_image is safe to use cudaStatus = cudaStreamSynchronize(current_stream); - TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus); + STD_TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus); int channels = src_image.size(0); int height = src_image.size(1); @@ -198,7 +200,7 @@ torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) { status = nvjpegEncoderParamsSetSamplingFactors( nv_enc_params, NVJPEG_CSS_444, stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to set nvjpeg encoder params sampling factors: ", status); @@ -224,20 +226,20 @@ torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) { height, stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "image encoding failed: ", status); // Retrieve length of the encoded image size_t length; status = nvjpegEncodeRetrieveBitstreamDevice( nvjpeg_handle, nv_enc_state, NULL, &length, stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to retrieve encoded image stream state: ", status); // Synchronize the stream to ensure that the encoded image is ready cudaStatus = cudaStreamSynchronize(stream); - TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus); + STD_TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus); // Reserve buffer for the encoded image torch::Tensor encoded_image = torch::empty( @@ -248,7 +250,7 @@ torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) { .device(target_device) .requires_grad(false)); cudaStatus = cudaStreamSynchronize(stream); - TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus); + STD_TORCH_CHECK(cudaStatus == cudaSuccess, "CUDA ERROR: ", cudaStatus); // Retrieve the encoded image status = nvjpegEncodeRetrieveBitstreamDevice( nvjpeg_handle, @@ -256,7 +258,7 @@ torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) { encoded_image.data_ptr(), &length, stream); - TORCH_CHECK( + STD_TORCH_CHECK( status == NVJPEG_STATUS_SUCCESS, "Failed to retrieve encoded image: ", status); @@ -266,7 +268,7 @@ torch::Tensor CUDAJpegEncoder::encode_jpeg(const torch::Tensor& src_image) { void CUDAJpegEncoder::set_quality(const int64_t quality) { nvjpegStatus_t paramsQualityStatus = nvjpegEncoderParamsSetQuality(nv_enc_params, quality, stream); - TORCH_CHECK( + STD_TORCH_CHECK( paramsQualityStatus == NVJPEG_STATUS_SUCCESS, "Failed to set nvjpeg encoder params quality: ", paramsQualityStatus); From 9342bfcaeb008818e0be12aa458762a02474c258 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 26 Feb 2026 13:52:23 +0000 Subject: [PATCH 2/5] Port png decoder and encoder to stable ABI --- .../csrc/io/image/cpu/decode_image.cpp | 4 ++- torchvision/csrc/io/image/cpu/decode_png.cpp | 33 +++++++++---------- torchvision/csrc/io/image/cpu/decode_png.h | 6 ++-- torchvision/csrc/io/image/cpu/encode_png.cpp | 28 +++++++++------- torchvision/csrc/io/image/cpu/encode_png.h | 6 ++-- torchvision/csrc/io/image/cpu/exif.h | 28 ++++++++++++++++ torchvision/csrc/io/image/image.cpp | 17 ++++++++-- 7 files changed, 83 insertions(+), 39 deletions(-) diff --git a/torchvision/csrc/io/image/cpu/decode_image.cpp b/torchvision/csrc/io/image/cpu/decode_image.cpp index a1674993bfc..3d0d8aed4ef 100644 --- a/torchvision/csrc/io/image/cpu/decode_image.cpp +++ b/torchvision/csrc/io/image/cpu/decode_image.cpp @@ -35,7 +35,9 @@ torch::Tensor decode_image( const uint8_t png_signature[4] = {137, 80, 78, 71}; // == "\211PNG" STD_TORCH_CHECK(data.numel() >= 4, err_msg); if (memcmp(png_signature, datap, 4) == 0) { - return decode_png(data, mode, apply_exif_orientation); + auto stable_data = vision::toStableTensor(data); + auto stable_result = decode_png(stable_data, mode, apply_exif_orientation); + return vision::fromStableTensor(stable_result); } const uint8_t gif_signature_1[6] = { diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp index 67c788455c4..e30849b091f 100644 --- a/torchvision/csrc/io/image/cpu/decode_png.cpp +++ b/torchvision/csrc/io/image/cpu/decode_png.cpp @@ -9,8 +9,8 @@ namespace image { using namespace exif_private; #if !PNG_FOUND -torch::Tensor decode_png( - const torch::Tensor& data, +torch::stable::Tensor decode_png( + const torch::stable::Tensor& data, ImageReadMode mode, bool apply_exif_orientation) { STD_TORCH_CHECK( @@ -23,13 +23,11 @@ bool is_little_endian() { return *(uint8_t*)&x; } -torch::Tensor decode_png( - const torch::Tensor& data, +torch::stable::Tensor decode_png( + const torch::stable::Tensor& data, ImageReadMode mode, bool apply_exif_orientation) { - C10_LOG_API_USAGE_ONCE("torchvision.csrc.io.image.cpu.decode_png.decode_png"); - - validate_encoded_data(data); + validate_encoded_data_stable(data); auto png_ptr = png_create_read_struct(PNG_LIBPNG_VER_STRING, nullptr, nullptr, nullptr); @@ -41,9 +39,8 @@ torch::Tensor decode_png( STD_TORCH_CHECK(info_ptr, "libpng info structure allocation failed!") } - auto accessor = data.accessor(); - auto datap = accessor.data(); - auto datap_len = accessor.size(0); + auto datap = data.const_data_ptr(); + auto datap_len = data.size(0); if (setjmp(png_jmpbuf(png_ptr)) != 0) { png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); @@ -197,19 +194,21 @@ torch::Tensor decode_png( auto num_pixels_per_row = width * channels; auto is_16_bits = bit_depth == 16; - auto tensor = torch::empty( - {int64_t(height), int64_t(width), channels}, - is_16_bits ? at::kUInt16 : torch::kU8); + int64_t tensor_sizes[] = {int64_t(height), int64_t(width), channels}; + auto tensor = torch::stable::empty( + {tensor_sizes, 3}, + is_16_bits ? torch::headeronly::ScalarType::UInt16 + : torch::headeronly::ScalarType::Byte); if (is_little_endian()) { png_set_swap(png_ptr); } - auto t_ptr = (uint8_t*)tensor.data_ptr(); + auto t_ptr = static_cast(tensor.mutable_data_ptr()); for (int pass = 0; pass < number_of_passes; pass++) { for (png_uint_32 i = 0; i < height; ++i) { png_read_row(png_ptr, t_ptr, nullptr); t_ptr += num_pixels_per_row * (is_16_bits ? 2 : 1); } - t_ptr = (uint8_t*)tensor.data_ptr(); + t_ptr = static_cast(tensor.mutable_data_ptr()); } int exif_orientation = -1; @@ -219,9 +218,9 @@ torch::Tensor decode_png( png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - auto output = tensor.permute({2, 0, 1}); + auto output = stablePermute(tensor, {2, 0, 1}); if (apply_exif_orientation) { - return exif_orientation_transform(output, exif_orientation); + return exif_orientation_transform_stable(output, exif_orientation); } return output; } diff --git a/torchvision/csrc/io/image/cpu/decode_png.h b/torchvision/csrc/io/image/cpu/decode_png.h index faaffa7ae49..45443a0582b 100644 --- a/torchvision/csrc/io/image/cpu/decode_png.h +++ b/torchvision/csrc/io/image/cpu/decode_png.h @@ -1,13 +1,13 @@ #pragma once -#include +#include "../../../stable_abi_compat.h" #include "../common.h" namespace vision { namespace image { -C10_EXPORT torch::Tensor decode_png( - const torch::Tensor& data, +torch::stable::Tensor decode_png( + const torch::stable::Tensor& data, ImageReadMode mode = IMAGE_READ_MODE_UNCHANGED, bool apply_exif_orientation = false); diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp index d015f44cb39..31475c099b2 100644 --- a/torchvision/csrc/io/image/cpu/encode_png.cpp +++ b/torchvision/csrc/io/image/cpu/encode_png.cpp @@ -1,4 +1,4 @@ -#include "encode_jpeg.h" +#include "encode_png.h" #include @@ -9,7 +9,9 @@ namespace image { #if !PNG_FOUND -torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { +torch::stable::Tensor encode_png( + const torch::stable::Tensor& data, + int64_t compression_level) { STD_TORCH_CHECK( false, "encode_png: torchvision not compiled with libpng support"); } @@ -66,8 +68,9 @@ void torch_png_write_data( } // namespace -torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { - C10_LOG_API_USAGE_ONCE("torchvision.csrc.io.image.cpu.encode_png.encode_png"); +torch::stable::Tensor encode_png( + const torch::stable::Tensor& data, + int64_t compression_level) { // Define compression structures and error handling png_structp png_write; png_infop info_ptr; @@ -104,12 +107,12 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { "Compression level should be between 0 and 9"); // Check that the input tensor is on CPU - STD_TORCH_CHECK( - data.device() == torch::kCPU, "Input tensor should be on CPU"); + STD_TORCH_CHECK(data.is_cpu(), "Input tensor should be on CPU"); // Check that the input tensor dtype is uint8 STD_TORCH_CHECK( - data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); + data.scalar_type() == torch::headeronly::ScalarType::Byte, + "Input tensor dtype should be uint8"); // Check that the input tensor is 3-dimensional STD_TORCH_CHECK( @@ -119,7 +122,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { int channels = data.size(0); int height = data.size(1); int width = data.size(2); - auto input = data.permute({1, 2, 0}).contiguous(); + auto input = torch::stable::contiguous(stablePermute(data, {1, 2, 0})); STD_TORCH_CHECK( channels == 1 || channels == 3, @@ -155,7 +158,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { png_write_info(png_write, info_ptr); auto stride = width * channels; - auto ptr = input.data_ptr(); + auto ptr = input.const_data_ptr(); // Encode PNG file for (int y = 0; y < height; ++y) { @@ -169,12 +172,13 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { // Destroy structures png_destroy_write_struct(&png_write, &info_ptr); - torch::TensorOptions options = torch::TensorOptions{torch::kU8}; - auto outTensor = torch::empty({(long)buf_info.size}, options); + int64_t out_size = static_cast(buf_info.size); + auto outTensor = + torch::stable::empty({&out_size, 1}, torch::headeronly::ScalarType::Byte); // Copy memory from png buffer, since torch cannot get ownership of it via // `from_blob` - auto outPtr = outTensor.data_ptr(); + auto outPtr = static_cast(outTensor.mutable_data_ptr()); std::memcpy(outPtr, buf_info.buffer, sizeof(uint8_t) * outTensor.numel()); free(buf_info.buffer); diff --git a/torchvision/csrc/io/image/cpu/encode_png.h b/torchvision/csrc/io/image/cpu/encode_png.h index 86a67c8706e..68955fead16 100644 --- a/torchvision/csrc/io/image/cpu/encode_png.h +++ b/torchvision/csrc/io/image/cpu/encode_png.h @@ -1,12 +1,12 @@ #pragma once -#include +#include "../../../stable_abi_compat.h" namespace vision { namespace image { -C10_EXPORT torch::Tensor encode_png( - const torch::Tensor& data, +torch::stable::Tensor encode_png( + const torch::stable::Tensor& data, int64_t compression_level); } // namespace image diff --git a/torchvision/csrc/io/image/cpu/exif.h b/torchvision/csrc/io/image/cpu/exif.h index e55a800b220..2730ef89cac 100644 --- a/torchvision/csrc/io/image/cpu/exif.h +++ b/torchvision/csrc/io/image/cpu/exif.h @@ -60,6 +60,7 @@ direct, #include #include +#include "../../../stable_abi_compat.h" namespace vision { namespace image { @@ -253,6 +254,33 @@ inline torch::Tensor exif_orientation_transform( return image; } +// Stable ABI version of exif_orientation_transform +inline torch::stable::Tensor exif_orientation_transform_stable( + const torch::stable::Tensor& image, + int orientation) { + if (orientation == IMAGE_ORIENTATION_TL) { + return image; + } else if (orientation == IMAGE_ORIENTATION_TR) { + return vision::stableFlip(image, {-1}); + } else if (orientation == IMAGE_ORIENTATION_BR) { + // needs 180 rotation equivalent to + // flip both horizontally and vertically + return vision::stableFlip(image, {-2, -1}); + } else if (orientation == IMAGE_ORIENTATION_BL) { + return vision::stableFlip(image, {-2}); + } else if (orientation == IMAGE_ORIENTATION_LT) { + return torch::stable::transpose(image, -1, -2); + } else if (orientation == IMAGE_ORIENTATION_RT) { + return vision::stableFlip(torch::stable::transpose(image, -1, -2), {-1}); + } else if (orientation == IMAGE_ORIENTATION_RB) { + return vision::stableFlip( + torch::stable::transpose(image, -1, -2), {-2, -1}); + } else if (orientation == IMAGE_ORIENTATION_LB) { + return vision::stableFlip(torch::stable::transpose(image, -1, -2), {-2}); + } + return image; +} + } // namespace exif_private } // namespace image } // namespace vision diff --git a/torchvision/csrc/io/image/image.cpp b/torchvision/csrc/io/image/image.cpp index b4a4ed54a67..45e77b36db0 100644 --- a/torchvision/csrc/io/image/image.cpp +++ b/torchvision/csrc/io/image/image.cpp @@ -1,16 +1,15 @@ #include "image.h" #include +#include namespace vision { namespace image { +// Legacy registration for non-PNG ops static auto registry = torch::RegisterOperators() .op("image::decode_gif", &decode_gif) - .op("image::decode_png(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor", - &decode_png) - .op("image::encode_png", &encode_png) .op("image::decode_jpeg(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor", &decode_jpeg) .op("image::decode_webp(Tensor encoded_data, int mode) -> Tensor", @@ -25,5 +24,17 @@ static auto registry = .op("image::_jpeg_version", &_jpeg_version) .op("image::_is_compiled_against_turbo", &_is_compiled_against_turbo); +// Stable ABI registration for PNG ops +STABLE_TORCH_LIBRARY_FRAGMENT(image, m) { + m.def( + "decode_png(Tensor data, int mode, bool apply_exif_orientation=False) -> Tensor"); + m.def("encode_png(Tensor data, int compression_level) -> Tensor"); +} + +STABLE_TORCH_LIBRARY_IMPL(image, CPU, m) { + m.impl("decode_png", TORCH_BOX(&decode_png)); + m.impl("encode_png", TORCH_BOX(&encode_png)); +} + } // namespace image } // namespace vision From 45bbce5ee1544edbafc07e79d2fba064da04e37d Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 26 Feb 2026 13:56:38 +0000 Subject: [PATCH 3/5] fix? --- torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp b/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp index c082485a2a7..0f5cf01548d 100644 --- a/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp +++ b/torchvision/csrc/io/image/cuda/decode_jpegs_cuda.cpp @@ -129,11 +129,7 @@ std::vector decode_jpegs_cuda( event.block(current_stream); return result; } catch (const std::exception& e) { - if (typeid(e) != typeid(std::runtime_error)) { - STD_TORCH_CHECK(false, "Error while decoding JPEG images: ", e.what()); - } else { - throw; - } + STD_TORCH_CHECK(false, "Error while decoding JPEG images: ", e.what()); } } From 02f7bb1cc6f1abea7c45c36afbaf3b58594c1037 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 26 Feb 2026 14:17:22 +0000 Subject: [PATCH 4/5] Use accessors, add missing file --- torchvision/csrc/io/image/cpu/decode_png.cpp | 5 +- torchvision/csrc/stable_abi_compat.h | 89 ++++++++++++++++++++ 2 files changed, 92 insertions(+), 2 deletions(-) create mode 100644 torchvision/csrc/stable_abi_compat.h diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp index e30849b091f..714cb4fd69e 100644 --- a/torchvision/csrc/io/image/cpu/decode_png.cpp +++ b/torchvision/csrc/io/image/cpu/decode_png.cpp @@ -39,8 +39,9 @@ torch::stable::Tensor decode_png( STD_TORCH_CHECK(info_ptr, "libpng info structure allocation failed!") } - auto datap = data.const_data_ptr(); - auto datap_len = data.size(0); + auto accessor = constAccessor(data); + auto datap = accessor.data(); + auto datap_len = accessor.size(0); if (setjmp(png_jmpbuf(png_ptr)) != 0) { png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); diff --git a/torchvision/csrc/stable_abi_compat.h b/torchvision/csrc/stable_abi_compat.h new file mode 100644 index 00000000000..e640b7e4414 --- /dev/null +++ b/torchvision/csrc/stable_abi_compat.h @@ -0,0 +1,89 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +// Conversion helpers between at::Tensor and torch::stable::Tensor. +// These are used at migration boundaries where some code is on the old API +// and some is on the stable ABI. +#include + +namespace vision { + +inline torch::stable::Tensor toStableTensor(at::Tensor t) { + return torch::stable::Tensor( + reinterpret_cast(new at::Tensor(std::move(t)))); +} + +inline at::Tensor fromStableTensor(const torch::stable::Tensor& t) { + return *reinterpret_cast(t.get()); +} + +// Dispatcher-based helpers for ops not yet in the stable ABI. +inline torch::stable::Tensor stablePermute( + const torch::stable::Tensor& self, + std::vector dims) { + const auto num_args = 2; + std::array stack{ + torch::stable::detail::from(self), torch::stable::detail::from(dims)}; + TORCH_ERROR_CODE_CHECK(torch_call_dispatcher( + "aten::permute", "", stack.data(), TORCH_ABI_VERSION)); + return torch::stable::detail::to(stack[0]); +} + +inline torch::stable::Tensor stableFlip( + const torch::stable::Tensor& self, + std::vector dims) { + const auto num_args = 2; + std::array stack{ + torch::stable::detail::from(self), torch::stable::detail::from(dims)}; + TORCH_ERROR_CODE_CHECK( + torch_call_dispatcher("aten::flip", "", stack.data(), TORCH_ABI_VERSION)); + return torch::stable::detail::to(stack[0]); +} + +// Accessor helpers for torch::stable::Tensor, modeled after torchcodec's +// StableABICompat.h. These construct a HeaderOnlyTensorAccessor from the +// stable tensor's raw pointer, sizes, and strides. +template +torch::headeronly::HeaderOnlyTensorAccessor mutableAccessor( + torch::stable::Tensor& tensor) { + return torch::headeronly::HeaderOnlyTensorAccessor( + tensor.mutable_data_ptr(), + tensor.sizes().data(), + tensor.strides().data()); +} + +template +torch::headeronly::HeaderOnlyTensorAccessor constAccessor( + const torch::stable::Tensor& tensor) { + return torch::headeronly::HeaderOnlyTensorAccessor( + tensor.const_data_ptr(), + tensor.sizes().data(), + tensor.strides().data()); +} + +// Stable ABI version of validate_encoded_data. +inline void validate_encoded_data_stable( + const torch::stable::Tensor& encoded_data) { + STD_TORCH_CHECK( + encoded_data.is_contiguous(), "Input tensor must be contiguous."); + STD_TORCH_CHECK( + encoded_data.scalar_type() == torch::headeronly::ScalarType::Byte, + "Input tensor must have uint8 data type."); + STD_TORCH_CHECK( + encoded_data.dim() == 1 && encoded_data.numel() > 0, + "Input tensor must be 1-dimensional and non-empty."); +} + +} // namespace vision From ecdcdaec843d567016d166a85c225cddbef792e8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Thu, 26 Feb 2026 17:16:07 +0000 Subject: [PATCH 5/5] fix --- torchvision/csrc/io/image/cpu/encode_png.cpp | 9 --------- 1 file changed, 9 deletions(-) diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp index 9db22d7efef..a276278b243 100644 --- a/torchvision/csrc/io/image/cpu/encode_png.cpp +++ b/torchvision/csrc/io/image/cpu/encode_png.cpp @@ -109,21 +109,12 @@ torch::stable::Tensor encode_png( "Compression level should be between 0 and 9"); // Check that the input tensor is on CPU -<<<<<<< HEAD STD_TORCH_CHECK(data.is_cpu(), "Input tensor should be on CPU"); // Check that the input tensor dtype is uint8 STD_TORCH_CHECK( data.scalar_type() == torch::headeronly::ScalarType::Byte, "Input tensor dtype should be uint8"); -======= - STD_TORCH_CHECK( - data.device() == torch::kCPU, "Input tensor should be on CPU"); - - // Check that the input tensor dtype is uint8 - STD_TORCH_CHECK( - data.dtype() == torch::kU8, "Input tensor dtype should be uint8"); ->>>>>>> c0331c5e2933c621db9a44623f4f3981fe2342e0 // Check that the input tensor is 3-dimensional STD_TORCH_CHECK(