From 2bb040b8513251bb8e70ce5cedb83a810150af73 Mon Sep 17 00:00:00 2001 From: MPSFuzz <2286770808@qq.com> Date: Fri, 6 Mar 2026 05:05:07 +0000 Subject: [PATCH 1/3] Fix CPU decode_jpeg error-path leak on malformed JPEGs (setjmp/longjmp) --- torchvision/csrc/io/image/cpu/decode_jpeg.cpp | 26 ++++++++++++++----- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp index 8163ace3307..dc6ca184d71 100644 --- a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp +++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp @@ -3,6 +3,8 @@ #include "common_jpeg.h" #include "exif.h" +#include + namespace vision { namespace image { @@ -141,12 +143,22 @@ torch::Tensor decode_jpeg( struct jpeg_decompress_struct cinfo; struct torch_jpeg_error_mgr jerr; + // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not + // unwind C++ stack frames, so destructors of objects created after setjmp + // won't run. Declare tensors before setjmp and reset them on the error path. + c10::optional tensor_opt; + c10::optional cmyk_line_opt; + auto datap = data.data_ptr(); // Setup decompression structure cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = torch_jpeg_error_exit; /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { + // Release any tensors that may have been allocated after setjmp. + cmyk_line_opt.reset(); + tensor_opt.reset(); + /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object. */ @@ -210,12 +222,12 @@ torch::Tensor decode_jpeg( int width = cinfo.output_width; int stride = width * channels; - auto tensor = + tensor_opt = torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8); - auto ptr = tensor.data_ptr(); - torch::Tensor cmyk_line_tensor; + auto ptr = tensor_opt->data_ptr(); + if (cmyk_to_rgb_or_gray) { - cmyk_line_tensor = torch::empty({int64_t(width), 4}, torch::kU8); + cmyk_line_opt = torch::empty({int64_t(width), 4}, torch::kU8); } while (cinfo.output_scanline < cinfo.output_height) { @@ -224,7 +236,7 @@ torch::Tensor decode_jpeg( * more than one scanline at a time if that's more convenient. */ if (cmyk_to_rgb_or_gray) { - auto cmyk_line_ptr = cmyk_line_tensor.data_ptr(); + auto cmyk_line_ptr = cmyk_line_opt->data_ptr(); jpeg_read_scanlines(&cinfo, &cmyk_line_ptr, 1); if (channels == 3) { @@ -240,7 +252,7 @@ torch::Tensor decode_jpeg( jpeg_finish_decompress(&cinfo); jpeg_destroy_decompress(&cinfo); - auto output = tensor.permute({2, 0, 1}); + auto output = tensor_opt->permute({2, 0, 1}); if (apply_exif_orientation) { return exif_orientation_transform(output, exif_orientation); @@ -266,4 +278,4 @@ bool _is_compiled_against_turbo() { } } // namespace image -} // namespace vision +} // namespace vision \ No newline at end of file From bf539b43ad25eafdb7c5475ef85b8f84ef4a0752 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 6 Mar 2026 12:57:51 +0000 Subject: [PATCH 2/3] Fix lint, use std::optional --- torchvision/csrc/io/image/cpu/decode_jpeg.cpp | 25 ++++++++++--------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp index dc6ca184d71..8cf8ea28019 100644 --- a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp +++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp @@ -3,7 +3,7 @@ #include "common_jpeg.h" #include "exif.h" -#include +#include namespace vision { namespace image { @@ -145,9 +145,10 @@ torch::Tensor decode_jpeg( // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not // unwind C++ stack frames, so destructors of objects created after setjmp - // won't run. Declare tensors before setjmp and reset them on the error path. - c10::optional tensor_opt; - c10::optional cmyk_line_opt; + // won't run. We use std::optional to declare tensors before setjmp while + // deferring construction, and explicitly reset them on the error path. + std::optional tensor; + std::optional cmyk_line_tensor; auto datap = data.data_ptr(); // Setup decompression structure @@ -156,8 +157,8 @@ torch::Tensor decode_jpeg( /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { // Release any tensors that may have been allocated after setjmp. - cmyk_line_opt.reset(); - tensor_opt.reset(); + cmyk_line_tensor.reset(); + tensor.reset(); /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object. @@ -222,12 +223,12 @@ torch::Tensor decode_jpeg( int width = cinfo.output_width; int stride = width * channels; - tensor_opt = + tensor = torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8); - auto ptr = tensor_opt->data_ptr(); + auto ptr = tensor->data_ptr(); if (cmyk_to_rgb_or_gray) { - cmyk_line_opt = torch::empty({int64_t(width), 4}, torch::kU8); + cmyk_line_tensor = torch::empty({int64_t(width), 4}, torch::kU8); } while (cinfo.output_scanline < cinfo.output_height) { @@ -236,7 +237,7 @@ torch::Tensor decode_jpeg( * more than one scanline at a time if that's more convenient. */ if (cmyk_to_rgb_or_gray) { - auto cmyk_line_ptr = cmyk_line_opt->data_ptr(); + auto cmyk_line_ptr = cmyk_line_tensor->data_ptr(); jpeg_read_scanlines(&cinfo, &cmyk_line_ptr, 1); if (channels == 3) { @@ -252,7 +253,7 @@ torch::Tensor decode_jpeg( jpeg_finish_decompress(&cinfo); jpeg_destroy_decompress(&cinfo); - auto output = tensor_opt->permute({2, 0, 1}); + auto output = tensor->permute({2, 0, 1}); if (apply_exif_orientation) { return exif_orientation_transform(output, exif_orientation); @@ -278,4 +279,4 @@ bool _is_compiled_against_turbo() { } } // namespace image -} // namespace vision \ No newline at end of file +} // namespace vision From 5458cc850a834d6b63800daee45cf64a9a5e5a11 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 6 Mar 2026 13:02:32 +0000 Subject: [PATCH 3/3] Apply same fix to jpeg encoder and png --- torchvision/csrc/io/image/cpu/decode_png.cpp | 17 +++++++++++++---- torchvision/csrc/io/image/cpu/encode_jpeg.cpp | 13 +++++++++++-- torchvision/csrc/io/image/cpu/encode_png.cpp | 14 ++++++++++++-- 3 files changed, 36 insertions(+), 8 deletions(-) diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp index 67c788455c4..9f20041ed4c 100644 --- a/torchvision/csrc/io/image/cpu/decode_png.cpp +++ b/torchvision/csrc/io/image/cpu/decode_png.cpp @@ -3,6 +3,8 @@ #include "common_png.h" #include "exif.h" +#include + namespace vision { namespace image { @@ -45,7 +47,14 @@ torch::Tensor decode_png( auto datap = accessor.data(); auto datap_len = accessor.size(0); + // NOTE: libpng uses setjmp/longjmp for error handling. longjmp does not + // unwind C++ stack frames, so destructors of objects created after setjmp + // won't run. We use std::optional to declare tensors before setjmp while + // deferring construction, and explicitly reset them on the error path. + std::optional tensor; + if (setjmp(png_jmpbuf(png_ptr)) != 0) { + tensor.reset(); png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); STD_TORCH_CHECK(false, "Internal error."); } @@ -197,19 +206,19 @@ torch::Tensor decode_png( auto num_pixels_per_row = width * channels; auto is_16_bits = bit_depth == 16; - auto tensor = torch::empty( + tensor = torch::empty( {int64_t(height), int64_t(width), channels}, is_16_bits ? at::kUInt16 : torch::kU8); if (is_little_endian()) { png_set_swap(png_ptr); } - auto t_ptr = (uint8_t*)tensor.data_ptr(); + auto t_ptr = (uint8_t*)tensor->data_ptr(); for (int pass = 0; pass < number_of_passes; pass++) { for (png_uint_32 i = 0; i < height; ++i) { png_read_row(png_ptr, t_ptr, nullptr); t_ptr += num_pixels_per_row * (is_16_bits ? 2 : 1); } - t_ptr = (uint8_t*)tensor.data_ptr(); + t_ptr = (uint8_t*)tensor->data_ptr(); } int exif_orientation = -1; @@ -219,7 +228,7 @@ torch::Tensor decode_png( png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - auto output = tensor.permute({2, 0, 1}); + auto output = tensor->permute({2, 0, 1}); if (apply_exif_orientation) { return exif_orientation_transform(output, exif_orientation); } diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp index 99b0b6097db..d29f3f4f481 100644 --- a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp +++ b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp @@ -2,6 +2,8 @@ #include +#include + #include "common_jpeg.h" namespace vision { @@ -37,6 +39,12 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { JpegSizeType jpegSize = 0; uint8_t* jpegBuf = nullptr; + // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not + // unwind C++ stack frames, so destructors of objects created after setjmp + // won't run. We use std::optional to declare tensors before setjmp while + // deferring construction, and explicitly reset them on the error path. + std::optional input; + cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = torch_jpeg_error_exit; @@ -45,6 +53,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object and the buffer. */ + input.reset(); jpeg_destroy_compress(&cinfo); if (jpegBuf != nullptr) { free(jpegBuf); @@ -69,7 +78,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { int channels = data.size(0); int height = data.size(1); int width = data.size(2); - auto input = data.permute({1, 2, 0}).contiguous(); + input = data.permute({1, 2, 0}).contiguous(); STD_TORCH_CHECK( channels == 1 || channels == 3, @@ -95,7 +104,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { jpeg_start_compress(&cinfo, TRUE); auto stride = width * channels; - auto ptr = input.data_ptr(); + auto ptr = input->data_ptr(); // Encode JPEG file while (cinfo.next_scanline < cinfo.image_height) { diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp index d015f44cb39..bd0391acf00 100644 --- a/torchvision/csrc/io/image/cpu/encode_png.cpp +++ b/torchvision/csrc/io/image/cpu/encode_png.cpp @@ -2,6 +2,8 @@ #include +#include + #include "common_png.h" namespace vision { @@ -78,11 +80,19 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { buf_info.buffer = nullptr; buf_info.size = 0; + // NOTE: libpng uses setjmp/longjmp for error handling. longjmp does not + // unwind C++ stack frames, so destructors of objects created after setjmp + // won't run. We use std::optional to declare tensors before setjmp while + // deferring construction, and explicitly reset them on the error path. + std::optional input; + /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(err_ptr.setjmp_buffer)) { /* If we get here, the PNG code has signaled an error. * We need to clean up the PNG object and the buffer. */ + input.reset(); + if (info_ptr != nullptr) { png_destroy_info_struct(png_write, &info_ptr); } @@ -119,7 +129,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { int channels = data.size(0); int height = data.size(1); int width = data.size(2); - auto input = data.permute({1, 2, 0}).contiguous(); + input = data.permute({1, 2, 0}).contiguous(); STD_TORCH_CHECK( channels == 1 || channels == 3, @@ -155,7 +165,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { png_write_info(png_write, info_ptr); auto stride = width * channels; - auto ptr = input.data_ptr(); + auto ptr = input->data_ptr(); // Encode PNG file for (int y = 0; y < height; ++y) {