From c9ffdfec025381f6a459b3af1e2dfa0a970217e3 Mon Sep 17 00:00:00 2001 From: MPSFuzz <2022326245015@stu.scu.edu.cn> Date: Tue, 10 Mar 2026 18:49:04 +0800 Subject: [PATCH] Fix CPU decode_jpeg error-path leak on malformed JPEGs (setjmp/longjmp) (#9423) Co-authored-by: MPSFuzz <2286770808@qq.com> Co-authored-by: Nicolas Hug Co-authored-by: Nicolas Hug --- torchvision/csrc/io/image/cpu/decode_jpeg.cpp | 23 +++++++++++++++---- torchvision/csrc/io/image/cpu/decode_png.cpp | 17 ++++++++++---- torchvision/csrc/io/image/cpu/encode_jpeg.cpp | 12 ++++++++-- torchvision/csrc/io/image/cpu/encode_png.cpp | 14 +++++++++-- 4 files changed, 53 insertions(+), 13 deletions(-) diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp index 052b98e1be9..55885d76a59 100644 --- a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp +++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp @@ -3,6 +3,8 @@ #include "common_jpeg.h" #include "exif.h" +#include + namespace vision { namespace image { @@ -141,12 +143,23 @@ torch::Tensor decode_jpeg( struct jpeg_decompress_struct cinfo; struct torch_jpeg_error_mgr jerr; + // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not + // unwind C++ stack frames, so destructors of objects created after setjmp + // won't run. We use std::optional to declare tensors before setjmp while + // deferring construction, and explicitly reset them on the error path. + std::optional tensor; + std::optional cmyk_line_tensor; + auto datap = data.data_ptr(); // Setup decompression structure cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = torch_jpeg_error_exit; /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(jerr.setjmp_buffer)) { + // Release any tensors that may have been allocated after setjmp. + cmyk_line_tensor.reset(); + tensor.reset(); + /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object. */ @@ -209,10 +222,10 @@ torch::Tensor decode_jpeg( int width = cinfo.output_width; int stride = width * channels; - auto tensor = + tensor = torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8); - auto ptr = tensor.data_ptr(); - torch::Tensor cmyk_line_tensor; + auto ptr = tensor->data_ptr(); + if (cmyk_to_rgb_or_gray) { cmyk_line_tensor = torch::empty({int64_t(width), 4}, torch::kU8); } @@ -223,7 +236,7 @@ torch::Tensor decode_jpeg( * more than one scanline at a time if that's more convenient. */ if (cmyk_to_rgb_or_gray) { - auto cmyk_line_ptr = cmyk_line_tensor.data_ptr(); + auto cmyk_line_ptr = cmyk_line_tensor->data_ptr(); jpeg_read_scanlines(&cinfo, &cmyk_line_ptr, 1); if (channels == 3) { @@ -239,7 +252,7 @@ torch::Tensor decode_jpeg( jpeg_finish_decompress(&cinfo); jpeg_destroy_decompress(&cinfo); - auto output = tensor.permute({2, 0, 1}); + auto output = tensor->permute({2, 0, 1}); if (apply_exif_orientation) { return exif_orientation_transform(output, exif_orientation); diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp index 5ea6f073975..41d32b205ee 100644 --- a/torchvision/csrc/io/image/cpu/decode_png.cpp +++ b/torchvision/csrc/io/image/cpu/decode_png.cpp @@ -3,6 +3,8 @@ #include "common_png.h" #include "exif.h" +#include + namespace vision { namespace image { @@ -45,7 +47,14 @@ torch::Tensor decode_png( auto datap = accessor.data(); auto datap_len = accessor.size(0); + // NOTE: libpng uses setjmp/longjmp for error handling. longjmp does not + // unwind C++ stack frames, so destructors of objects created after setjmp + // won't run. We use std::optional to declare tensors before setjmp while + // deferring construction, and explicitly reset them on the error path. + std::optional tensor; + if (setjmp(png_jmpbuf(png_ptr)) != 0) { + tensor.reset(); png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); TORCH_CHECK(false, "Internal error."); } @@ -196,19 +205,19 @@ torch::Tensor decode_png( auto num_pixels_per_row = width * channels; auto is_16_bits = bit_depth == 16; - auto tensor = torch::empty( + tensor = torch::empty( {int64_t(height), int64_t(width), channels}, is_16_bits ? at::kUInt16 : torch::kU8); if (is_little_endian()) { png_set_swap(png_ptr); } - auto t_ptr = (uint8_t*)tensor.data_ptr(); + auto t_ptr = (uint8_t*)tensor->data_ptr(); for (int pass = 0; pass < number_of_passes; pass++) { for (png_uint_32 i = 0; i < height; ++i) { png_read_row(png_ptr, t_ptr, nullptr); t_ptr += num_pixels_per_row * (is_16_bits ? 2 : 1); } - t_ptr = (uint8_t*)tensor.data_ptr(); + t_ptr = (uint8_t*)tensor->data_ptr(); } int exif_orientation = -1; @@ -218,7 +227,7 @@ torch::Tensor decode_png( png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); - auto output = tensor.permute({2, 0, 1}); + auto output = tensor->permute({2, 0, 1}); if (apply_exif_orientation) { return exif_orientation_transform(output, exif_orientation); } diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp index d2ed73071a2..def265841f9 100644 --- a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp +++ b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp @@ -1,5 +1,6 @@ #include "encode_jpeg.h" +#include #include "common_jpeg.h" namespace vision { @@ -35,6 +36,12 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { JpegSizeType jpegSize = 0; uint8_t* jpegBuf = nullptr; + // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not + // unwind C++ stack frames, so destructors of objects created after setjmp + // won't run. We use std::optional to declare tensors before setjmp while + // deferring construction, and explicitly reset them on the error path. + std::optional input; + cinfo.err = jpeg_std_error(&jerr.pub); jerr.pub.error_exit = torch_jpeg_error_exit; @@ -43,6 +50,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { /* If we get here, the JPEG code has signaled an error. * We need to clean up the JPEG object and the buffer. */ + input.reset(); jpeg_destroy_compress(&cinfo); if (jpegBuf != nullptr) { free(jpegBuf); @@ -64,7 +72,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { int channels = data.size(0); int height = data.size(1); int width = data.size(2); - auto input = data.permute({1, 2, 0}).contiguous(); + input = data.permute({1, 2, 0}).contiguous(); TORCH_CHECK( channels == 1 || channels == 3, @@ -90,7 +98,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) { jpeg_start_compress(&cinfo, TRUE); auto stride = width * channels; - auto ptr = input.data_ptr(); + auto ptr = input->data_ptr(); // Encode JPEG file while (cinfo.next_scanline < cinfo.image_height) { diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp index d55a0ed3ff6..def2c805750 100644 --- a/torchvision/csrc/io/image/cpu/encode_png.cpp +++ b/torchvision/csrc/io/image/cpu/encode_png.cpp @@ -1,5 +1,7 @@ #include "encode_jpeg.h" +#include + #include "common_png.h" namespace vision { @@ -76,11 +78,19 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { buf_info.buffer = nullptr; buf_info.size = 0; + // NOTE: libpng uses setjmp/longjmp for error handling. longjmp does not + // unwind C++ stack frames, so destructors of objects created after setjmp + // won't run. We use std::optional to declare tensors before setjmp while + // deferring construction, and explicitly reset them on the error path. + std::optional input; + /* Establish the setjmp return context for my_error_exit to use. */ if (setjmp(err_ptr.setjmp_buffer)) { /* If we get here, the PNG code has signaled an error. * We need to clean up the PNG object and the buffer. */ + input.reset(); + if (info_ptr != nullptr) { png_destroy_info_struct(png_write, &info_ptr); } @@ -114,7 +124,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { int channels = data.size(0); int height = data.size(1); int width = data.size(2); - auto input = data.permute({1, 2, 0}).contiguous(); + input = data.permute({1, 2, 0}).contiguous(); TORCH_CHECK( channels == 1 || channels == 3, @@ -150,7 +160,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) { png_write_info(png_write, info_ptr); auto stride = width * channels; - auto ptr = input.data_ptr(); + auto ptr = input->data_ptr(); // Encode PNG file for (int y = 0; y < height; ++y) {