From 2bb040b8513251bb8e70ce5cedb83a810150af73 Mon Sep 17 00:00:00 2001
From: MPSFuzz <2286770808@qq.com>
Date: Fri, 6 Mar 2026 05:05:07 +0000
Subject: [PATCH 1/3] Fix CPU decode_jpeg error-path leak on malformed JPEGs
 (setjmp/longjmp)

---
 torchvision/csrc/io/image/cpu/decode_jpeg.cpp | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)
diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
index 8163ace3307..dc6ca184d71 100644
--- a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
+++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
@@ -3,6 +3,8 @@
 #include "common_jpeg.h"
 #include "exif.h"
 
+#include <c10/util/Optional.h>
+
 namespace vision {
 namespace image {
 
@@ -141,12 +143,22 @@ torch::Tensor decode_jpeg(
   struct jpeg_decompress_struct cinfo;
   struct torch_jpeg_error_mgr jerr;
 
+  // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not
+  // unwind C++ stack frames, so destructors of objects created after setjmp
+  // won't run. Declare tensors before setjmp and reset them on the error path.
+  c10::optional<torch::Tensor> tensor_opt;
+  c10::optional<torch::Tensor> cmyk_line_opt;
+
   auto datap = data.data_ptr<uint8_t>();
   // Setup decompression structure
   cinfo.err = jpeg_std_error(&jerr.pub);
   jerr.pub.error_exit = torch_jpeg_error_exit;
   /* Establish the setjmp return context for my_error_exit to use. */
   if (setjmp(jerr.setjmp_buffer)) {
+    // Release any tensors that may have been allocated after setjmp.
+    cmyk_line_opt.reset();
+    tensor_opt.reset();
+
     /* If we get here, the JPEG code has signaled an error.
      * We need to clean up the JPEG object.
      */
@@ -210,12 +222,12 @@ torch::Tensor decode_jpeg(
   int width = cinfo.output_width;
 
   int stride = width * channels;
-  auto tensor =
+  tensor_opt =
       torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8);
-  auto ptr = tensor.data_ptr<uint8_t>();
-  torch::Tensor cmyk_line_tensor;
+  auto ptr = tensor_opt->data_ptr<uint8_t>();
+
   if (cmyk_to_rgb_or_gray) {
-    cmyk_line_tensor = torch::empty({int64_t(width), 4}, torch::kU8);
+    cmyk_line_opt = torch::empty({int64_t(width), 4}, torch::kU8);
   }
 
   while (cinfo.output_scanline < cinfo.output_height) {
@@ -224,7 +236,7 @@ torch::Tensor decode_jpeg(
      * more than one scanline at a time if that's more convenient.
      */
     if (cmyk_to_rgb_or_gray) {
-      auto cmyk_line_ptr = cmyk_line_tensor.data_ptr<uint8_t>();
+      auto cmyk_line_ptr = cmyk_line_opt->data_ptr<uint8_t>();
       jpeg_read_scanlines(&cinfo, &cmyk_line_ptr, 1);
 
       if (channels == 3) {
@@ -240,7 +252,7 @@ torch::Tensor decode_jpeg(
 
   jpeg_finish_decompress(&cinfo);
   jpeg_destroy_decompress(&cinfo);
-  auto output = tensor.permute({2, 0, 1});
+  auto output = tensor_opt->permute({2, 0, 1});
 
   if (apply_exif_orientation) {
     return exif_orientation_transform(output, exif_orientation);
@@ -266,4 +278,4 @@ bool _is_compiled_against_turbo() {
 }
 
 } // namespace image
-} // namespace vision
+} // namespace vision
\ No newline at end of file

From bf539b43ad25eafdb7c5475ef85b8f84ef4a0752 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 6 Mar 2026 12:57:51 +0000
Subject: [PATCH 2/3] Fix lint, use std::optional

---
 torchvision/csrc/io/image/cpu/decode_jpeg.cpp | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
index dc6ca184d71..8cf8ea28019 100644
--- a/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
+++ b/torchvision/csrc/io/image/cpu/decode_jpeg.cpp
@@ -3,7 +3,7 @@
 #include "common_jpeg.h"
 #include "exif.h"
 
-#include <c10/util/Optional.h>
+#include <optional>
 
 namespace vision {
 namespace image {
@@ -145,9 +145,10 @@ torch::Tensor decode_jpeg(
 
   // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not
   // unwind C++ stack frames, so destructors of objects created after setjmp
-  // won't run. Declare tensors before setjmp and reset them on the error path.
-  c10::optional<torch::Tensor> tensor_opt;
-  c10::optional<torch::Tensor> cmyk_line_opt;
+  // won't run. We use std::optional to declare tensors before setjmp while
+  // deferring construction, and explicitly reset them on the error path.
+  std::optional<torch::Tensor> tensor;
+  std::optional<torch::Tensor> cmyk_line_tensor;
 
   auto datap = data.data_ptr<uint8_t>();
   // Setup decompression structure
@@ -156,8 +157,8 @@ torch::Tensor decode_jpeg(
   /* Establish the setjmp return context for my_error_exit to use. */
   if (setjmp(jerr.setjmp_buffer)) {
     // Release any tensors that may have been allocated after setjmp.
-    cmyk_line_opt.reset();
-    tensor_opt.reset();
+    cmyk_line_tensor.reset();
+    tensor.reset();
 
     /* If we get here, the JPEG code has signaled an error.
      * We need to clean up the JPEG object.
@@ -222,12 +223,12 @@ torch::Tensor decode_jpeg(
   int width = cinfo.output_width;
 
   int stride = width * channels;
-  tensor_opt =
+  tensor =
       torch::empty({int64_t(height), int64_t(width), channels}, torch::kU8);
-  auto ptr = tensor_opt->data_ptr<uint8_t>();
+  auto ptr = tensor->data_ptr<uint8_t>();
 
   if (cmyk_to_rgb_or_gray) {
-    cmyk_line_opt = torch::empty({int64_t(width), 4}, torch::kU8);
+    cmyk_line_tensor = torch::empty({int64_t(width), 4}, torch::kU8);
   }
 
   while (cinfo.output_scanline < cinfo.output_height) {
@@ -236,7 +237,7 @@ torch::Tensor decode_jpeg(
      * more than one scanline at a time if that's more convenient.
      */
     if (cmyk_to_rgb_or_gray) {
-      auto cmyk_line_ptr = cmyk_line_opt->data_ptr<uint8_t>();
+      auto cmyk_line_ptr = cmyk_line_tensor->data_ptr<uint8_t>();
       jpeg_read_scanlines(&cinfo, &cmyk_line_ptr, 1);
 
       if (channels == 3) {
@@ -252,7 +253,7 @@ torch::Tensor decode_jpeg(
 
   jpeg_finish_decompress(&cinfo);
   jpeg_destroy_decompress(&cinfo);
-  auto output = tensor_opt->permute({2, 0, 1});
+  auto output = tensor->permute({2, 0, 1});
 
   if (apply_exif_orientation) {
     return exif_orientation_transform(output, exif_orientation);
@@ -278,4 +279,4 @@ bool _is_compiled_against_turbo() {
 }
 
 } // namespace image
-} // namespace vision
\ No newline at end of file
+} // namespace vision

From 5458cc850a834d6b63800daee45cf64a9a5e5a11 Mon Sep 17 00:00:00 2001
From: Nicolas Hug <contact@nicolas-hug.com>
Date: Fri, 6 Mar 2026 13:02:32 +0000
Subject: [PATCH 3/3] Apply same fix to jpeg encoder and png

---
 torchvision/csrc/io/image/cpu/decode_png.cpp  | 17 +++++++++++++----
 torchvision/csrc/io/image/cpu/encode_jpeg.cpp | 13 +++++++++++--
 torchvision/csrc/io/image/cpu/encode_png.cpp  | 14 ++++++++++++--
 3 files changed, 36 insertions(+), 8 deletions(-)

diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp
index 67c788455c4..9f20041ed4c 100644
--- a/torchvision/csrc/io/image/cpu/decode_png.cpp
+++ b/torchvision/csrc/io/image/cpu/decode_png.cpp
@@ -3,6 +3,8 @@
 #include "common_png.h"
 #include "exif.h"
 
+#include <optional>
+
 namespace vision {
 namespace image {
 
@@ -45,7 +47,14 @@ torch::Tensor decode_png(
   auto datap = accessor.data();
   auto datap_len = accessor.size(0);
 
+  // NOTE: libpng uses setjmp/longjmp for error handling. longjmp does not
+  // unwind C++ stack frames, so destructors of objects created after setjmp
+  // won't run. We use std::optional to declare tensors before setjmp while
+  // deferring construction, and explicitly reset them on the error path.
+  std::optional<torch::Tensor> tensor;
+
   if (setjmp(png_jmpbuf(png_ptr)) != 0) {
+    tensor.reset();
     png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
     STD_TORCH_CHECK(false, "Internal error.");
   }
@@ -197,19 +206,19 @@ torch::Tensor decode_png(
 
   auto num_pixels_per_row = width * channels;
   auto is_16_bits = bit_depth == 16;
-  auto tensor = torch::empty(
+  tensor = torch::empty(
       {int64_t(height), int64_t(width), channels},
       is_16_bits ? at::kUInt16 : torch::kU8);
   if (is_little_endian()) {
     png_set_swap(png_ptr);
   }
-  auto t_ptr = (uint8_t*)tensor.data_ptr();
+  auto t_ptr = (uint8_t*)tensor->data_ptr();
   for (int pass = 0; pass < number_of_passes; pass++) {
     for (png_uint_32 i = 0; i < height; ++i) {
       png_read_row(png_ptr, t_ptr, nullptr);
       t_ptr += num_pixels_per_row * (is_16_bits ? 2 : 1);
     }
-    t_ptr = (uint8_t*)tensor.data_ptr();
+    t_ptr = (uint8_t*)tensor->data_ptr();
   }
 
   int exif_orientation = -1;
@@ -219,7 +228,7 @@ torch::Tensor decode_png(
 
   png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
 
-  auto output = tensor.permute({2, 0, 1});
+  auto output = tensor->permute({2, 0, 1});
   if (apply_exif_orientation) {
     return exif_orientation_transform(output, exif_orientation);
   }
diff --git a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp
index 99b0b6097db..d29f3f4f481 100644
--- a/torchvision/csrc/io/image/cpu/encode_jpeg.cpp
+++ b/torchvision/csrc/io/image/cpu/encode_jpeg.cpp
@@ -2,6 +2,8 @@
 
 #include <torch/headeronly/util/Exception.h>
 
+#include <optional>
+
 #include "common_jpeg.h"
 
 namespace vision {
@@ -37,6 +39,12 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) {
   JpegSizeType jpegSize = 0;
   uint8_t* jpegBuf = nullptr;
 
+  // NOTE: libjpeg uses setjmp/longjmp for error handling. longjmp does not
+  // unwind C++ stack frames, so destructors of objects created after setjmp
+  // won't run. We use std::optional to declare tensors before setjmp while
+  // deferring construction, and explicitly reset them on the error path.
+  std::optional<torch::Tensor> input;
+
   cinfo.err = jpeg_std_error(&jerr.pub);
   jerr.pub.error_exit = torch_jpeg_error_exit;
 
@@ -45,6 +53,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) {
     /* If we get here, the JPEG code has signaled an error.
      * We need to clean up the JPEG object and the buffer.
      */
+    input.reset();
     jpeg_destroy_compress(&cinfo);
     if (jpegBuf != nullptr) {
       free(jpegBuf);
@@ -69,7 +78,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) {
   int channels = data.size(0);
   int height = data.size(1);
   int width = data.size(2);
-  auto input = data.permute({1, 2, 0}).contiguous();
+  input = data.permute({1, 2, 0}).contiguous();
 
   STD_TORCH_CHECK(
       channels == 1 || channels == 3,
@@ -95,7 +104,7 @@ torch::Tensor encode_jpeg(const torch::Tensor& data, int64_t quality) {
   jpeg_start_compress(&cinfo, TRUE);
 
   auto stride = width * channels;
-  auto ptr = input.data_ptr<uint8_t>();
+  auto ptr = input->data_ptr<uint8_t>();
 
   // Encode JPEG file
   while (cinfo.next_scanline < cinfo.image_height) {
diff --git a/torchvision/csrc/io/image/cpu/encode_png.cpp b/torchvision/csrc/io/image/cpu/encode_png.cpp
index d015f44cb39..bd0391acf00 100644
--- a/torchvision/csrc/io/image/cpu/encode_png.cpp
+++ b/torchvision/csrc/io/image/cpu/encode_png.cpp
@@ -2,6 +2,8 @@
 
 #include <torch/headeronly/util/Exception.h>
 
+#include <optional>
+
 #include "common_png.h"
 
 namespace vision {
@@ -78,11 +80,19 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) {
   buf_info.buffer = nullptr;
   buf_info.size = 0;
 
+  // NOTE: libpng uses setjmp/longjmp for error handling. longjmp does not
+  // unwind C++ stack frames, so destructors of objects created after setjmp
+  // won't run. We use std::optional to declare tensors before setjmp while
+  // deferring construction, and explicitly reset them on the error path.
+  std::optional<torch::Tensor> input;
+
   /* Establish the setjmp return context for my_error_exit to use. */
   if (setjmp(err_ptr.setjmp_buffer)) {
     /* If we get here, the PNG code has signaled an error.
      * We need to clean up the PNG object and the buffer.
      */
+    input.reset();
+
     if (info_ptr != nullptr) {
       png_destroy_info_struct(png_write, &info_ptr);
     }
@@ -119,7 +129,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) {
   int channels = data.size(0);
   int height = data.size(1);
   int width = data.size(2);
-  auto input = data.permute({1, 2, 0}).contiguous();
+  input = data.permute({1, 2, 0}).contiguous();
 
   STD_TORCH_CHECK(
       channels == 1 || channels == 3,
@@ -155,7 +165,7 @@ torch::Tensor encode_png(const torch::Tensor& data, int64_t compression_level) {
   png_write_info(png_write, info_ptr);
 
   auto stride = width * channels;
-  auto ptr = input.data_ptr<uint8_t>();
+  auto ptr = input->data_ptr<uint8_t>();
 
   // Encode PNG file
   for (int y = 0; y < height; ++y) {