From a1335926db9d49b2a5274f291c7fbfd8785bb696 Mon Sep 17 00:00:00 2001
From: Advaitgaur004 <b22cs004@iitj.ac.in>
Date: Tue, 19 Aug 2025 22:22:25 +0530
Subject: [PATCH 1/2] style: Apply clang-format to entire codebase

---
 include/common/vector.h                |   2 +-
 include/cten.h                         |  52 +-
 src/basic.c                            |  73 +--
 src/common/vector.c                    |   2 +-
 src/nn.c                               | 141 ++---
 src/operator.c                         | 217 ++++---
 src/optimizer/adagrad.c                |  25 +-
 src/optimizer/adam.c                   |  30 +-
 src/optimizer/rmsprop.c                |  33 +-
 src/optimizer/sgd.c                    |  29 +-
 src/utils.c                            | 281 ++++-----
 tests/Backward/test_abs_backward.c     |  88 ++-
 tests/Backward/test_add_backward.c     | 210 +++++--
 tests/Backward/test_div_backward.c     |  55 +-
 tests/Backward/test_linear_backward.c  | 179 +++---
 tests/Backward/test_matmul_backward.c  |  59 +-
 tests/Backward/test_max_backward.c     |  58 +-
 tests/Backward/test_mean_backward.c    | 256 ++++++---
 tests/Backward/test_min_backward.c     |  60 +-
 tests/Backward/test_mul_backward.c     |  96 ++--
 tests/Backward/test_pow_backward.c     |  57 +-
 tests/Backward/test_relu_backward.c    | 171 ++++--
 tests/Backward/test_softmax_backward.c | 753 +++++++++++++------------
 tests/Backward/test_sub_backward.c     | 177 ++++--
 tests/Backward/test_sum_backward.c     | 220 +++++---
 tests/Operator/test_abs.c              |   8 +-
 tests/Operator/test_add.c              | 373 ++++++++----
 tests/Operator/test_div.c              | 119 +++-
 tests/Operator/test_matmul.c           | 204 ++++---
 tests/Operator/test_max.c              |  44 +-
 tests/Operator/test_mean.c             | 162 ++++--
 tests/Operator/test_min.c              |  38 +-
 tests/Operator/test_mul.c              | 722 +++++++++++++++---------
 tests/Operator/test_mulf.c             |  41 +-
 tests/Operator/test_pow.c              | 125 ++--
 tests/Operator/test_reciprocal.c       | 166 +++---
 tests/Operator/test_softmax.c          | 412 +++++++-------
 tests/Operator/test_square.c           |  73 ++-
 tests/Operator/test_sub.c              | 348 +++++++++---
 tests/Operator/test_sum.c              | 143 +++--
 tests/csv_reporter.c                   | 134 +++--
 tests/csv_reporter.h                   |  14 +-
 tests/cten_tests.c                     |  38 +-
 tests/test_config.h                    |   8 +-
 tests/test_utils.c                     | 177 ++++--
 tests/test_utils.h                     |   7 +-
 46 files changed, 4177 insertions(+), 2503 deletions(-)

diff --git a/include/common/vector.h b/include/common/vector.h
index 951b489..fcb42a3 100644
--- a/include/common/vector.h
+++ b/include/common/vector.h
@@ -60,7 +60,7 @@ void c11_vector__swap(c11_vector* self, c11_vector* other);
 #define c11_vector__erase(T, self, index)                                                          \
     do {                                                                                           \
         T* p = (T*)(self)->data + (index);                                                         \
-        memmove(p, p + 1, ((self)->length - (index)-1) * sizeof(T));                               \
+        memmove(p, p + 1, ((self)->length - (index) - 1) * sizeof(T));                             \
         (self)->length--;                                                                          \
     } while(0)
 
diff --git a/include/cten.h b/include/cten.h
index 3a84459..f1b1861 100644
--- a/include/cten.h
+++ b/include/cten.h
@@ -8,12 +8,12 @@
 #include <limits.h>
 
 #define _CTEN_PICK_REDUCE(_1, _2, NAME, ...) NAME
-#define Tensor_max(...)  _CTEN_PICK_REDUCE(__VA_ARGS__, Tensor_max_dim,  Tensor_max_all)(__VA_ARGS__)
-#define Tensor_min(...)  _CTEN_PICK_REDUCE(__VA_ARGS__, Tensor_min_dim,  Tensor_min_all)(__VA_ARGS__)
+#define Tensor_max(...) _CTEN_PICK_REDUCE(__VA_ARGS__, Tensor_max_dim, Tensor_max_all)(__VA_ARGS__)
+#define Tensor_min(...) _CTEN_PICK_REDUCE(__VA_ARGS__, Tensor_min_dim, Tensor_min_all)(__VA_ARGS__)
 
-#define _CTEN_PICK(_1,_2,NAME,...) NAME
+#define _CTEN_PICK(_1, _2, NAME, ...) NAME
 #define Tensor_mean(...) _CTEN_PICK(__VA_ARGS__, Tensor_mean_dim, Tensor_mean_all)(__VA_ARGS__)
-#define Tensor_sum(...)  _CTEN_PICK(__VA_ARGS__, Tensor_sum_dim,  Tensor_sum_all )(__VA_ARGS__)
+#define Tensor_sum(...) _CTEN_PICK(__VA_ARGS__, Tensor_sum_dim, Tensor_sum_all)(__VA_ARGS__)
 
 typedef int TensorShape[4];
 typedef struct GradNode GradNode;
@@ -88,8 +88,8 @@ Tensor Tensor_reciprocal(Tensor self);
 /* Helper functions that the macros dispatch to */
 Tensor Tensor_mean_all(Tensor self);
 Tensor Tensor_mean_dim(Tensor self, int dim);
-Tensor Tensor_sum_all (Tensor self);
-Tensor Tensor_sum_dim (Tensor self, int dim);
+Tensor Tensor_sum_all(Tensor self);
+Tensor Tensor_sum_dim(Tensor self, int dim);
 
 Tensor Tensor_max_all(Tensor self);
 TensorMaxMinResult Tensor_max_dim(Tensor self, int dim);
@@ -133,24 +133,32 @@ typedef struct optim_adagrad optim_adagrad;
 typedef struct optim_rmsprop optim_rmsprop;
 typedef struct optim_adam optim_adam;
 
-//SGD
+// SGD
 optim_sgd* optim_sgd_new(int n_params, Tensor* params, float weight_decay);
 void optim_sgd_config(optim_sgd* self, float lr, float momentum);
 void optim_sgd_zerograd(optim_sgd* self);
 void optim_sgd_step(optim_sgd* self);
 
-//AdaGrad
-optim_adagrad* optim_adagrad_new(int n_params, Tensor* params, float lr, float ε,float weight_decay);
+// AdaGrad
+optim_adagrad*
+    optim_adagrad_new(int n_params, Tensor* params, float lr, float ε, float weight_decay);
 void optim_adagrad_zerograd(optim_adagrad* self);
 void optim_adagrad_step(optim_adagrad* self);
 
-//RMSProp
-optim_rmsprop* optim_rmsprop_new(int n_params, Tensor* params, float lr, float β, float ε,float weight_decay);
+// RMSProp
+optim_rmsprop*
+    optim_rmsprop_new(int n_params, Tensor* params, float lr, float β, float ε, float weight_decay);
 void optim_rmsprop_zerograd(optim_rmsprop* self);
 void optim_rmsprop_step(optim_rmsprop* self);
 
-//Adam
-optim_adam* optim_adam_new(int n_params, Tensor* params, float lr, float β1, float β2, float ε,float weight_decay);
+// Adam
+optim_adam* optim_adam_new(int n_params,
+                           Tensor* params,
+                           float lr,
+                           float β1,
+                           float β2,
+                           float ε,
+                           float weight_decay);
 void optim_adam_zerograd(optim_adam* self);
 void optim_adam_step(optim_adam* self);
 
@@ -168,13 +176,25 @@ void cten_end_eval();
 bool va_arg_is_present(va_list args);
 
 /* Utils */
-void Tensor_normalize_dataset(const float (*X)[4], float (*X_norm)[4], int n_samples, int n_train_samples, int n_features);Tensor Tensor_detach(Tensor self);
-void Tensor_shuffle_dataset(const float (*X)[4], const int *y,float (*X_shuffled)[4], int *y_shuffled, int n_samples, int n_features);
+void Tensor_normalize_dataset(const float (*X)[4],
+                              float (*X_norm)[4],
+                              int n_samples,
+                              int n_train_samples,
+                              int n_features);
+Tensor Tensor_detach(Tensor self);
+void Tensor_shuffle_dataset(const float (*X)[4],
+                            const int* y,
+                            float (*X_shuffled)[4],
+                            int* y_shuffled,
+                            int n_samples,
+                            int n_features);
 void cten_assert(bool cond, const char* fmt, ...);
 void cten_assert_shape(const char* title, TensorShape a, TensorShape b);
 void cten_assert_dim(const char* title, int a, int b);
 bool cten_elemwise_broadcast(Tensor* a, Tensor* b);
 int load_iris_dataset(const float (**X)[4], const int** y);
 Tensor Tensor_reduce_dim(Tensor self, int dim, const char* operation);
-Tensor reduce_gradient_for_broadcasting(Tensor grad, TensorShape original_shape, TensorShape broadcasted_shape);
+Tensor reduce_gradient_for_broadcasting(Tensor grad,
+                                        TensorShape original_shape,
+                                        TensorShape broadcasted_shape);
 Tensor Tensor_unsqueeze(Tensor self, int dim);
\ No newline at end of file
diff --git a/src/basic.c b/src/basic.c
index 04f7a76..d9f8224 100644
--- a/src/basic.c
+++ b/src/basic.c
@@ -38,19 +38,19 @@ int TensorShape_tostring(TensorShape shape, char* buf, int size) {
 Tensor Tensor_new(TensorShape shape, bool requires_grad) {
     Tensor self;
     memset(self.shape, 0, sizeof(TensorShape));
-    int ndims = TensorShape_dim(shape); 
+    int ndims = TensorShape_dim(shape);
     memcpy(self.shape, shape, ndims * sizeof(int));
 
     int numel = TensorShape_numel(self.shape);
     self.data = _cten_malloc(sizeof(FloatBuffer) + sizeof(float) * numel);
     self.data->numel = numel;
-    
-    //Initialize tensor with random values
+
+    // Initialize tensor with random values
     float* data_ptr = self.data->flex;
-    for (int i = 0; i < numel; i++) {
+    for(int i = 0; i < numel; i++) {
         data_ptr[i] = ((float)rand() / RAND_MAX) * 2.0f - 1.0f;
     }
-    
+
     if(requires_grad) {
         self.node = _cten_malloc(sizeof(GradNode));
         memset(self.node, 0, sizeof(GradNode));
@@ -76,9 +76,7 @@ Tensor Tensor_ones(TensorShape shape, bool requires_grad) {
 
 Tensor Tensor_transpose(Tensor self) {
     int dim = TensorShape_dim(self.shape);
-    if(dim < 2){
-        return self; 
-    }
+    if(dim < 2) { return self; }
     TensorShape new_shape;
     new_shape[0] = self.shape[1];
     new_shape[1] = self.shape[0];
@@ -121,17 +119,15 @@ Tensor Tensor_detach(Tensor self) {
 }
 
 void Tensor_backward(Tensor self, Tensor grad) {
-    if(self.node == NULL) {
-        return;
-    }
-    
+    if(self.node == NULL) { return; }
+
     if(grad.data == NULL) {
         assert(self.data->numel == 1);
         grad = Tensor_ones((TensorShape){1, 0, 0, 0}, false);
     }
-    
+
     assert(grad.node == NULL);
-    
+
     // Accumulate gradient
     if(self.node->grad.data == NULL) {
         self.node->grad = grad;
@@ -141,24 +137,26 @@ void Tensor_backward(Tensor self, Tensor grad) {
 
     for(int i = 0; i < self.node->n_inputs; i++) {
         Tensor input_tensor = self.node->inputs[i];
-        if (input_tensor.node == NULL) {
-            continue;
-        }
-        
-        // Step 1: Get the local gradient (the partial derivative). --> For z = f(x, y), this would be dz/dx or dz/dy.
+        if(input_tensor.node == NULL) { continue; }
+
+        // Step 1: Get the local gradient (the partial derivative). --> For z = f(x, y), this would
+        // be dz/dx or dz/dy.
         Tensor input_grad = self.node->grad_fn(self, i);
-        
+
         // This is the gradient flowing from the output, which we need to propagate backwards.
         Tensor grad = self.node->grad;
         int input_ndim = TensorShape_dim(input_tensor.shape);
         int grad_ndim = TensorShape_dim(grad.shape);
-        
-        if ((strcmp(self.node->name, "Sum") == 0 || strcmp(self.node->name, "Mean") == 0 || strcmp(self.node->name, "MaxDim") == 0 || strcmp(self.node->name, "MinDim") == 0) && input_ndim > grad_ndim) {
-            // Find the dimension that was reduced. We assume the non-reduced dimensions match in size.
+
+        if((strcmp(self.node->name, "Sum") == 0 || strcmp(self.node->name, "Mean") == 0 ||
+            strcmp(self.node->name, "MaxDim") == 0 || strcmp(self.node->name, "MinDim") == 0) &&
+           input_ndim > grad_ndim) {
+            // Find the dimension that was reduced. We assume the non-reduced dimensions match in
+            // size.
             int unsqueeze_dim = -1;
             int grad_idx = 0;
-            for (int dim_idx = 0; dim_idx < input_ndim; ++dim_idx) {
-                if (grad_idx >= grad_ndim || input_tensor.shape[dim_idx] != grad.shape[grad_idx]) {
+            for(int dim_idx = 0; dim_idx < input_ndim; ++dim_idx) {
+                if(grad_idx >= grad_ndim || input_tensor.shape[dim_idx] != grad.shape[grad_idx]) {
                     // Yes, this is the dimension that was removed.
                     unsqueeze_dim = dim_idx;
                     break;
@@ -166,19 +164,19 @@ void Tensor_backward(Tensor self, Tensor grad) {
                 grad_idx++;
             }
 
-            if (unsqueeze_dim != -1) {
+            if(unsqueeze_dim != -1) {
                 grad = Tensor_unsqueeze(grad, unsqueeze_dim);
             } else {
                 cten_assert(false, "Could not deduce unsqueeze dimension.");
             }
         }
-        
+
         // Step 2: Apply the chain rule (upstream_grad * local_grad)
         Tensor combined_grad;
-        if (strcmp(self.node->name, "Softmax") == 0) {
+        if(strcmp(self.node->name, "Softmax") == 0) {
             combined_grad = input_grad;
         } else if(strcmp(self.node->name, "Matmul") == 0) {
-            if (i == 0) {
+            if(i == 0) {
                 combined_grad = Tensor_matmul(grad, input_grad);
             } else {
                 combined_grad = Tensor_matmul(input_grad, grad);
@@ -186,18 +184,21 @@ void Tensor_backward(Tensor self, Tensor grad) {
         } else {
             combined_grad = Tensor_mul(grad, input_grad);
         }
-        
-        // Step 3: Handle broadcasting. --> If the original input was broadcasted, the resulting gradient will have the broadcasted shape, it must be reduced back down to the original input's shape.
+
+        // Step 3: Handle broadcasting. --> If the original input was broadcasted, the resulting
+        // gradient will have the broadcasted shape, it must be reduced back down to the original
+        // input's shape.
         bool needs_reduction = false;
-        for (int dim = 0; dim < 4; dim++) {
-            if (combined_grad.shape[dim] != input_tensor.shape[dim]) {
+        for(int dim = 0; dim < 4; dim++) {
+            if(combined_grad.shape[dim] != input_tensor.shape[dim]) {
                 needs_reduction = true;
                 break;
             }
         }
-        
-        if (needs_reduction) {
-            combined_grad = reduce_gradient_for_broadcasting(combined_grad, input_tensor.shape, self.shape);
+
+        if(needs_reduction) {
+            combined_grad =
+                reduce_gradient_for_broadcasting(combined_grad, input_tensor.shape, self.shape);
         }
         Tensor_backward(input_tensor, combined_grad);
     }
diff --git a/src/common/vector.c b/src/common/vector.c
index 9abe192..aa6dfda 100644
--- a/src/common/vector.c
+++ b/src/common/vector.c
@@ -61,7 +61,7 @@ void* c11_vector__submit(c11_vector* self, int* length) {
     return retval;
 }
 
-void c11_vector__swap(c11_vector *self, c11_vector *other){
+void c11_vector__swap(c11_vector* self, c11_vector* other) {
     c11_vector tmp = *self;
     *self = *other;
     *other = tmp;
diff --git a/src/nn.c b/src/nn.c
index a341c79..5487df7 100644
--- a/src/nn.c
+++ b/src/nn.c
@@ -38,7 +38,6 @@ Tensor nn_relu(Tensor self) {
         res.node->inputs[0] = self;
         res.node->n_inputs = 1;
         res.node->name = "Relu";
-
     }
     return res;
 }
@@ -67,9 +66,7 @@ Tensor nn_log(Tensor self) {
     return res;
 }
 
-static Tensor GradFn_exp(Tensor self, int i) {
-    return self;
-}
+static Tensor GradFn_exp(Tensor self, int i) { return self; }
 
 Tensor nn_exp(Tensor self) {
     bool requires_grad = !cten_is_eval() && self.node != NULL;
@@ -139,7 +136,7 @@ static Tensor GradFn_tan(Tensor self, int i) {
     Tensor res = Tensor_new(self.shape, false);
     for(int j = 0; j < self.data->numel; j++) {
         float y = self.data->flex[j];
-        res.data->flex[j] = 1.0f + y*y;
+        res.data->flex[j] = 1.0f + y * y;
     }
     return res;
 }
@@ -189,7 +186,7 @@ static Tensor GradFn_tanh(Tensor self, int i) {
     Tensor res = Tensor_new(self.shape, false);
     for(int j = 0; j < self.data->numel; j++) {
         float y = self.data->flex[j];
-        res.data->flex[j] = 1.0f - y*y;
+        res.data->flex[j] = 1.0f - y * y;
     }
     return res;
 }
@@ -215,7 +212,7 @@ static Tensor GradFn_elu(Tensor self, int i) {
     Tensor grad = Tensor_new(input.shape, false);
     for(int j = 0; j < input.data->numel; j++) {
         float x = input.data->flex[j];
-        if (x > 0) {
+        if(x > 0) {
             grad.data->flex[j] = 1.0f;
         } else {
             // derivative is alpha * e^x = alpha * (e^x - 1) + alpha = y + alpha
@@ -231,7 +228,7 @@ Tensor nn_elu(Tensor self, float alpha) {
     Tensor res = Tensor_new(self.shape, requires_grad);
     for(int i = 0; i < self.data->numel; i++) {
         float x = self.data->flex[i];
-        if (x > 0) {
+        if(x > 0) {
             res.data->flex[i] = x;
         } else {
             res.data->flex[i] = alpha * (expf(x) - 1.0f);
@@ -253,7 +250,7 @@ static Tensor GradFn_selu(Tensor self, int i) {
     const float lambda = 1.05070098f;
     for(int j = 0; j < input.data->numel; j++) {
         float x = input.data->flex[j];
-        if (x > 0) {
+        if(x > 0) {
             grad.data->flex[j] = lambda;
         } else {
             // derivative is lambda * alpha * e^x = y + lambda*alpha
@@ -270,7 +267,7 @@ Tensor nn_selu(Tensor self) {
     const float lambda = 1.05070098f;
     for(int i = 0; i < self.data->numel; i++) {
         float x = self.data->flex[i];
-        if (x > 0) {
+        if(x > 0) {
             res.data->flex[i] = lambda * x;
         } else {
             res.data->flex[i] = lambda * alpha * (expf(x) - 1);
@@ -290,9 +287,9 @@ Tensor Glorot_init(TensorShape shape, bool requires_grad) {
     int fan_in = shape[0];
     int fan_out = shape[1];
     float scale = sqrtf(6.0f / (fan_in + fan_out));
-    
+
     for(int i = 0; i < res.data->numel; i++) {
-        float r = (float)rand() / RAND_MAX * 2.0f - 1.0f; 
+        float r = (float)rand() / RAND_MAX * 2.0f - 1.0f;
         res.data->flex[i] = r * scale;
     }
     return res;
@@ -301,10 +298,10 @@ Tensor Glorot_init(TensorShape shape, bool requires_grad) {
 static Tensor GradFn_softmax(Tensor self, int i) {
     Tensor input = self.node->inputs[i];
     Tensor grad = Tensor_new(input.shape, false);
-    
+
     int dim = self.node->params[0];
     int input_ndim = TensorShape_dim(input.shape);
-    
+
     int dim_size = self.shape[dim];
     int outer_size = 1;
     for(int j = 0; j < dim; j++) {
@@ -315,21 +312,22 @@ static Tensor GradFn_softmax(Tensor self, int i) {
         inner_size *= self.shape[j];
     }
 
-    float* s_data = self.data->flex; // Softmax output data (s)
-    float* upstream_grad_data = self.node->grad.data->flex; // Upstream grad (dL/ds)
-    float* input_grad_data = grad.data->flex; // Resulting grad (dL/dz)
-    for (int outer = 0; outer < outer_size; outer++) {
-        for (int inner = 0; inner < inner_size; inner++) {
+    float* s_data = self.data->flex;                         // Softmax output data (s)
+    float* upstream_grad_data = self.node->grad.data->flex;  // Upstream grad (dL/ds)
+    float* input_grad_data = grad.data->flex;                // Resulting grad (dL/dz)
+    for(int outer = 0; outer < outer_size; outer++) {
+        for(int inner = 0; inner < inner_size; inner++) {
             int slice_offset = outer * dim_size * inner_size + inner;
             // Step 1. Calculate the dot product for the current slice: sum_k(dL/ds_k * s_k)
             float dot_product = 0.0f;
-            for (int k = 0; k < dim_size; k++) {
+            for(int k = 0; k < dim_size; k++) {
                 int index = slice_offset + k * inner_size;
                 dot_product += upstream_grad_data[index] * s_data[index];
             }
-            
-            // Step 2. Calculate the final gradient using the formula: dL/dz_j = s_j * (dL/ds_j - dot_product)
-            for (int k = 0; k < dim_size; k++) {
+
+            // Step 2. Calculate the final gradient using the formula: dL/dz_j = s_j * (dL/ds_j -
+            // dot_product)
+            for(int k = 0; k < dim_size; k++) {
                 int index = slice_offset + k * inner_size;
                 input_grad_data[index] = s_data[index] * (upstream_grad_data[index] - dot_product);
             }
@@ -352,7 +350,7 @@ Tensor nn_softmax(Tensor self, int dim) {
     for(int i = dim + 1; i < self_dim; i++) {
         inner_size *= self.shape[i];
     }
-    
+
     for(int outer = 0; outer < outer_size; outer++) {
         for(int inner = 0; inner < inner_size; inner++) {
             int slice_offset = outer * dim_size * inner_size + inner;
@@ -378,27 +376,27 @@ Tensor nn_softmax(Tensor self, int dim) {
     if(requires_grad) {
         res.node->grad_fn = GradFn_softmax;
         res.node->inputs[0] = self;
-        res.node->n_inputs = 1; 
-        res.node->name = "Softmax";     
+        res.node->n_inputs = 1;
+        res.node->name = "Softmax";
         res.node->params[0] = dim;
     }
     return res;
 }
 
 static Tensor GradFn_crossentropy(Tensor self, int i) {
-    if (i == 1) { // Gradient w.r.t. y_pred
+    if(i == 1) {  // Gradient w.r.t. y_pred
         Tensor y_true = self.node->inputs[0];
         Tensor y_pred = self.node->inputs[1];
         int n_samples = y_true.shape[0];
         int n_classes = y_true.shape[1];
-        
+
         Tensor grad = Tensor_new(y_pred.shape, false);
-        
-        for (int i = 0; i < n_samples; i++) {
-            for (int j = 0; j < n_classes; j++) {
+
+        for(int i = 0; i < n_samples; i++) {
+            for(int j = 0; j < n_classes; j++) {
                 float y_true_val = y_true.data->flex[i * n_classes + j];
                 float y_pred_val = y_pred.data->flex[i * n_classes + j];
-                if (y_true_val == 0) {
+                if(y_true_val == 0) {
                     grad.data->flex[i * n_classes + j] = 0;
                 } else {
                     grad.data->flex[i * n_classes + j] = -y_true_val / y_pred_val;
@@ -421,9 +419,12 @@ Tensor nn_crossentropy(Tensor y_true, Tensor y_pred) {
     assert(n_samples == y_pred.shape[0]);
     assert(n_classes == y_pred.shape[1]);
 
-    bool requires_grad = !cten_is_eval() && (y_true.node != NULL || y_pred.node != NULL); //No eval but rather training so requires grad is True
+    bool requires_grad =
+        !cten_is_eval() &&
+        (y_true.node != NULL ||
+         y_pred.node != NULL);  // No eval but rather training so requires grad is True
     Tensor res = Tensor_zeros((TensorShape){1}, requires_grad);
-    
+
     // Calculate cross-entropy loss
     float total_loss = 0.0f;
     for(int i = 0; i < n_samples; i++) {
@@ -431,32 +432,32 @@ Tensor nn_crossentropy(Tensor y_true, Tensor y_pred) {
         for(int j = 0; j < n_classes; j++) {
             float true_val = y_true.data->flex[i * n_classes + j];
             float pred_val = y_pred.data->flex[i * n_classes + j];
-            float epsilon = 1e-8f; // avoid log(0) so we add a small epsilon
-            if (true_val > 0) { // one-hot encoding
+            float epsilon = 1e-8f;  // avoid log(0) so we add a small epsilon
+            if(true_val > 0) {      // one-hot encoding
                 sample_loss -= true_val * logf(pred_val + epsilon);
             }
         }
         total_loss += sample_loss;
     }
-    
+
     res.data->flex[0] = total_loss / n_samples;
-    
+
     if(requires_grad) {
         res.node->grad_fn = GradFn_crossentropy;
         res.node->inputs[0] = y_true;
         res.node->inputs[1] = y_pred;
         res.node->n_inputs = 2;
-        res.node->name = "Cross-entropy";       
+        res.node->name = "Cross-entropy";
     }
 
     return res;
 }
 
 static Tensor GradFn_softmax_crossentropy(Tensor self, int i) {
-    if (i == 1) {
+    if(i == 1) {
         Tensor y_true = self.node->inputs[0];
         Tensor logits = self.node->inputs[1];
-        
+
         Tensor y_pred = Tensor_new(logits.shape, false);
         int self_dim = TensorShape_dim(logits.shape);
         int last_dim_size = logits.shape[self_dim - 1];
@@ -482,18 +483,18 @@ static Tensor GradFn_softmax_crossentropy(Tensor self, int i) {
                 y_pred.data->flex[index] /= sum;
             }
         }
-        
+
         Tensor grad = Tensor_new(y_pred.shape, false);
         int n_samples = y_pred.shape[0];
         int n_classes = y_pred.shape[1];
-        
-        for (int i = 0; i < n_samples; i++) {
-            for (int j = 0; j < n_classes; j++) {
-                grad.data->flex[i * n_classes + j] = 
+
+        for(int i = 0; i < n_samples; i++) {
+            for(int j = 0; j < n_classes; j++) {
+                grad.data->flex[i * n_classes + j] =
                     y_pred.data->flex[i * n_classes + j] - y_true.data->flex[i * n_classes + j];
             }
         }
-        
+
         return grad;
     }
     return Tensor_zeros((TensorShape){1}, false);
@@ -501,7 +502,7 @@ static Tensor GradFn_softmax_crossentropy(Tensor self, int i) {
 
 Tensor nn_softmax_crossentropy(Tensor y_true, Tensor logits) {
     bool requires_grad = !cten_is_eval() && logits.node != NULL;
-    //disable gradient computation
+    // disable gradient computation
     cten_begin_eval();
     int last_dim_logits = TensorShape_dim(logits.shape) - 1;
     Tensor y_pred = nn_softmax(logits, last_dim_logits);
@@ -509,26 +510,26 @@ Tensor nn_softmax_crossentropy(Tensor y_true, Tensor logits) {
     cten_end_eval();
     Tensor res = Tensor_zeros((TensorShape){1}, requires_grad);
     res.data->flex[0] = loss.data->flex[0];
-    
+
     if(requires_grad) {
         res.node->grad_fn = GradFn_softmax_crossentropy;
         res.node->inputs[0] = y_true;
         res.node->inputs[1] = logits;
         res.node->n_inputs = 2;
-        res.node->name = "SoftmaxCrossEntropy"; 
+        res.node->name = "SoftmaxCrossEntropy";
     }
-    
+
     return res;
 }
 
 static Tensor GradFn_mse_loss(Tensor self, int i) {
-    if (i == 1) {  // Gradient w.r.t y_pred
+    if(i == 1) {  // Gradient w.r.t y_pred
         Tensor y_true = self.node->inputs[0];
         Tensor y_pred = self.node->inputs[1];
         int n = y_pred.data->numel;
 
         Tensor grad = Tensor_new(y_pred.shape, false);
-        for (int j = 0; j < n; j++) {
+        for(int j = 0; j < n; j++) {
             grad.data->flex[j] = 2.0f * (y_pred.data->flex[j] - y_true.data->flex[j]) / n;
         }
         return grad;
@@ -548,7 +549,7 @@ Tensor nn_mse_loss(Tensor y_true, Tensor y_pred) {
     Tensor res = Tensor_new((TensorShape){1}, requires_grad);
     res.data->flex[0] = loss.data->flex[0];
 
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_mse_loss;
         res.node->inputs[0] = y_true;
         res.node->inputs[1] = y_pred;
@@ -559,17 +560,17 @@ Tensor nn_mse_loss(Tensor y_true, Tensor y_pred) {
 }
 
 static Tensor GradFn_mae_loss(Tensor self, int i) {
-    if (i == 1) { // Gradient w.r.t y_pred
+    if(i == 1) {  // Gradient w.r.t y_pred
         Tensor y_true = self.node->inputs[0];
         Tensor y_pred = self.node->inputs[1];
         int n = y_pred.data->numel;
 
         Tensor grad = Tensor_new(y_pred.shape, false);
-        for (int j = 0; j < n; j++) {
+        for(int j = 0; j < n; j++) {
             float error = y_pred.data->flex[j] - y_true.data->flex[j];
-            if (error > 0) {
+            if(error > 0) {
                 grad.data->flex[j] = 1.0f / n;
-            } else if (error < 0) {
+            } else if(error < 0) {
                 grad.data->flex[j] = -1.0f / n;
             } else {
                 grad.data->flex[j] = 0.0f;
@@ -592,7 +593,7 @@ Tensor nn_mae_loss(Tensor y_true, Tensor y_pred) {
     Tensor res = Tensor_new((TensorShape){1}, requires_grad);
     res.data->flex[0] = loss.data->flex[0];
 
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_mae_loss;
         res.node->inputs[0] = y_true;
         res.node->inputs[1] = y_pred;
@@ -603,7 +604,7 @@ Tensor nn_mae_loss(Tensor y_true, Tensor y_pred) {
 }
 
 static Tensor GradFn_huber_loss(Tensor self, int i) {
-    if (i == 1) { // Gradient w.r.t y_pred
+    if(i == 1) {  // Gradient w.r.t y_pred
         Tensor y_true = self.node->inputs[0];
         Tensor y_pred = self.node->inputs[1];
         float delta = huber_delta_value;
@@ -612,12 +613,12 @@ static Tensor GradFn_huber_loss(Tensor self, int i) {
         Tensor grad = Tensor_new(y_pred.shape, false);
         // Gradient of Huber loss is (error / n) for small errors,
         // and (delta * sign(error) / n) for large errors.
-        for (int j = 0; j < n; j++) {
+        for(int j = 0; j < n; j++) {
             float error = y_pred.data->flex[j] - y_true.data->flex[j];
-            if (fabsf(error) <= delta) {
+            if(fabsf(error) <= delta) {
                 grad.data->flex[j] = error / n;
             } else {
-                if (error > 0) {
+                if(error > 0) {
                     grad.data->flex[j] = delta / n;
                 } else {
                     grad.data->flex[j] = -delta / n;
@@ -630,25 +631,25 @@ static Tensor GradFn_huber_loss(Tensor self, int i) {
 }
 
 Tensor nn_huber_loss(Tensor y_true, Tensor y_pred, float delta) {
-    huber_delta_value = delta; // Store delta for the backward pass
+    huber_delta_value = delta;  // Store delta for the backward pass
     bool requires_grad = !cten_is_eval() && y_pred.node != NULL;
 
     int n = y_pred.data->numel;
     float total_loss = 0.0f;
-    for (int i = 0; i < n; i++) {
+    for(int i = 0; i < n; i++) {
         float error = y_pred.data->flex[i] - y_true.data->flex[i];
         float abs_error = fabsf(error);
-        if (abs_error <= delta) {
-            total_loss += 0.5f * error * error; // MSE part
+        if(abs_error <= delta) {
+            total_loss += 0.5f * error * error;  // MSE part
         } else {
-            total_loss += delta * (abs_error - 0.5f * delta); // MAE part
+            total_loss += delta * (abs_error - 0.5f * delta);  // MAE part
         }
     }
 
     Tensor res = Tensor_new((TensorShape){1}, requires_grad);
-    res.data->flex[0] = total_loss / n; // Mean Huber Loss
+    res.data->flex[0] = total_loss / n;  // Mean Huber Loss
 
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_huber_loss;
         res.node->inputs[0] = y_true;
         res.node->inputs[1] = y_pred;
diff --git a/src/operator.c b/src/operator.c
index 63c4f88..d4f9e63 100644
--- a/src/operator.c
+++ b/src/operator.c
@@ -35,18 +35,18 @@ static Tensor GradFn_mul(Tensor self, int i) {
 Tensor Tensor_add(Tensor self, Tensor other) {
     Tensor orig_self = self;
     Tensor orig_other = other;
-    
+
     if(!cten_elemwise_broadcast(&self, &other)) {
         cten_assert_shape("Tensor_add() cannot broadcast", orig_self.shape, orig_other.shape);
     }
-    
+
     bool requires_grad = !cten_is_eval() && (orig_self.node != NULL || orig_other.node != NULL);
     Tensor res = Tensor_new(self.shape, requires_grad);
-    
+
     for(int i = 0; i < self.data->numel; i++) {
         res.data->flex[i] = self.data->flex[i] + other.data->flex[i];
     }
-    
+
     if(requires_grad) {
         res.node->grad_fn = GradFn_add;
         res.node->inputs[0] = orig_self;
@@ -60,18 +60,18 @@ Tensor Tensor_add(Tensor self, Tensor other) {
 Tensor Tensor_mul(Tensor self, Tensor other) {
     Tensor orig_self = self;
     Tensor orig_other = other;
-    
+
     if(!cten_elemwise_broadcast(&self, &other)) {
         cten_assert_shape("Tensor_mul() cannot broadcast", orig_self.shape, orig_other.shape);
     }
-    
+
     bool requires_grad = !cten_is_eval() && (orig_self.node != NULL || orig_other.node != NULL);
     Tensor res = Tensor_new(self.shape, requires_grad);
-    
+
     for(int i = 0; i < self.data->numel; i++) {
         res.data->flex[i] = self.data->flex[i] * other.data->flex[i];
     }
-    
+
     if(requires_grad) {
         res.node->grad_fn = GradFn_mul;
         res.node->inputs[0] = orig_self;
@@ -112,16 +112,16 @@ void Tensor_argmax(Tensor self, int* out) {
 Tensor GradFn_mean(Tensor self, int i) {
     Tensor input_tensor = self.node->inputs[i];
     int divisor;
-    
-    if (TensorShape_numel(self.shape) == 1 && TensorShape_numel(input_tensor.shape) > 1) {
+
+    if(TensorShape_numel(self.shape) == 1 && TensorShape_numel(input_tensor.shape) > 1) {
         divisor = TensorShape_numel(input_tensor.shape);
     } else {
         int input_ndim = TensorShape_dim(input_tensor.shape);
         int output_ndim = TensorShape_dim(self.shape);
-        if (input_ndim > output_ndim) {
+        if(input_ndim > output_ndim) {
             int out_idx = 0;
             int reduced_dim_size = 1;
-            for(int d=0; d < input_ndim; ++d) {
+            for(int d = 0; d < input_ndim; ++d) {
                 if(out_idx >= output_ndim || input_tensor.shape[d] != self.shape[out_idx]) {
                     reduced_dim_size = input_tensor.shape[d];
                     break;
@@ -137,30 +137,27 @@ Tensor GradFn_mean(Tensor self, int i) {
 
     // gradient ==> SAME SHAPE as the ORIGINAL INPUT.
     Tensor res = Tensor_new(input_tensor.shape, false);
-    
+
     // gradient value is 1 divided by the number of elements that were averaged.
     float grad_val = 1.0f / divisor;
-    
+
     for(int j = 0; j < res.data->numel; j++) {
         res.data->flex[j] = grad_val;
-    }   
+    }
     return res;
 }
 
 Tensor Tensor_mean(Tensor self, ...) {
     int ndim = TensorShape_dim(self.shape);
-    int dim = INT_MIN; // Default value to trigger the "else" block
-    
+    int dim = INT_MIN;  // Default value to trigger the "else" block
+
     va_list args;
     va_start(args, self);
-    
-    if (va_arg_is_present(args)) {
-        dim = va_arg(args, int);
-    }
+
+    if(va_arg_is_present(args)) { dim = va_arg(args, int); }
     va_end(args);
-    
 
-    if (dim != INT_MIN) {
+    if(dim != INT_MIN) {
         Tensor res = Tensor_reduce_dim(self, dim, "mean");
         if(res.node != NULL) {
             res.node->grad_fn = GradFn_mean;
@@ -193,18 +190,15 @@ Tensor GradFn_sum(Tensor self, int i) {
 
 Tensor Tensor_sum(Tensor self, ...) {
     int ndim = TensorShape_dim(self.shape);
-    int dim = INT_MIN; // Default value to trigger the "else" block
-    
+    int dim = INT_MIN;  // Default value to trigger the "else" block
+
     va_list args;
     va_start(args, self);
-    
-    if (va_arg_is_present(args)) {
-        dim = va_arg(args, int);
-    }
+
+    if(va_arg_is_present(args)) { dim = va_arg(args, int); }
     va_end(args);
-    
 
-    if (dim != INT_MIN) {
+    if(dim != INT_MIN) {
         Tensor res = Tensor_reduce_dim(self, dim, "sum");
         if(res.node != NULL) {
             res.node->grad_fn = GradFn_sum;
@@ -231,7 +225,8 @@ Tensor Tensor_sum(Tensor self, ...) {
 }
 
 static Tensor GradFn_matmul(Tensor self, int i) {
-    return Tensor_transpose(Tensor_detach(self.node->inputs[1-i]));;
+    return Tensor_transpose(Tensor_detach(self.node->inputs[1 - i]));
+    ;
 }
 
 Tensor Tensor_matmul(Tensor self, Tensor other) {
@@ -249,7 +244,10 @@ Tensor Tensor_matmul(Tensor self, Tensor other) {
     TensorShape res_shape;
     memcpy(res_shape, self.shape, sizeof(TensorShape));
     res_shape[self_dim - 1] = p;
-    Tensor res = Tensor_new(res_shape, self.node != NULL || other.node != NULL); //here weight/bias have .node != NULL, so res have GradNode
+    Tensor res = Tensor_new(
+        res_shape,
+        self.node != NULL ||
+            other.node != NULL);  // here weight/bias have .node != NULL, so res have GradNode
 
     for(int i = 0; i < m; i++) {
         for(int j = 0; j < p; j++) {
@@ -276,9 +274,7 @@ static Tensor GradFn_sub(Tensor self, int i) {
     // f(x, y) = x - y; f'(x) = 1; f'(y) = -1
     Tensor input = self.node->inputs[i];
     Tensor res = Tensor_ones(input.shape, false);
-    if(i == 1) {
-        res = Tensor_mulf(res, -1);
-    }
+    if(i == 1) { res = Tensor_mulf(res, -1); }
     return res;
 }
 
@@ -287,12 +283,12 @@ static Tensor GradFn_div(Tensor self, int i) {
     Tensor x = self.node->inputs[0];
     Tensor y = self.node->inputs[1];
 
-    if (i == 0) { // Gradient w.r.t. x: 1/y
-        for (int j = 0; j < res.data->numel; j++) {
+    if(i == 0) {  // Gradient w.r.t. x: 1/y
+        for(int j = 0; j < res.data->numel; j++) {
             res.data->flex[j] = 1.0f / y.data->flex[j % y.data->numel];
         }
-    } else { // Gradient w.r.t. y: -x/y²
-        for (int j = 0; j < res.data->numel; j++) {
+    } else {  // Gradient w.r.t. y: -x/y²
+        for(int j = 0; j < res.data->numel; j++) {
             float x_val = x.data->flex[j % x.data->numel];
             float y_val = y.data->flex[j % y.data->numel];
             res.data->flex[j] = -x_val / (y_val * y_val);
@@ -305,15 +301,15 @@ Tensor Tensor_div(Tensor self, Tensor other) {
     Tensor orig_self = self;
     Tensor orig_other = other;
 
-    if (!cten_elemwise_broadcast(&self, &other)) {
+    if(!cten_elemwise_broadcast(&self, &other)) {
         cten_assert_shape("Tensor_div() cannot broadcast", orig_self.shape, orig_other.shape);
     }
     bool requires_grad = !cten_is_eval() && (orig_self.node != NULL || orig_other.node != NULL);
     Tensor res = Tensor_new(self.shape, requires_grad);
-    for (int i = 0; i < self.data->numel; i++) {
+    for(int i = 0; i < self.data->numel; i++) {
         res.data->flex[i] = self.data->flex[i] / other.data->flex[i];
     }
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_div;
         res.node->inputs[0] = orig_self;
         res.node->inputs[1] = orig_other;
@@ -327,7 +323,7 @@ static Tensor GradFn_square(Tensor self, int i) {
     // f(x) = x²; f'(x) = 2x
     Tensor input = self.node->inputs[i];
     Tensor res = Tensor_new(input.shape, false);
-    for (int j = 0; j < res.data->numel; j++) {
+    for(int j = 0; j < res.data->numel; j++) {
         res.data->flex[j] = 2.0f * input.data->flex[j];
     }
     return res;
@@ -336,11 +332,11 @@ static Tensor GradFn_square(Tensor self, int i) {
 Tensor Tensor_square(Tensor self) {
     bool requires_grad = !cten_is_eval() && (self.node != NULL);
     Tensor res = Tensor_new(self.shape, requires_grad);
-    for (int i = 0; i < self.data->numel; i++) {
+    for(int i = 0; i < self.data->numel; i++) {
         float val = self.data->flex[i];
         res.data->flex[i] = val * val;
     }
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_square;
         res.node->inputs[0] = self;
         res.node->n_inputs = 1;
@@ -353,7 +349,7 @@ static Tensor GradFn_reciprocal(Tensor self, int i) {
     // f(x) = 1/x; f'(x) = -1/x^2
     Tensor input = self.node->inputs[i];
     Tensor res = Tensor_new(input.shape, false);
-    for (int j = 0; j < res.data->numel; j++) {
+    for(int j = 0; j < res.data->numel; j++) {
         float x_val = input.data->flex[j];
         res.data->flex[j] = -1.0f / (x_val * x_val);
     }
@@ -363,10 +359,10 @@ static Tensor GradFn_reciprocal(Tensor self, int i) {
 Tensor Tensor_reciprocal(Tensor self) {
     bool requires_grad = !cten_is_eval() && (self.node != NULL);
     Tensor res = Tensor_new(self.shape, requires_grad);
-    for (int i = 0; i < self.data->numel; i++) {
+    for(int i = 0; i < self.data->numel; i++) {
         res.data->flex[i] = 1.0f / self.data->flex[i];
     }
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_reciprocal;
         res.node->inputs[0] = self;
         res.node->n_inputs = 1;
@@ -380,13 +376,13 @@ static Tensor GradFn_pow(Tensor self, int i) {
     Tensor res = Tensor_new(self.shape, false);
     Tensor x = self.node->inputs[0];
     Tensor y = self.node->inputs[1];
-    
-    if (i == 0) {
+
+    if(i == 0) {
         // Gradient w.r.t. x: y*x^(y-1)
-        for (int j = 0; j < res.data->numel; j++) {
+        for(int j = 0; j < res.data->numel; j++) {
             float x_val = x.data->flex[j % x.data->numel];
             float y_val = y.data->flex[j % y.data->numel];
-            if (x_val == 0.0f && y_val > 1.0f) {
+            if(x_val == 0.0f && y_val > 1.0f) {
                 res.data->flex[j] = 0.0f;
             } else {
                 res.data->flex[j] = y_val * powf(x_val, y_val - 1.0f);
@@ -394,18 +390,18 @@ static Tensor GradFn_pow(Tensor self, int i) {
         }
     } else {
         // Gradient w.r.t. y: x^y * ln(x)
-        for (int j = 0; j < res.data->numel; j++) {
+        for(int j = 0; j < res.data->numel; j++) {
             float x_val = x.data->flex[j % x.data->numel];
             float self_val = self.data->flex[j];
-            if (x_val <= 0.0f) {
+            if(x_val <= 0.0f) {
                 // Gradient of x^y w.r.t y is undefined or complex for x <= 0.
-                // Returning 0 for simplicity, but this might need specific handling depending on use case.
-                // For example, if x can be negative and y is an integer, the behavior is different.
-                // If x is 0, and y > 0, derivative is 0. If x is 0 and y <= 0, it's undefined.
-                // logf(negative) is NaN. powf(negative, non-integer) is complex.
-                // We assume positive x for logf(x) to be real.
-                // A robust solution might involve checking domain or returning NaN.
-                res.data->flex[j] = 0.0f; 
+                // Returning 0 for simplicity, but this might need specific handling depending on
+                // use case. For example, if x can be negative and y is an integer, the behavior is
+                // different. If x is 0, and y > 0, derivative is 0. If x is 0 and y <= 0, it's
+                // undefined. logf(negative) is NaN. powf(negative, non-integer) is complex. We
+                // assume positive x for logf(x) to be real. A robust solution might involve
+                // checking domain or returning NaN.
+                res.data->flex[j] = 0.0f;
             } else {
                 res.data->flex[j] = self_val * logf(x_val);
             }
@@ -417,15 +413,15 @@ static Tensor GradFn_pow(Tensor self, int i) {
 Tensor Tensor_pow(Tensor self, Tensor other) {
     Tensor orig_self = self;
     Tensor orig_other = other;
-    if (!cten_elemwise_broadcast(&self, &other)) {
+    if(!cten_elemwise_broadcast(&self, &other)) {
         cten_assert_shape("Tensor_pow() cannot broadcast", orig_self.shape, orig_other.shape);
     }
     bool requires_grad = !cten_is_eval() && (orig_self.node != NULL || orig_other.node != NULL);
     Tensor res = Tensor_new(self.shape, requires_grad);
-    for (int i = 0; i < self.data->numel; i++) {
+    for(int i = 0; i < self.data->numel; i++) {
         res.data->flex[i] = powf(self.data->flex[i], other.data->flex[i]);
     }
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_pow;
         res.node->inputs[0] = orig_self;
         res.node->inputs[1] = orig_other;
@@ -438,15 +434,15 @@ Tensor Tensor_pow(Tensor self, Tensor other) {
 Tensor Tensor_sub(Tensor self, Tensor other) {
     Tensor orig_self = self;
     Tensor orig_other = other;
-    if (!cten_elemwise_broadcast(&self, &other)) {
+    if(!cten_elemwise_broadcast(&self, &other)) {
         cten_assert_shape("Tensor_sub() cannot broadcast", orig_self.shape, orig_other.shape);
     }
     bool requires_grad = !cten_is_eval() && (orig_self.node != NULL || orig_other.node != NULL);
     Tensor res = Tensor_new(self.shape, requires_grad);
-    for (int i = 0; i < self.data->numel; i++) {
+    for(int i = 0; i < self.data->numel; i++) {
         res.data->flex[i] = self.data->flex[i] - other.data->flex[i];
     }
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_sub;
         res.node->inputs[0] = orig_self;
         res.node->inputs[1] = orig_other;
@@ -465,22 +461,23 @@ Tensor GradFn_reduce_dim(Tensor self, int i) {
     int ndim = TensorShape_dim(input.shape);
     int reduced_dim = -1;
 
-    for(int d = 0, out_d = 0; d < ndim; d++){
-        if(out_d >= TensorShape_dim(self.shape) || input.shape[d] != self.shape[out_d]){
+    for(int d = 0, out_d = 0; d < ndim; d++) {
+        if(out_d >= TensorShape_dim(self.shape) || input.shape[d] != self.shape[out_d]) {
             reduced_dim = d;
             break;
         }
         out_d++;
     }
     cten_assert(reduced_dim != -1, "Could not determine reduced dimension in gradient calculation");
-    
-    for (int j = 0; j < out_numel; j++) {
+
+    for(int j = 0; j < out_numel; j++) {
         int index_along_dim = (int)indices_tensor.data->flex[j];
-        
-        int linear_idx = 0, stride = 1, out_j_rem = j, out_shape_idx = TensorShape_dim(self.shape) - 1;
-        for (int k = ndim - 1; k >= 0; --k) {
+
+        int linear_idx = 0, stride = 1, out_j_rem = j,
+            out_shape_idx = TensorShape_dim(self.shape) - 1;
+        for(int k = ndim - 1; k >= 0; --k) {
             int current_dim_idx;
-            if (k == reduced_dim) {
+            if(k == reduced_dim) {
                 current_dim_idx = index_along_dim;
             } else {
                 int dim_k = self.shape[out_shape_idx--];
@@ -499,42 +496,38 @@ Tensor GradFn_max_all(Tensor self, int i) {
     Tensor input = self.node->inputs[i];
     Tensor res = Tensor_zeros(input.shape, false);
     float max_val = self.data->flex[0];
-    
+
     int max_count = 0;
-    for (int j = 0; j < input.data->numel; j++) {
-        if (input.data->flex[j] == max_val) max_count++;
+    for(int j = 0; j < input.data->numel; j++) {
+        if(input.data->flex[j] == max_val) max_count++;
     }
-    
+
     float grad_value = (max_count > 0) ? 1.0f / max_count : 0.0f;
-    for (int j = 0; j < input.data->numel; j++) {
-        if (input.data->flex[j] == max_val) res.data->flex[j] = grad_value;
+    for(int j = 0; j < input.data->numel; j++) {
+        if(input.data->flex[j] == max_val) res.data->flex[j] = grad_value;
     }
     return res;
 }
 
 Tensor Tensor_max(Tensor self) {
-    if (self.data->numel == 0){
-        cten_assert(false, "Error: max() on an empty tensor.");
-    }
+    if(self.data->numel == 0) { cten_assert(false, "Error: max() on an empty tensor."); }
     bool requires_grad = !cten_is_eval() && (self.node != NULL);
     Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad);
-    
+
     float max_val = self.data->flex[0];
-    for (int i = 1; i < self.data->numel; i++) {
-        if (self.data->flex[i] > max_val) {
-            max_val = self.data->flex[i];
-        }
+    for(int i = 1; i < self.data->numel; i++) {
+        if(self.data->flex[i] > max_val) { max_val = self.data->flex[i]; }
     }
-    
+
     res.data->flex[0] = max_val;
-    
-    if (requires_grad) {
+
+    if(requires_grad) {
         res.node->grad_fn = GradFn_max_all;
         res.node->inputs[0] = self;
         res.node->n_inputs = 1;
         res.node->name = "MaxAll";
     }
-    
+
     return res;
 }
 
@@ -542,43 +535,39 @@ Tensor GradFn_min_all(Tensor self, int i) {
     Tensor input = self.node->inputs[i];
     Tensor res = Tensor_zeros(input.shape, false);
     float min_val = self.data->flex[0];
-    
+
     int min_count = 0;
-    for (int j = 0; j < input.data->numel; j++) {
-        if (input.data->flex[j] == min_val) min_count++;
+    for(int j = 0; j < input.data->numel; j++) {
+        if(input.data->flex[j] == min_val) min_count++;
     }
-    
+
     float grad_value = (min_count > 0) ? 1.0f / min_count : 0.0f;
-    for (int j = 0; j < input.data->numel; j++) {
-        if (input.data->flex[j] == min_val) res.data->flex[j] = grad_value;
+    for(int j = 0; j < input.data->numel; j++) {
+        if(input.data->flex[j] == min_val) res.data->flex[j] = grad_value;
     }
     return res;
 }
 
 Tensor Tensor_min(Tensor self) {
-    if (self.data->numel == 0){
-        cten_assert(false, "Error: min() on an empty tensor.");
-    }
+    if(self.data->numel == 0) { cten_assert(false, "Error: min() on an empty tensor."); }
     bool requires_grad = !cten_is_eval() && (self.node != NULL);
     Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad);
-    
+
     // Find minimum value
     float min_val = self.data->flex[0];
-    for (int i = 1; i < self.data->numel; i++) {
-        if (self.data->flex[i] < min_val) {
-            min_val = self.data->flex[i];
-        }
+    for(int i = 1; i < self.data->numel; i++) {
+        if(self.data->flex[i] < min_val) { min_val = self.data->flex[i]; }
     }
-    
+
     res.data->flex[0] = min_val;
-    
-    if (requires_grad) {
+
+    if(requires_grad) {
         res.node->grad_fn = GradFn_min_all;
         res.node->inputs[0] = self;
         res.node->n_inputs = 1;
         res.node->name = "MinAll";
     }
-    
+
     return res;
 }
 
@@ -587,9 +576,9 @@ static Tensor GradFn_abs(Tensor self, int i) {
     Tensor res = Tensor_new(input.shape, false);
     for(int j = 0; j < input.data->numel; j++) {
         float val = input.data->flex[j];
-        if (val > 0) {
+        if(val > 0) {
             res.data->flex[j] = 1.0f;
-        } else if (val < 0) {
+        } else if(val < 0) {
             res.data->flex[j] = -1.0f;
         } else {
             res.data->flex[j] = 0.0f;
diff --git a/src/optimizer/adagrad.c b/src/optimizer/adagrad.c
index 7c242fe..990eef0 100644
--- a/src/optimizer/adagrad.c
+++ b/src/optimizer/adagrad.c
@@ -14,14 +14,17 @@ typedef struct optim_adagrad {
     float weight_decay;
 } optim_adagrad;
 
-optim_adagrad* optim_adagrad_new(int n_params, Tensor* params, float lr, float ε, float weight_decay) {
+optim_adagrad*
+    optim_adagrad_new(int n_params, Tensor* params, float lr, float ε, float weight_decay) {
     cten_assert(n_params >= 0, "AdaGrad: n_params cannot be negative, but got %d.", n_params);
-    if (n_params > 0) {
+    if(n_params > 0) {
         cten_assert(params != NULL, "AdaGrad: params array cannot be NULL when n_params > 0.");
     }
     cten_assert(lr >= 0.0f, "AdaGrad: learning rate must be non-negative, but got %f.", lr);
     cten_assert(ε >= 0.0f, "AdaGrad: epsilon must be non-negative, but got %f.", ε);
-    cten_assert(weight_decay >= 0.0f, "AdaGrad: weight decay must be non-negative, but got %f.", weight_decay);
+    cten_assert(weight_decay >= 0.0f,
+                "AdaGrad: weight decay must be non-negative, but got %f.",
+                weight_decay);
 
     optim_adagrad* self = _cten_malloc(sizeof(optim_adagrad));
     self->n_params = n_params;
@@ -30,29 +33,25 @@ optim_adagrad* optim_adagrad_new(int n_params, Tensor* params, float lr, float 
     self->ε = ε;
     self->sum_sq_grad = _cten_malloc(sizeof(Tensor) * n_params);
     self->weight_decay = weight_decay;
-    for (int i = 0; i < n_params; i++) {
+    for(int i = 0; i < n_params; i++) {
         self->sum_sq_grad[i] = Tensor_zeros(params[i].shape, false);
     }
     return self;
 }
 
-void optim_adagrad_zerograd(optim_adagrad* self) {
-    _cten_zero_grad(self->params, self->n_params);
-}
+void optim_adagrad_zerograd(optim_adagrad* self) { _cten_zero_grad(self->params, self->n_params); }
 
 void optim_adagrad_step(optim_adagrad* self) {
-    for (int i = 0; i < self->n_params; i++) {
+    for(int i = 0; i < self->n_params; i++) {
         Tensor t = self->params[i];
-        if (t.node == NULL || t.node->grad.data == NULL) continue;
+        if(t.node == NULL || t.node->grad.data == NULL) continue;
 
         Tensor grad = t.node->grad;
         Tensor* sum_sq = &self->sum_sq_grad[i];
 
-        for (int j = 0; j < t.data->numel; j++) {
+        for(int j = 0; j < t.data->numel; j++) {
             float g = grad.data->flex[j];
-            if (self->weight_decay > 0.0f) {
-                g += self->weight_decay * t.data->flex[j];
-            }
+            if(self->weight_decay > 0.0f) { g += self->weight_decay * t.data->flex[j]; }
             sum_sq->data->flex[j] += g * g;
             t.data->flex[j] -= self->lr * g / (sqrtf(sum_sq->data->flex[j]) + self->ε);
         }
diff --git a/src/optimizer/adam.c b/src/optimizer/adam.c
index a97455e..4455f9a 100644
--- a/src/optimizer/adam.c
+++ b/src/optimizer/adam.c
@@ -18,16 +18,24 @@ typedef struct optim_adam {
     float weight_decay;
 } optim_adam;
 
-optim_adam* optim_adam_new(int n_params, Tensor* params, float lr, float β1, float β2, float ε, float weight_decay) {
+optim_adam* optim_adam_new(int n_params,
+                           Tensor* params,
+                           float lr,
+                           float β1,
+                           float β2,
+                           float ε,
+                           float weight_decay) {
     cten_assert(n_params >= 0, "Adam: n_params cannot be negative, but got %d.", n_params);
-    if (n_params > 0) {
+    if(n_params > 0) {
         cten_assert(params != NULL, "Adam: params array cannot be NULL when n_params > 0.");
     }
     cten_assert(lr >= 0.0f, "Adam: learning rate must be non-negative, but got %f.", lr);
     cten_assert(β1 >= 0.0f && β1 < 1.0f, "Adam: beta1 must be in [0, 1), but got %f.", β1);
     cten_assert(β2 >= 0.0f && β2 < 1.0f, "Adam: beta2 must be in [0, 1), but got %f.", β2);
     cten_assert(ε >= 0.0f, "Adam: epsilon must be non-negative, but got %f.", ε);
-    cten_assert(weight_decay >= 0.0f, "Adam: weight decay must be non-negative, but got %f.", weight_decay);
+    cten_assert(weight_decay >= 0.0f,
+                "Adam: weight decay must be non-negative, but got %f.",
+                weight_decay);
 
     optim_adam* self = _cten_malloc(sizeof(optim_adam));
     self->n_params = n_params;
@@ -40,32 +48,28 @@ optim_adam* optim_adam_new(int n_params, Tensor* params, float lr, float β1, fl
 
     self->m = _cten_malloc(sizeof(Tensor) * n_params);
     self->v = _cten_malloc(sizeof(Tensor) * n_params);
-    for (int i = 0; i < n_params; i++) {
+    for(int i = 0; i < n_params; i++) {
         self->m[i] = Tensor_zeros(params[i].shape, false);
         self->v[i] = Tensor_zeros(params[i].shape, false);
     }
     return self;
 }
 
-void optim_adam_zerograd(optim_adam* self) {
-    _cten_zero_grad(self->params, self->n_params);
-}
+void optim_adam_zerograd(optim_adam* self) { _cten_zero_grad(self->params, self->n_params); }
 
 void optim_adam_step(optim_adam* self) {
     self->t++;
-    for (int i = 0; i < self->n_params; i++) {
+    for(int i = 0; i < self->n_params; i++) {
         Tensor p = self->params[i];
-        if (p.node == NULL || p.node->grad.data == NULL) continue;
+        if(p.node == NULL || p.node->grad.data == NULL) continue;
 
         Tensor grad = p.node->grad;
         Tensor* m = &self->m[i];
         Tensor* v = &self->v[i];
 
-        for (int j = 0; j < p.data->numel; j++) {
+        for(int j = 0; j < p.data->numel; j++) {
             float g = grad.data->flex[j];
-            if (self->weight_decay > 0.0f) {
-                g += self->weight_decay * p.data->flex[j];
-            }
+            if(self->weight_decay > 0.0f) { g += self->weight_decay * p.data->flex[j]; }
             m->data->flex[j] = self->β1 * m->data->flex[j] + (1 - self->β1) * g;
             v->data->flex[j] = self->β2 * v->data->flex[j] + (1 - self->β2) * g * g;
             float m_hat = m->data->flex[j] / (1 - powf(self->β1, self->t));
diff --git a/src/optimizer/rmsprop.c b/src/optimizer/rmsprop.c
index 20a30e5..397bf6c 100644
--- a/src/optimizer/rmsprop.c
+++ b/src/optimizer/rmsprop.c
@@ -15,15 +15,24 @@ typedef struct optim_rmsprop {
     float weight_decay;
 } optim_rmsprop;
 
-optim_rmsprop* optim_rmsprop_new(int n_params, Tensor* params, float lr, float β, float ε, float weight_decay) {
+optim_rmsprop* optim_rmsprop_new(int n_params,
+                                 Tensor* params,
+                                 float lr,
+                                 float β,
+                                 float ε,
+                                 float weight_decay) {
     cten_assert(n_params >= 0, "RMSProp: n_params cannot be negative, but got %d.", n_params);
-    if (n_params > 0) {
+    if(n_params > 0) {
         cten_assert(params != NULL, "RMSProp: params array cannot be NULL when n_params > 0.");
     }
     cten_assert(lr >= 0.0f, "RMSProp: learning rate must be non-negative, but got %f.", lr);
-    cten_assert(β >= 0.0f && β < 1.0f, "RMSProp: beta (decay rate) must be in [0, 1), but got %f.", β);
+    cten_assert(β >= 0.0f && β < 1.0f,
+                "RMSProp: beta (decay rate) must be in [0, 1), but got %f.",
+                β);
     cten_assert(ε >= 0.0f, "RMSProp: epsilon must be non-negative, but got %f.", ε);
-    cten_assert(weight_decay >= 0.0f, "RMSProp: weight decay must be non-negative, but got %f.", weight_decay);
+    cten_assert(weight_decay >= 0.0f,
+                "RMSProp: weight decay must be non-negative, but got %f.",
+                weight_decay);
 
     optim_rmsprop* self = _cten_malloc(sizeof(optim_rmsprop));
     self->n_params = n_params;
@@ -34,29 +43,25 @@ optim_rmsprop* optim_rmsprop_new(int n_params, Tensor* params, float lr, float 
     self->weight_decay = weight_decay;
 
     self->squared_avg = _cten_malloc(sizeof(Tensor) * n_params);
-    for (int i = 0; i < n_params; i++) {
+    for(int i = 0; i < n_params; i++) {
         self->squared_avg[i] = Tensor_zeros(params[i].shape, false);
     }
     return self;
 }
 
-void optim_rmsprop_zerograd(optim_rmsprop* self) {
-    _cten_zero_grad(self->params, self->n_params);
-}
+void optim_rmsprop_zerograd(optim_rmsprop* self) { _cten_zero_grad(self->params, self->n_params); }
 
 void optim_rmsprop_step(optim_rmsprop* self) {
-    for (int i = 0; i < self->n_params; i++) {
+    for(int i = 0; i < self->n_params; i++) {
         Tensor t = self->params[i];
-        if (t.node == NULL || t.node->grad.data == NULL) continue;
+        if(t.node == NULL || t.node->grad.data == NULL) continue;
 
         Tensor grad = t.node->grad;
         Tensor* sq_avg = &self->squared_avg[i];
 
-        for (int j = 0; j < t.data->numel; j++) {
+        for(int j = 0; j < t.data->numel; j++) {
             float g = grad.data->flex[j];
-            if (self->weight_decay > 0.0f) {
-                g += self->weight_decay * t.data->flex[j];
-            }
+            if(self->weight_decay > 0.0f) { g += self->weight_decay * t.data->flex[j]; }
             sq_avg->data->flex[j] = self->β * sq_avg->data->flex[j] + (1 - self->β) * g * g;
             t.data->flex[j] -= self->lr * g / (sqrtf(sq_avg->data->flex[j]) + self->ε);
         }
diff --git a/src/optimizer/sgd.c b/src/optimizer/sgd.c
index 855eae4..eb10183 100644
--- a/src/optimizer/sgd.c
+++ b/src/optimizer/sgd.c
@@ -16,7 +16,7 @@ typedef struct optim_sgd {
 
 optim_sgd* optim_sgd_new(int n_params, Tensor* params, float weight_decay) {
     cten_assert(n_params >= 0, "n_params cannot be negative, but got %d.", n_params);
-    if (n_params > 0) {
+    if(n_params > 0) {
         cten_assert(params != NULL, "params array cannot be NULL when n_params is greater than 0.");
     }
 
@@ -35,38 +35,33 @@ void optim_sgd_config(optim_sgd* self, float lr, float momentum) {
     self->lr = lr;
     self->momentum = momentum;
 
-    if (self->velocity == NULL && self->momentum > 0.0f) {
+    if(self->velocity == NULL && self->momentum > 0.0f) {
         self->velocity = _cten_malloc(sizeof(Tensor) * self->n_params);
-        for (int i = 0; i < self->n_params; i++) {
+        for(int i = 0; i < self->n_params; i++) {
             self->velocity[i] = Tensor_zeros(self->params[i].shape, false);
         }
     }
 }
 
-void optim_sgd_zerograd(optim_sgd* self) {
-    _cten_zero_grad(self->params, self->n_params);
-}
+void optim_sgd_zerograd(optim_sgd* self) { _cten_zero_grad(self->params, self->n_params); }
 
 void optim_sgd_step(optim_sgd* self) {
     for(int i = 0; i < self->n_params; i++) {
         Tensor t = self->params[i];
-        if(t.node == NULL || t.node->grad.data == NULL) {
-            continue;
-        }
+        if(t.node == NULL || t.node->grad.data == NULL) { continue; }
 
         float* param_data = t.data->flex;
         float* grad_data = t.node->grad.data->flex;
 
-        if (self->momentum > 0.0f) {
+        if(self->momentum > 0.0f) {
             // v = momentum * v + grad
             // p = p - lr * v
-            cten_assert(self->velocity != NULL, "Velocity buffer is NULL. Did you configure momentum?");
+            cten_assert(self->velocity != NULL,
+                        "Velocity buffer is NULL. Did you configure momentum?");
             float* velocity_data = self->velocity[i].data->flex;
-            for (int j = 0; j < t.data->numel; j++) {
+            for(int j = 0; j < t.data->numel; j++) {
                 float grad_val = grad_data[j];
-                if (self->weight_decay > 0.0f) {
-                    grad_val += self->weight_decay * param_data[j];
-                }
+                if(self->weight_decay > 0.0f) { grad_val += self->weight_decay * param_data[j]; }
                 velocity_data[j] = self->momentum * velocity_data[j] + grad_val;
                 param_data[j] -= self->lr * velocity_data[j];
             }
@@ -74,9 +69,7 @@ void optim_sgd_step(optim_sgd* self) {
             // p = p - lr * grad
             for(int j = 0; j < t.data->numel; j++) {
                 float grad_val = grad_data[j];
-                if (self->weight_decay > 0.0f) {
-                    grad_val += self->weight_decay * param_data[j];
-                }
+                if(self->weight_decay > 0.0f) { grad_val += self->weight_decay * param_data[j]; }
                 param_data[j] -= self->lr * grad_val;
             }
         }
diff --git a/src/utils.c b/src/utils.c
index 0ea552f..e9cfeb4 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -5,8 +5,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <math.h>    
-#include <time.h>    
+#include <math.h>
+#include <time.h>
 #include <limits.h>
 
 bool va_arg_is_present(va_list args) {
@@ -22,8 +22,9 @@ Tensor GradFn_reduce_dim(Tensor self, int i);
 
 Tensor Tensor_mean_all(Tensor self) {
     float total = 0.0f;
-    for(int i = 0; i < self.data->numel; i++) total += self.data->flex[i];
-    Tensor res = Tensor_new((TensorShape){1,0,0,0}, self.node != NULL);
+    for(int i = 0; i < self.data->numel; i++)
+        total += self.data->flex[i];
+    Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, self.node != NULL);
     res.data->flex[0] = total / self.data->numel;
     if(res.node != NULL) {
         res.node->grad_fn = GradFn_mean;
@@ -47,8 +48,9 @@ Tensor Tensor_mean_dim(Tensor self, int dim) {
 
 Tensor Tensor_sum_all(Tensor self) {
     float total = 0.0f;
-    for(int i = 0; i < self.data->numel; i++) total += self.data->flex[i];
-    Tensor res = Tensor_new((TensorShape){1,0,0,0}, self.node != NULL);
+    for(int i = 0; i < self.data->numel; i++)
+        total += self.data->flex[i];
+    Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, self.node != NULL);
     res.data->flex[0] = total;
     if(res.node != NULL) {
         res.node->grad_fn = GradFn_sum;
@@ -73,17 +75,15 @@ Tensor Tensor_sum_dim(Tensor self, int dim) {
 Tensor Tensor_max_all(Tensor self) {
     bool requires_grad = !cten_is_eval() && (self.node != NULL);
     Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad);
-    
-    if (self.data->numel == 0) cten_assert(false, "max on empty tensor");
+
+    if(self.data->numel == 0) cten_assert(false, "max on empty tensor");
     float max_val = self.data->flex[0];
-    for (int i = 1; i < self.data->numel; i++) {
-        if (self.data->flex[i] > max_val) {
-            max_val = self.data->flex[i];
-        }
+    for(int i = 1; i < self.data->numel; i++) {
+        if(self.data->flex[i] > max_val) { max_val = self.data->flex[i]; }
     }
     res.data->flex[0] = max_val;
-    
-    if (requires_grad) {
+
+    if(requires_grad) {
         res.node->grad_fn = GradFn_max_all;
         res.node->inputs[0] = self;
         res.node->n_inputs = 1;
@@ -98,24 +98,24 @@ TensorMaxMinResult Tensor_max_dim(Tensor self, int dim) {
 
     TensorShape out_shape = {0};
     int out_shape_len = 0;
-    for (int i = 0; i < ndim; i++) {
-        if (i != dim) out_shape[out_shape_len++] = self.shape[i];
+    for(int i = 0; i < ndim; i++) {
+        if(i != dim) out_shape[out_shape_len++] = self.shape[i];
     }
-    
+
     bool requires_grad = !cten_is_eval() && (self.node != NULL);
     Tensor values = Tensor_new(out_shape, requires_grad);
     Tensor indices = Tensor_new(out_shape, false);
 
     int dim_size = self.shape[dim];
-    for (int i = 0; i < values.data->numel; ++i) {
+    for(int i = 0; i < values.data->numel; ++i) {
         float best_val = -INFINITY;
         int best_idx = -1;
 
-        for (int j = 0; j < dim_size; ++j) {
+        for(int j = 0; j < dim_size; ++j) {
             int in_linear_idx = 0, stride = 1, out_i_rem = i, out_idx_tracker = out_shape_len - 1;
-            for (int k = ndim - 1; k >= 0; --k) {
+            for(int k = ndim - 1; k >= 0; --k) {
                 int current_dim_idx;
-                if (k == dim) {
+                if(k == dim) {
                     current_dim_idx = j;
                 } else {
                     int dim_k = out_shape[out_idx_tracker--];
@@ -126,20 +126,23 @@ TensorMaxMinResult Tensor_max_dim(Tensor self, int dim) {
                 stride *= self.shape[k];
             }
             float current_val = self.data->flex[in_linear_idx];
-            if (current_val > best_val) { best_val = current_val; best_idx = j; }
+            if(current_val > best_val) {
+                best_val = current_val;
+                best_idx = j;
+            }
         }
         values.data->flex[i] = best_val;
         indices.data->flex[i] = (float)best_idx;
     }
 
-    if (requires_grad) {
+    if(requires_grad) {
         values.node->grad_fn = GradFn_reduce_dim;
         values.node->inputs[0] = self;
         values.node->inputs[1] = indices;
         values.node->n_inputs = 2;
         values.node->name = "MaxDim";
     }
-    
+
     TensorMaxMinResult result = {values, indices};
     return result;
 }
@@ -148,16 +151,14 @@ Tensor Tensor_min_all(Tensor self) {
     bool requires_grad = !cten_is_eval() && (self.node != NULL);
     Tensor res = Tensor_new((TensorShape){1, 0, 0, 0}, requires_grad);
 
-    if (self.data->numel == 0) cten_assert(false, "min on empty tensor");
+    if(self.data->numel == 0) cten_assert(false, "min on empty tensor");
     float min_val = self.data->flex[0];
-    for (int i = 1; i < self.data->numel; i++) {
-        if (self.data->flex[i] < min_val) {
-            min_val = self.data->flex[i];
-        }
+    for(int i = 1; i < self.data->numel; i++) {
+        if(self.data->flex[i] < min_val) { min_val = self.data->flex[i]; }
     }
     res.data->flex[0] = min_val;
 
-    if (requires_grad) {
+    if(requires_grad) {
         res.node->grad_fn = GradFn_min_all;
         res.node->inputs[0] = self;
         res.node->n_inputs = 1;
@@ -172,24 +173,24 @@ TensorMaxMinResult Tensor_min_dim(Tensor self, int dim) {
 
     TensorShape out_shape = {0};
     int out_shape_len = 0;
-    for (int i = 0; i < ndim; i++) {
-        if (i != dim) out_shape[out_shape_len++] = self.shape[i];
+    for(int i = 0; i < ndim; i++) {
+        if(i != dim) out_shape[out_shape_len++] = self.shape[i];
     }
-    
+
     bool requires_grad = !cten_is_eval() && (self.node != NULL);
     Tensor values = Tensor_new(out_shape, requires_grad);
     Tensor indices = Tensor_new(out_shape, false);
 
     int dim_size = self.shape[dim];
-    for (int i = 0; i < values.data->numel; ++i) {
+    for(int i = 0; i < values.data->numel; ++i) {
         float best_val = INFINITY;
         int best_idx = -1;
 
-        for (int j = 0; j < dim_size; ++j) {
+        for(int j = 0; j < dim_size; ++j) {
             int in_linear_idx = 0, stride = 1, out_i_rem = i, out_idx_tracker = out_shape_len - 1;
-            for (int k = ndim - 1; k >= 0; --k) {
+            for(int k = ndim - 1; k >= 0; --k) {
                 int current_dim_idx;
-                if (k == dim) {
+                if(k == dim) {
                     current_dim_idx = j;
                 } else {
                     int dim_k = out_shape[out_idx_tracker--];
@@ -200,25 +201,27 @@ TensorMaxMinResult Tensor_min_dim(Tensor self, int dim) {
                 stride *= self.shape[k];
             }
             float current_val = self.data->flex[in_linear_idx];
-            if (current_val < best_val) { best_val = current_val; best_idx = j; }
+            if(current_val < best_val) {
+                best_val = current_val;
+                best_idx = j;
+            }
         }
         values.data->flex[i] = best_val;
         indices.data->flex[i] = (float)best_idx;
     }
-    
-    if (requires_grad) {
+
+    if(requires_grad) {
         values.node->grad_fn = GradFn_reduce_dim;
         values.node->inputs[0] = self;
         values.node->inputs[1] = indices;
         values.node->n_inputs = 2;
         values.node->name = "MinDim";
     }
-    
+
     TensorMaxMinResult result = {values, indices};
     return result;
 }
 
-
 void cten_assert(bool cond, const char* fmt, ...) {
     if(!cond) {
         va_list args;
@@ -253,16 +256,16 @@ bool cten_elemwise_broadcast(Tensor* a, Tensor* b) {
     int b_ndims = TensorShape_dim(orig_b.shape);
     int max_ndims = (a_ndims > b_ndims) ? a_ndims : b_ndims;
 
-    if (max_ndims > 4) return false;
+    if(max_ndims > 4) return false;
     memset(result_shape, 0, sizeof(TensorShape));
 
-    for (int i = 0; i < max_ndims; i++) {
+    for(int i = 0; i < max_ndims; i++) {
         int a_idx = a_ndims - 1 - i;
         int b_idx = b_ndims - 1 - i;
         int result_idx = max_ndims - 1 - i;
         int a_dim = (a_idx >= 0) ? orig_a.shape[a_idx] : 1;
         int b_dim = (b_idx >= 0) ? orig_b.shape[b_idx] : 1;
-        if (a_dim == b_dim || a_dim == 1 || b_dim == 1) {
+        if(a_dim == b_dim || a_dim == 1 || b_dim == 1) {
             result_shape[result_idx] = (a_dim > b_dim) ? a_dim : b_dim;
         } else {
             return false;
@@ -270,23 +273,23 @@ bool cten_elemwise_broadcast(Tensor* a, Tensor* b) {
     }
 
     // 2. Check if tensor 'a' needs to be expanded
-    if (memcmp(orig_a.shape, result_shape, sizeof(TensorShape)) != 0) {
+    if(memcmp(orig_a.shape, result_shape, sizeof(TensorShape)) != 0) {
         Tensor new_a = Tensor_new(result_shape, orig_a.node != NULL);
-        for (int i = 0; i < new_a.data->numel; i++) {
+        for(int i = 0; i < new_a.data->numel; i++) {
             int rem = i;
             int idx[4] = {0};
-            for (int d = max_ndims - 1; d >= 0; d--) {
+            for(int d = max_ndims - 1; d >= 0; d--) {
                 idx[d] = rem % result_shape[d];
                 rem /= result_shape[d];
             }
 
             int source_idx = 0;
             int stride = 1;
-            //iterating backwards over the original tensor's dimensions
-            for (int d = a_ndims - 1; d >= 0; d--) {
+            // iterating backwards over the original tensor's dimensions
+            for(int d = a_ndims - 1; d >= 0; d--) {
                 int original_dim_size = orig_a.shape[d];
                 int result_dim_coord = idx[max_ndims - a_ndims + d];
-                //if original dimension was 1, it's broadcast; its index is 0.
+                // if original dimension was 1, it's broadcast; its index is 0.
                 int dim_idx = (original_dim_size == 1) ? 0 : result_dim_coord;
                 source_idx += dim_idx * stride;
                 stride *= original_dim_size;
@@ -297,19 +300,19 @@ bool cten_elemwise_broadcast(Tensor* a, Tensor* b) {
     }
 
     // 3. Check if tensor 'b' needs to be expanded
-    if (memcmp(orig_b.shape, result_shape, sizeof(TensorShape)) != 0) {
+    if(memcmp(orig_b.shape, result_shape, sizeof(TensorShape)) != 0) {
         Tensor new_b = Tensor_new(result_shape, orig_b.node != NULL);
-        for (int i = 0; i < new_b.data->numel; i++) {
+        for(int i = 0; i < new_b.data->numel; i++) {
             int rem = i;
             int idx[4] = {0};
-            for (int d = max_ndims - 1; d >= 0; d--) {
+            for(int d = max_ndims - 1; d >= 0; d--) {
                 idx[d] = rem % result_shape[d];
                 rem /= result_shape[d];
             }
 
             int source_idx = 0;
             int stride = 1;
-            for (int d = b_ndims - 1; d >= 0; d--) {
+            for(int d = b_ndims - 1; d >= 0; d--) {
                 int original_dim_size = orig_b.shape[d];
                 int result_dim_coord = idx[max_ndims - b_ndims + d];
                 int dim_idx = (original_dim_size == 1) ? 0 : result_dim_coord;
@@ -323,176 +326,186 @@ bool cten_elemwise_broadcast(Tensor* a, Tensor* b) {
     return true;
 }
 
-Tensor reduce_gradient_for_broadcasting(Tensor grad, TensorShape original_shape, TensorShape broadcasted_shape) {
+Tensor reduce_gradient_for_broadcasting(Tensor grad,
+                                        TensorShape original_shape,
+                                        TensorShape broadcasted_shape) {
     Tensor result = grad;
-    
-    for (int dim = 3; dim >= 0; dim--) {
+
+    for(int dim = 3; dim >= 0; dim--) {
         int orig_size = original_shape[dim];
         int broad_size = broadcasted_shape[dim];
         int grad_size = result.shape[dim];
-        
+
         // Case 1: dim was broadcasted from size 1 to size N
-        if (orig_size == 1 && broad_size > 1 && grad_size == broad_size) {
-            Tensor summed = Tensor_sum(result, dim);  
-            TensorShape new_shape = {result.shape[0], result.shape[1], result.shape[2], result.shape[3]};
-            new_shape[dim] = 1;  
+        if(orig_size == 1 && broad_size > 1 && grad_size == broad_size) {
+            Tensor summed = Tensor_sum(result, dim);
+            TensorShape new_shape = {result.shape[0],
+                                     result.shape[1],
+                                     result.shape[2],
+                                     result.shape[3]};
+            new_shape[dim] = 1;
             result = Tensor_new(new_shape, false);
-            
-            if (summed.data->numel == 1) {
-                for (int i = 0; i < result.data->numel; i++) {
+
+            if(summed.data->numel == 1) {
+                for(int i = 0; i < result.data->numel; i++) {
                     result.data->flex[i] = summed.data->flex[0];
                 }
             } else {
-                for (int i = 0; i < result.data->numel && i < summed.data->numel; i++) {
+                for(int i = 0; i < result.data->numel && i < summed.data->numel; i++) {
                     result.data->flex[i] = summed.data->flex[i];
                 }
             }
         }
-        // Case 2: dim was added (original was 0, broadcasted > 0) 
-        else if (orig_size == 0 && broad_size > 0 && grad_size == broad_size) {
+        // Case 2: dim was added (original was 0, broadcasted > 0)
+        else if(orig_size == 0 && broad_size > 0 && grad_size == broad_size) {
             Tensor summed = Tensor_sum(result, dim);
-            TensorShape new_shape = {result.shape[0], result.shape[1], result.shape[2], result.shape[3]};
+            TensorShape new_shape = {result.shape[0],
+                                     result.shape[1],
+                                     result.shape[2],
+                                     result.shape[3]};
             new_shape[dim] = 0;
-            for (int d = dim; d < 3; d++) {
-                if (d + 1 < 4) {
-                    new_shape[d] = new_shape[d + 1];
-                }
+            for(int d = dim; d < 3; d++) {
+                if(d + 1 < 4) { new_shape[d] = new_shape[d + 1]; }
             }
-            new_shape[3] = 0; //clearing last dim
+            new_shape[3] = 0;  // clearing last dim
             result = Tensor_new(new_shape, false);
-            for (int i = 0; i < result.data->numel && i < summed.data->numel; i++) {
+            for(int i = 0; i < result.data->numel && i < summed.data->numel; i++) {
                 result.data->flex[i] = summed.data->flex[i];
             }
         }
-        // Case 3: no broadcasting on this dim  
-        else if (orig_size == broad_size && grad_size == broad_size) {
-            //do nothing
-        }
-        else {
-            //have to think about this
+        // Case 3: no broadcasting on this dim
+        else if(orig_size == broad_size && grad_size == broad_size) {
+            // do nothing
+        } else {
+            // have to think about this
             cten_assert(false, "reduce_gradient_for_broadcasting: unexpected broadcasting pattern");
         }
     }
     return result;
 }
 
-void Tensor_normalize_dataset(const float (*X)[4], float (*X_norm)[4], int n_samples, int n_train_samples, int n_features) {
+void Tensor_normalize_dataset(const float (*X)[4],
+                              float (*X_norm)[4],
+                              int n_samples,
+                              int n_train_samples,
+                              int n_features) {
     float mean[4] = {0}, std[4] = {0};
-    
-    for (int i = 0; i < n_train_samples; i++) {
-        for (int j = 0; j < n_features; j++) {
+
+    for(int i = 0; i < n_train_samples; i++) {
+        for(int j = 0; j < n_features; j++) {
             mean[j] += X[i][j];
         }
     }
-    for (int j = 0; j < n_features; j++) {
+    for(int j = 0; j < n_features; j++) {
         mean[j] /= n_train_samples;
     }
-    
-    for (int i = 0; i < n_train_samples; i++) {
-        for (int j = 0; j < n_features; j++) {
+
+    for(int i = 0; i < n_train_samples; i++) {
+        for(int j = 0; j < n_features; j++) {
             std[j] += (X[i][j] - mean[j]) * (X[i][j] - mean[j]);
         }
     }
-    for (int j = 0; j < n_features; j++) {
+    for(int j = 0; j < n_features; j++) {
         std[j] = sqrtf(std[j] / n_train_samples);
         // Avoid division by zero
-        if (std[j] == 0) std[j] = 1.0f;
+        if(std[j] == 0) std[j] = 1.0f;
     }
 
-    for (int i = 0; i < n_samples; i++) {
-        for (int j = 0; j < n_features; j++) {
+    for(int i = 0; i < n_samples; i++) {
+        for(int j = 0; j < n_features; j++) {
             X_norm[i][j] = (X[i][j] - mean[j]) / std[j];
         }
     }
 }
 
-void Tensor_shuffle_dataset(const float (*X)[4], const int *y,float (*X_shuffled)[4], int *y_shuffled, int n_samples, int n_features) {
+void Tensor_shuffle_dataset(const float (*X)[4],
+                            const int* y,
+                            float (*X_shuffled)[4],
+                            int* y_shuffled,
+                            int n_samples,
+                            int n_features) {
     int* indices = malloc(n_samples * sizeof(int));
-    for (int i = 0; i < n_samples; i++) {
+    for(int i = 0; i < n_samples; i++) {
         indices[i] = i;
     }
-    
+
     // Fisher-Yates shuffle
     srand((unsigned)time(NULL));
-    for (int i = n_samples - 1; i > 0; i--) {
+    for(int i = n_samples - 1; i > 0; i--) {
         int j = rand() % (i + 1);
         int tmp = indices[i];
         indices[i] = indices[j];
         indices[j] = tmp;
     }
 
-    for (int i = 0; i < n_samples; i++) {
+    for(int i = 0; i < n_samples; i++) {
         int idx = indices[i];
-        for (int j = 0; j < n_features; j++) {
+        for(int j = 0; j < n_features; j++) {
             X_shuffled[i][j] = X[idx][j];
         }
         y_shuffled[i] = y[idx];
     }
-    
+
     free(indices);
 }
 
 Tensor Tensor_reduce_dim(Tensor self, int dim, const char* operation) {
     int ndim = TensorShape_dim(self.shape);
-    if (dim < 0){
-        if (dim < -ndim) {
+    if(dim < 0) {
+        if(dim < -ndim) {
             printf("dim %d out of range", dim);
             exit(-1);
         }
         dim += ndim;
     }
-    if (dim >= ndim) {
+    if(dim >= ndim) {
         printf("dim %d out of range", dim);
         exit(-1);
     }
-    
+
     TensorShape out_shape = {0, 0, 0, 0};
     int out_idx = 0;
-    for (int i = 0; i < ndim; i++) {
-        if (i != dim) {
-            out_shape[out_idx++] = self.shape[i];
-        }
+    for(int i = 0; i < ndim; i++) {
+        if(i != dim) { out_shape[out_idx++] = self.shape[i]; }
     }
-    
+
     int dim_size = self.shape[dim];
     Tensor res = Tensor_zeros(out_shape, self.node != NULL);
-    
+
     int total_out_elements = res.data->numel;
-    
-    for (int out_i = 0; out_i < total_out_elements; out_i++) {
+
+    for(int out_i = 0; out_i < total_out_elements; out_i++) {
         int out_indices[4] = {0};
         int remaining = out_i;
-        for (int j = out_idx - 1; j >= 0; j--) {
+        for(int j = out_idx - 1; j >= 0; j--) {
             out_indices[j] = remaining % out_shape[j];
             remaining /= out_shape[j];
         }
-        
-        for (int d = 0; d < dim_size; d++) {
+
+        for(int d = 0; d < dim_size; d++) {
             int in_indices[4] = {0};
             int out_pos = 0;
-            for (int j = 0; j < ndim; j++) {
-                if (j == dim) {
+            for(int j = 0; j < ndim; j++) {
+                if(j == dim) {
                     in_indices[j] = d;
                 } else {
                     in_indices[j] = out_indices[out_pos++];
                 }
             }
-            
+
             int in_linear = 0;
             int stride = 1;
-            for (int j = ndim - 1; j >= 0; j--) {
+            for(int j = ndim - 1; j >= 0; j--) {
                 in_linear += in_indices[j] * stride;
                 stride *= self.shape[j];
             }
-            
+
             res.data->flex[out_i] += self.data->flex[in_linear];
         }
-        
-        if (strcmp(operation, "mean") == 0) {
-            res.data->flex[out_i] /= dim_size;
-        }
+
+        if(strcmp(operation, "mean") == 0) { res.data->flex[out_i] /= dim_size; }
     }
-    
+
     return res;
 }
 
@@ -503,19 +516,17 @@ Tensor Tensor_unsqueeze(Tensor self, int dim) {
     TensorShape new_shape = {0};
     int old_idx = 0;
     // insert a '1' at the 'dim' position in the new shape.
-    for (int i = 0; i < old_ndim + 1 && i < 4; i++) {
-        if (i == dim) {
+    for(int i = 0; i < old_ndim + 1 && i < 4; i++) {
+        if(i == dim) {
             new_shape[i] = 1;
         } else {
-            if(old_idx < 4) {
-               new_shape[i] = self.shape[old_idx++];
-            }
+            if(old_idx < 4) { new_shape[i] = self.shape[old_idx++]; }
         }
     }
 
     Tensor res = self;
     memcpy(res.shape, new_shape, sizeof(TensorShape));
-    
+
     return res;
 }
 
@@ -552,7 +563,7 @@ void cten_clip_grad_value_range(Tensor* params, int n_params, float min_value, f
     if(min_value > max_value) {
         cten_assert(false, "min_value must be less than or equal to max_value");
     }
-    if(n_params <= 0 || params == NULL) { return; } //safety check
+    if(n_params <= 0 || params == NULL) { return; }  // safety check
     int clipped_count = 0;
     int total_count = 0;
     for(int i = 0; i < n_params; i++) {
@@ -575,7 +586,7 @@ void cten_clip_grad_value_range(Tensor* params, int n_params, float min_value, f
 }
 
 void cten_clip_grad_positive(Tensor* params, int n_params, float max_value) {
-    if(n_params <= 0 || params == NULL) { return; } //safety check
+    if(n_params <= 0 || params == NULL) { return; }  // safety check
     int clipped_count = 0;
     int total_count = 0;
 
@@ -597,7 +608,7 @@ void cten_clip_grad_positive(Tensor* params, int n_params, float max_value) {
 }
 
 void cten_clip_grad_negative(Tensor* params, int n_params, float min_value) {
-    if(n_params <= 0 || params == NULL) { return; } //safety check
+    if(n_params <= 0 || params == NULL) { return; }  // safety check
     int clipped_count = 0;
     int total_count = 0;
 
diff --git a/tests/Backward/test_abs_backward.c b/tests/Backward/test_abs_backward.c
index 554af87..2b2b138 100644
--- a/tests/Backward/test_abs_backward.c
+++ b/tests/Backward/test_abs_backward.c
@@ -6,7 +6,7 @@
 
 void test_abs_backward() {
     const char* op_name = "abs_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Simple backward
@@ -17,14 +17,19 @@ void test_abs_backward() {
             TensorShape s_shape = {1};
             float d1[] = {-5.0f};
             float exp_grad[] = {-1.0f};
-            
+
             Tensor t1 = create_test_tensor(s_shape, d1, true);
             Tensor z = Tensor_abs(t1);
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(s_shape, exp_grad, false);
-            compare_tensors(&t1.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Vector with mixed values
@@ -32,15 +37,20 @@ void test_abs_backward() {
             TensorShape v_shape = {5};
             float d1[] = {10.0f, -2.0f, 0.0f, 5.5f, -0.1f};
             float exp_grad[] = {1.0f, -1.0f, 0.0f, 1.0f, -1.0f};
-            
+
             Tensor t1 = create_test_tensor(v_shape, d1, true);
             Tensor z = Tensor_abs(t1);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
-            compare_tensors(&t1.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 3: Matrix backward
@@ -48,15 +58,20 @@ void test_abs_backward() {
             TensorShape m_shape = {2, 2};
             float d1[] = {1.0f, -2.0f, 0.0f, -4.0f};
             float exp_grad[] = {1.0f, -1.0f, 0.0f, -1.0f};
-            
+
             Tensor t1 = create_test_tensor(m_shape, d1, true);
             Tensor z = Tensor_abs(t1);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
-            compare_tensors(&t1.node->grad, &expected_grad, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -76,9 +91,8 @@ void test_abs_backward() {
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
 
-
     // Test Case 3: Chained and Complex Graphs
-    {   
+    {
         const char* tc_name = "Chained_and_Complex_Graphs_backward";
         // Sub-test 1: z = abs(x) * w
         {
@@ -91,21 +105,31 @@ void test_abs_backward() {
             // dl/dw = (dl/dz) * (dz/dw) = {1, 1} * abs(x) = {2, 3}
             float exp_grad_x[] = {-5.0f, 10.0f};
             float exp_grad_w[] = {2.0f, 3.0f};
-            
+
             Tensor x = create_test_tensor(shape, x_data, true);
             Tensor w = create_test_tensor(shape, w_data, true);
-            
+
             Tensor abs_x = Tensor_abs(x);
             Tensor prod = Tensor_mul(abs_x, w);
             Tensor l = Tensor_sum(prod);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad_x = create_test_tensor(shape, exp_grad_x, false);
             Tensor expected_grad_w = create_test_tensor(shape, exp_grad_w, false);
 
-            compare_tensors(&x.node->grad, &expected_grad_x, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&w.node->grad, &expected_grad_w, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&x.node->grad,
+                            &expected_grad_x,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&w.node->grad,
+                            &expected_grad_w,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: z = abs(x * w)
@@ -120,21 +144,31 @@ void test_abs_backward() {
             // dl/dw = (dl/dz)(dz/dy)(dy/dw) = {1,1} * sign(y) * x = {-1,-1} * {-2,3} = {2, -3}
             float exp_grad_x[] = {-5.0f, 1.0f};
             float exp_grad_w[] = {2.0f, -3.0f};
-            
+
             Tensor x = create_test_tensor(shape, x_data, true);
             Tensor w = create_test_tensor(shape, w_data, true);
-            
+
             Tensor prod = Tensor_mul(x, w);
             Tensor abs_prod = Tensor_abs(prod);
             Tensor l = Tensor_sum(abs_prod);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad_x = create_test_tensor(shape, exp_grad_x, false);
             Tensor expected_grad_w = create_test_tensor(shape, exp_grad_w, false);
 
-            compare_tensors(&x.node->grad, &expected_grad_x, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&w.node->grad, &expected_grad_w, op_name, tc_name, 4, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&x.node->grad,
+                            &expected_grad_x,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&w.node->grad,
+                            &expected_grad_w,
+                            op_name,
+                            tc_name,
+                            4,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
diff --git a/tests/Backward/test_add_backward.c b/tests/Backward/test_add_backward.c
index 8606a16..5ee5b5e 100644
--- a/tests/Backward/test_add_backward.c
+++ b/tests/Backward/test_add_backward.c
@@ -6,7 +6,7 @@
 
 void test_add_backward() {
     const char* op_name = "add_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Simple backward (1x1 tensors)
@@ -19,18 +19,28 @@ void test_add_backward() {
             float d2[] = {3.0f};
             float exp_grad1[] = {1.0f};  // dz/dx = 1
             float exp_grad2[] = {1.0f};  // dz/dy = 1
-            
+
             Tensor t1 = create_test_tensor(s_shape, d1, true);
             Tensor t2 = create_test_tensor(s_shape, d2, true);
             Tensor z = Tensor_add(t1, t2);  // z = 5.0
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad1 = create_test_tensor(s_shape, exp_grad1, false);
             Tensor expected_grad2 = create_test_tensor(s_shape, exp_grad2, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad2, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad2,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Vector sum backward
@@ -39,18 +49,28 @@ void test_add_backward() {
             float d1[] = {1.0f, 2.0f, 3.0f};
             float d2[] = {4.0f, 5.0f, 6.0f};
             float exp_grad[] = {1.0f, 1.0f, 1.0f};
-            
+
             Tensor t1 = create_test_tensor(v_shape, d1, true);
             Tensor t2 = create_test_tensor(v_shape, d2, true);
             Tensor z = Tensor_add(t1, t2);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 3: Matrix sum backward
@@ -59,18 +79,28 @@ void test_add_backward() {
             float d1[] = {1.0f, 2.0f, 3.0f, 4.0f};
             float d2[] = {5.0f, 6.0f, 7.0f, 8.0f};
             float exp_grad[] = {1.0f, 1.0f, 1.0f, 1.0f};
-            
+
             Tensor t1 = create_test_tensor(m_shape, d1, true);
             Tensor t2 = create_test_tensor(m_shape, d2, true);
             Tensor z = Tensor_add(t1, t2);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -85,19 +115,29 @@ void test_add_backward() {
             float scalar_data[] = {3.0f};
             float exp_grad_vec[] = {1.0f, 1.0f};
             float exp_grad_scalar[] = {2.0f};
-            
+
             Tensor t_vec = create_test_tensor(vec_shape, vec_data, true);
             Tensor t_scalar = create_test_tensor(scalar_shape, scalar_data, true);
             Tensor z = Tensor_add(t_vec, t_scalar);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad_vec = create_test_tensor(vec_shape, exp_grad_vec, false);
             Tensor expected_grad_scalar = create_test_tensor(scalar_shape, exp_grad_scalar, false);
 
-            compare_tensors(&t_vec.node->grad, &expected_grad_vec, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_scalar.node->grad, &expected_grad_scalar, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_vec.node->grad,
+                            &expected_grad_vec,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_scalar.node->grad,
+                            &expected_grad_scalar,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Matrix + Row Vector
@@ -119,8 +159,18 @@ void test_add_backward() {
             Tensor expected_grad_mat = create_test_tensor(mat_shape, exp_grad_mat, false);
             Tensor expected_grad_row = create_test_tensor(row_shape, exp_grad_row, false);
 
-            compare_tensors(&t_mat.node->grad, &expected_grad_mat, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_row.node->grad, &expected_grad_row, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_mat.node->grad,
+                            &expected_grad_mat,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_row.node->grad,
+                            &expected_grad_row,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 3: Matrix + Column Vector
@@ -142,8 +192,18 @@ void test_add_backward() {
             Tensor expected_grad_mat = create_test_tensor(mat_shape, exp_grad_mat, false);
             Tensor expected_grad_col = create_test_tensor(col_shape, exp_grad_col, false);
 
-            compare_tensors(&t_mat.node->grad, &expected_grad_mat, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_col.node->grad, &expected_grad_col, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_mat.node->grad,
+                            &expected_grad_mat,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_col.node->grad,
+                            &expected_grad_col,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 4: 3D + 2D Tensor
@@ -165,8 +225,18 @@ void test_add_backward() {
             Tensor expected_grad_3d = create_test_tensor(tensor3d_shape, exp_grad_3d, false);
             Tensor expected_grad_2d = create_test_tensor(tensor2d_shape, exp_grad_2d, false);
 
-            compare_tensors(&t_3d.node->grad, &expected_grad_3d, op_name, tc_name, 4, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_2d.node->grad, &expected_grad_2d, op_name, tc_name, 4, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_3d.node->grad,
+                            &expected_grad_3d,
+                            op_name,
+                            tc_name,
+                            4,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_2d.node->grad,
+                            &expected_grad_2d,
+                            op_name,
+                            tc_name,
+                            4,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 5: Multi-dim broadcast
@@ -174,10 +244,12 @@ void test_add_backward() {
             TensorShape large_shape = {2, 3, 4};
             TensorShape small_shape = {1, 1, 4};
             float large_data[24];
-            for(int i = 0; i < 24; i++) large_data[i] = (float)(i + 1);
+            for(int i = 0; i < 24; i++)
+                large_data[i] = (float)(i + 1);
             float small_data[] = {0.1f, 0.2f, 0.3f, 0.4f};
             float exp_grad_large[24];
-            for(int i = 0; i < 24; i++) exp_grad_large[i] = 1.0f;
+            for(int i = 0; i < 24; i++)
+                exp_grad_large[i] = 1.0f;
             float exp_grad_small[] = {6.0f, 6.0f, 6.0f, 6.0f};
 
             Tensor t_large = create_test_tensor(large_shape, large_data, true);
@@ -190,13 +262,23 @@ void test_add_backward() {
             Tensor expected_grad_large = create_test_tensor(large_shape, exp_grad_large, false);
             Tensor expected_grad_small = create_test_tensor(small_shape, exp_grad_small, false);
 
-            compare_tensors(&t_large.node->grad, &expected_grad_large, op_name, tc_name, 5, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_small.node->grad, &expected_grad_small, op_name, tc_name, 5, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_large.node->grad,
+                            &expected_grad_large,
+                            op_name,
+                            tc_name,
+                            5,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_small.node->grad,
+                            &expected_grad_small,
+                            op_name,
+                            tc_name,
+                            5,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
     // Test Case 3: Chained and Complex Graphs
-    {   
+    {
         const char* tc_name = "Chained_and_Complex_Graphs_backward";
         // Sub-test 1: Complex computation graph (x+y)*w
         {
@@ -208,24 +290,39 @@ void test_add_backward() {
             float exp_grad_x[] = {2.0f, 3.0f};
             float exp_grad_y[] = {5.0f};
             float exp_grad_w[] = {4.0f, 5.0f};
-            
+
             Tensor x = create_test_tensor(v_shape, x_data, true);
             Tensor y = create_test_tensor(s_shape, y_data, true);
             Tensor w = create_test_tensor(v_shape, w_data, true);
-            
+
             Tensor sum_xy = Tensor_add(x, y);
             Tensor prod = Tensor_mul(sum_xy, w);
             Tensor l = Tensor_sum(prod);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad_x = create_test_tensor(v_shape, exp_grad_x, false);
             Tensor expected_grad_y = create_test_tensor(s_shape, exp_grad_y, false);
             Tensor expected_grad_w = create_test_tensor(v_shape, exp_grad_w, false);
 
-            compare_tensors(&x.node->grad, &expected_grad_x, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&y.node->grad, &expected_grad_y, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&w.node->grad, &expected_grad_w, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&x.node->grad,
+                            &expected_grad_x,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&y.node->grad,
+                            &expected_grad_y,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&w.node->grad,
+                            &expected_grad_w,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Chain of broadcasting operations (mat + row) + col
@@ -254,9 +351,24 @@ void test_add_backward() {
             Tensor expected_grad_row = create_test_tensor(row_shape, exp_grad_row, false);
             Tensor expected_grad_col = create_test_tensor(col_shape, exp_grad_col, false);
 
-            compare_tensors(&t_mat.node->grad, &expected_grad_mat, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_row.node->grad, &expected_grad_row, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_col.node->grad, &expected_grad_col, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_mat.node->grad,
+                            &expected_grad_mat,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_row.node->grad,
+                            &expected_grad_row,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_col.node->grad,
+                            &expected_grad_col,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -281,8 +393,18 @@ void test_add_backward() {
         Tensor expected_grad_mat = create_test_tensor(mat_shape, exp_grad_mat, false);
         Tensor expected_grad_scalar = create_test_tensor(scalar_shape, exp_grad_scalar, false);
 
-        compare_tensors(&t_mat.node->grad, &expected_grad_mat, "mul_backward", tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&t_scalar.node->grad, &expected_grad_scalar, "mul_backward", tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_mat.node->grad,
+                        &expected_grad_mat,
+                        "mul_backward",
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_scalar.node->grad,
+                        &expected_grad_scalar,
+                        "mul_backward",
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     cten_free(pool_id);
diff --git a/tests/Backward/test_div_backward.c b/tests/Backward/test_div_backward.c
index 1b5e4ab..69f9bcc 100644
--- a/tests/Backward/test_div_backward.c
+++ b/tests/Backward/test_div_backward.c
@@ -6,7 +6,7 @@
 
 void test_div_backward() {
     const char* op_name = "div_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Simple element-wise vector division
@@ -20,16 +20,16 @@ void test_div_backward() {
         // loss = sum(z) = 6.8407
         float exp_grad_x[] = {0.218881f, 0.372065f, -0.533447f};
         float exp_grad_y[] = {-0.323614f, -0.481095f, 2.170725f};
-        
+
         Tensor x = create_test_tensor(shape, x_data, true);
         Tensor y = create_test_tensor(shape, y_data, true);
-        
+
         Tensor z = Tensor_div(x, y);
         Tensor loss = Tensor_sum(z);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(shape, exp_grad_y, false);
 
@@ -55,10 +55,10 @@ void test_div_backward() {
 
         Tensor z = Tensor_div(x, y);
         Tensor loss = Tensor_sum(z);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(x_shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(y_shape, exp_grad_y, false);
 
@@ -84,10 +84,10 @@ void test_div_backward() {
 
         Tensor z = Tensor_div(x, y);
         Tensor loss = Tensor_sum(z);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(x_shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(y_shape, exp_grad_y, false);
 
@@ -109,13 +109,13 @@ void test_div_backward() {
 
         Tensor x = create_test_tensor(shape, x_data, true);
         Tensor y = create_test_tensor(shape, y_data, true);
-        
+
         Tensor z = Tensor_div(x, y);
         Tensor loss = Tensor_sum(z);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(shape, exp_grad_y, false);
 
@@ -130,7 +130,7 @@ void test_div_backward() {
         float a_data[] = {3.0511f};
         float b_data[] = {1.3192f};
         float c_data[] = {1.404f};
-        
+
         // Let d = a / b. Then z = d * c.
         // Forward: d = 3.0511/1.3192 = 2.3129. z = 2.3129 * 1.404 = 3.2472
         // Backward pass:
@@ -142,24 +142,39 @@ void test_div_backward() {
         float exp_grad_a[] = {1.064281f};
         // dz/db = (dz/dd) * (dd/db) = c * (-a/b²) = 1.404 * (-3.0511/(1.3192*1.3192)) = -2.461514
         float exp_grad_b[] = {-2.461514f};
-        
+
         Tensor a = create_test_tensor(shape, a_data, true);
         Tensor b = create_test_tensor(shape, b_data, true);
         Tensor c = create_test_tensor(shape, c_data, true);
-        
+
         Tensor d = Tensor_div(a, b);
         Tensor z = Tensor_mul(d, c);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad_a_tensor = create_test_tensor(shape, exp_grad_a, false);
         Tensor expected_grad_b_tensor = create_test_tensor(shape, exp_grad_b, false);
         Tensor expected_grad_c_tensor = create_test_tensor(shape, exp_grad_c, false);
 
-        compare_tensors(&a.node->grad, &expected_grad_a_tensor, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&b.node->grad, &expected_grad_b_tensor, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&c.node->grad, &expected_grad_c_tensor, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&a.node->grad,
+                        &expected_grad_a_tensor,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
+        compare_tensors(&b.node->grad,
+                        &expected_grad_b_tensor,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
+        compare_tensors(&c.node->grad,
+                        &expected_grad_c_tensor,
+                        op_name,
+                        tc_name,
+                        3,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     cten_free(pool_id);
diff --git a/tests/Backward/test_linear_backward.c b/tests/Backward/test_linear_backward.c
index 0efca66..e2ff31c 100644
--- a/tests/Backward/test_linear_backward.c
+++ b/tests/Backward/test_linear_backward.c
@@ -6,7 +6,7 @@
 
 void test_linear_backward() {
     const char* op_name = "linear_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Simple linear backward
@@ -14,39 +14,55 @@ void test_linear_backward() {
         const char* tc_name = "Simple_linear_backward";
         // Sub-test 1: Basic linear layer
         {
-            TensorShape input_shape = {1, 3};  // batch_size=1, input_features=3
+            TensorShape input_shape = {1, 3};   // batch_size=1, input_features=3
             TensorShape weight_shape = {3, 2};  // input_features=3, output_features=2
             TensorShape bias_shape = {1, 2};    // output_features=2
-            
+
             float input_data[] = {1.0f, 2.0f, 3.0f};
             float weight_data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
             float bias_data[] = {0.1f, 0.2f};
-            
+
             // Expected gradients
             float exp_grad_input[] = {0.3f, 0.7f, 1.1f};  // input_grad = weight.T @ grad_output
-            float exp_grad_weight[] = {1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f};  // weight_grad = input.T @ grad_output
-            float exp_grad_bias[] = {1.0f, 1.0f};  // bias_grad = sum(grad_output, dim=0)
-            
+            float exp_grad_weight[] =
+                {1.0f, 2.0f, 3.0f, 1.0f, 2.0f, 3.0f};  // weight_grad = input.T @ grad_output
+            float exp_grad_bias[] = {1.0f, 1.0f};      // bias_grad = sum(grad_output, dim=0)
+
             Tensor input = create_test_tensor(input_shape, input_data, true);
             Tensor weight = create_test_tensor(weight_shape, weight_data, true);
             Tensor bias = create_test_tensor(bias_shape, bias_data, true);
-            
+
             Tensor output = nn_linear(input, weight, bias);
-            
+
             // Create a gradient for the output
             TensorShape grad_shape = {1, 2};  // Same as output shape
             float grad_data[] = {1.0f, 1.0f};
             Tensor grad_output = create_test_tensor(grad_shape, grad_data, false);
-            
+
             Tensor_backward(output, grad_output);
-            
+
             Tensor expected_grad_input = create_test_tensor(input_shape, exp_grad_input, false);
             Tensor expected_grad_weight = create_test_tensor(weight_shape, exp_grad_weight, false);
             Tensor expected_grad_bias = create_test_tensor(bias_shape, exp_grad_bias, false);
 
-            compare_tensors(&input.node->grad, &expected_grad_input, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&weight.node->grad, &expected_grad_weight, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&bias.node->grad, &expected_grad_bias, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&input.node->grad,
+                            &expected_grad_input,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&weight.node->grad,
+                            &expected_grad_weight,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&bias.node->grad,
+                            &expected_grad_bias,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -55,39 +71,54 @@ void test_linear_backward() {
         const char* tc_name = "Batch_linear_backward";
         // Sub-test 1: Batch size > 1
         {
-            TensorShape input_shape = {2, 3};  // batch_size=2, input_features=3
+            TensorShape input_shape = {2, 3};   // batch_size=2, input_features=3
             TensorShape weight_shape = {3, 2};  // input_features=3, output_features=2
             TensorShape bias_shape = {1, 2};    // output_features=2
-            
+
             float input_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float weight_data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
             float bias_data[] = {0.1f, 0.2f};
-            
+
             // Expected gradients
             float exp_grad_input[] = {0.3f, 0.7f, 1.1f, 0.3f, 0.7f, 1.1f};
             float exp_grad_weight[] = {5.0f, 7.0f, 9.0f, 5.0f, 7.0f, 9.0f};
             float exp_grad_bias[] = {2.0f, 2.0f};  // Sum over batch dimension
-            
+
             Tensor input = create_test_tensor(input_shape, input_data, true);
             Tensor weight = create_test_tensor(weight_shape, weight_data, true);
             Tensor bias = create_test_tensor(bias_shape, bias_data, true);
-            
+
             Tensor output = nn_linear(input, weight, bias);
-            
+
             // Create a gradient for the output
             TensorShape grad_shape = {2, 2};  // Same as output shape
             float grad_data[] = {1.0f, 1.0f, 1.0f, 1.0f};
             Tensor grad_output = create_test_tensor(grad_shape, grad_data, false);
-            
+
             Tensor_backward(output, grad_output);
-            
+
             Tensor expected_grad_input = create_test_tensor(input_shape, exp_grad_input, false);
             Tensor expected_grad_weight = create_test_tensor(weight_shape, exp_grad_weight, false);
             Tensor expected_grad_bias = create_test_tensor(bias_shape, exp_grad_bias, false);
 
-            compare_tensors(&input.node->grad, &expected_grad_input, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&weight.node->grad, &expected_grad_weight, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&bias.node->grad, &expected_grad_bias, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&input.node->grad,
+                            &expected_grad_input,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&weight.node->grad,
+                            &expected_grad_weight,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&bias.node->grad,
+                            &expected_grad_bias,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -96,70 +127,78 @@ void test_linear_backward() {
         const char* tc_name = "Random_input_linear_backward";
         // Sub-test 1: Random input values
         {
-            TensorShape input_shape = {2, 4};  // batch_size=2, input_features=4
+            TensorShape input_shape = {2, 4};   // batch_size=2, input_features=4
             TensorShape weight_shape = {4, 3};  // input_features=4, output_features=3
             TensorShape bias_shape = {1, 3};    // output_features=3
-            
+
             float input_data[] = {0.5f, 1.3f, 2.7f, 0.8f, 1.9f, 0.4f, 1.2f, 3.1f};
-            float weight_data[] = {0.2f, 0.1f, 0.3f, 0.5f, 0.4f, 0.2f, 0.1f, 0.7f, 0.6f, 0.3f, 0.2f, 0.8f};
+            float weight_data[] =
+                {0.2f, 0.1f, 0.3f, 0.5f, 0.4f, 0.2f, 0.1f, 0.7f, 0.6f, 0.3f, 0.2f, 0.8f};
             float bias_data[] = {0.5f, 0.3f, 0.2f};
-            
+
             // Expected gradients for a gradient of ones at the output
             float exp_grad_bias[] = {2.0f, 2.0f, 2.0f};  // Sum over batch dimension
-            
+
             Tensor input = create_test_tensor(input_shape, input_data, true);
             Tensor weight = create_test_tensor(weight_shape, weight_data, true);
             Tensor bias = create_test_tensor(bias_shape, bias_data, true);
-            
+
             Tensor output = nn_linear(input, weight, bias);
-            
+
             // Create a gradient for the output
             TensorShape grad_shape = {2, 3};  // Same as output shape
             float grad_data[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
             Tensor grad_output = create_test_tensor(grad_shape, grad_data, false);
-            
+
             Tensor_backward(output, grad_output);
-            
+
             Tensor expected_grad_bias = create_test_tensor(bias_shape, exp_grad_bias, false);
 
             // Focus on bias gradient
-            compare_tensors(&bias.node->grad, &expected_grad_bias, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&bias.node->grad,
+                            &expected_grad_bias,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Different gradient values
         {
-            TensorShape input_shape = {3, 2};  // batch_size=3, input_features=2
+            TensorShape input_shape = {3, 2};   // batch_size=3, input_features=2
             TensorShape weight_shape = {2, 4};  // input_features=2, output_features=4
             TensorShape bias_shape = {1, 4};    // output_features=4
-            
+
             float input_data[] = {1.5f, 2.3f, 0.7f, 1.8f, 3.2f, 0.9f};
             float weight_data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f};
             float bias_data[] = {0.1f, 0.2f, 0.3f, 0.4f};
-            
+
             // Create a non-uniform gradient for the output
             TensorShape grad_shape = {3, 4};  // Same as output shape
-            float grad_data[] = {
-                0.5f, 1.0f, 1.5f, 2.0f,
-                0.1f, 0.2f, 0.3f, 0.4f,
-                1.0f, 0.8f, 0.6f, 0.4f
-            };
-            
+            float grad_data[] =
+                {0.5f, 1.0f, 1.5f, 2.0f, 0.1f, 0.2f, 0.3f, 0.4f, 1.0f, 0.8f, 0.6f, 0.4f};
+
             // Expected bias gradient is the sum of the output gradient across the batch dimension
             float exp_grad_bias[] = {1.6f, 2.0f, 2.4f, 2.8f};
-            
+
             Tensor input = create_test_tensor(input_shape, input_data, true);
             Tensor weight = create_test_tensor(weight_shape, weight_data, true);
             Tensor bias = create_test_tensor(bias_shape, bias_data, true);
-            
+
             Tensor output = nn_linear(input, weight, bias);
             Tensor grad_output = create_test_tensor(grad_shape, grad_data, false);
-            
+
             Tensor_backward(output, grad_output);
-            
+
             Tensor expected_grad_bias = create_test_tensor(bias_shape, exp_grad_bias, false);
 
             // Focus on bias gradient
-            compare_tensors(&bias.node->grad, &expected_grad_bias, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&bias.node->grad,
+                            &expected_grad_bias,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -168,58 +207,68 @@ void test_linear_backward() {
         const char* tc_name = "Chained_operations_with_linear";
         // Sub-test 1: Linear followed by sum
         {
-            TensorShape input_shape = {2, 3};  // batch_size=2, input_features=3
+            TensorShape input_shape = {2, 3};   // batch_size=2, input_features=3
             TensorShape weight_shape = {3, 2};  // input_features=3, output_features=2
             TensorShape bias_shape = {1, 2};    // output_features=2
-            
+
             float input_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float weight_data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
             float bias_data[] = {0.1f, 0.2f};
-            
+
             // Expected gradients
             float exp_grad_bias[] = {2.0f, 2.0f};  // For sum reduction
-            
+
             Tensor input = create_test_tensor(input_shape, input_data, true);
             Tensor weight = create_test_tensor(weight_shape, weight_data, true);
             Tensor bias = create_test_tensor(bias_shape, bias_data, true);
-            
+
             Tensor output = nn_linear(input, weight, bias);
             Tensor sum_output = Tensor_sum(output);
-            
+
             Tensor_backward(sum_output, (Tensor){0});
-            
+
             Tensor expected_grad_bias = create_test_tensor(bias_shape, exp_grad_bias, false);
 
             // Focus on bias gradient
-            compare_tensors(&bias.node->grad, &expected_grad_bias, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&bias.node->grad,
+                            &expected_grad_bias,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Linear followed by mean
         {
-            TensorShape input_shape = {2, 3};  // batch_size=2, input_features=3
+            TensorShape input_shape = {2, 3};   // batch_size=2, input_features=3
             TensorShape weight_shape = {3, 2};  // input_features=3, output_features=2
             TensorShape bias_shape = {1, 2};    // output_features=2
-            
+
             float input_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float weight_data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
             float bias_data[] = {0.1f, 0.2f};
-            
+
             // Expected gradients
             float exp_grad_bias[] = {0.5f, 0.5f};  // For mean reduction (1/2)
-            
+
             Tensor input = create_test_tensor(input_shape, input_data, true);
             Tensor weight = create_test_tensor(weight_shape, weight_data, true);
             Tensor bias = create_test_tensor(bias_shape, bias_data, true);
-            
+
             Tensor output = nn_linear(input, weight, bias);
             Tensor mean_output = Tensor_mean(output);
-            
+
             Tensor_backward(mean_output, (Tensor){0});
-            
+
             Tensor expected_grad_bias = create_test_tensor(bias_shape, exp_grad_bias, false);
 
             // Focus on bias gradient
-            compare_tensors(&bias.node->grad, &expected_grad_bias, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&bias.node->grad,
+                            &expected_grad_bias,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
diff --git a/tests/Backward/test_matmul_backward.c b/tests/Backward/test_matmul_backward.c
index dff1c04..1af06d0 100644
--- a/tests/Backward/test_matmul_backward.c
+++ b/tests/Backward/test_matmul_backward.c
@@ -6,7 +6,7 @@
 
 void test_matmul_backward() {
     const char* op_name = "matmul_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Basic matrix multiplication backward (sum to scalar)
@@ -14,27 +14,27 @@ void test_matmul_backward() {
         const char* tc_name = "matmul_basic_sum_backward";
         TensorShape a_shape = {2, 3};  // 2x3 matrix
         TensorShape b_shape = {3, 2};  // 3x2 matrix
-        
+
         // A = [[1, 2, 3], [4, 5, 6]]
         float a_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
         // B = [[1, 2], [3, 4], [5, 6]]
         float b_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-        
+
         // Expected gradients (computed manually):
         // For sum(A @ B), gradients are sum of all partial derivatives
         // dC/dA = ones(2,2) @ B^T = [[1,1],[1,1]] @ [[1,3,5],[2,4,6]] = [[3,7,11],[3,7,11]]
         float exp_grad_a[] = {3.0f, 7.0f, 11.0f, 3.0f, 7.0f, 11.0f};
         // dC/dB = A^T @ ones(2,2) = [[1,4],[2,5],[3,6]] @ [[1,1],[1,1]] = [[5,5],[7,7],[9,9]]
         float exp_grad_b[] = {5.0f, 5.0f, 7.0f, 7.0f, 9.0f, 9.0f};
-        
+
         Tensor A = create_test_tensor(a_shape, a_data, true);
         Tensor B = create_test_tensor(b_shape, b_data, true);
         Tensor C = Tensor_matmul(A, B);  // C = A @ B (2x2 result)
-        Tensor C_sum = Tensor_sum(C);  // sum to scalar for backward
-        
+        Tensor C_sum = Tensor_sum(C);    // sum to scalar for backward
+
         Tensor grad_dummy = {0};
         Tensor_backward(C_sum, grad_dummy);
-        
+
         Tensor expected_grad_a = create_test_tensor(a_shape, exp_grad_a, false);
         Tensor expected_grad_b = create_test_tensor(b_shape, exp_grad_b, false);
 
@@ -46,26 +46,26 @@ void test_matmul_backward() {
     {
         const char* tc_name = "matmul_square_sum_backward";
         TensorShape shape = {2, 2};  // 2x2 matrices
-        
+
         // A = [[1, 2], [3, 4]]
         float a_data[] = {1.0f, 2.0f, 3.0f, 4.0f};
         // B = [[2, 0], [1, 3]]
         float b_data[] = {2.0f, 0.0f, 1.0f, 3.0f};
-        
+
         // Expected gradients for sum(A @ B):
         // dC/dA = ones(2,2) @ B^T = [[1,1],[1,1]] @ [[2,1],[0,3]] = [[2,4],[2,4]]
         float exp_grad_a[] = {2.0f, 4.0f, 2.0f, 4.0f};
         // dC/dB = A^T @ ones(2,2) = [[1,3],[2,4]] @ [[1,1],[1,1]] = [[4,4],[6,6]]
         float exp_grad_b[] = {4.0f, 4.0f, 6.0f, 6.0f};
-        
+
         Tensor A = create_test_tensor(shape, a_data, true);
         Tensor B = create_test_tensor(shape, b_data, true);
         Tensor C = Tensor_matmul(A, B);
         Tensor C_sum = Tensor_sum(C);  // sum to scalar for backward
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(C_sum, grad_dummy);
-        
+
         Tensor expected_grad_a = create_test_tensor(shape, exp_grad_a, false);
         Tensor expected_grad_b = create_test_tensor(shape, exp_grad_b, false);
 
@@ -78,26 +78,27 @@ void test_matmul_backward() {
         const char* tc_name = "matmul_rectangular_sum_backward";
         TensorShape a_shape = {3, 2};  // 3x2 matrix
         TensorShape b_shape = {2, 4};  // 2x4 matrix
-        
+
         // A = [[1, 2], [3, 4], [5, 6]]
         float a_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
         // B = [[1, 2, 3, 4], [5, 6, 7, 8]]
         float b_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
-        
+
         // Expected gradients for sum(A @ B):
-        // dC/dA = ones(3,4) @ B^T = ones(3,4) @ [[1,5],[2,6],[3,7],[4,8]] = [[10,26],[10,26],[10,26]]
+        // dC/dA = ones(3,4) @ B^T = ones(3,4) @ [[1,5],[2,6],[3,7],[4,8]] =
+        // [[10,26],[10,26],[10,26]]
         float exp_grad_a[] = {10.0f, 26.0f, 10.0f, 26.0f, 10.0f, 26.0f};
         // dC/dB = A^T @ ones(3,4) = [[1,3,5],[2,4,6]] @ ones(3,4) = [[9,9,9,9],[12,12,12,12]]
         float exp_grad_b[] = {9.0f, 9.0f, 9.0f, 9.0f, 12.0f, 12.0f, 12.0f, 12.0f};
-        
+
         Tensor A = create_test_tensor(a_shape, a_data, true);
         Tensor B = create_test_tensor(b_shape, b_data, true);
         Tensor C = Tensor_matmul(A, B);  // C = A @ B (3x4 result)
-        Tensor C_sum = Tensor_sum(C);  // sum to scalar for backward
-        
+        Tensor C_sum = Tensor_sum(C);    // sum to scalar for backward
+
         Tensor grad_dummy = {0};
         Tensor_backward(C_sum, grad_dummy);
-        
+
         Tensor expected_grad_a = create_test_tensor(a_shape, exp_grad_a, false);
         Tensor expected_grad_b = create_test_tensor(b_shape, exp_grad_b, false);
 
@@ -111,30 +112,30 @@ void test_matmul_backward() {
         TensorShape a_shape = {2, 2};
         TensorShape b_shape = {2, 2};
         TensorShape w_shape = {2, 2};
-        
+
         // A = [[1, 2], [3, 4]]
         float a_data[] = {1.0f, 2.0f, 3.0f, 4.0f};
         // B = [[1, 0], [0, 1]] (identity)
         float b_data[] = {1.0f, 0.0f, 0.0f, 1.0f};
         // W = [[2, 1], [1, 2]] (weights)
         float w_data[] = {2.0f, 1.0f, 1.0f, 2.0f};
-        
+
         // Expected gradients for z = sum((A @ B) * W) = sum(A * W) since B is identity
-        float exp_grad_a[] = {2.0f, 1.0f, 1.0f, 2.0f};  // dz/dA = W
-        float exp_grad_b[] = {6.0f, 6.0f, 10.0f, 10.0f}; // dz/dB = A^T @ W
-        float exp_grad_w[] = {1.0f, 2.0f, 3.0f, 4.0f};   // dz/dW = A @ B = A
-        
+        float exp_grad_a[] = {2.0f, 1.0f, 1.0f, 2.0f};    // dz/dA = W
+        float exp_grad_b[] = {6.0f, 6.0f, 10.0f, 10.0f};  // dz/dB = A^T @ W
+        float exp_grad_w[] = {1.0f, 2.0f, 3.0f, 4.0f};    // dz/dW = A @ B = A
+
         Tensor A = create_test_tensor(a_shape, a_data, true);
         Tensor B = create_test_tensor(b_shape, b_data, true);
         Tensor W = create_test_tensor(w_shape, w_data, true);
-        
+
         Tensor AB = Tensor_matmul(A, B);  // AB = A (since B is identity)
         Tensor prod = Tensor_mul(AB, W);  // prod = A * W
-        Tensor z = Tensor_sum(prod);  // z = sum(A * W) (scalar)
-        
+        Tensor z = Tensor_sum(prod);      // z = sum(A * W) (scalar)
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad_a = create_test_tensor(a_shape, exp_grad_a, false);
         Tensor expected_grad_b = create_test_tensor(b_shape, exp_grad_b, false);
         Tensor expected_grad_w = create_test_tensor(w_shape, exp_grad_w, false);
diff --git a/tests/Backward/test_max_backward.c b/tests/Backward/test_max_backward.c
index ec8c7c0..77b8722 100644
--- a/tests/Backward/test_max_backward.c
+++ b/tests/Backward/test_max_backward.c
@@ -6,7 +6,7 @@
 
 void test_max_backward() {
     const char* op_name = "max_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Vector with a unique maximum value
@@ -15,13 +15,13 @@ void test_max_backward() {
         TensorShape v_shape = {3};
         float data[] = {2.0f, 8.0f, 5.0f};
         float exp_grad[] = {0.0f, 1.0f, 0.0f};
-        
+
         Tensor t = create_test_tensor(v_shape, data, true);
         Tensor z = Tensor_max(t);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -32,13 +32,13 @@ void test_max_backward() {
         TensorShape v_shape = {4};
         float data[] = {9.0f, 3.0f, 9.0f, 1.0f};
         float exp_grad[] = {0.5f, 0.0f, 0.5f, 0.0f};
-        
+
         Tensor t = create_test_tensor(v_shape, data, true);
         Tensor z = Tensor_max(t);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -49,13 +49,13 @@ void test_max_backward() {
         TensorShape m_shape = {2, 2};
         float data[] = {1.0f, 2.0f, 10.0f, 4.0f};
         float exp_grad[] = {0.0f, 0.0f, 1.0f, 0.0f};
-        
+
         Tensor t = create_test_tensor(m_shape, data, true);
         Tensor z = Tensor_max(t);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -67,7 +67,7 @@ void test_max_backward() {
         TensorShape s_shape = {1};
         float x_data[] = {1.0f, 5.0f, 2.0f};
         float y_data[] = {4.0f};
-        
+
         // Let m = max(x). z = m * y.
         // dz/dx = dz/dm * dm/dx
         // dz/dm = y = 4.0
@@ -76,21 +76,31 @@ void test_max_backward() {
         float exp_grad_x[] = {0.0f, 4.0f, 0.0f};
         // dz/dy = m = 5.0
         float exp_grad_y[] = {5.0f};
-        
+
         Tensor x = create_test_tensor(v_shape, x_data, true);
         Tensor y = create_test_tensor(s_shape, y_data, true);
-        
-        Tensor m = Tensor_max(x);      // m = 5.0
-        Tensor z = Tensor_mul(m, y);   // z = 20.0
-        
+
+        Tensor m = Tensor_max(x);     // m = 5.0
+        Tensor z = Tensor_mul(m, y);  // z = 20.0
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad_x_tensor = create_test_tensor(v_shape, exp_grad_x, false);
         Tensor expected_grad_y_tensor = create_test_tensor(s_shape, exp_grad_y, false);
 
-        compare_tensors(&x.node->grad, &expected_grad_x_tensor, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&y.node->grad, &expected_grad_y_tensor, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&x.node->grad,
+                        &expected_grad_x_tensor,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
+        compare_tensors(&y.node->grad,
+                        &expected_grad_y_tensor,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 5: Gradient of max over a dimension (dim=1)
@@ -103,10 +113,10 @@ void test_max_backward() {
         Tensor t = create_test_tensor(m_shape, data, true);
         TensorMaxMinResult max_res = Tensor_max(t, 1);
         Tensor loss = Tensor_sum(max_res.values);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -121,10 +131,10 @@ void test_max_backward() {
         Tensor t = create_test_tensor(m_shape, data, true);
         TensorMaxMinResult max_res = Tensor_max(t, 0);
         Tensor loss = Tensor_sum(max_res.values);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -147,7 +157,7 @@ void test_max_backward() {
 
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
diff --git a/tests/Backward/test_mean_backward.c b/tests/Backward/test_mean_backward.c
index 5c53efa..1803376 100644
--- a/tests/Backward/test_mean_backward.c
+++ b/tests/Backward/test_mean_backward.c
@@ -6,7 +6,7 @@
 
 void test_mean_backward() {
     const char* op_name = "mean_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Mean all elements backward
@@ -16,48 +16,71 @@ void test_mean_backward() {
         {
             TensorShape v_shape = {3};
             float data[] = {1.0f, 2.0f, 3.0f};
-            float exp_grad[] = {1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f};
-            
+            float exp_grad[] = {1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f};
+
             Tensor t = create_test_tensor(v_shape, data, true);
             Tensor z = Tensor_mean(t);  // mean of all elements
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Matrix mean all
         {
             TensorShape m_shape = {2, 3};
             float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-            float exp_grad[] = {1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f, 1.0f/6.0f};
-            
+            float exp_grad[] =
+                {1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f, 1.0f / 6.0f};
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_mean(t);  // mean of all elements
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 3: 3D tensor mean all
         {
             TensorShape tensor3d_shape = {2, 2, 2};
             float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
-            float exp_grad[] = {1.0f/8.0f, 1.0f/8.0f, 1.0f/8.0f, 1.0f/8.0f, 1.0f/8.0f, 1.0f/8.0f, 1.0f/8.0f, 1.0f/8.0f};
-            
+            float exp_grad[] = {1.0f / 8.0f,
+                                1.0f / 8.0f,
+                                1.0f / 8.0f,
+                                1.0f / 8.0f,
+                                1.0f / 8.0f,
+                                1.0f / 8.0f,
+                                1.0f / 8.0f,
+                                1.0f / 8.0f};
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = Tensor_mean(t);  // mean of all elements
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -69,33 +92,45 @@ void test_mean_backward() {
             TensorShape m_shape = {2, 3};
             float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float exp_grad[] = {0.5f, 0.5f, 0.5f, 0.5f, 0.5f, 0.5f};  // 1/2 for each element
-            
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_mean(t, 0);  // mean along dim 0
             Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: 3D tensor mean along dim 0
         {
             TensorShape tensor3d_shape = {2, 3, 4};
             float data[24];
-            for (int i = 0; i < 24; i++) data[i] = (float)(i + 1);
+            for(int i = 0; i < 24; i++)
+                data[i] = (float)(i + 1);
             float exp_grad[24];
-            for (int i = 0; i < 24; i++) exp_grad[i] = 0.5f;  // 1/2 for each element
-            
+            for(int i = 0; i < 24; i++)
+                exp_grad[i] = 0.5f;  // 1/2 for each element
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = Tensor_mean(t, 0);  // mean along dim 0
             Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -106,34 +141,51 @@ void test_mean_backward() {
         {
             TensorShape m_shape = {2, 3};
             float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-            float exp_grad[] = {1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f};  // 1/3 for each element
-            
+            float exp_grad[] = {1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f};  // 1/3 for each element
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_mean(t, 1);  // mean along dim 1
             Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: 3D tensor mean along dim 1
         {
             TensorShape tensor3d_shape = {2, 3, 4};
             float data[24];
-            for (int i = 0; i < 24; i++) data[i] = (float)(i + 1);
+            for(int i = 0; i < 24; i++)
+                data[i] = (float)(i + 1);
             float exp_grad[24];
-            for (int i = 0; i < 24; i++) exp_grad[i] = 1.0f/3.0f;  // 1/3 for each element
-            
+            for(int i = 0; i < 24; i++)
+                exp_grad[i] = 1.0f / 3.0f;  // 1/3 for each element
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = Tensor_mean(t, 1);  // mean along dim 1
             Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -144,18 +196,25 @@ void test_mean_backward() {
         {
             TensorShape tensor3d_shape = {2, 3, 4};
             float data[24];
-            for (int i = 0; i < 24; i++) data[i] = (float)(i + 1);
+            for(int i = 0; i < 24; i++)
+                data[i] = (float)(i + 1);
             float exp_grad[24];
-            for (int i = 0; i < 24; i++) exp_grad[i] = 0.25f;  // 1/4 for each element
-            
+            for(int i = 0; i < 24; i++)
+                exp_grad[i] = 0.25f;  // 1/4 for each element
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = Tensor_mean(t, 2);  // mean along dim 2
             Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -165,49 +224,65 @@ void test_mean_backward() {
         // Sub-test 1: Random matrix mean along dim 0
         {
             TensorShape m_shape = {3, 4};
-            float data[] = {
-                2.5f, 1.3f, 4.8f, 3.2f,
-                0.7f, 5.1f, 2.9f, 6.4f,
-                3.6f, 1.8f, 4.2f, 0.9f
-            };
-            float exp_grad[] = {
-                1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f,
-                1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f,
-                1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f
-            };
-            
+            float data[] = {2.5f, 1.3f, 4.8f, 3.2f, 0.7f, 5.1f, 2.9f, 6.4f, 3.6f, 1.8f, 4.2f, 0.9f};
+            float exp_grad[] = {1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f,
+                                1.0f / 3.0f};
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_mean(t, 0);  // mean along dim 0
             Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Random matrix mean along dim 1
         {
             TensorShape m_shape = {3, 4};
-            float data[] = {
-                2.5f, 1.3f, 4.8f, 3.2f,
-                0.7f, 5.1f, 2.9f, 6.4f,
-                3.6f, 1.8f, 4.2f, 0.9f
-            };
-            float exp_grad[] = {
-                0.25f, 0.25f, 0.25f, 0.25f,
-                0.25f, 0.25f, 0.25f, 0.25f,
-                0.25f, 0.25f, 0.25f, 0.25f
-            };
-            
+            float data[] = {2.5f, 1.3f, 4.8f, 3.2f, 0.7f, 5.1f, 2.9f, 6.4f, 3.6f, 1.8f, 4.2f, 0.9f};
+            float exp_grad[] = {0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f,
+                                0.25f};
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_mean(t, 1);  // mean along dim 1
             Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -219,22 +294,34 @@ void test_mean_backward() {
             TensorShape m_shape = {2, 3};
             float a_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float b_data[] = {0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f};
-            float exp_grad_a[] = {0.5f/2.0f, 1.5f/2.0f, 2.5f/2.0f, 3.5f/2.0f, 4.5f/2.0f, 5.5f/2.0f};
-            float exp_grad_b[] = {1.0f/2.0f, 2.0f/2.0f, 3.0f/2.0f, 4.0f/2.0f, 5.0f/2.0f, 6.0f/2.0f};
-            
+            float exp_grad_a[] =
+                {0.5f / 2.0f, 1.5f / 2.0f, 2.5f / 2.0f, 3.5f / 2.0f, 4.5f / 2.0f, 5.5f / 2.0f};
+            float exp_grad_b[] =
+                {1.0f / 2.0f, 2.0f / 2.0f, 3.0f / 2.0f, 4.0f / 2.0f, 5.0f / 2.0f, 6.0f / 2.0f};
+
             Tensor a = create_test_tensor(m_shape, a_data, true);
             Tensor b = create_test_tensor(m_shape, b_data, true);
             Tensor prod = Tensor_mul(a, b);
             Tensor z = Tensor_mean(prod, 0);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad_a = create_test_tensor(m_shape, exp_grad_a, false);
             Tensor expected_grad_b = create_test_tensor(m_shape, exp_grad_b, false);
 
-            compare_tensors(&a.node->grad, &expected_grad_a, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&b.node->grad, &expected_grad_b, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&a.node->grad,
+                            &expected_grad_a,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&b.node->grad,
+                            &expected_grad_b,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Mean(a+b, dim=1)
@@ -242,24 +329,35 @@ void test_mean_backward() {
             TensorShape m_shape = {2, 3};
             float a_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float b_data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
-            float exp_grad_a[] = {1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f};
-            float exp_grad_b[] = {1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f};
-            
+            float exp_grad_a[] =
+                {1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f};
+            float exp_grad_b[] =
+                {1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f, 1.0f / 3.0f};
+
             Tensor a = create_test_tensor(m_shape, a_data, true);
             Tensor b = create_test_tensor(m_shape, b_data, true);
             Tensor sum_ab = Tensor_add(a, b);
             Tensor z = Tensor_mean(sum_ab, 1);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad_a = create_test_tensor(m_shape, exp_grad_a, false);
             Tensor expected_grad_b = create_test_tensor(m_shape, exp_grad_b, false);
 
-            compare_tensors(&a.node->grad, &expected_grad_a, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&b.node->grad, &expected_grad_b, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&a.node->grad,
+                            &expected_grad_a,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&b.node->grad,
+                            &expected_grad_b,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
-    
     }
     cten_free(pool_id);
 }
\ No newline at end of file
diff --git a/tests/Backward/test_min_backward.c b/tests/Backward/test_min_backward.c
index a5dad23..5a6d8b1 100644
--- a/tests/Backward/test_min_backward.c
+++ b/tests/Backward/test_min_backward.c
@@ -6,7 +6,7 @@
 
 void test_min_backward() {
     const char* op_name = "min_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Vector with a unique minimum value
@@ -15,13 +15,13 @@ void test_min_backward() {
         TensorShape v_shape = {3};
         float data[] = {8.0f, 2.0f, 5.0f};
         float exp_grad[] = {0.0f, 1.0f, 0.0f};
-        
+
         Tensor t = create_test_tensor(v_shape, data, true);
         Tensor z = Tensor_min(t);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -32,13 +32,13 @@ void test_min_backward() {
         TensorShape v_shape = {4};
         float data[] = {9.0f, 1.0f, 5.0f, 1.0f};
         float exp_grad[] = {0.0f, 0.5f, 0.0f, 0.5f};
-        
+
         Tensor t = create_test_tensor(v_shape, data, true);
         Tensor z = Tensor_min(t);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -49,13 +49,13 @@ void test_min_backward() {
         TensorShape m_shape = {2, 2};
         float data[] = {10.0f, 2.0f, 8.0f, 4.0f};
         float exp_grad[] = {0.0f, 1.0f, 0.0f, 0.0f};
-        
+
         Tensor t = create_test_tensor(m_shape, data, true);
         Tensor z = Tensor_min(t);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -67,7 +67,7 @@ void test_min_backward() {
         TensorShape s_shape = {1};
         float x_data[] = {8.0f, 3.0f, 9.0f};
         float y_data[] = {10.0f};
-        
+
         // Let m = min(x). z = m + y.
         // dz/dx = dz/dm * dm/dx
         // dz/dm = 1.0 (from add op)
@@ -76,21 +76,31 @@ void test_min_backward() {
         float exp_grad_x[] = {0.0f, 1.0f, 0.0f};
         // dz/dy = 1.0
         float exp_grad_y[] = {1.0f};
-        
+
         Tensor x = create_test_tensor(v_shape, x_data, true);
         Tensor y = create_test_tensor(s_shape, y_data, true);
-        
-        Tensor m = Tensor_min(x);      // m = 3.0
-        Tensor z = Tensor_add(m, y);   // z = 13.0
-        
+
+        Tensor m = Tensor_min(x);     // m = 3.0
+        Tensor z = Tensor_add(m, y);  // z = 13.0
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad_x_tensor = create_test_tensor(v_shape, exp_grad_x, false);
         Tensor expected_grad_y_tensor = create_test_tensor(s_shape, exp_grad_y, false);
 
-        compare_tensors(&x.node->grad, &expected_grad_x_tensor, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&y.node->grad, &expected_grad_y_tensor, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&x.node->grad,
+                        &expected_grad_x_tensor,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
+        compare_tensors(&y.node->grad,
+                        &expected_grad_y_tensor,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 5: Gradient of min over a dimension (dim=1)
@@ -103,10 +113,10 @@ void test_min_backward() {
         Tensor t = create_test_tensor(m_shape, data, true);
         TensorMaxMinResult min_res = Tensor_min(t, 1);
         Tensor loss = Tensor_sum(min_res.values);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -121,10 +131,10 @@ void test_min_backward() {
         Tensor t = create_test_tensor(m_shape, data, true);
         TensorMaxMinResult min_res = Tensor_min(t, 0);
         Tensor loss = Tensor_sum(min_res.values);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
@@ -147,10 +157,10 @@ void test_min_backward() {
 
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
         compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
-    
+
     cten_free(pool_id);
 }
\ No newline at end of file
diff --git a/tests/Backward/test_mul_backward.c b/tests/Backward/test_mul_backward.c
index 89739e0..59da200 100644
--- a/tests/Backward/test_mul_backward.c
+++ b/tests/Backward/test_mul_backward.c
@@ -6,7 +6,7 @@
 
 void test_mul_backward() {
     const char* op_name = "mul_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Scalar backward (1x1 tensors)
@@ -20,41 +20,61 @@ void test_mul_backward() {
             float d2[] = {3.0f};
             float exp_grad1[] = {3.0f};  // dz/dx = y = 3.0
             float exp_grad2[] = {2.0f};  // dz/dy = x = 2.0
-            
+
             Tensor t1 = create_test_tensor(s_shape, d1, true);
             Tensor t2 = create_test_tensor(s_shape, d2, true);
             Tensor z = Tensor_mul(t1, t2);  // z = 6.0
-            
+
             // Scalar backward
             Tensor grad_dummy = {0};
             Tensor_backward(z, grad_dummy);
-            
+
             Tensor expected_grad1 = create_test_tensor(s_shape, exp_grad1, false);
             Tensor expected_grad2 = create_test_tensor(s_shape, exp_grad2, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad2, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad2,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Different scalar values
         {
-            float d1[] = {4.0f}; 
+            float d1[] = {4.0f};
             float d2[] = {5.0f};
             float exp_grad1[] = {5.0f};  // dz/dx = y = 5.0
             float exp_grad2[] = {4.0f};  // dz/dy = x = 4.0
-            
+
             Tensor t1 = create_test_tensor(s_shape, d1, true);
             Tensor t2 = create_test_tensor(s_shape, d2, true);
             Tensor z = Tensor_mul(t1, t2);  // z = 20.0
-            
+
             Tensor grad_dummy = {0};
             Tensor_backward(z, grad_dummy);
-            
+
             Tensor expected_grad1 = create_test_tensor(s_shape, exp_grad1, false);
             Tensor expected_grad2 = create_test_tensor(s_shape, exp_grad2, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad2, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad2,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -66,15 +86,15 @@ void test_mul_backward() {
         float d2[] = {4.0f, 5.0f};
         float exp_grad1[] = {4.0f, 5.0f};  // dz/dx = y = [4, 5]
         float exp_grad2[] = {2.0f, 3.0f};  // dz/dy = x = [2, 3]
-        
+
         Tensor t1 = create_test_tensor(v_shape, d1, true);
         Tensor t2 = create_test_tensor(v_shape, d2, true);
         Tensor z = Tensor_mul(t1, t2);  // z = [8, 15]
-        Tensor z_sum = Tensor_sum(z);  // sum to scalar for backward
-        
+        Tensor z_sum = Tensor_sum(z);   // sum to scalar for backward
+
         Tensor grad_dummy = {0};
         Tensor_backward(z_sum, grad_dummy);
-        
+
         Tensor expected_grad1 = create_test_tensor(v_shape, exp_grad1, false);
         Tensor expected_grad2 = create_test_tensor(v_shape, exp_grad2, false);
 
@@ -90,15 +110,15 @@ void test_mul_backward() {
         float d2[] = {5.0f, 6.0f, 7.0f, 8.0f};
         float exp_grad1[] = {5.0f, 6.0f, 7.0f, 8.0f};  // dz/dx = y
         float exp_grad2[] = {1.0f, 2.0f, 3.0f, 4.0f};  // dz/dy = x
-        
+
         Tensor t1 = create_test_tensor(m_shape, d1, true);
         Tensor t2 = create_test_tensor(m_shape, d2, true);
         Tensor z = Tensor_mul(t1, t2);  // z = [[5, 12], [21, 32]]
-        Tensor z_sum = Tensor_sum(z);  // sum to scalar for backward
-        
+        Tensor z_sum = Tensor_sum(z);   // sum to scalar for backward
+
         Tensor grad_dummy = {0};
         Tensor_backward(z_sum, grad_dummy);
-        
+
         Tensor expected_grad1 = create_test_tensor(m_shape, exp_grad1, false);
         Tensor expected_grad2 = create_test_tensor(m_shape, exp_grad2, false);
 
@@ -115,20 +135,30 @@ void test_mul_backward() {
         float scalar_data[] = {4.0f};
         float exp_grad_vec[] = {4.0f, 4.0f};  // dz/dx = scalar broadcasted = [4, 4]
         float exp_grad_scalar[] = {5.0f};     // dz/dy = sum(vec) = 2 + 3 = 5
-        
+
         Tensor t_vec = create_test_tensor(vec_shape, vec_data, true);
         Tensor t_scalar = create_test_tensor(scalar_shape, scalar_data, true);
         Tensor z = Tensor_mul(t_vec, t_scalar);  // z = [8, 12]
-        Tensor z_sum = Tensor_sum(z);  // sum to scalar for backward
-        
+        Tensor z_sum = Tensor_sum(z);            // sum to scalar for backward
+
         Tensor grad_dummy = {0};
         Tensor_backward(z_sum, grad_dummy);
-        
+
         Tensor expected_grad_vec = create_test_tensor(vec_shape, exp_grad_vec, false);
         Tensor expected_grad_scalar = create_test_tensor(scalar_shape, exp_grad_scalar, false);
 
-        compare_tensors(&t_vec.node->grad, &expected_grad_vec, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&t_scalar.node->grad, &expected_grad_scalar, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_vec.node->grad,
+                        &expected_grad_vec,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_scalar.node->grad,
+                        &expected_grad_scalar,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 5: Complex computation graph (chained operations)
@@ -139,23 +169,23 @@ void test_mul_backward() {
         float x_data[] = {1.0f, 2.0f};
         float y_data[] = {3.0f};
         float w_data[] = {2.0f, 3.0f};
-        
+
         // Expected gradients for z = sum((x + y) * w)
         float exp_grad_x[] = {2.0f, 3.0f};  // dz/dx = w = [2, 3]
         float exp_grad_y[] = {5.0f};        // dz/dy = sum(w) = 2 + 3 = 5
         float exp_grad_w[] = {4.0f, 5.0f};  // dz/dw = (x + y) = [4, 5]
-        
+
         Tensor x = create_test_tensor(v_shape, x_data, true);
         Tensor y = create_test_tensor(s_shape, y_data, true);
         Tensor w = create_test_tensor(v_shape, w_data, true);
-        
-        Tensor sum = Tensor_add(x, y);  // sum = [4, 5]
+
+        Tensor sum = Tensor_add(x, y);     // sum = [4, 5]
         Tensor prod = Tensor_mul(sum, w);  // prod = [8, 15]
-        Tensor z = Tensor_sum(prod);  // z = 23 (scalar)
-        
+        Tensor z = Tensor_sum(prod);       // z = 23 (scalar)
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(v_shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(s_shape, exp_grad_y, false);
         Tensor expected_grad_w = create_test_tensor(v_shape, exp_grad_w, false);
diff --git a/tests/Backward/test_pow_backward.c b/tests/Backward/test_pow_backward.c
index 39737e2..a9e7e28 100644
--- a/tests/Backward/test_pow_backward.c
+++ b/tests/Backward/test_pow_backward.c
@@ -7,7 +7,7 @@
 
 void test_pow_backward() {
     const char* op_name = "pow_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Simple element-wise vector power
@@ -21,16 +21,16 @@ void test_pow_backward() {
         // loss = sum(z) = 7.4621
         float exp_grad_x[] = {2.568312f, 1.116224f};
         float exp_grad_y[] = {7.218272f, -0.261589f};
-        
+
         Tensor x = create_test_tensor(shape, x_data, true);
         Tensor y = create_test_tensor(shape, y_data, true);
-        
+
         Tensor z = Tensor_pow(x, y);
         Tensor loss = Tensor_sum(z);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(shape, exp_grad_y, false);
 
@@ -56,10 +56,10 @@ void test_pow_backward() {
 
         Tensor z = Tensor_pow(x, y);
         Tensor loss = Tensor_sum(z);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(x_shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(y_shape, exp_grad_y, false);
 
@@ -85,10 +85,10 @@ void test_pow_backward() {
 
         Tensor z = Tensor_pow(x, y);
         Tensor loss = Tensor_sum(z);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(x_shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(y_shape, exp_grad_y, false);
 
@@ -101,7 +101,7 @@ void test_pow_backward() {
         const char* tc_name = "pow_edge_cases_backward";
         TensorShape shape = {2};
         float x_data[] = {0.0f, -2.0f};
-        float y_data[] = {2.0f, 3.0f}; // Exponents are integers
+        float y_data[] = {2.0f, 3.0f};  // Exponents are integers
 
         // z = [0^2, (-2)^3] = [0.0, -8.0]
         // loss = sum(z) = -8.0
@@ -116,13 +116,13 @@ void test_pow_backward() {
 
         Tensor x = create_test_tensor(shape, x_data, true);
         Tensor y = create_test_tensor(shape, y_data, true);
-        
+
         Tensor z = Tensor_pow(x, y);
         Tensor loss = Tensor_sum(z);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(loss, grad_dummy);
-        
+
         Tensor expected_grad_x = create_test_tensor(shape, exp_grad_x, false);
         Tensor expected_grad_y = create_test_tensor(shape, exp_grad_y, false);
 
@@ -137,7 +137,7 @@ void test_pow_backward() {
         float a_data[] = {1.4839f};
         float b_data[] = {2.2687f};
         float c_data[] = {0.6194f};
-        
+
         // Let d = a^b. Then z = d * c.
         // Forward: d = 1.4839^2.2687 = 2.4483. z = 2.4483 * 0.6194 = 1.5164
         // Backward pass (upstream grad for d is c=0.6194):
@@ -147,24 +147,39 @@ void test_pow_backward() {
         float exp_grad_b[] = {0.598515f};
         // dz/dc = d = a^b = 2.4483
         float exp_grad_c[] = {2.448306f};
-        
+
         Tensor a = create_test_tensor(shape, a_data, true);
         Tensor b = create_test_tensor(shape, b_data, true);
         Tensor c = create_test_tensor(shape, c_data, true);
-        
+
         Tensor d = Tensor_pow(a, b);
         Tensor z = Tensor_mul(d, c);
-        
+
         Tensor grad_dummy = {0};
         Tensor_backward(z, grad_dummy);
-        
+
         Tensor expected_grad_a_tensor = create_test_tensor(shape, exp_grad_a, false);
         Tensor expected_grad_b_tensor = create_test_tensor(shape, exp_grad_b, false);
         Tensor expected_grad_c_tensor = create_test_tensor(shape, exp_grad_c, false);
 
-        compare_tensors(&a.node->grad, &expected_grad_a_tensor, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&b.node->grad, &expected_grad_b_tensor, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&c.node->grad, &expected_grad_c_tensor, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&a.node->grad,
+                        &expected_grad_a_tensor,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
+        compare_tensors(&b.node->grad,
+                        &expected_grad_b_tensor,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
+        compare_tensors(&c.node->grad,
+                        &expected_grad_c_tensor,
+                        op_name,
+                        tc_name,
+                        3,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     cten_free(pool_id);
diff --git a/tests/Backward/test_relu_backward.c b/tests/Backward/test_relu_backward.c
index ce4f8d5..3db5883 100644
--- a/tests/Backward/test_relu_backward.c
+++ b/tests/Backward/test_relu_backward.c
@@ -6,7 +6,7 @@
 
 void test_relu_backward() {
     const char* op_name = "relu_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Simple ReLU backward
@@ -15,35 +15,45 @@ void test_relu_backward() {
         // Sub-test 1: Scalar ReLU
         {
             TensorShape s_shape = {1};
-            float d1[] = {2.0f};  // Positive value
+            float d1[] = {2.0f};         // Positive value
             float exp_grad1[] = {1.0f};  // Gradient passes through for positive values
-            
+
             Tensor t1 = create_test_tensor(s_shape, d1, true);
             Tensor z = nn_relu(t1);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad1 = create_test_tensor(s_shape, exp_grad1, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Scalar ReLU with negative input
         {
             TensorShape s_shape = {1};
-            float d1[] = {-2.0f};  // Negative value
+            float d1[] = {-2.0f};        // Negative value
             float exp_grad1[] = {0.0f};  // Gradient is zero for negative values
-            
+
             Tensor t1 = create_test_tensor(s_shape, d1, true);
             Tensor z = nn_relu(t1);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad1 = create_test_tensor(s_shape, exp_grad1, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 3: Vector ReLU
@@ -51,16 +61,21 @@ void test_relu_backward() {
             TensorShape v_shape = {4};
             float d1[] = {-1.0f, 0.0f, 1.0f, 2.0f};
             float exp_grad1[] = {0.0f, 0.0f, 1.0f, 1.0f};  // Gradient is 0 for x <= 0, 1 for x > 0
-            
+
             Tensor t1 = create_test_tensor(v_shape, d1, true);
             Tensor z = nn_relu(t1);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad1 = create_test_tensor(v_shape, exp_grad1, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -72,16 +87,21 @@ void test_relu_backward() {
             TensorShape m_shape = {2, 3};
             float data[] = {-1.0f, 0.0f, 1.0f, 2.0f, -3.0f, 4.0f};
             float exp_grad[] = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
-            
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = nn_relu(t);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -93,15 +113,20 @@ void test_relu_backward() {
             TensorShape v_shape = {6};
             float data[] = {-2.5f, 1.3f, 0.0f, -0.7f, 3.2f, -1.8f};
             float exp_grad[] = {0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f};
-            
+
             Tensor t = create_test_tensor(v_shape, data, true);
             Tensor z = nn_relu(t);
-            Tensor l = Tensor_sum(z);            
+            Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: 3D tensor with random values
@@ -109,15 +134,20 @@ void test_relu_backward() {
             TensorShape tensor3d_shape = {2, 2, 2};
             float data[] = {-1.5f, 2.7f, 0.0f, -3.1f, 4.2f, -0.8f, 1.9f, 0.0f};
             float exp_grad[] = {0.0f, 1.0f, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f, 0.0f};
-            
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = nn_relu(t);
-            Tensor l = Tensor_sum(z);            
+            Tensor l = Tensor_sum(z);
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -129,19 +159,28 @@ void test_relu_backward() {
             TensorShape v_shape = {4};
             float data[] = {-1.0f, 0.0f, 1.0f, 2.0f};
             float grad_data[] = {0.5f, 1.0f, 1.5f, 2.0f};
-            float exp_grad[] = {0.0f, 0.0f, 1.5f, 2.0f};  // Element-wise product of input gradient and ReLU derivative
-            
+            float exp_grad[] = {
+                0.0f,
+                0.0f,
+                1.5f,
+                2.0f};  // Element-wise product of input gradient and ReLU derivative
+
             Tensor t = create_test_tensor(v_shape, data, true);
             Tensor z = nn_relu(t);
-            Tensor l = Tensor_sum(z);            
+            Tensor l = Tensor_sum(z);
             TensorShape grad_shape = {4};
             Tensor grad = create_test_tensor(grad_shape, grad_data, false);
-            
+
             Tensor_backward(l, grad);
-            
+
             Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -153,57 +192,77 @@ void test_relu_backward() {
             TensorShape input_shape = {2, 3};
             TensorShape weight_shape = {3, 4};
             TensorShape bias_shape = {1, 4};
-            
+
             float input_data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-            float weight_data[] = {0.1f, -0.2f, 0.3f, 0.4f, 0.5f, -0.6f, 0.7f, -0.8f, 0.9f, 1.0f, -1.1f, 1.2f};
+            float weight_data[] =
+                {0.1f, -0.2f, 0.3f, 0.4f, 0.5f, -0.6f, 0.7f, -0.8f, 0.9f, 1.0f, -1.1f, 1.2f};
             float bias_data[] = {0.1f, -0.1f, 0.2f, -0.2f};
-            
-            float exp_grad_input[] = {
-                0.3f, -0.9f, 3.1f,
-                0.3f, -0.9f, 3.1f
-            };
-            float exp_grad_weight[] = {
-                5.0f, 5.0f, 0.0f, 5.0f,
-                7.0f, 7.0f, 0.0f, 7.0f,
-                9.0f, 9.0f, 0.0f, 9.0f
-            };
+
+            float exp_grad_input[] = {0.3f, -0.9f, 3.1f, 0.3f, -0.9f, 3.1f};
+            float exp_grad_weight[] =
+                {5.0f, 5.0f, 0.0f, 5.0f, 7.0f, 7.0f, 0.0f, 7.0f, 9.0f, 9.0f, 0.0f, 9.0f};
             float exp_grad_bias[] = {2.0f, 2.0f, 0.0f, 2.0f};
 
             Tensor input = create_test_tensor(input_shape, input_data, true);
             Tensor weight = create_test_tensor(weight_shape, weight_data, true);
             Tensor bias = create_test_tensor(bias_shape, bias_data, true);
-            
+
             Tensor linear_output = nn_linear(input, weight, bias);
             Tensor relu_output = nn_relu(linear_output);
             Tensor sum_output = Tensor_sum(relu_output);
-            
+
             Tensor_backward(sum_output, (Tensor){0});
-            
+
             Tensor expected_grad_input = create_test_tensor(input_shape, exp_grad_input, false);
             Tensor expected_grad_weight = create_test_tensor(weight_shape, exp_grad_weight, false);
             Tensor expected_grad_bias = create_test_tensor(bias_shape, exp_grad_bias, false);
 
-            compare_tensors(&input.node->grad, &expected_grad_input, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&weight.node->grad, &expected_grad_weight, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&bias.node->grad, &expected_grad_bias, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&input.node->grad,
+                            &expected_grad_input,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&weight.node->grad,
+                            &expected_grad_weight,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&bias.node->grad,
+                            &expected_grad_bias,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 4: ReLU -> Mean with mixed values
         {
             TensorShape m_shape = {2, 3};
             float data[] = {-1.0f, 0.0f, 1.0f, 2.0f, -3.0f, 4.0f};
-            float exp_grad[] = {0.0f, 0.0f, 1.0f/6.0f, 1.0f/6.0f, 0.0f, 1.0f/6.0f};  // 1/6 for positive values, 0 for negative
-            
+            float exp_grad[] = {0.0f,
+                                0.0f,
+                                1.0f / 6.0f,
+                                1.0f / 6.0f,
+                                0.0f,
+                                1.0f / 6.0f};  // 1/6 for positive values, 0 for negative
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor relu_output = nn_relu(t);
             Tensor mean_output = Tensor_mean(relu_output);
             Tensor l = Tensor_sum(mean_output);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 4, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            4,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
diff --git a/tests/Backward/test_softmax_backward.c b/tests/Backward/test_softmax_backward.c
index 1c599fb..c4dfaa6 100644
--- a/tests/Backward/test_softmax_backward.c
+++ b/tests/Backward/test_softmax_backward.c
@@ -6,54 +6,53 @@
 
 void test_softmax_backward() {
     const char* op_name = "softmax_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1
     {
         const char* tc_name = "1d_shape_6_dim_0";
-        TensorShape shape_1 = { 6, 0, 0, 0 };
+        TensorShape shape_1 = {6, 0, 0, 0};
         int dim_1 = 0;
-        float input_data_1[] = {
-            -0.600663f, 0.102573f, 2.162825f, -0.152183f, 0.226799f, -1.075410f
-        };
-        float upstream_grad_data_1[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
-        float expected_grad_1[] = {
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
-        
+        float input_data_1[] =
+            {-0.600663f, 0.102573f, 2.162825f, -0.152183f, 0.226799f, -1.075410f};
+        float upstream_grad_data_1[] =
+            {1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f};
+        float expected_grad_1[] =
+            {0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f};
+
         Tensor t_input_1 = create_test_tensor(shape_1, input_data_1, true);
         Tensor t_upstream_grad_1 = create_test_tensor(shape_1, upstream_grad_data_1, false);
         Tensor t_expected_grad_1 = create_test_tensor(shape_1, expected_grad_1, false);
 
         Tensor t_output_1 = nn_softmax(t_input_1, dim_1);
         Tensor_backward(t_output_1, t_upstream_grad_1);
-        
-        compare_tensors(&t_input_1.node->grad, &t_expected_grad_1, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+
+        compare_tensors(&t_input_1.node->grad,
+                        &t_expected_grad_1,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 2
     {
         const char* tc_name = "2d_shape_4_5_dim_0";
-        TensorShape shape_2 = { 4, 5, 0, 0 };
+        TensorShape shape_2 = {4, 5, 0, 0};
         int dim_2 = 0;
-        float input_data_2[] = {
-            -0.931344f, -0.136981f, 1.507272f, 0.304199f, 0.385996f, -0.380736f, -0.107786f, 0.401818f, 
-            -1.267200f, 0.553252f, 0.288499f, -0.659821f, 0.094152f, 0.391888f, -1.306159f, 0.130279f, 
-            -1.271632f, 1.041832f, 0.636306f, -0.635559f
-        };
-        float upstream_grad_data_2[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
-        float expected_grad_2[] = {
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
+        float input_data_2[] = {-0.931344f, -0.136981f, 1.507272f, 0.304199f,  0.385996f,
+                                -0.380736f, -0.107786f, 0.401818f, -1.267200f, 0.553252f,
+                                0.288499f,  -0.659821f, 0.094152f, 0.391888f,  -1.306159f,
+                                0.130279f,  -1.271632f, 1.041832f, 0.636306f,  -0.635559f};
+        float upstream_grad_data_2[] = {1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+                                        1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+                                        1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+                                        1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f};
+        float expected_grad_2[] = {0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+                                   0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+                                   0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+                                   0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f};
 
         Tensor t_input_2 = create_test_tensor(shape_2, input_data_2, true);
         Tensor t_upstream_grad_2 = create_test_tensor(shape_2, upstream_grad_data_2, false);
@@ -61,445 +60,495 @@ void test_softmax_backward() {
 
         Tensor t_output_2 = nn_softmax(t_input_2, dim_2);
         Tensor_backward(t_output_2, t_upstream_grad_2);
-        
-        compare_tensors(&t_input_2.node->grad, &t_expected_grad_2, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+
+        compare_tensors(&t_input_2.node->grad,
+                        &t_expected_grad_2,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 3
     {
         const char* tc_name = "2d_shape_4_5_dim_1";
-        TensorShape shape_3 = { 4, 5, 0, 0 };
+        TensorShape shape_3 = {4, 5, 0, 0};
         int dim_3 = 1;
-        float input_data_3[] = {
-            0.422733f, 0.827574f, -0.601589f, 0.578967f, 0.538468f, 0.151745f, 0.134477f, 3.022047f, 
-            -0.902687f, -2.489207f, 0.618169f, 0.077335f, 0.069497f, -3.296276f, -0.235217f, -1.696956f, 
-            -1.026716f, -2.963197f, 0.455785f, 0.433459f
-        };
-        float upstream_grad_data_3[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
-        float expected_grad_3[] = {
-            -0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
+        float input_data_3[] = {0.422733f,  0.827574f,  -0.601589f, 0.578967f,  0.538468f,
+                                0.151745f,  0.134477f,  3.022047f,  -0.902687f, -2.489207f,
+                                0.618169f,  0.077335f,  0.069497f,  -3.296276f, -0.235217f,
+                                -1.696956f, -1.026716f, -2.963197f, 0.455785f,  0.433459f};
+        float upstream_grad_data_3[] = {1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+                                        1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+                                        1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+                                        1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f};
+        float expected_grad_3[] = {-0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f,
+                                   0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+                                   0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+                                   0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f};
 
         Tensor t_input_3 = create_test_tensor(shape_3, input_data_3, true);
         Tensor t_upstream_grad_3 = create_test_tensor(shape_3, upstream_grad_data_3, false);
         Tensor t_expected_grad_3 = create_test_tensor(shape_3, expected_grad_3, false);
-        
+
         Tensor t_output_3 = nn_softmax(t_input_3, dim_3);
         Tensor_backward(t_output_3, t_upstream_grad_3);
 
-        compare_tensors(&t_input_3.node->grad, &t_expected_grad_3, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_input_3.node->grad,
+                        &t_expected_grad_3,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 4
     {
         const char* tc_name = "3d_shape_3_4_5_dim_0";
-        TensorShape shape_4 = { 3, 4, 5, 0 };
+        TensorShape shape_4 = {3, 4, 5, 0};
         int dim_4 = 0;
         float input_data_4[] = {
-            -0.937705f, 0.436715f, -0.442776f, 0.581859f, 0.959965f, 0.070535f, 0.813318f, -0.907422f, 
-            0.670672f, 1.866049f, -0.428506f, -1.389786f, -0.316659f, -0.013470f, -1.913688f, 0.412920f, 
-            -0.280394f, 1.159683f, -1.445249f, 2.092504f, 1.650078f, 1.463567f, -1.784309f, 2.217201f, 
-            0.591906f, 0.573348f, 0.598191f, -2.007727f, -0.169107f, 1.320956f, -1.388490f, -1.082718f, 
-            -0.444077f, 0.271516f, 0.602838f, -1.012676f, -0.133792f, -1.590335f, -0.770685f, -0.203343f, 
-            -0.415268f, 0.273560f, -1.768764f, -0.250524f, 1.047747f, -0.093305f, -0.367960f, -1.050634f, 
-            1.427392f, -0.935025f, 0.674888f, -0.942771f, -0.600548f, 0.430824f, -1.490048f, 0.312752f, 
-            0.518365f, 0.536293f, -0.770158f, 1.417278f
-        };
+            -0.937705f, 0.436715f,  -0.442776f, 0.581859f,  0.959965f,  0.070535f,  0.813318f,
+            -0.907422f, 0.670672f,  1.866049f,  -0.428506f, -1.389786f, -0.316659f, -0.013470f,
+            -1.913688f, 0.412920f,  -0.280394f, 1.159683f,  -1.445249f, 2.092504f,  1.650078f,
+            1.463567f,  -1.784309f, 2.217201f,  0.591906f,  0.573348f,  0.598191f,  -2.007727f,
+            -0.169107f, 1.320956f,  -1.388490f, -1.082718f, -0.444077f, 0.271516f,  0.602838f,
+            -1.012676f, -0.133792f, -1.590335f, -0.770685f, -0.203343f, -0.415268f, 0.273560f,
+            -1.768764f, -0.250524f, 1.047747f,  -0.093305f, -0.367960f, -1.050634f, 1.427392f,
+            -0.935025f, 0.674888f,  -0.942771f, -0.600548f, 0.430824f,  -1.490048f, 0.312752f,
+            0.518365f,  0.536293f,  -0.770158f, 1.417278f};
         float upstream_grad_data_4[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f};
         float expected_grad_4[] = {
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
+            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f,
+            0.000000f, 0.000000f, 0.000000f, 0.000000f};
 
         Tensor t_input_4 = create_test_tensor(shape_4, input_data_4, true);
         Tensor t_upstream_grad_4 = create_test_tensor(shape_4, upstream_grad_data_4, false);
         Tensor t_expected_grad_4 = create_test_tensor(shape_4, expected_grad_4, false);
-        
+
         Tensor t_output_4 = nn_softmax(t_input_4, dim_4);
         Tensor_backward(t_output_4, t_upstream_grad_4);
 
-        compare_tensors(&t_input_4.node->grad, &t_expected_grad_4, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_input_4.node->grad,
+                        &t_expected_grad_4,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 5
     {
         const char* tc_name = "3d_shape_3_4_5_dim_1";
-        TensorShape shape_5 = { 3, 4, 5, 0 };
+        TensorShape shape_5 = {3, 4, 5, 0};
         int dim_5 = 1;
         float input_data_5[] = {
-            -0.682165f, 0.708709f, -0.939215f, -0.572041f, -0.143642f, -1.416953f, -1.060691f, 1.419186f, 
-            -0.077845f, 1.187653f, -0.821140f, 0.985510f, -0.250138f, 1.570574f, -1.418109f, 0.249006f, 
-            0.265434f, 1.384456f, 0.865765f, 0.086445f, 1.558527f, -0.820017f, 0.425729f, -0.006672f, 
-            0.967204f, -1.171084f, -1.465802f, 1.057096f, -0.162689f, 0.113171f, -0.030453f, -0.651431f, 
-            -2.409602f, -1.468620f, -0.613512f, -0.988311f, -0.923162f, -1.431130f, -0.911353f, 0.249657f, 
-            -1.524619f, 0.671706f, -0.057979f, -0.216199f, -0.358070f, 1.211571f, 0.170172f, -0.038723f, 
-            -0.953913f, 0.093318f, -0.682872f, 0.254221f, 1.941541f, 1.017158f, -0.576129f, -0.601990f, 
-            1.321309f, -0.083949f, -1.165818f, -0.889365f
-        };
+            -0.682165f, 0.708709f,  -0.939215f, -0.572041f, -0.143642f, -1.416953f, -1.060691f,
+            1.419186f,  -0.077845f, 1.187653f,  -0.821140f, 0.985510f,  -0.250138f, 1.570574f,
+            -1.418109f, 0.249006f,  0.265434f,  1.384456f,  0.865765f,  0.086445f,  1.558527f,
+            -0.820017f, 0.425729f,  -0.006672f, 0.967204f,  -1.171084f, -1.465802f, 1.057096f,
+            -0.162689f, 0.113171f,  -0.030453f, -0.651431f, -2.409602f, -1.468620f, -0.613512f,
+            -0.988311f, -0.923162f, -1.431130f, -0.911353f, 0.249657f,  -1.524619f, 0.671706f,
+            -0.057979f, -0.216199f, -0.358070f, 1.211571f,  0.170172f,  -0.038723f, -0.953913f,
+            0.093318f,  -0.682872f, 0.254221f,  1.941541f,  1.017158f,  -0.576129f, -0.601990f,
+            1.321309f,  -0.083949f, -1.165818f, -0.889365f};
         float upstream_grad_data_5[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f};
         float expected_grad_5[] = {
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, -0.000000f
-        };
+            0.000000f,  0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  -0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,  -0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f, -0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  -0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  -0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            -0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  -0.000000f, 0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  -0.000000f};
 
         Tensor t_input_5 = create_test_tensor(shape_5, input_data_5, true);
         Tensor t_upstream_grad_5 = create_test_tensor(shape_5, upstream_grad_data_5, false);
         Tensor t_expected_grad_5 = create_test_tensor(shape_5, expected_grad_5, false);
-        
+
         Tensor t_output_5 = nn_softmax(t_input_5, dim_5);
         Tensor_backward(t_output_5, t_upstream_grad_5);
 
-        compare_tensors(&t_input_5.node->grad, &t_expected_grad_5, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_input_5.node->grad,
+                        &t_expected_grad_5,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 6
     {
         const char* tc_name = "3d_shape_3_4_5_dim_2";
-        TensorShape shape_6 = { 3, 4, 5, 0 };
+        TensorShape shape_6 = {3, 4, 5, 0};
         int dim_6 = 2;
         float input_data_6[] = {
-            2.022276f, 0.775352f, -0.713080f, -0.876044f, 0.573575f, -1.185992f, -2.174689f, 0.058702f, 
-            -1.239167f, -1.175520f, 1.434582f, 0.434983f, 1.004002f, 0.471956f, -0.667871f, 0.400037f, 
-            -0.211023f, -1.595166f, -0.071106f, -0.729924f, -0.854266f, -0.024481f, -0.311798f, 0.061049f, 
-            -2.024275f, -0.420349f, -0.375486f, 1.340385f, 0.692551f, -0.692819f, 0.690899f, 0.813157f, 
-            1.401130f, 1.050018f, -1.347116f, -0.798472f, 0.679221f, 1.154209f, -0.514578f, -1.520411f, 
-            0.044336f, 0.217133f, 1.322336f, -0.533918f, -0.274530f, -0.461149f, -1.520405f, 0.046148f, 
-            0.347391f, -0.668243f, -0.895559f, -0.570172f, 0.487236f, -1.055238f, -1.243746f, -0.292935f, 
-            1.089170f, 1.661516f, 1.185564f, 2.789485f
-        };
+            2.022276f,  0.775352f,  -0.713080f, -0.876044f, 0.573575f,  -1.185992f, -2.174689f,
+            0.058702f,  -1.239167f, -1.175520f, 1.434582f,  0.434983f,  1.004002f,  0.471956f,
+            -0.667871f, 0.400037f,  -0.211023f, -1.595166f, -0.071106f, -0.729924f, -0.854266f,
+            -0.024481f, -0.311798f, 0.061049f,  -2.024275f, -0.420349f, -0.375486f, 1.340385f,
+            0.692551f,  -0.692819f, 0.690899f,  0.813157f,  1.401130f,  1.050018f,  -1.347116f,
+            -0.798472f, 0.679221f,  1.154209f,  -0.514578f, -1.520411f, 0.044336f,  0.217133f,
+            1.322336f,  -0.533918f, -0.274530f, -0.461149f, -1.520405f, 0.046148f,  0.347391f,
+            -0.668243f, -0.895559f, -0.570172f, 0.487236f,  -1.055238f, -1.243746f, -0.292935f,
+            1.089170f,  1.661516f,  1.185564f,  2.789485f};
         float upstream_grad_data_6[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f};
         float expected_grad_6[] = {
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
-        
+            0.000000f,  0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f, -0.000000f, -0.000000f, -0.000000f,
+            -0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f};
+
         Tensor t_input_6 = create_test_tensor(shape_6, input_data_6, true);
         Tensor t_upstream_grad_6 = create_test_tensor(shape_6, upstream_grad_data_6, false);
         Tensor t_expected_grad_6 = create_test_tensor(shape_6, expected_grad_6, false);
-        
+
         Tensor t_output_6 = nn_softmax(t_input_6, dim_6);
         Tensor_backward(t_output_6, t_upstream_grad_6);
 
-        compare_tensors(&t_input_6.node->grad, &t_expected_grad_6, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_input_6.node->grad,
+                        &t_expected_grad_6,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 7
     {
         const char* tc_name = "4d_shape_2_3_4_5_dim_0";
-        TensorShape shape_7 = { 2, 3, 4, 5 };
+        TensorShape shape_7 = {2, 3, 4, 5};
         int dim_7 = 0;
         float input_data_7[] = {
-            1.474789f, -0.519416f, 0.772987f, 0.513793f, 0.358928f, -1.248166f, 1.215024f, -0.499042f, 
-            -0.712523f, 0.093402f, -1.435768f, 2.114161f, -0.970802f, -0.293139f, -0.310645f, 1.072718f, 
-            -0.732339f, 0.848594f, 2.230250f, 0.788688f, 0.818710f, 0.110189f, 0.596008f, -1.353641f, 
-            -1.533842f, 0.222465f, -0.505462f, 0.735581f, 0.294267f, -2.310454f, -1.823522f, -0.632836f, 
-            -0.853936f, -0.440113f, 0.261468f, 0.628521f, 1.073663f, -1.206878f, 0.195572f, 0.067860f, 
-            1.201953f, 0.672570f, 0.162885f, -0.318752f, -1.069017f, 0.150168f, -1.795954f, -0.631950f, 
-            -0.794543f, 0.239983f, -0.962714f, -0.631215f, -0.126794f, -0.652402f, -1.027763f, 0.239582f, 
-            0.813199f, 0.763215f, -0.191387f, 1.045356f, 0.035553f, 0.423691f, -0.921079f, -0.465691f, 
-            0.733018f, -0.995488f, -1.165929f, -0.038222f, 1.495182f, -1.220365f, 0.781768f, 0.533956f, 
-            0.570570f, 1.327856f, -1.325767f, 0.026772f, -0.304380f, -0.893432f, -0.487218f, -0.308606f, 
-            0.575754f, -1.920340f, -0.568058f, -1.235566f, 0.559014f, 1.422567f, 1.470937f, 1.470600f, 
-            0.836620f, -0.128337f, 0.964306f, 1.091873f, 2.125473f, 1.657596f, 0.850262f, 0.829998f, 
-            -1.011373f, 2.523616f, -0.630927f, 0.489706f, 0.230201f, 2.202367f, -0.355164f, -0.264000f, 
-            -1.112660f, 0.016458f, -0.148696f, 0.256844f, 0.498291f, -2.765549f, 1.320153f, 0.056797f, 
-            -0.598021f, -1.354019f, -0.255769f, -0.762125f, -0.353912f, -0.369006f, -0.153019f, -1.089665f
-        };
+            1.474789f,  -0.519416f, 0.772987f,  0.513793f,  0.358928f,  -1.248166f, 1.215024f,
+            -0.499042f, -0.712523f, 0.093402f,  -1.435768f, 2.114161f,  -0.970802f, -0.293139f,
+            -0.310645f, 1.072718f,  -0.732339f, 0.848594f,  2.230250f,  0.788688f,  0.818710f,
+            0.110189f,  0.596008f,  -1.353641f, -1.533842f, 0.222465f,  -0.505462f, 0.735581f,
+            0.294267f,  -2.310454f, -1.823522f, -0.632836f, -0.853936f, -0.440113f, 0.261468f,
+            0.628521f,  1.073663f,  -1.206878f, 0.195572f,  0.067860f,  1.201953f,  0.672570f,
+            0.162885f,  -0.318752f, -1.069017f, 0.150168f,  -1.795954f, -0.631950f, -0.794543f,
+            0.239983f,  -0.962714f, -0.631215f, -0.126794f, -0.652402f, -1.027763f, 0.239582f,
+            0.813199f,  0.763215f,  -0.191387f, 1.045356f,  0.035553f,  0.423691f,  -0.921079f,
+            -0.465691f, 0.733018f,  -0.995488f, -1.165929f, -0.038222f, 1.495182f,  -1.220365f,
+            0.781768f,  0.533956f,  0.570570f,  1.327856f,  -1.325767f, 0.026772f,  -0.304380f,
+            -0.893432f, -0.487218f, -0.308606f, 0.575754f,  -1.920340f, -0.568058f, -1.235566f,
+            0.559014f,  1.422567f,  1.470937f,  1.470600f,  0.836620f,  -0.128337f, 0.964306f,
+            1.091873f,  2.125473f,  1.657596f,  0.850262f,  0.829998f,  -1.011373f, 2.523616f,
+            -0.630927f, 0.489706f,  0.230201f,  2.202367f,  -0.355164f, -0.264000f, -1.112660f,
+            0.016458f,  -0.148696f, 0.256844f,  0.498291f,  -2.765549f, 1.320153f,  0.056797f,
+            -0.598021f, -1.354019f, -0.255769f, -0.762125f, -0.353912f, -0.369006f, -0.153019f,
+            -1.089665f};
         float upstream_grad_data_7[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f};
         float expected_grad_7[] = {
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, -0.000000f, 0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f};
 
         Tensor t_input_7 = create_test_tensor(shape_7, input_data_7, true);
         Tensor t_upstream_grad_7 = create_test_tensor(shape_7, upstream_grad_data_7, false);
         Tensor t_expected_grad_7 = create_test_tensor(shape_7, expected_grad_7, false);
-        
+
         Tensor t_output_7 = nn_softmax(t_input_7, dim_7);
         Tensor_backward(t_output_7, t_upstream_grad_7);
 
-        compare_tensors(&t_input_7.node->grad, &t_expected_grad_7, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_input_7.node->grad,
+                        &t_expected_grad_7,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 8
     {
         const char* tc_name = "4d_shape_2_3_4_5_dim_1";
-        TensorShape shape_8 = { 2, 3, 4, 5 };
+        TensorShape shape_8 = {2, 3, 4, 5};
         int dim_8 = 1;
         float input_data_8[] = {
-            0.561681f, 0.386991f, 0.700992f, -0.175400f, -1.221304f, 1.315252f, 0.262052f, -1.296483f, 
-            -1.000376f, -0.049059f, -0.231204f, -0.923470f, 1.755458f, -0.090067f, -0.658477f, -1.624743f, 
-            0.779242f, 1.246339f, 0.841233f, -0.168954f, -2.188574f, -1.119449f, 0.713464f, 0.836752f, 
-            0.427087f, -0.089142f, 0.602505f, 1.380736f, 1.809501f, 1.535842f, 0.977647f, 0.036867f, 
-            -0.597753f, -0.329029f, -0.593759f, 0.445857f, -1.010847f, -0.540897f, 1.135262f, -0.741583f, 
-            -0.598109f, 0.714068f, 1.211951f, 1.256436f, 1.870781f, -0.347804f, -0.187317f, 0.268083f, 
-            0.752065f, -0.409261f, 0.783845f, -1.448883f, 0.429867f, -0.708923f, 0.299181f, 0.294794f, 
-            0.746353f, 0.700991f, 3.085500f, -0.020110f, 0.719284f, -1.626763f, 1.026756f, -2.271972f, 
-            -0.224668f, 0.515021f, -1.488521f, 1.192641f, 1.940856f, -0.390310f, -0.573369f, 0.458933f, 
-            0.996491f, 1.104303f, 0.708012f, 0.022833f, -1.197958f, -0.650611f, 0.735700f, -1.355728f, 
-            -0.851106f, 0.462304f, 0.136542f, -0.662511f, 0.112475f, -0.230184f, -0.187235f, 0.168426f, 
-            -0.861487f, -0.093247f, -1.005995f, -0.345442f, -1.515225f, -0.738003f, 0.932156f, -0.418522f, 
-            -0.089483f, -0.303355f, 0.491240f, -1.087204f, -2.471883f, 0.178723f, 0.556631f, 1.224770f, 
-            0.991969f, -0.456523f, 0.178374f, 0.010078f, -0.576381f, -1.770423f, -0.484817f, -0.203203f, 
-            1.406515f, -0.824538f, -1.138210f, 0.733394f, -0.416151f, 0.167366f, -0.066760f, 1.503202f
-        };
+            0.561681f,  0.386991f,  0.700992f,  -0.175400f, -1.221304f, 1.315252f,  0.262052f,
+            -1.296483f, -1.000376f, -0.049059f, -0.231204f, -0.923470f, 1.755458f,  -0.090067f,
+            -0.658477f, -1.624743f, 0.779242f,  1.246339f,  0.841233f,  -0.168954f, -2.188574f,
+            -1.119449f, 0.713464f,  0.836752f,  0.427087f,  -0.089142f, 0.602505f,  1.380736f,
+            1.809501f,  1.535842f,  0.977647f,  0.036867f,  -0.597753f, -0.329029f, -0.593759f,
+            0.445857f,  -1.010847f, -0.540897f, 1.135262f,  -0.741583f, -0.598109f, 0.714068f,
+            1.211951f,  1.256436f,  1.870781f,  -0.347804f, -0.187317f, 0.268083f,  0.752065f,
+            -0.409261f, 0.783845f,  -1.448883f, 0.429867f,  -0.708923f, 0.299181f,  0.294794f,
+            0.746353f,  0.700991f,  3.085500f,  -0.020110f, 0.719284f,  -1.626763f, 1.026756f,
+            -2.271972f, -0.224668f, 0.515021f,  -1.488521f, 1.192641f,  1.940856f,  -0.390310f,
+            -0.573369f, 0.458933f,  0.996491f,  1.104303f,  0.708012f,  0.022833f,  -1.197958f,
+            -0.650611f, 0.735700f,  -1.355728f, -0.851106f, 0.462304f,  0.136542f,  -0.662511f,
+            0.112475f,  -0.230184f, -0.187235f, 0.168426f,  -0.861487f, -0.093247f, -1.005995f,
+            -0.345442f, -1.515225f, -0.738003f, 0.932156f,  -0.418522f, -0.089483f, -0.303355f,
+            0.491240f,  -1.087204f, -2.471883f, 0.178723f,  0.556631f,  1.224770f,  0.991969f,
+            -0.456523f, 0.178374f,  0.010078f,  -0.576381f, -1.770423f, -0.484817f, -0.203203f,
+            1.406515f,  -0.824538f, -1.138210f, 0.733394f,  -0.416151f, 0.167366f,  -0.066760f,
+            1.503202f};
         float upstream_grad_data_8[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f};
         float expected_grad_8[] = {
-            -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
-        
+            -0.000000f, 0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  -0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, -0.000000f, 0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  -0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, -0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f, 0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f};
+
         Tensor t_input_8 = create_test_tensor(shape_8, input_data_8, true);
         Tensor t_upstream_grad_8 = create_test_tensor(shape_8, upstream_grad_data_8, false);
         Tensor t_expected_grad_8 = create_test_tensor(shape_8, expected_grad_8, false);
-        
+
         Tensor t_output_8 = nn_softmax(t_input_8, dim_8);
         Tensor_backward(t_output_8, t_upstream_grad_8);
 
-        compare_tensors(&t_input_8.node->grad, &t_expected_grad_8, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_input_8.node->grad,
+                        &t_expected_grad_8,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 9
     {
         const char* tc_name = "4d_shape_2_3_4_5_dim_2";
-        TensorShape shape_9 = { 2, 3, 4, 5 };
+        TensorShape shape_9 = {2, 3, 4, 5};
         int dim_9 = 2;
         float input_data_9[] = {
-            0.010760f, 0.100405f, -1.383691f, -0.769888f, 1.195853f, -1.022066f, -0.012783f, 0.538006f, 
-            -1.055005f, 1.188038f, -0.106974f, -1.330146f, 0.137568f, 0.386421f, -1.093478f, -1.881092f, 
-            0.732247f, 1.999664f, 1.194850f, -1.598107f, 0.283570f, -0.342071f, 0.727294f, -0.940179f, 
-            3.080915f, 0.145376f, -1.614777f, 1.112682f, -0.316733f, 0.897962f, 0.459663f, -1.165369f, 
-            -0.732535f, -1.159413f, 2.247406f, -0.050621f, -1.504156f, -0.302434f, -0.513041f, 0.300270f, 
-            -1.572776f, 0.513942f, -0.044417f, 0.164410f, -0.974761f, 0.335461f, -0.087529f, 0.287355f, 
-            -1.536843f, 0.486854f, 1.358455f, -1.136863f, -0.877172f, -0.794430f, -1.425114f, -0.909163f, 
-            -0.052022f, 0.158338f, 0.574350f, -0.518316f, -0.351622f, 1.041731f, -1.792563f, 0.337314f, 
-            0.624813f, -0.787995f, 0.930929f, 0.848330f, 2.423832f, -0.677877f, -1.639693f, 0.373048f, 
-            0.696541f, -0.621287f, -0.094549f, 0.908195f, 1.737419f, 1.046002f, 0.564060f, -0.292395f, 
-            -1.443398f, 0.007105f, -0.769770f, 0.700754f, 0.373561f, 0.092035f, 0.562334f, 0.468080f, 
-            -0.198245f, -0.450278f, -0.944298f, -0.850880f, -0.276559f, -0.007981f, 0.493513f, -0.993513f, 
-            2.331804f, 0.003341f, 0.774831f, -0.807105f, -1.219366f, -0.251807f, 0.392564f, 0.217238f, 
-            0.282720f, -0.171292f, -1.394582f, -0.856184f, 0.623297f, 1.629926f, 0.092632f, 0.936860f, 
-            0.798362f, -0.503815f, 1.078040f, -0.095208f, -2.700858f, 1.573456f, -0.783068f, -1.859010f
-        };
+            0.010760f,  0.100405f,  -1.383691f, -0.769888f, 1.195853f,  -1.022066f, -0.012783f,
+            0.538006f,  -1.055005f, 1.188038f,  -0.106974f, -1.330146f, 0.137568f,  0.386421f,
+            -1.093478f, -1.881092f, 0.732247f,  1.999664f,  1.194850f,  -1.598107f, 0.283570f,
+            -0.342071f, 0.727294f,  -0.940179f, 3.080915f,  0.145376f,  -1.614777f, 1.112682f,
+            -0.316733f, 0.897962f,  0.459663f,  -1.165369f, -0.732535f, -1.159413f, 2.247406f,
+            -0.050621f, -1.504156f, -0.302434f, -0.513041f, 0.300270f,  -1.572776f, 0.513942f,
+            -0.044417f, 0.164410f,  -0.974761f, 0.335461f,  -0.087529f, 0.287355f,  -1.536843f,
+            0.486854f,  1.358455f,  -1.136863f, -0.877172f, -0.794430f, -1.425114f, -0.909163f,
+            -0.052022f, 0.158338f,  0.574350f,  -0.518316f, -0.351622f, 1.041731f,  -1.792563f,
+            0.337314f,  0.624813f,  -0.787995f, 0.930929f,  0.848330f,  2.423832f,  -0.677877f,
+            -1.639693f, 0.373048f,  0.696541f,  -0.621287f, -0.094549f, 0.908195f,  1.737419f,
+            1.046002f,  0.564060f,  -0.292395f, -1.443398f, 0.007105f,  -0.769770f, 0.700754f,
+            0.373561f,  0.092035f,  0.562334f,  0.468080f,  -0.198245f, -0.450278f, -0.944298f,
+            -0.850880f, -0.276559f, -0.007981f, 0.493513f,  -0.993513f, 2.331804f,  0.003341f,
+            0.774831f,  -0.807105f, -1.219366f, -0.251807f, 0.392564f,  0.217238f,  0.282720f,
+            -0.171292f, -1.394582f, -0.856184f, 0.623297f,  1.629926f,  0.092632f,  0.936860f,
+            0.798362f,  -0.503815f, 1.078040f,  -0.095208f, -2.700858f, 1.573456f,  -0.783068f,
+            -1.859010f};
         float upstream_grad_data_9[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f};
         float expected_grad_9[] = {
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, -0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, -0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
-        
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, -0.000000f, 0.000000f,
+            0.000000f,  0.000000f,  0.000000f, -0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  -0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f,  -0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            -0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f, -0.000000f, 0.000000f,
+            0.000000f,  0.000000f,  0.000000f, -0.000000f, 0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  -0.000000f, 0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f, 0.000000f,  0.000000f, 0.000000f,  0.000000f,
+            0.000000f};
+
         Tensor t_input_9 = create_test_tensor(shape_9, input_data_9, true);
         Tensor t_upstream_grad_9 = create_test_tensor(shape_9, upstream_grad_data_9, false);
         Tensor t_expected_grad_9 = create_test_tensor(shape_9, expected_grad_9, false);
-        
+
         Tensor t_output_9 = nn_softmax(t_input_9, dim_9);
         Tensor_backward(t_output_9, t_upstream_grad_9);
 
-        compare_tensors(&t_input_9.node->grad, &t_expected_grad_9, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_input_9.node->grad,
+                        &t_expected_grad_9,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 10
     {
         const char* tc_name = "4d_shape_2_3_4_5_dim_3";
-        TensorShape shape_10 = { 2, 3, 4, 5 };
+        TensorShape shape_10 = {2, 3, 4, 5};
         int dim_10 = 3;
         float input_data_10[] = {
-            -0.533466f, 0.851224f, 0.866751f, 0.687578f, 0.893614f, -0.176522f, 0.259054f, 0.616506f, 
-            -0.329471f, -0.068180f, -2.789774f, -0.929192f, -0.040251f, 0.557579f, -1.587092f, 0.837130f, 
-            -0.396975f, -0.425327f, -1.108546f, -1.497528f, -1.050783f, -0.618608f, 0.302976f, -0.894611f, 
-            0.740044f, 0.278542f, -0.823290f, -1.026350f, -1.483952f, 1.162551f, 0.517106f, -0.460962f, 
-            -1.371643f, -0.248814f, 0.682069f, 1.154833f, -1.403635f, 1.373101f, -1.902848f, -0.723326f, 
-            1.271827f, -0.707682f, -0.092860f, -0.806911f, -1.742649f, 0.856783f, 1.581833f, -1.866143f, 
-            -0.065457f, -1.585482f, 0.204017f, 0.444698f, 0.600724f, -0.474215f, -1.305148f, -0.853096f, 
-            1.269127f, 1.701428f, 0.301171f, 0.078225f, -0.472443f, 1.128119f, 2.771775f, 0.348380f, 
-            0.125561f, 1.738564f, -0.459702f, 0.147657f, -0.245800f, 0.484814f, -0.245291f, 1.815248f, 
-            0.401230f, -0.266132f, -0.703580f, 1.460846f, 0.358296f, 0.947361f, -1.496864f, 0.589258f, 
-            0.044601f, 0.702129f, -0.474669f, 0.388595f, 0.255142f, -0.898959f, 0.023422f, 0.507979f, 
-            -0.316557f, -0.365951f, -0.438184f, 0.291421f, -0.066030f, -0.706016f, -0.022865f, -2.269721f, 
-            1.083016f, 0.740388f, 0.508656f, 0.053598f, -0.024204f, 0.970268f, 0.723369f, -0.281419f, 
-            -0.666052f, -0.236260f, 0.347583f, 0.163527f, 0.662205f, -0.333970f, -1.201504f, 0.366744f, 
-            2.164139f, 0.516986f, -0.900627f, 0.271167f, 0.374698f, 0.893020f, 0.744694f, -1.054701f
-        };
+            -0.533466f, 0.851224f,  0.866751f,  0.687578f,  0.893614f,  -0.176522f, 0.259054f,
+            0.616506f,  -0.329471f, -0.068180f, -2.789774f, -0.929192f, -0.040251f, 0.557579f,
+            -1.587092f, 0.837130f,  -0.396975f, -0.425327f, -1.108546f, -1.497528f, -1.050783f,
+            -0.618608f, 0.302976f,  -0.894611f, 0.740044f,  0.278542f,  -0.823290f, -1.026350f,
+            -1.483952f, 1.162551f,  0.517106f,  -0.460962f, -1.371643f, -0.248814f, 0.682069f,
+            1.154833f,  -1.403635f, 1.373101f,  -1.902848f, -0.723326f, 1.271827f,  -0.707682f,
+            -0.092860f, -0.806911f, -1.742649f, 0.856783f,  1.581833f,  -1.866143f, -0.065457f,
+            -1.585482f, 0.204017f,  0.444698f,  0.600724f,  -0.474215f, -1.305148f, -0.853096f,
+            1.269127f,  1.701428f,  0.301171f,  0.078225f,  -0.472443f, 1.128119f,  2.771775f,
+            0.348380f,  0.125561f,  1.738564f,  -0.459702f, 0.147657f,  -0.245800f, 0.484814f,
+            -0.245291f, 1.815248f,  0.401230f,  -0.266132f, -0.703580f, 1.460846f,  0.358296f,
+            0.947361f,  -1.496864f, 0.589258f,  0.044601f,  0.702129f,  -0.474669f, 0.388595f,
+            0.255142f,  -0.898959f, 0.023422f,  0.507979f,  -0.316557f, -0.365951f, -0.438184f,
+            0.291421f,  -0.066030f, -0.706016f, -0.022865f, -2.269721f, 1.083016f,  0.740388f,
+            0.508656f,  0.053598f,  -0.024204f, 0.970268f,  0.723369f,  -0.281419f, -0.666052f,
+            -0.236260f, 0.347583f,  0.163527f,  0.662205f,  -0.333970f, -1.201504f, 0.366744f,
+            2.164139f,  0.516986f,  -0.900627f, 0.271167f,  0.374698f,  0.893020f,  0.744694f,
+            -1.054701f};
         float upstream_grad_data_10[] = {
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 
-            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f
-        };
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f,
+            1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f, 1.000000f};
         float expected_grad_10[] = {
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.0000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 
-            0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f, 0.000000f
-        };
-        
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  -0.000000f, -0.000000f, -0.000000f,
+            -0.000000f, -0.000000f, 0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  -0.000000f, -0.000000f, -0.000000f, -0.000000f, -0.000000f, 0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.0000f,    0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,  0.000000f,
+            0.000000f};
+
         Tensor t_input_10 = create_test_tensor(shape_10, input_data_10, true);
         Tensor t_upstream_grad_10 = create_test_tensor(shape_10, upstream_grad_data_10, false);
         Tensor t_expected_grad_10 = create_test_tensor(shape_10, expected_grad_10, false);
-        
+
         Tensor t_output_10 = nn_softmax(t_input_10, dim_10);
         Tensor_backward(t_output_10, t_upstream_grad_10);
 
-        compare_tensors(&t_input_10.node->grad, &t_expected_grad_10, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&t_input_10.node->grad,
+                        &t_expected_grad_10,
+                        op_name,
+                        tc_name,
+                        1,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     cten_free(pool_id);
diff --git a/tests/Backward/test_sub_backward.c b/tests/Backward/test_sub_backward.c
index d218942..4cdf34c 100644
--- a/tests/Backward/test_sub_backward.c
+++ b/tests/Backward/test_sub_backward.c
@@ -6,7 +6,7 @@
 
 void test_sub_backward() {
     const char* op_name = "sub_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Simple backward (1x1 tensors)
@@ -17,20 +17,30 @@ void test_sub_backward() {
             TensorShape s_shape = {1};
             float d1[] = {5.0f};
             float d2[] = {3.0f};
-            float exp_grad1[] = {1.0f};  // dz/dx = 1
-            float exp_grad2[] = {-1.0f}; // dz/dy = -1
-            
+            float exp_grad1[] = {1.0f};   // dz/dx = 1
+            float exp_grad2[] = {-1.0f};  // dz/dy = -1
+
             Tensor t1 = create_test_tensor(s_shape, d1, true);
             Tensor t2 = create_test_tensor(s_shape, d2, true);
             Tensor z = Tensor_sub(t1, t2);  // z = 2.0
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad1 = create_test_tensor(s_shape, exp_grad1, false);
             Tensor expected_grad2 = create_test_tensor(s_shape, exp_grad2, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad2, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad2,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Vector sub backward
@@ -40,19 +50,29 @@ void test_sub_backward() {
             float d2[] = {2.0f, 3.0f, 4.0f};
             float exp_grad1[] = {1.0f, 1.0f, 1.0f};
             float exp_grad2[] = {-1.0f, -1.0f, -1.0f};
-            
+
             Tensor t1 = create_test_tensor(v_shape, d1, true);
             Tensor t2 = create_test_tensor(v_shape, d2, true);
             Tensor z = Tensor_sub(t1, t2);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad1 = create_test_tensor(v_shape, exp_grad1, false);
             Tensor expected_grad2 = create_test_tensor(v_shape, exp_grad2, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad2, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad2,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 3: Matrix sub backward
@@ -62,19 +82,29 @@ void test_sub_backward() {
             float d2[] = {1.0f, 2.0f, 3.0f, 4.0f};
             float exp_grad1[] = {1.0f, 1.0f, 1.0f, 1.0f};
             float exp_grad2[] = {-1.0f, -1.0f, -1.0f, -1.0f};
-            
+
             Tensor t1 = create_test_tensor(m_shape, d1, true);
             Tensor t2 = create_test_tensor(m_shape, d2, true);
             Tensor z = Tensor_sub(t1, t2);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad1 = create_test_tensor(m_shape, exp_grad1, false);
             Tensor expected_grad2 = create_test_tensor(m_shape, exp_grad2, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad2, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad2,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -89,19 +119,29 @@ void test_sub_backward() {
             float scalar_data[] = {3.0f};
             float exp_grad_vec[] = {1.0f, 1.0f};
             float exp_grad_scalar[] = {-2.0f};
-            
+
             Tensor t_vec = create_test_tensor(vec_shape, vec_data, true);
             Tensor t_scalar = create_test_tensor(scalar_shape, scalar_data, true);
             Tensor z = Tensor_sub(t_vec, t_scalar);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad_vec = create_test_tensor(vec_shape, exp_grad_vec, false);
             Tensor expected_grad_scalar = create_test_tensor(scalar_shape, exp_grad_scalar, false);
 
-            compare_tensors(&t_vec.node->grad, &expected_grad_vec, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_scalar.node->grad, &expected_grad_scalar, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_vec.node->grad,
+                            &expected_grad_vec,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_scalar.node->grad,
+                            &expected_grad_scalar,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Matrix - Row Vector
@@ -123,8 +163,18 @@ void test_sub_backward() {
             Tensor expected_grad_mat = create_test_tensor(mat_shape, exp_grad_mat, false);
             Tensor expected_grad_row = create_test_tensor(row_shape, exp_grad_row, false);
 
-            compare_tensors(&t_mat.node->grad, &expected_grad_mat, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_row.node->grad, &expected_grad_row, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_mat.node->grad,
+                            &expected_grad_mat,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_row.node->grad,
+                            &expected_grad_row,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 3: Matrix - Column Vector
@@ -146,8 +196,18 @@ void test_sub_backward() {
             Tensor expected_grad_mat = create_test_tensor(mat_shape, exp_grad_mat, false);
             Tensor expected_grad_col = create_test_tensor(col_shape, exp_grad_col, false);
 
-            compare_tensors(&t_mat.node->grad, &expected_grad_mat, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t_col.node->grad, &expected_grad_col, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_mat.node->grad,
+                            &expected_grad_mat,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t_col.node->grad,
+                            &expected_grad_col,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -161,19 +221,29 @@ void test_sub_backward() {
             float d2[] = {1.1f, 2.2f, 0.5f, 3.3f};
             float exp_grad1[] = {1.0f, 1.0f, 1.0f, 1.0f};
             float exp_grad2[] = {-1.0f, -1.0f, -1.0f, -1.0f};
-            
+
             Tensor t1 = create_test_tensor(v_shape, d1, true);
             Tensor t2 = create_test_tensor(v_shape, d2, true);
             Tensor z = Tensor_sub(t1, t2);
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad1 = create_test_tensor(v_shape, exp_grad1, false);
             Tensor expected_grad2 = create_test_tensor(v_shape, exp_grad2, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad2, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad2,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Random 3D tensor subtraction
@@ -194,8 +264,18 @@ void test_sub_backward() {
             Tensor expected_grad1 = create_test_tensor(tensor3d_shape, exp_grad1, false);
             Tensor expected_grad2 = create_test_tensor(tensor3d_shape, exp_grad2, false);
 
-            compare_tensors(&t1.node->grad, &expected_grad1, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&t2.node->grad, &expected_grad2, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t1.node->grad,
+                            &expected_grad1,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t2.node->grad,
+                            &expected_grad2,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -211,24 +291,39 @@ void test_sub_backward() {
             float exp_grad_a[] = {2.0f, 3.0f};
             float exp_grad_b[] = {-2.0f, -3.0f};
             float exp_grad_c[] = {2.0f, 2.0f};
-            
+
             Tensor a = create_test_tensor(v_shape, a_data, true);
             Tensor b = create_test_tensor(v_shape, b_data, true);
             Tensor c = create_test_tensor(v_shape, c_data, true);
-            
+
             Tensor diff = Tensor_sub(a, b);
             Tensor prod = Tensor_mul(diff, c);
             Tensor l = Tensor_sum(prod);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad_a = create_test_tensor(v_shape, exp_grad_a, false);
             Tensor expected_grad_b = create_test_tensor(v_shape, exp_grad_b, false);
             Tensor expected_grad_c = create_test_tensor(v_shape, exp_grad_c, false);
 
-            compare_tensors(&a.node->grad, &expected_grad_a, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&b.node->grad, &expected_grad_b, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&c.node->grad, &expected_grad_c, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&a.node->grad,
+                            &expected_grad_a,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&b.node->grad,
+                            &expected_grad_b,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&c.node->grad,
+                            &expected_grad_c,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
diff --git a/tests/Backward/test_sum_backward.c b/tests/Backward/test_sum_backward.c
index 742270d..9c72865 100644
--- a/tests/Backward/test_sum_backward.c
+++ b/tests/Backward/test_sum_backward.c
@@ -6,7 +6,7 @@
 
 void test_sum_backward() {
     const char* op_name = "sum_backward";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Sum all elements backward
@@ -17,15 +17,20 @@ void test_sum_backward() {
             TensorShape v_shape = {3};
             float data[] = {1.0f, 2.0f, 3.0f};
             float exp_grad[] = {1.0f, 1.0f, 1.0f};
-            
+
             Tensor t = create_test_tensor(v_shape, data, true);
             Tensor z = Tensor_sum(t);  // sum all elements
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(v_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Matrix sum all
@@ -33,15 +38,20 @@ void test_sum_backward() {
             TensorShape m_shape = {2, 3};
             float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float exp_grad[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
-            
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_sum(t);  // sum all elements
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 3: 3D tensor sum all
@@ -49,15 +59,20 @@ void test_sum_backward() {
             TensorShape tensor3d_shape = {2, 2, 2};
             float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
             float exp_grad[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
-            
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = Tensor_sum(t);  // sum all elements
-            
+
             Tensor_backward(z, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            3,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -69,35 +84,47 @@ void test_sum_backward() {
             TensorShape m_shape = {2, 3};
             float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float exp_grad[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
-            
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_sum(t, 0);  // sum along dim 0. Shape is {3}
 
-            Tensor grad_for_z = Tensor_ones(z.shape, false); 
+            Tensor grad_for_z = Tensor_ones(z.shape, false);
             Tensor_backward(z, grad_for_z);
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: 3D tensor sum along dim 0
         {
             TensorShape tensor3d_shape = {2, 3, 4};
             float data[24];
-            for (int i = 0; i < 24; i++) data[i] = (float)(i + 1);
+            for(int i = 0; i < 24; i++)
+                data[i] = (float)(i + 1);
             float exp_grad[24];
-            for (int i = 0; i < 24; i++) exp_grad[i] = 1.0f;
-            
+            for(int i = 0; i < 24; i++)
+                exp_grad[i] = 1.0f;
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = Tensor_sum(t, 0);  // sum along dim 0. Shape is {3, 4}
-            
+
             Tensor grad_for_z = Tensor_ones(z.shape, false);
             Tensor_backward(z, grad_for_z);
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -109,35 +136,47 @@ void test_sum_backward() {
             TensorShape m_shape = {2, 3};
             float data[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float exp_grad[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
-            
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_sum(t, 1);  // sum along dim 1 -> shape {2}
             Tensor l = Tensor_sum(z);
-            
+
             Tensor_backward(l, (Tensor){0});
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: 3D tensor sum along dim 1
         {
             TensorShape tensor3d_shape = {2, 3, 4};
             float data[24];
-            for (int i = 0; i < 24; i++) data[i] = (float)(i + 1);
+            for(int i = 0; i < 24; i++)
+                data[i] = (float)(i + 1);
             float exp_grad[24];
-            for (int i = 0; i < 24; i++) exp_grad[i] = 1.0f;
-            
+            for(int i = 0; i < 24; i++)
+                exp_grad[i] = 1.0f;
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = Tensor_sum(t, 1);  // sum along dim 1 -> shape {2, 4}
-            
+
             Tensor grad_for_z = Tensor_ones(z.shape, false);
             Tensor_backward(z, grad_for_z);
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -148,19 +187,26 @@ void test_sum_backward() {
         {
             TensorShape tensor3d_shape = {2, 3, 4};
             float data[24];
-            for (int i = 0; i < 24; i++) data[i] = (float)(i + 1);
+            for(int i = 0; i < 24; i++)
+                data[i] = (float)(i + 1);
             float exp_grad[24];
-            for (int i = 0; i < 24; i++) exp_grad[i] = 1.0f;
-            
+            for(int i = 0; i < 24; i++)
+                exp_grad[i] = 1.0f;
+
             Tensor t = create_test_tensor(tensor3d_shape, data, true);
             Tensor z = Tensor_sum(t, 2);  // sum along dim 2 -> shape {2, 3}
-            
+
             Tensor grad_for_z = Tensor_ones(z.shape, false);
             Tensor_backward(z, grad_for_z);
-            
+
             Tensor expected_grad = create_test_tensor(tensor3d_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -170,51 +216,47 @@ void test_sum_backward() {
         // Sub-test 1: Random matrix sum along dim 0
         {
             TensorShape m_shape = {3, 4};
-            float data[] = {
-                2.5f, 1.3f, 4.8f, 3.2f,
-                0.7f, 5.1f, 2.9f, 6.4f,
-                3.6f, 1.8f, 4.2f, 0.9f
-            };
-            float exp_grad[] = {
-                1.0f, 1.0f, 1.0f, 1.0f,
-                1.0f, 1.0f, 1.0f, 1.0f,
-                1.0f, 1.0f, 1.0f, 1.0f
-            };
-            
+            float data[] = {2.5f, 1.3f, 4.8f, 3.2f, 0.7f, 5.1f, 2.9f, 6.4f, 3.6f, 1.8f, 4.2f, 0.9f};
+            float exp_grad[] =
+                {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_sum(t, 0);  // sum along dim 0 -> shape {4}
-            
+
             Tensor grad_for_z = Tensor_ones(z.shape, false);
             Tensor_backward(z, grad_for_z);
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Random matrix sum along dim 1
         {
             TensorShape m_shape = {3, 4};
-            float data[] = {
-                2.5f, 1.3f, 4.8f, 3.2f,
-                0.7f, 5.1f, 2.9f, 6.4f,
-                3.6f, 1.8f, 4.2f, 0.9f
-            };
-            float exp_grad[] = {
-                1.0f, 1.0f, 1.0f, 1.0f,
-                1.0f, 1.0f, 1.0f, 1.0f,
-                1.0f, 1.0f, 1.0f, 1.0f
-            };
-            
+            float data[] = {2.5f, 1.3f, 4.8f, 3.2f, 0.7f, 5.1f, 2.9f, 6.4f, 3.6f, 1.8f, 4.2f, 0.9f};
+            float exp_grad[] =
+                {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
+
             Tensor t = create_test_tensor(m_shape, data, true);
             Tensor z = Tensor_sum(t, 1);  // sum along dim 1 -> shape {3}
-            
+
             Tensor grad_for_z = Tensor_ones(z.shape, false);
             Tensor_backward(z, grad_for_z);
-            
+
             Tensor expected_grad = create_test_tensor(m_shape, exp_grad, false);
 
-            compare_tensors(&t.node->grad, &expected_grad, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&t.node->grad,
+                            &expected_grad,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
@@ -228,20 +270,30 @@ void test_sum_backward() {
             float b_data[] = {0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f};
             float exp_grad_a[] = {0.5f, 1.5f, 2.5f, 3.5f, 4.5f, 5.5f};
             float exp_grad_b[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-            
+
             Tensor a = create_test_tensor(m_shape, a_data, true);
             Tensor b = create_test_tensor(m_shape, b_data, true);
             Tensor prod = Tensor_mul(a, b);
-            Tensor z = Tensor_sum(prod, 0); // shape {3}
-            
+            Tensor z = Tensor_sum(prod, 0);  // shape {3}
+
             Tensor grad_for_z = Tensor_ones(z.shape, false);
             Tensor_backward(z, grad_for_z);
-            
+
             Tensor expected_grad_a = create_test_tensor(m_shape, exp_grad_a, false);
             Tensor expected_grad_b = create_test_tensor(m_shape, exp_grad_b, false);
 
-            compare_tensors(&a.node->grad, &expected_grad_a, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&b.node->grad, &expected_grad_b, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&a.node->grad,
+                            &expected_grad_a,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&b.node->grad,
+                            &expected_grad_b,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
         }
 
         // Sub-test 2: Sum(a+b, dim=1)
@@ -251,20 +303,30 @@ void test_sum_backward() {
             float b_data[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f};
             float exp_grad_a[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
             float exp_grad_b[] = {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
-            
+
             Tensor a = create_test_tensor(m_shape, a_data, true);
             Tensor b = create_test_tensor(m_shape, b_data, true);
             Tensor sum_ab = Tensor_add(a, b);
-            Tensor z = Tensor_sum(sum_ab, 1); // shape {2}
-            
+            Tensor z = Tensor_sum(sum_ab, 1);  // shape {2}
+
             Tensor grad_for_z = Tensor_ones(z.shape, false);
             Tensor_backward(z, grad_for_z);
-            
+
             Tensor expected_grad_a = create_test_tensor(m_shape, exp_grad_a, false);
             Tensor expected_grad_b = create_test_tensor(m_shape, exp_grad_b, false);
 
-            compare_tensors(&a.node->grad, &expected_grad_a, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
-            compare_tensors(&b.node->grad, &expected_grad_b, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&a.node->grad,
+                            &expected_grad_a,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
+            compare_tensors(&b.node->grad,
+                            &expected_grad_b,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
diff --git a/tests/Operator/test_abs.c b/tests/Operator/test_abs.c
index 5896698..199d9a0 100644
--- a/tests/Operator/test_abs.c
+++ b/tests/Operator/test_abs.c
@@ -7,7 +7,7 @@
 
 void test_abs_operator() {
     const char* op_name = "abs";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Basic test with mixed positive, negative and zero values
@@ -16,7 +16,7 @@ void test_abs_operator() {
         TensorShape shape = {6};
         float d1[] = {-2.5f, -1.0f, 0.0f, 1.0f, 2.5f, -3.0f};
         float exp_d[] = {2.5f, 1.0f, 0.0f, 1.0f, 2.5f, 3.0f};
-        
+
         Tensor t1 = create_test_tensor(shape, d1, false);
         Tensor expected_res = create_test_tensor(shape, exp_d, false);
         Tensor actual_res = Tensor_abs(t1);
@@ -58,7 +58,7 @@ void test_abs_operator() {
         TensorShape m_shape = {2, 3};
         float d1[] = {1.0f, -2.0f, 0.0f, -4.0f, 5.0f, -6.0f};
         float exp_d[] = {1.0f, 2.0f, 0.0f, 4.0f, 5.0f, 6.0f};
-        
+
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(m_shape, exp_d, false);
         Tensor actual_res = Tensor_abs(t1);
@@ -72,7 +72,7 @@ void test_abs_operator() {
         TensorShape shape = {5};
         float d1[] = {-1e8f, 1e-8f, 0.0f, 1e8f, -1e-8f};
         float exp_d[] = {1e8f, 1e-8f, 0.0f, 1e8f, 1e-8f};
-        
+
         Tensor t1 = create_test_tensor(shape, d1, false);
         Tensor expected_res = create_test_tensor(shape, exp_d, false);
         Tensor actual_res = Tensor_abs(t1);
diff --git a/tests/Operator/test_add.c b/tests/Operator/test_add.c
index 176d3a2..51ee08b 100644
--- a/tests/Operator/test_add.c
+++ b/tests/Operator/test_add.c
@@ -6,7 +6,7 @@
 
 void test_add_operator() {
     const char* op_name = "add";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Scalar addition (represented as 1x1 tensors)
@@ -29,10 +29,10 @@ void test_add_operator() {
 
         // Sub-test 2
         {
-            const char* tc_name = "add_scalar";  
-            float d1[] = {10.0f}; 
+            const char* tc_name = "add_scalar";
+            float d1[] = {10.0f};
             float d2[] = {5.0f};
-            float exp_d[] = {15.0f}; 
+            float exp_d[] = {15.0f};
             Tensor t1 = create_test_tensor(s_shape, d1, false);
             Tensor t2 = create_test_tensor(s_shape, d2, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -71,18 +71,21 @@ void test_add_operator() {
 
         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 4: Broadcasting (vector + scalar-like tensor)
     {
         const char* tc_name = "add_broadcast_vector_plus_scalar_tensor";
-        TensorShape vec_shape = {2}; float vec_data[] = {1.0f, 2.0f};
-        TensorShape scalar_shape = {1}; float scalar_data[] = {10.0f};
-        TensorShape expected_shape = {2}; float exp_data[] = {11.0f, 12.0f};
+        TensorShape vec_shape = {2};
+        float vec_data[] = {1.0f, 2.0f};
+        TensorShape scalar_shape = {1};
+        float scalar_data[] = {10.0f};
+        TensorShape expected_shape = {2};
+        float exp_data[] = {11.0f, 12.0f};
 
         Tensor t_vec = create_test_tensor(vec_shape, vec_data, false);
         Tensor t_scalar_original = create_test_tensor(scalar_shape, scalar_data, false);
-        
-        Tensor actual_res = Tensor_add(t_vec, t_scalar_original); 
+
+        Tensor actual_res = Tensor_add(t_vec, t_scalar_original);
         Tensor expected_res = create_test_tensor(expected_shape, exp_data, false);
 
         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
@@ -91,15 +94,26 @@ void test_add_operator() {
     // Test Case 5: Advanced Broadcasting
     {
         const char* tc_name = "add_advanced_broadcasting";
-        
+
         // Sub-test 1: Multi-dimensional broadcasting {3,1} + {1,4} -> {3,4}
         {
-            TensorShape s1_shape = {3, 1}; float d1[] = {1.0f, 2.0f, 3.0f};
-            TensorShape s2_shape = {1, 4}; float d2[] = {10.0f, 20.0f, 30.0f, 40.0f};
-            TensorShape exp_shape = {3, 4}; 
-            float exp_d[] = {11.0f, 21.0f, 31.0f, 41.0f,  // 1+[10,20,30,40]
-                             12.0f, 22.0f, 32.0f, 42.0f,  // 2+[10,20,30,40]
-                             13.0f, 23.0f, 33.0f, 43.0f}; // 3+[10,20,30,40]
+            TensorShape s1_shape = {3, 1};
+            float d1[] = {1.0f, 2.0f, 3.0f};
+            TensorShape s2_shape = {1, 4};
+            float d2[] = {10.0f, 20.0f, 30.0f, 40.0f};
+            TensorShape exp_shape = {3, 4};
+            float exp_d[] = {11.0f,
+                             21.0f,
+                             31.0f,
+                             41.0f,  // 1+[10,20,30,40]
+                             12.0f,
+                             22.0f,
+                             32.0f,
+                             42.0f,  // 2+[10,20,30,40]
+                             13.0f,
+                             23.0f,
+                             33.0f,
+                             43.0f};  // 3+[10,20,30,40]
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -111,20 +125,38 @@ void test_add_operator() {
 
         // Sub-test 2: 3D broadcasting {2,3,1} + {1,1,4} -> {2,3,4}
         {
-            TensorShape s1_shape = {2, 3, 1}; 
+            TensorShape s1_shape = {2, 3, 1};
             float d1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-            TensorShape s2_shape = {1, 1, 4}; 
+            TensorShape s2_shape = {1, 1, 4};
             float d2[] = {10.0f, 20.0f, 30.0f, 40.0f};
             TensorShape exp_shape = {2, 3, 4};
             float exp_d[] = {
                 // First 2x3 slice
-                11.0f, 21.0f, 31.0f, 41.0f,  // 1+[10,20,30,40]
-                12.0f, 22.0f, 32.0f, 42.0f,  // 2+[10,20,30,40]
-                13.0f, 23.0f, 33.0f, 43.0f,  // 3+[10,20,30,40]
+                11.0f,
+                21.0f,
+                31.0f,
+                41.0f,  // 1+[10,20,30,40]
+                12.0f,
+                22.0f,
+                32.0f,
+                42.0f,  // 2+[10,20,30,40]
+                13.0f,
+                23.0f,
+                33.0f,
+                43.0f,  // 3+[10,20,30,40]
                 // Second 2x3 slice
-                14.0f, 24.0f, 34.0f, 44.0f,  // 4+[10,20,30,40]
-                15.0f, 25.0f, 35.0f, 45.0f,  // 5+[10,20,30,40]
-                16.0f, 26.0f, 36.0f, 46.0f   // 6+[10,20,30,40]
+                14.0f,
+                24.0f,
+                34.0f,
+                44.0f,  // 4+[10,20,30,40]
+                15.0f,
+                25.0f,
+                35.0f,
+                45.0f,  // 5+[10,20,30,40]
+                16.0f,
+                26.0f,
+                36.0f,
+                46.0f  // 6+[10,20,30,40]
             };
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
@@ -137,14 +169,17 @@ void test_add_operator() {
 
         // Sub-test 3: 4D broadcasting with size-1 dimensions {1,1,1,1} + {5,4,3,2} -> {5,4,3,2}
         {
-            TensorShape s1_shape = {1, 1, 1, 1}; float d1[] = {5.0f};
-            TensorShape s2_shape = {5, 4, 3, 2}; 
-            float d2[120]; // 5*4*3*2 = 120 elements
-            for(int i = 0; i < 120; i++) d2[i] = (float)(i + 1);
-            
+            TensorShape s1_shape = {1, 1, 1, 1};
+            float d1[] = {5.0f};
+            TensorShape s2_shape = {5, 4, 3, 2};
+            float d2[120];  // 5*4*3*2 = 120 elements
+            for(int i = 0; i < 120; i++)
+                d2[i] = (float)(i + 1);
+
             TensorShape exp_shape = {5, 4, 3, 2};
             float exp_d[120];
-            for(int i = 0; i < 120; i++) exp_d[i] = d2[i] + 5.0f;
+            for(int i = 0; i < 120; i++)
+                exp_d[i] = d2[i] + 5.0f;
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -156,60 +191,172 @@ void test_add_operator() {
 
         // Sub-test 4: Complex broadcasting {1,3,1,5} + {2,1,4,1} -> {2,3,4,5}
         {
-           // Input 1: Shape [2,1,4,1]
-           TensorShape s1_shape = {2, 1, 4, 1};
-           float d1[] = {
-               0.2343f, 0.6135f, 0.1611f, 0.5962f,  // First batch
-               0.5681f, 0.5235f, 0.1218f, 0.4864f   // Second batch
-           };
-            
-           // Input 2: Shape [1,3,1,5]
-           TensorShape s2_shape = {1, 3, 1, 5};
-           float d2[] = {
-               0.2277f, 0.9322f, 0.7883f, 0.1584f, 0.4751f,  // First channel
-               0.8497f, 0.6706f, 0.5062f, 0.5672f, 0.8714f,  // Second channel
-               0.0536f, 0.7535f, 0.6602f, 0.9550f, 0.0743f   // Third channel
-           };
-            
-           // Expected output shape: [2,3,4,5]
-           TensorShape exp_shape = {2, 3, 4, 5};
-           float exp_d[] = {
-               // Batch 0, Channel 0
-               0.4620f, 1.1665f, 1.0226f, 0.3927f, 0.7094f,
-               0.8412f, 1.5457f, 1.4018f, 0.7719f, 1.0886f,
-               0.3888f, 1.0933f, 0.9494f, 0.3195f, 0.6362f,
-               0.8239f, 1.5284f, 1.3845f, 0.7546f, 1.0713f,
-                
-               // Batch 0, Channel 1
-               1.0840f, 0.9049f, 0.7405f, 0.8015f, 1.1057f,
-               1.4632f, 1.2841f, 1.1197f, 1.1807f, 1.4849f,
-               1.0108f, 0.8317f, 0.6672f, 0.7283f, 1.0325f,
-               1.4459f, 1.2668f, 1.1024f, 1.1634f, 1.4676f,
-                
-               // Batch 0, Channel 2
-               0.2879f, 0.9878f, 0.8945f, 1.1892f, 0.3086f,
-               0.6671f, 1.3670f, 1.2737f, 1.5685f, 0.6878f,
-               0.2147f, 0.9146f, 0.8213f, 1.1161f, 0.2354f,
-               0.6498f, 1.3497f, 1.2564f, 1.5511f, 0.6705f,
-                
-               // Batch 1, Channel 0
-               0.7958f, 1.5003f, 1.3564f, 0.7265f, 1.0432f,
-               0.7512f, 1.4557f, 1.3118f, 0.6819f, 0.9986f,
-               0.3496f, 1.0540f, 0.9101f, 0.2802f, 0.5969f,
-               0.7141f, 1.4186f, 1.2747f, 0.6448f, 0.9615f,
-                
-               // Batch 1, Channel 1
-               1.4178f, 1.2387f, 1.0743f, 1.1353f, 1.4395f,
-               1.3732f, 1.1941f, 1.0297f, 1.0907f, 1.3949f,
-               0.9715f, 0.7924f, 0.6281f, 0.6890f, 0.9932f,
-               1.3361f, 1.1570f, 0.9926f, 1.0536f, 1.3578f,
-                
-               // Batch 1, Channel 2
-               0.6217f, 1.3216f, 1.2283f, 1.5230f, 0.6424f,
-               0.5771f, 1.2770f, 1.1837f, 1.4785f, 0.5978f,
-               0.1754f, 0.8753f, 0.7820f, 1.0768f, 0.1961f,
-               0.5400f, 1.2399f, 1.1466f, 1.4414f, 0.5607f
-               };
+            // Input 1: Shape [2,1,4,1]
+            TensorShape s1_shape = {2, 1, 4, 1};
+            float d1[] = {
+                0.2343f,
+                0.6135f,
+                0.1611f,
+                0.5962f,  // First batch
+                0.5681f,
+                0.5235f,
+                0.1218f,
+                0.4864f  // Second batch
+            };
+
+            // Input 2: Shape [1,3,1,5]
+            TensorShape s2_shape = {1, 3, 1, 5};
+            float d2[] = {
+                0.2277f,
+                0.9322f,
+                0.7883f,
+                0.1584f,
+                0.4751f,  // First channel
+                0.8497f,
+                0.6706f,
+                0.5062f,
+                0.5672f,
+                0.8714f,  // Second channel
+                0.0536f,
+                0.7535f,
+                0.6602f,
+                0.9550f,
+                0.0743f  // Third channel
+            };
+
+            // Expected output shape: [2,3,4,5]
+            TensorShape exp_shape = {2, 3, 4, 5};
+            float exp_d[] = {// Batch 0, Channel 0
+                             0.4620f,
+                             1.1665f,
+                             1.0226f,
+                             0.3927f,
+                             0.7094f,
+                             0.8412f,
+                             1.5457f,
+                             1.4018f,
+                             0.7719f,
+                             1.0886f,
+                             0.3888f,
+                             1.0933f,
+                             0.9494f,
+                             0.3195f,
+                             0.6362f,
+                             0.8239f,
+                             1.5284f,
+                             1.3845f,
+                             0.7546f,
+                             1.0713f,
+
+                             // Batch 0, Channel 1
+                             1.0840f,
+                             0.9049f,
+                             0.7405f,
+                             0.8015f,
+                             1.1057f,
+                             1.4632f,
+                             1.2841f,
+                             1.1197f,
+                             1.1807f,
+                             1.4849f,
+                             1.0108f,
+                             0.8317f,
+                             0.6672f,
+                             0.7283f,
+                             1.0325f,
+                             1.4459f,
+                             1.2668f,
+                             1.1024f,
+                             1.1634f,
+                             1.4676f,
+
+                             // Batch 0, Channel 2
+                             0.2879f,
+                             0.9878f,
+                             0.8945f,
+                             1.1892f,
+                             0.3086f,
+                             0.6671f,
+                             1.3670f,
+                             1.2737f,
+                             1.5685f,
+                             0.6878f,
+                             0.2147f,
+                             0.9146f,
+                             0.8213f,
+                             1.1161f,
+                             0.2354f,
+                             0.6498f,
+                             1.3497f,
+                             1.2564f,
+                             1.5511f,
+                             0.6705f,
+
+                             // Batch 1, Channel 0
+                             0.7958f,
+                             1.5003f,
+                             1.3564f,
+                             0.7265f,
+                             1.0432f,
+                             0.7512f,
+                             1.4557f,
+                             1.3118f,
+                             0.6819f,
+                             0.9986f,
+                             0.3496f,
+                             1.0540f,
+                             0.9101f,
+                             0.2802f,
+                             0.5969f,
+                             0.7141f,
+                             1.4186f,
+                             1.2747f,
+                             0.6448f,
+                             0.9615f,
+
+                             // Batch 1, Channel 1
+                             1.4178f,
+                             1.2387f,
+                             1.0743f,
+                             1.1353f,
+                             1.4395f,
+                             1.3732f,
+                             1.1941f,
+                             1.0297f,
+                             1.0907f,
+                             1.3949f,
+                             0.9715f,
+                             0.7924f,
+                             0.6281f,
+                             0.6890f,
+                             0.9932f,
+                             1.3361f,
+                             1.1570f,
+                             0.9926f,
+                             1.0536f,
+                             1.3578f,
+
+                             // Batch 1, Channel 2
+                             0.6217f,
+                             1.3216f,
+                             1.2283f,
+                             1.5230f,
+                             0.6424f,
+                             0.5771f,
+                             1.2770f,
+                             1.1837f,
+                             1.4785f,
+                             0.5978f,
+                             0.1754f,
+                             0.8753f,
+                             0.7820f,
+                             1.0768f,
+                             0.1961f,
+                             0.5400f,
+                             1.2399f,
+                             1.1466f,
+                             1.4414f,
+                             0.5607f};
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -223,13 +370,20 @@ void test_add_operator() {
     // Test Case 5: Higher Dimensional Tensors
     {
         const char* tc_name = "add_higher_dimensional_tensors";
-        
+
         // Sub-test 1: 3D tensor addition (same shape)
         {
             TensorShape shape_3d = {2, 3, 4};
-            float d1[] = {0.3745f, 0.9507f, 0.7320f, 0.5987f, 0.1560f, 0.1560f, 0.0581f, 0.8662f, 0.6011f, 0.7081f, 0.0206f, 0.9699f, 0.8324f, 0.2123f, 0.1818f, 0.1834f, 0.3042f, 0.5248f, 0.4319f, 0.2912f, 0.6119f, 0.1395f, 0.2921f, 0.3664f};
-            float d2[] = {0.4561f, 0.7852f, 0.1997f, 0.5142f, 0.5924f, 0.0465f, 0.6075f, 0.1705f, 0.0651f, 0.9489f, 0.9656f, 0.8084f, 0.3046f, 0.0977f, 0.6842f, 0.4402f, 0.1220f, 0.4952f, 0.0344f, 0.9093f, 0.2588f, 0.6625f, 0.3117f, 0.5201f};
-            float exp_d[] = {0.8306f, 1.7359f, 0.9317f, 1.1129f, 0.7484f, 0.2025f, 0.6656f, 1.0367f, 0.6662f, 1.6570f, 0.9862f, 1.7783f, 1.1370f, 0.3100f, 0.8660f, 0.6236f, 0.4262f, 1.0200f, 0.4663f, 1.2005f, 0.8707f, 0.8020f, 0.6038f, 0.8865f};
+            float d1[] = {0.3745f, 0.9507f, 0.7320f, 0.5987f, 0.1560f, 0.1560f, 0.0581f, 0.8662f,
+                          0.6011f, 0.7081f, 0.0206f, 0.9699f, 0.8324f, 0.2123f, 0.1818f, 0.1834f,
+                          0.3042f, 0.5248f, 0.4319f, 0.2912f, 0.6119f, 0.1395f, 0.2921f, 0.3664f};
+            float d2[] = {0.4561f, 0.7852f, 0.1997f, 0.5142f, 0.5924f, 0.0465f, 0.6075f, 0.1705f,
+                          0.0651f, 0.9489f, 0.9656f, 0.8084f, 0.3046f, 0.0977f, 0.6842f, 0.4402f,
+                          0.1220f, 0.4952f, 0.0344f, 0.9093f, 0.2588f, 0.6625f, 0.3117f, 0.5201f};
+            float exp_d[] = {0.8306f, 1.7359f, 0.9317f, 1.1129f, 0.7484f, 0.2025f,
+                             0.6656f, 1.0367f, 0.6662f, 1.6570f, 0.9862f, 1.7783f,
+                             1.1370f, 0.3100f, 0.8660f, 0.6236f, 0.4262f, 1.0200f,
+                             0.4663f, 1.2005f, 0.8707f, 0.8020f, 0.6038f, 0.8865f};
 
             Tensor t1 = create_test_tensor(shape_3d, d1, false);
             Tensor t2 = create_test_tensor(shape_3d, d2, false);
@@ -242,9 +396,38 @@ void test_add_operator() {
         // Sub-test 2: 4D tensor addition (same shape)
         {
             TensorShape shape_4d = {2, 3, 4, 5};
-            float d1[] = {0.5467f, 0.1849f, 0.9696f, 0.7751f, 0.9395f, 0.8948f, 0.5979f, 0.9219f, 0.0885f, 0.1960f, 0.0452f, 0.3253f, 0.3887f, 0.2713f, 0.8287f, 0.3568f, 0.2809f, 0.5427f, 0.1409f, 0.8022f, 0.0746f, 0.9869f, 0.7722f, 0.1987f, 0.0055f, 0.8155f, 0.7069f, 0.7290f, 0.7713f, 0.0740f, 0.3585f, 0.1159f, 0.8631f, 0.6233f, 0.3309f, 0.0636f, 0.3110f, 0.3252f, 0.7296f, 0.6376f, 0.8872f, 0.4722f, 0.1196f, 0.7132f, 0.7608f, 0.5613f, 0.7710f, 0.4938f, 0.5227f, 0.4275f, 0.0254f, 0.1079f, 0.0314f, 0.6364f, 0.3144f, 0.5086f, 0.9076f, 0.2493f, 0.4104f, 0.7556f, 0.2288f, 0.0770f, 0.2898f, 0.1612f, 0.9297f, 0.8081f, 0.6334f, 0.8715f, 0.8037f, 0.1866f, 0.8926f, 0.5393f, 0.8074f, 0.8961f, 0.3180f, 0.1101f, 0.2279f, 0.4271f, 0.8180f, 0.8607f, 0.0070f, 0.5107f, 0.4174f, 0.2221f, 0.1199f, 0.3376f, 0.9429f, 0.3232f, 0.5188f, 0.7030f, 0.3636f, 0.9718f, 0.9624f, 0.2518f, 0.4972f, 0.3009f, 0.2848f, 0.0369f, 0.6096f, 0.5027f, 0.0515f, 0.2786f, 0.9083f, 0.2396f, 0.1449f, 0.4895f, 0.9857f, 0.2421f, 0.6721f, 0.7616f, 0.2376f, 0.7282f, 0.3678f, 0.6323f, 0.6335f, 0.5358f, 0.0903f, 0.8353f, 0.3208f, 0.1865f};
-            float d2[] = {0.0408f, 0.5909f, 0.6776f, 0.0166f, 0.5121f, 0.2265f, 0.6452f, 0.1744f, 0.6909f, 0.3867f, 0.9367f, 0.1375f, 0.3411f, 0.1135f, 0.9247f, 0.8773f, 0.2579f, 0.6600f, 0.8172f, 0.5552f, 0.5297f, 0.2419f, 0.0931f, 0.8972f, 0.9004f, 0.6331f, 0.3390f, 0.3492f, 0.7260f, 0.8971f, 0.8871f, 0.7799f, 0.6420f, 0.0841f, 0.1616f, 0.8986f, 0.6064f, 0.0092f, 0.1015f, 0.6635f, 0.0051f, 0.1608f, 0.5487f, 0.6919f, 0.6520f, 0.2243f, 0.7122f, 0.2372f, 0.3254f, 0.7465f, 0.6496f, 0.8492f, 0.6576f, 0.5683f, 0.0937f, 0.3677f, 0.2652f, 0.2440f, 0.9730f, 0.3931f, 0.8920f, 0.6311f, 0.7948f, 0.5026f, 0.5769f, 0.4925f, 0.1952f, 0.7225f, 0.2808f, 0.0243f, 0.6455f, 0.1771f, 0.9405f, 0.9539f, 0.9149f, 0.3702f, 0.0155f, 0.9283f, 0.4282f, 0.9667f, 0.9636f, 0.8530f, 0.2944f, 0.3851f, 0.8511f, 0.3169f, 0.1695f, 0.5568f, 0.9362f, 0.6960f, 0.5701f, 0.0972f, 0.6150f, 0.9901f, 0.1401f, 0.5183f, 0.8774f, 0.7408f, 0.6970f, 0.7025f, 0.3595f, 0.2936f, 0.8094f, 0.8101f, 0.8671f, 0.9132f, 0.5113f, 0.5015f, 0.7983f, 0.6500f, 0.7020f, 0.7958f, 0.8900f, 0.3380f, 0.3756f, 0.0940f, 0.5783f, 0.0359f, 0.4656f, 0.5426f};
-            float exp_d[] = {0.5875f, 0.7758f, 1.6472f, 0.7917f, 1.4516f, 1.1213f, 1.2431f, 1.0963f, 0.7794f,
+            float d1[] = {0.5467f, 0.1849f, 0.9696f, 0.7751f, 0.9395f, 0.8948f, 0.5979f, 0.9219f,
+                          0.0885f, 0.1960f, 0.0452f, 0.3253f, 0.3887f, 0.2713f, 0.8287f, 0.3568f,
+                          0.2809f, 0.5427f, 0.1409f, 0.8022f, 0.0746f, 0.9869f, 0.7722f, 0.1987f,
+                          0.0055f, 0.8155f, 0.7069f, 0.7290f, 0.7713f, 0.0740f, 0.3585f, 0.1159f,
+                          0.8631f, 0.6233f, 0.3309f, 0.0636f, 0.3110f, 0.3252f, 0.7296f, 0.6376f,
+                          0.8872f, 0.4722f, 0.1196f, 0.7132f, 0.7608f, 0.5613f, 0.7710f, 0.4938f,
+                          0.5227f, 0.4275f, 0.0254f, 0.1079f, 0.0314f, 0.6364f, 0.3144f, 0.5086f,
+                          0.9076f, 0.2493f, 0.4104f, 0.7556f, 0.2288f, 0.0770f, 0.2898f, 0.1612f,
+                          0.9297f, 0.8081f, 0.6334f, 0.8715f, 0.8037f, 0.1866f, 0.8926f, 0.5393f,
+                          0.8074f, 0.8961f, 0.3180f, 0.1101f, 0.2279f, 0.4271f, 0.8180f, 0.8607f,
+                          0.0070f, 0.5107f, 0.4174f, 0.2221f, 0.1199f, 0.3376f, 0.9429f, 0.3232f,
+                          0.5188f, 0.7030f, 0.3636f, 0.9718f, 0.9624f, 0.2518f, 0.4972f, 0.3009f,
+                          0.2848f, 0.0369f, 0.6096f, 0.5027f, 0.0515f, 0.2786f, 0.9083f, 0.2396f,
+                          0.1449f, 0.4895f, 0.9857f, 0.2421f, 0.6721f, 0.7616f, 0.2376f, 0.7282f,
+                          0.3678f, 0.6323f, 0.6335f, 0.5358f, 0.0903f, 0.8353f, 0.3208f, 0.1865f};
+            float d2[] = {0.0408f, 0.5909f, 0.6776f, 0.0166f, 0.5121f, 0.2265f, 0.6452f, 0.1744f,
+                          0.6909f, 0.3867f, 0.9367f, 0.1375f, 0.3411f, 0.1135f, 0.9247f, 0.8773f,
+                          0.2579f, 0.6600f, 0.8172f, 0.5552f, 0.5297f, 0.2419f, 0.0931f, 0.8972f,
+                          0.9004f, 0.6331f, 0.3390f, 0.3492f, 0.7260f, 0.8971f, 0.8871f, 0.7799f,
+                          0.6420f, 0.0841f, 0.1616f, 0.8986f, 0.6064f, 0.0092f, 0.1015f, 0.6635f,
+                          0.0051f, 0.1608f, 0.5487f, 0.6919f, 0.6520f, 0.2243f, 0.7122f, 0.2372f,
+                          0.3254f, 0.7465f, 0.6496f, 0.8492f, 0.6576f, 0.5683f, 0.0937f, 0.3677f,
+                          0.2652f, 0.2440f, 0.9730f, 0.3931f, 0.8920f, 0.6311f, 0.7948f, 0.5026f,
+                          0.5769f, 0.4925f, 0.1952f, 0.7225f, 0.2808f, 0.0243f, 0.6455f, 0.1771f,
+                          0.9405f, 0.9539f, 0.9149f, 0.3702f, 0.0155f, 0.9283f, 0.4282f, 0.9667f,
+                          0.9636f, 0.8530f, 0.2944f, 0.3851f, 0.8511f, 0.3169f, 0.1695f, 0.5568f,
+                          0.9362f, 0.6960f, 0.5701f, 0.0972f, 0.6150f, 0.9901f, 0.1401f, 0.5183f,
+                          0.8774f, 0.7408f, 0.6970f, 0.7025f, 0.3595f, 0.2936f, 0.8094f, 0.8101f,
+                          0.8671f, 0.9132f, 0.5113f, 0.5015f, 0.7983f, 0.6500f, 0.7020f, 0.7958f,
+                          0.8900f, 0.3380f, 0.3756f, 0.0940f, 0.5783f, 0.0359f, 0.4656f, 0.5426f};
+            float exp_d[] = {
+                0.5875f, 0.7758f, 1.6472f, 0.7917f, 1.4516f, 1.1213f, 1.2431f, 1.0963f, 0.7794f,
                 0.5827f, 0.9819f, 0.4628f, 0.7298f, 0.3848f, 1.7534f, 1.2341f, 0.5388f, 1.2027f,
                 0.9581f, 1.3574f, 0.6043f, 1.2288f, 0.8653f, 1.0959f, 0.9059f, 1.4486f, 1.0459f,
                 1.0782f, 1.4973f, 0.9711f, 1.2456f, 0.8958f, 1.5051f, 0.7074f, 0.4925f, 0.9622f,
@@ -271,7 +454,7 @@ void test_add_operator() {
     // Test Case 6: Gradient Propagation
     {
         const char* tc_name = "add_gradient_propagation";
-        
+
         // Sub-test 1: requires_grad flag propagation
         {
             TensorShape shape = {2, 2};
@@ -279,8 +462,8 @@ void test_add_operator() {
             float d2[] = {5.0f, 6.0f, 7.0f, 8.0f};
             float exp_d[] = {6.0f, 8.0f, 10.0f, 12.0f};
 
-            Tensor t1 = create_test_tensor(shape, d1, true);  // requires_grad = true
-            Tensor t2 = create_test_tensor(shape, d2, false); // requires_grad = false
+            Tensor t1 = create_test_tensor(shape, d1, true);   // requires_grad = true
+            Tensor t2 = create_test_tensor(shape, d2, false);  // requires_grad = false
             Tensor expected_res = create_test_tensor(shape, exp_d, false);
             Tensor actual_res = Tensor_add(t1, t2);
 
diff --git a/tests/Operator/test_div.c b/tests/Operator/test_div.c
index 4614456..c8dbf9b 100644
--- a/tests/Operator/test_div.c
+++ b/tests/Operator/test_div.c
@@ -7,7 +7,7 @@
 
 void test_div_operator() {
     const char* op_name = "div";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Scalar division (represented as 1x1 tensors)
@@ -19,7 +19,7 @@ void test_div_operator() {
         {
             float d1[] = {6.754841f};
             float d2[] = {0.612548f};
-            float exp_d[] = {11.027441f}; // 6.754841 / 0.612548 = 11.027441
+            float exp_d[] = {11.027441f};  // 6.754841 / 0.612548 = 11.027441
             Tensor t1 = create_test_tensor(s_shape, d1, false);
             Tensor t2 = create_test_tensor(s_shape, d2, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -32,7 +32,7 @@ void test_div_operator() {
         {
             float d1[] = {7.628241f};
             float d2[] = {3.545148f};
-            float exp_d[] = {2.151741f}; // 7.628241 / 3.545148 = 2.151741
+            float exp_d[] = {2.151741f};  // 7.628241 / 3.545148 = 2.151741
             Tensor t1 = create_test_tensor(s_shape, d1, false);
             Tensor t2 = create_test_tensor(s_shape, d2, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -48,7 +48,9 @@ void test_div_operator() {
         TensorShape v_shape = {3};
         float d1[] = {4.370861f, 9.556429f, 7.587945f};
         float d2[] = {3.193963f, 1.202084f, 1.201975f};
-        float exp_d[] = {1.368476f, 7.949885f, 6.312896f}; // [4.370861/3.193963, 9.556429/1.202084, 7.587945/1.201975]
+        float exp_d[] = {1.368476f,
+                         7.949885f,
+                         6.312896f};  // [4.370861/3.193963, 9.556429/1.202084, 7.587945/1.201975]
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor t2 = create_test_tensor(v_shape, d2, false);
         Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
@@ -63,7 +65,9 @@ void test_div_operator() {
         TensorShape m_shape = {2, 2};
         float d1[] = {1.522753f, 8.795585f, 6.410035f, 7.372653f};
         float d2[] = {0.592630f, 4.864594f, 4.245992f, 1.455526f};
-        float exp_d[] = {2.569482f, 1.808082f, 1.509667f, 5.065284f}; // [1.522753/0.592630, 8.795585/4.864594, 6.410035/4.245992, 7.372653/1.455526]
+        float exp_d[] =
+            {2.569482f, 1.808082f, 1.509667f, 5.065284f};  // [1.522753/0.592630, 8.795585/4.864594, 6.410035/4.245992,
+                                                           // 7.372653/1.455526]
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor t2 = create_test_tensor(m_shape, d2, false);
         Tensor expected_res = create_test_tensor(m_shape, exp_d, false);
@@ -76,9 +80,32 @@ void test_div_operator() {
     {
         const char* tc_name = "div_3d_tensor";
         TensorShape t_shape = {2, 2, 2};
-        float d1[] = {2.636425f, 2.650641f, 3.738180f, 5.722808f, 4.887505f, 3.621062f, 6.506676f, 2.255445f};
-        float d2[] = {1.814651f, 2.148628f, 2.552315f, 4.033292f, 1.398532f, 2.814055f, 3.165866f, 0.709027f};
-        float exp_d[] = {1.452855f, 1.233643f, 1.464623f, 1.418893f, 3.494740f, 1.286777f, 2.055260f, 3.181043f}; // [2.636425/1.814651, 2.650641/2.148628, 3.738180/2.552315, 5.722808/4.033292, 4.887505/1.398532, 3.621062/2.814055, 6.506676/3.165866, 2.255445/0.709027]
+        float d1[] = {2.636425f,
+                      2.650641f,
+                      3.738180f,
+                      5.722808f,
+                      4.887505f,
+                      3.621062f,
+                      6.506676f,
+                      2.255445f};
+        float d2[] = {1.814651f,
+                      2.148628f,
+                      2.552315f,
+                      4.033292f,
+                      1.398532f,
+                      2.814055f,
+                      3.165866f,
+                      0.709027f};
+        float exp_d[] = {
+            1.452855f,
+            1.233643f,
+            1.464623f,
+            1.418893f,
+            3.494740f,
+            1.286777f,
+            2.055260f,
+            3.181043f};  // [2.636425/1.814651, 2.650641/2.148628, 3.738180/2.552315, 5.722808/4.033292,
+                         // 4.887505/1.398532, 3.621062/2.814055, 6.506676/3.165866, 2.255445/0.709027]
         Tensor t1 = create_test_tensor(t_shape, d1, false);
         Tensor t2 = create_test_tensor(t_shape, d2, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
@@ -90,18 +117,21 @@ void test_div_operator() {
     // Test Case 5: Broadcasting (vector divided by scalar)
     {
         const char* tc_name = "div_broadcast_vector_scalar";
-        TensorShape vec_shape = {3}; 
+        TensorShape vec_shape = {3};
         float vec_data[] = {6.467904f, 2.534717f, 1.585464f};
-        TensorShape scalar_shape = {1}; 
+        TensorShape scalar_shape = {1};
         float scalar_data[] = {4.514808f};
-        
+
         // Expected: broadcast scalar to vector then apply division
-        TensorShape expected_shape = {3}; 
-        float exp_data[] = {1.432598f, 0.561423f, 0.351170f}; // [6.467904/4.514808, 2.534717/4.514808, 1.585464/4.514808]
+        TensorShape expected_shape = {3};
+        float exp_data[] = {
+            1.432598f,
+            0.561423f,
+            0.351170f};  // [6.467904/4.514808, 2.534717/4.514808, 1.585464/4.514808]
 
         Tensor t_vec = create_test_tensor(vec_shape, vec_data, false);
         Tensor t_scalar = create_test_tensor(scalar_shape, scalar_data, false);
-        
+
         Tensor actual_res = Tensor_div(t_vec, t_scalar);
         Tensor expected_res = create_test_tensor(expected_shape, exp_data, false);
 
@@ -119,8 +149,12 @@ void test_div_operator() {
 
         Tensor actual_res = Tensor_div(t1, t2);
         // Check if the result is very large (greater than 1e10 in absolute value)
-        if (fabs(actual_res.data->flex[0]) < 1e10) {
-            fprintf(stderr, "Test %s:%d failed: expected a very large number, got %f\n", tc_name, 1, actual_res.data->flex[0]);
+        if(fabs(actual_res.data->flex[0]) < 1e10) {
+            fprintf(stderr,
+                    "Test %s:%d failed: expected a very large number, got %f\n",
+                    tc_name,
+                    1,
+                    actual_res.data->flex[0]);
             abort();
         }
     }
@@ -131,7 +165,7 @@ void test_div_operator() {
         TensorShape s_shape = {1};
         float d1[] = {-21.854305f};
         float d2[] = {9.556429f};
-        float exp_d[] = {-2.286869f}; // -21.854305 / 9.556429 = -2.286869
+        float exp_d[] = {-2.286869f};  // -21.854305 / 9.556429 = -2.286869
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor t2 = create_test_tensor(s_shape, d2, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -146,7 +180,7 @@ void test_div_operator() {
         TensorShape s_shape = {1};
         float d1[] = {-37.939727f};
         float d2[] = {-6.387926f};
-        float exp_d[] = {5.939287f}; // -37.939727 / -6.387926 = 5.939287
+        float exp_d[] = {5.939287f};  // -37.939727 / -6.387926 = 5.939287
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor t2 = create_test_tensor(s_shape, d2, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -158,18 +192,18 @@ void test_div_operator() {
     // Test Case 9: Broadcasting (matrix divided by vector)
     {
         const char* tc_name = "div_broadcast_matrix_vector";
-        TensorShape matrix_shape = {2, 3}; // 2x3 matrix
+        TensorShape matrix_shape = {2, 3};  // 2x3 matrix
         float matrix_data[] = {2.1854f, 4.7782f, 3.7940f, 3.1940f, 1.2021f, 1.2020f};
-        TensorShape vector_shape = {3}; // vector with 3 elements
+        TensorShape vector_shape = {3};  // vector with 3 elements
         float vector_data[] = {0.6162f, 2.2324f, 1.7022f};
-        
+
         // Expected: broadcast vector to shape [2,3] then divide
         TensorShape expected_shape = {2, 3};
         float exp_data[] = {3.546576f, 2.140387f, 2.228880f, 5.183382f, 0.538479f, 0.706145f};
 
         Tensor t_matrix = create_test_tensor(matrix_shape, matrix_data, false);
         Tensor t_vector = create_test_tensor(vector_shape, vector_data, false);
-        
+
         Tensor actual_res = Tensor_div(t_matrix, t_vector);
         Tensor expected_res = create_test_tensor(expected_shape, exp_data, false);
 
@@ -181,7 +215,10 @@ void test_div_operator() {
         const char* tc_name = "div_identity";
         TensorShape v_shape = {3};
         float d[] = {7.340280f, -4.027929f, 1.906585f};
-        float exp_d[] = {1.000000f, 1.000000f, 1.000000f}; // [7.340280/7.340280, -4.027929/-4.027929, 1.906585/1.906585] = [1, 1, 1]
+        float exp_d[] = {
+            1.000000f,
+            1.000000f,
+            1.000000f};  // [7.340280/7.340280, -4.027929/-4.027929, 1.906585/1.906585] = [1, 1, 1]
         Tensor t = create_test_tensor(v_shape, d, false);
         Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
         Tensor actual_res = Tensor_div(t, t);
@@ -195,22 +232,48 @@ void test_div_operator() {
         TensorShape s_shape = {1};
         float d1[] = {1.000000e+07f};
         float d2[] = {1.000000e-07f};
-        float exp_d[] = {1.000000e+14f}; // 1.000000e+07f / 1.000000e-07f = 1.000000e+14f
+        float exp_d[] = {1.000000e+14f};  // 1.000000e+07f / 1.000000e-07f = 1.000000e+14f
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor t2 = create_test_tensor(s_shape, d2, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
         Tensor actual_res = Tensor_div(t1, t2);
 
-        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, 1e6f); // Using larger tolerance due to floating point precision
+        compare_tensors(&actual_res,
+                        &expected_res,
+                        op_name,
+                        tc_name,
+                        1,
+                        1e6f);  // Using larger tolerance due to floating point precision
     }
 
     // Test Case 12: 4D tensor division
     {
         const char* tc_name = "div_4d_tensor";
         TensorShape t_shape = {2, 2, 2, 1};
-        float d1[] = {64.100351f, 73.726532f, 11.852604f, 97.291887f, 84.919838f, 29.110520f, 26.364247f, 26.506406f};
-        float d2[] = {7.476360f, 11.445616f, 9.775010f, 7.242125f, 13.013352f, 4.510889f, 7.258604f, 8.594513f};
-        float exp_d[] = {8.573737f, 6.441465f, 1.212541f, 13.434164f, 6.525593f, 6.453388f, 3.632138f, 3.084108f}; // Element-wise division
+        float d1[] = {64.100351f,
+                      73.726532f,
+                      11.852604f,
+                      97.291887f,
+                      84.919838f,
+                      29.110520f,
+                      26.364247f,
+                      26.506406f};
+        float d2[] = {7.476360f,
+                      11.445616f,
+                      9.775010f,
+                      7.242125f,
+                      13.013352f,
+                      4.510889f,
+                      7.258604f,
+                      8.594513f};
+        float exp_d[] = {8.573737f,
+                         6.441465f,
+                         1.212541f,
+                         13.434164f,
+                         6.525593f,
+                         6.453388f,
+                         3.632138f,
+                         3.084108f};  // Element-wise division
         Tensor t1 = create_test_tensor(t_shape, d1, false);
         Tensor t2 = create_test_tensor(t_shape, d2, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
diff --git a/tests/Operator/test_matmul.c b/tests/Operator/test_matmul.c
index fe18898..4da838f 100644
--- a/tests/Operator/test_matmul.c
+++ b/tests/Operator/test_matmul.c
@@ -14,10 +14,13 @@ void test_matmul_operator() {
     // Test Case 1: Square Matrix Multiplication (2x2 * 2x2)
     {
         const char* tc_name = "matmul_square_2x2";
-        TensorShape s1_shape = {2, 2}; float d1[] = {1.0f, 2.0f, 3.0f, 4.0f};
-        TensorShape s2_shape = {2, 2}; float d2[] = {5.0f, 6.0f, 7.0f, 8.0f};
-        TensorShape exp_shape = {2, 2}; float exp_d[] = {19.0f, 22.0f, 43.0f, 50.0f};
-        
+        TensorShape s1_shape = {2, 2};
+        float d1[] = {1.0f, 2.0f, 3.0f, 4.0f};
+        TensorShape s2_shape = {2, 2};
+        float d2[] = {5.0f, 6.0f, 7.0f, 8.0f};
+        TensorShape exp_shape = {2, 2};
+        float exp_d[] = {19.0f, 22.0f, 43.0f, 50.0f};
+
         Tensor t1 = create_test_tensor(s1_shape, d1, false);
         Tensor t2 = create_test_tensor(s2_shape, d2, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
@@ -29,9 +32,12 @@ void test_matmul_operator() {
     // Test Case 2: Rectangular Matrix Multiplication (2x3 * 3x2)
     {
         const char* tc_name = "matmul_rect_2x3_3x2";
-        TensorShape s1_shape = {2, 3}; float d1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-        TensorShape s2_shape = {3, 2}; float d2[] = {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f};
-        TensorShape exp_shape = {2, 2}; float exp_d[] = {58.0f, 64.0f, 139.0f, 154.0f};
+        TensorShape s1_shape = {2, 3};
+        float d1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+        TensorShape s2_shape = {3, 2};
+        float d2[] = {7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f};
+        TensorShape exp_shape = {2, 2};
+        float exp_d[] = {58.0f, 64.0f, 139.0f, 154.0f};
 
         Tensor t1 = create_test_tensor(s1_shape, d1, false);
         Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -44,9 +50,12 @@ void test_matmul_operator() {
     // Test Case 3: Matrix-Vector (2x2 * 2x1) (Vector as column matrix)
     {
         const char* tc_name = "matmul_matrix_vector_2x2_2x1";
-        TensorShape s_mat_shape = {2, 2}; float d_mat[] = {1.0f, 2.0f, 3.0f, 4.0f};
-        TensorShape s_vec_shape = {2, 1}; float d_vec[] = {5.0f, 6.0f}; // Column vector
-        TensorShape exp_shape = {2, 1}; float exp_d[] = {17.0f, 39.0f};
+        TensorShape s_mat_shape = {2, 2};
+        float d_mat[] = {1.0f, 2.0f, 3.0f, 4.0f};
+        TensorShape s_vec_shape = {2, 1};
+        float d_vec[] = {5.0f, 6.0f};  // Column vector
+        TensorShape exp_shape = {2, 1};
+        float exp_d[] = {17.0f, 39.0f};
 
         Tensor t_mat = create_test_tensor(s_mat_shape, d_mat, false);
         Tensor t_vec = create_test_tensor(s_vec_shape, d_vec, false);
@@ -59,9 +68,12 @@ void test_matmul_operator() {
     // Test Case 4: Vector-Matrix (1x2 * 2x2) (Vector as row matrix)
     {
         const char* tc_name = "matmul_vector_matrix_1x2_2x2";
-        TensorShape s_vec_shape = {1, 2}; float d_vec[] = {1.0f, 2.0f}; // Row vector
-        TensorShape s_mat_shape = {2, 2}; float d_mat[] = {3.0f, 4.0f, 5.0f, 6.0f};
-        TensorShape exp_shape = {1, 2}; float exp_d[] = {13.0f, 16.0f};
+        TensorShape s_vec_shape = {1, 2};
+        float d_vec[] = {1.0f, 2.0f};  // Row vector
+        TensorShape s_mat_shape = {2, 2};
+        float d_mat[] = {3.0f, 4.0f, 5.0f, 6.0f};
+        TensorShape exp_shape = {1, 2};
+        float exp_d[] = {13.0f, 16.0f};
 
         Tensor t_vec = create_test_tensor(s_vec_shape, d_vec, false);
         Tensor t_mat = create_test_tensor(s_mat_shape, d_mat, false);
@@ -74,12 +86,15 @@ void test_matmul_operator() {
     // Test Case 5: Edge Matrix Sizes
     {
         const char* tc_name = "matmul_edge_matrix_sizes";
-        
+
         // Sub-test 1: 1x1 matrix multiplication
         {
-            TensorShape s1_shape = {1, 1}; float d1[] = {5.0f};
-            TensorShape s2_shape = {1, 1}; float d2[] = {3.0f};
-            TensorShape exp_shape = {1, 1}; float exp_d[] = {15.0f};
+            TensorShape s1_shape = {1, 1};
+            float d1[] = {5.0f};
+            TensorShape s2_shape = {1, 1};
+            float d2[] = {3.0f};
+            TensorShape exp_shape = {1, 1};
+            float exp_d[] = {15.0f};
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -91,9 +106,12 @@ void test_matmul_operator() {
 
         // Sub-test 2: Single row/column matrices {1,5} * {5,1} -> {1,1}
         {
-            TensorShape s1_shape = {1, 5}; float d1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
-            TensorShape s2_shape = {5, 1}; float d2[] = {2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-            TensorShape exp_shape = {1, 1}; float exp_d[] = {70.0f}; // 1*2+2*3+3*4+4*5+5*6 = 70
+            TensorShape s1_shape = {1, 5};
+            float d1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+            TensorShape s2_shape = {5, 1};
+            float d2[] = {2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+            TensorShape exp_shape = {1, 1};
+            float exp_d[] = {70.0f};  // 1*2+2*3+3*4+4*5+5*6 = 70
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -105,15 +123,17 @@ void test_matmul_operator() {
 
         // Sub-test 3: Single column/row matrices {5,1} * {1,5} -> {5,5}
         {
-            TensorShape s1_shape = {5, 1}; float d1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
-            TensorShape s2_shape = {1, 5}; float d2[] = {2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
-            TensorShape exp_shape = {5, 5}; 
+            TensorShape s1_shape = {5, 1};
+            float d1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f};
+            TensorShape s2_shape = {1, 5};
+            float d2[] = {2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+            TensorShape exp_shape = {5, 5};
             float exp_d[] = {
-                2.0f, 3.0f, 4.0f, 5.0f, 6.0f,    // 1*[2,3,4,5,6]
-                4.0f, 6.0f, 8.0f, 10.0f, 12.0f,  // 2*[2,3,4,5,6]
-                6.0f, 9.0f, 12.0f, 15.0f, 18.0f, // 3*[2,3,4,5,6]
-                8.0f, 12.0f, 16.0f, 20.0f, 24.0f, // 4*[2,3,4,5,6]
-                10.0f, 15.0f, 20.0f, 25.0f, 30.0f // 5*[2,3,4,5,6]
+                2.0f,  3.0f,  4.0f,  5.0f,  6.0f,   // 1*[2,3,4,5,6]
+                4.0f,  6.0f,  8.0f,  10.0f, 12.0f,  // 2*[2,3,4,5,6]
+                6.0f,  9.0f,  12.0f, 15.0f, 18.0f,  // 3*[2,3,4,5,6]
+                8.0f,  12.0f, 16.0f, 20.0f, 24.0f,  // 4*[2,3,4,5,6]
+                10.0f, 15.0f, 20.0f, 25.0f, 30.0f   // 5*[2,3,4,5,6]
             };
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
@@ -128,21 +148,21 @@ void test_matmul_operator() {
     // Test Case 6: Large Matrix Operations
     {
         const char* tc_name = "matmul_large_matrix_operations";
-        
+
         // Sub-test 1: Stress test with 10x10 matrices
         {
             TensorShape s1_shape = {10, 10};
             TensorShape s2_shape = {10, 10};
             TensorShape exp_shape = {10, 10};
-            
+
             float d1[100], d2[100], exp_d[100];
-            
+
             // Initialize matrices with simple patterns
             for(int i = 0; i < 100; i++) {
-                d1[i] = (float)(i % 10 + 1); // 1,2,3,...,10,1,2,3,...
-                d2[i] = (float)(i / 10 + 1); // 1,1,1,...,1,2,2,2,...
+                d1[i] = (float)(i % 10 + 1);  // 1,2,3,...,10,1,2,3,...
+                d2[i] = (float)(i / 10 + 1);  // 1,1,1,...,1,2,2,2,...
             }
-            
+
             // Calculate expected result manually for verification
             for(int i = 0; i < 10; i++) {
                 for(int j = 0; j < 10; j++) {
@@ -167,16 +187,19 @@ void test_matmul_operator() {
             TensorShape s1_shape = {20, 15};
             TensorShape s2_shape = {15, 25};
             TensorShape exp_shape = {20, 25};
-            
+
             // Use stack arrays with reduced size
-            float d1[300]; // 20*15 = 300
-            float d2[375]; // 15*25 = 375  
-            float exp_d[500]; // 20*25 = 500
-            
+            float d1[300];     // 20*15 = 300
+            float d2[375];     // 15*25 = 375
+            float exp_d[500];  // 20*25 = 500
+
             // Initialize with simple patterns
-            for(int i = 0; i < 300; i++) d1[i] = 1.0f;
-            for(int i = 0; i < 375; i++) d2[i] = 1.0f;
-            for(int i = 0; i < 500; i++) exp_d[i] = 15.0f; // Each element should be 15*1*1 = 15
+            for(int i = 0; i < 300; i++)
+                d1[i] = 1.0f;
+            for(int i = 0; i < 375; i++)
+                d2[i] = 1.0f;
+            for(int i = 0; i < 500; i++)
+                exp_d[i] = 15.0f;  // Each element should be 15*1*1 = 15
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -190,15 +213,42 @@ void test_matmul_operator() {
     // Test Case 7: Larger Matrix Multiplication
     {
         const char* tc_name = "matmul_larger_matrices";
-        
+
         // Sub-test 1: Larger matrix multiplication (4x3 * 3x5)
         {
             TensorShape s1_shape = {4, 3};
-            float d1[] = {0.4008f, 0.5596f, 0.1552f, 0.1819f, 0.8618f, 0.9461f, 0.3733f, 0.2707f, 0.6440f, 0.4087f, 0.0254f, 0.1562f};
+            float d1[] = {0.4008f,
+                          0.5596f,
+                          0.1552f,
+                          0.1819f,
+                          0.8618f,
+                          0.9461f,
+                          0.3733f,
+                          0.2707f,
+                          0.6440f,
+                          0.4087f,
+                          0.0254f,
+                          0.1562f};
             TensorShape s2_shape = {3, 5};
-            float d2[] = {0.7160f, 0.6589f, 0.0271f, 0.2220f, 0.2311f, 0.6719f, 0.0197f, 0.1041f, 0.7999f, 0.1785f, 0.6527f, 0.2382f, 0.0994f, 0.2432f, 0.7223f};
+            float d2[] = {0.7160f,
+                          0.6589f,
+                          0.0271f,
+                          0.2220f,
+                          0.2311f,
+                          0.6719f,
+                          0.0197f,
+                          0.1041f,
+                          0.7999f,
+                          0.1785f,
+                          0.6527f,
+                          0.2382f,
+                          0.0994f,
+                          0.2432f,
+                          0.7223f};
             TensorShape exp_shape = {4, 5};
-            float exp_d[] = {0.7643f, 0.3121f, 0.0846f, 0.5744f, 0.3047f, 1.3269f, 0.3622f, 0.1887f, 0.9598f, 0.8793f, 0.8696f, 0.4047f, 0.1023f, 0.4560f, 0.5997f, 0.4116f, 0.3070f, 0.0292f, 0.1490f, 0.2118f};
+            float exp_d[] = {0.7643f, 0.3121f, 0.0846f, 0.5744f, 0.3047f, 1.3269f, 0.3622f,
+                             0.1887f, 0.9598f, 0.8793f, 0.8696f, 0.4047f, 0.1023f, 0.4560f,
+                             0.5997f, 0.4116f, 0.3070f, 0.0292f, 0.1490f, 0.2118f};
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -210,38 +260,49 @@ void test_matmul_operator() {
     }
 
     // TODO : Currently MatMul Doesnt support batch matrix multiplication
-    // 
+    //
     // // Test Case 8: Batch Matrix Multiplication
     // {
     //     const char* tc_name = "matmul_batch_matrices";
-        
+
     //     // Sub-test 1: Batch matrix multiplication (2x3x4 * 2x4x5)
     //     {
     //         TensorShape s1_shape = {2, 3, 4};
-    //         float d1[] = {0.9256f, 0.4219f, 0.3916f, 0.6438f, 0.8790f, 0.0543f, 0.0463f, 0.5632f, 0.7813f, 0.9841f, 0.7979f, 0.8884f, 0.5976f, 0.0739f, 0.8306f, 0.0435f, 0.2653f, 0.7424f, 0.9176f, 0.6326f, 0.2545f, 0.6777f, 0.9430f, 0.4921f};
-    //         TensorShape s2_shape = {2, 4, 5};
-    //         float d2[] = {0.1146f, 0.8401f, 0.0189f, 0.9417f, 0.9551f, 0.3073f, 0.5162f, 0.6919f, 0.3872f, 0.9831f, 0.8261f, 0.6104f, 0.1850f, 0.4844f, 0.0732f, 0.8003f, 0.3244f, 0.6337f, 0.4984f, 0.1917f, 0.5972f, 0.8280f, 0.1163f, 0.1445f, 0.5281f, 0.3753f, 0.7377f, 0.0097f, 0.0460f, 0.8825f, 0.1283f, 0.3434f, 0.9592f, 0.2614f, 0.8935f, 0.9233f, 0.1056f, 0.1819f, 0.9243f, 0.1263f};
-    //         TensorShape exp_shape = {2, 3, 5};
-    //         float exp_d[] = {1.0745f, 1.4433f, 0.7899f, 1.5456f, 1.4509f, 0.6064f, 0.9774f, 0.4197f, 1.1520f, 1.0043f, 1.7620f, 1.9396f, 1.4062f, 1.9461f, 1.9424f, 0.5314f, 0.8391f, 0.8748f, 0.3471f, 1.1284f, 1.1388f, 1.1492f, 1.0333f, 0.8970f, 1.6950f, 0.9817f, 1.0865f, 1.0302f, 0.7693f, 1.6373f};
+    //         float d1[] = {0.9256f, 0.4219f, 0.3916f, 0.6438f, 0.8790f, 0.0543f, 0.0463f, 0.5632f,
+    //         0.7813f, 0.9841f, 0.7979f, 0.8884f, 0.5976f, 0.0739f, 0.8306f, 0.0435f, 0.2653f,
+    //         0.7424f, 0.9176f, 0.6326f, 0.2545f, 0.6777f, 0.9430f, 0.4921f}; TensorShape s2_shape
+    //         = {2, 4, 5}; float d2[] = {0.1146f, 0.8401f, 0.0189f, 0.9417f, 0.9551f, 0.3073f,
+    //         0.5162f, 0.6919f, 0.3872f, 0.9831f, 0.8261f, 0.6104f, 0.1850f, 0.4844f, 0.0732f,
+    //         0.8003f, 0.3244f, 0.6337f, 0.4984f, 0.1917f, 0.5972f, 0.8280f, 0.1163f, 0.1445f,
+    //         0.5281f, 0.3753f, 0.7377f, 0.0097f, 0.0460f, 0.8825f, 0.1283f, 0.3434f, 0.9592f,
+    //         0.2614f, 0.8935f, 0.9233f, 0.1056f, 0.1819f, 0.9243f, 0.1263f}; TensorShape exp_shape
+    //         = {2, 3, 5}; float exp_d[] = {1.0745f, 1.4433f, 0.7899f, 1.5456f, 1.4509f, 0.6064f,
+    //         0.9774f, 0.4197f, 1.1520f, 1.0043f, 1.7620f, 1.9396f, 1.4062f, 1.9461f, 1.9424f,
+    //         0.5314f, 0.8391f, 0.8748f, 0.3471f, 1.1284f, 1.1388f, 1.1492f, 1.0333f,
+    //         0.8970f, 1.6950f, 0.9817f, 1.0865f, 1.0302f, 0.7693f, 1.6373f};
 
     //         Tensor t1 = create_test_tensor(s1_shape, d1, false);
     //         Tensor t2 = create_test_tensor(s2_shape, d2, false);
     //         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
     //         Tensor actual_res = Tensor_matmul(t1, t2);
 
-    //         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+    //         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1,
+    //         TEST_FLOAT_TOLERANCE);
     //     }
     // }
 
     // Test Case 9: Special Matrix Content
     {
         const char* tc_name = "matmul_special_matrix_content";
-        
+
         // Sub-test 1: Matrix with zeros
         {
-            TensorShape s1_shape = {2, 2}; float d1[] = {0.0f, 0.0f, 1.0f, 1.0f};
-            TensorShape s2_shape = {2, 2}; float d2[] = {1.0f, 2.0f, 3.0f, 4.0f};
-            TensorShape exp_shape = {2, 2}; float exp_d[] = {0.0f, 0.0f, 4.0f, 6.0f};
+            TensorShape s1_shape = {2, 2};
+            float d1[] = {0.0f, 0.0f, 1.0f, 1.0f};
+            TensorShape s2_shape = {2, 2};
+            float d2[] = {1.0f, 2.0f, 3.0f, 4.0f};
+            TensorShape exp_shape = {2, 2};
+            float exp_d[] = {0.0f, 0.0f, 4.0f, 6.0f};
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -252,11 +313,11 @@ void test_matmul_operator() {
         }
     }
     // TODO: Problem in Matmul Broadcasting
-    
+
     // // Test Case 10: Broadcasting
     // {
     //     const char* tc_name = "matmul_broadcasting";
-        
+
     //     // Sub-test 1: Simple matrix multiplication {4,5} @ {5,3} -> {4,3}
     //     {
     //         TensorShape s1_shape = {4, 5};
@@ -266,7 +327,7 @@ void test_matmul_operator() {
     //             0.0206f, 0.9699f, 0.8324f, 0.2123f, 0.1818f,  // Row 2
     //             0.1834f, 0.3042f, 0.5248f, 0.4319f, 0.2912f,  // Row 3
     //         };
-            
+
     //         TensorShape s2_shape = {5, 3};
     //         float d2[] = {
     //             0.6119f, 0.1395f, 0.2921f,  // Row 0
@@ -275,7 +336,7 @@ void test_matmul_operator() {
     //             0.0465f, 0.6075f, 0.1705f,  // Row 3
     //             0.0651f, 0.9489f, 0.9656f,  // Row 4
     //         };
-            
+
     //         TensorShape exp_shape = {4, 3};
     //         float exp_d[] = {
     //             0.7616f, 1.3740f, 1.5423f,  // Row 0
@@ -289,7 +350,8 @@ void test_matmul_operator() {
     //         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
     //         Tensor actual_res = Tensor_matmul(t1, t2);
 
-    //         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+    //         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1,
+    //         TEST_FLOAT_TOLERANCE);
     //     }
 
     //     // Sub-test 2: 3D Broadcasting {1,3,2} @ {2,2,4} -> {2,3,4}
@@ -300,7 +362,7 @@ void test_matmul_operator() {
     //             0.0977f, 0.6842f,  // [0,1,:]
     //             0.4402f, 0.1220f,  // [0,2,:]
     //         };
-            
+
     //         TensorShape s2_shape = {2, 2, 4};
     //         float d2[] = {
     //             // Batch 0
@@ -310,7 +372,7 @@ void test_matmul_operator() {
     //             0.1849f, 0.9696f, 0.7751f, 0.9395f,  // [1,0,:]
     //             0.8948f, 0.5979f, 0.9219f, 0.0885f,  // [1,1,:]
     //         };
-            
+
     //         TensorShape exp_shape = {2, 3, 4};
     //         float exp_d[] = {
     //             // Batch 0
@@ -327,7 +389,8 @@ void test_matmul_operator() {
     //         Tensor t2 = create_test_tensor(s2_shape, d2, false);
     //         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
     //         Tensor actual_res = Tensor_matmul(t1, t2);
-    //         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+    //         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 2,
+    //         TEST_FLOAT_TOLERANCE);
     //     }
 
     //     // Sub-test 3: 4D Broadcasting {2,1,2,3} @ {1,1,3,2} -> {2,1,2,2}
@@ -341,14 +404,14 @@ void test_matmul_operator() {
     //             0.3568f, 0.2809f, 0.5427f,  // [1,0,0,:]
     //             0.1409f, 0.8022f, 0.0746f,  // [1,0,1,:]
     //         };
-            
+
     //         TensorShape s2_shape = {1, 1, 3, 2};
     //         float d2[] = {
     //             0.9869f, 0.7722f,  // [0,0,0,:]
     //             0.1987f, 0.0055f,  // [0,0,1,:]
     //             0.8155f, 0.7069f,  // [0,0,2,:]
     //         };
-            
+
     //         TensorShape exp_shape = {2, 1, 2, 2};
     //         float exp_d[] = {
     //             // Batch 0
@@ -363,8 +426,9 @@ void test_matmul_operator() {
     //         Tensor t2 = create_test_tensor(s2_shape, d2, false);
     //         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
     //         Tensor actual_res = Tensor_matmul(t1, t2);
-    //         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
-    //     }     
+    //         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 3,
+    //         TEST_FLOAT_TOLERANCE);
+    //     }
     // }
 
     cten_free(pool_id);
diff --git a/tests/Operator/test_max.c b/tests/Operator/test_max.c
index dfddec7..b63dfb4 100644
--- a/tests/Operator/test_max.c
+++ b/tests/Operator/test_max.c
@@ -7,7 +7,7 @@
 void test_max_operator() {
     const char* op_name = "max";
     PoolId pool_id = 0;
-    
+
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Max of a scalar tensor
@@ -27,7 +27,7 @@ void test_max_operator() {
     {
         const char* tc_name = "max_vector";
         TensorShape v_shape = {5};
-        float d1[] = {8.7458f, 4.147f, 0.9326f, 7.1226f, 2.5115f}; 
+        float d1[] = {8.7458f, 4.147f, 0.9326f, 7.1226f, 2.5115f};
         float exp_d[] = {8.7458f};
         TensorShape exp_shape = {1, 0, 0, 0};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
@@ -41,7 +41,7 @@ void test_max_operator() {
     {
         const char* tc_name = "max_matrix";
         TensorShape m_shape = {2, 3};
-        float d1[] = {7.6507f, -6.481f, 2.9918f, -6.1952f, -9.0693f, 4.4308f}; 
+        float d1[] = {7.6507f, -6.481f, 2.9918f, -6.1952f, -9.0693f, 4.4308f};
         float exp_d[] = {7.6507f};
         TensorShape exp_shape = {1, 0, 0, 0};
         Tensor t1 = create_test_tensor(m_shape, d1, false);
@@ -55,7 +55,7 @@ void test_max_operator() {
     {
         const char* tc_name = "max_vector_negative";
         TensorShape v_shape = {4};
-        float d1[] = {-8.687f, -0.9767f, -9.2835f, -6.0498f}; 
+        float d1[] = {-8.687f, -0.9767f, -9.2835f, -6.0498f};
         float exp_d[] = {-0.9767f};
         TensorShape exp_shape = {1, 0, 0, 0};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
@@ -69,7 +69,7 @@ void test_max_operator() {
     {
         const char* tc_name = "max_duplicate";
         TensorShape v_shape = {5};
-        float d1[] = {6.1886f, -9.87f, 5.8818f, 5.8818f, 6.1886f}; 
+        float d1[] = {6.1886f, -9.87f, 5.8818f, 5.8818f, 6.1886f};
         float exp_d[] = {6.1886f};
         TensorShape exp_shape = {1, 0, 0, 0};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
@@ -83,7 +83,7 @@ void test_max_operator() {
     {
         const char* tc_name = "max_3d_tensor";
         TensorShape t_shape = {2, 2, 2};
-        float d1[] = {-6.8904f, 9.1443f, -3.2681f, -8.1451f, -8.0657f, 6.9499f, 2.0745f, 6.1426f}; 
+        float d1[] = {-6.8904f, 9.1443f, -3.2681f, -8.1451f, -8.0657f, 6.9499f, 2.0745f, 6.1426f};
         float exp_d[] = {9.1443f};
         TensorShape exp_shape = {1, 0, 0, 0};
         Tensor t1 = create_test_tensor(t_shape, d1, false);
@@ -93,7 +93,8 @@ void test_max_operator() {
         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
 
-    // Test Case 7: Max over a specific dimension of a matrix (dim=0) (here dim=-2 is used to represent dim=0)
+    // Test Case 7: Max over a specific dimension of a matrix (dim=0) (here dim=-2 is used to
+    // represent dim=0)
     {
         const char* tc_name = "max_matrix_dim_0";
         TensorShape m_shape = {2, 3};
@@ -105,11 +106,16 @@ void test_max_operator() {
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
         Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false);
-        
+
         TensorMaxMinResult actual = Tensor_max(t1, -2);
 
         compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&actual.indices,
+                        &expected_indices,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 8: Max over a specific dimension of a matrix (dim=1)
@@ -124,11 +130,16 @@ void test_max_operator() {
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
         Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false);
-        
+
         TensorMaxMinResult actual = Tensor_max(t1, 1);
 
         compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&actual.indices,
+                        &expected_indices,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 9: Max over a dimension with duplicate max values (should return first index)
@@ -143,11 +154,16 @@ void test_max_operator() {
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
         Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false);
-        
+
         TensorMaxMinResult actual = Tensor_max(t1, -1);
-        
+
         compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&actual.indices,
+                        &expected_indices,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     cten_free(pool_id);
diff --git a/tests/Operator/test_mean.c b/tests/Operator/test_mean.c
index 2152557..d9dcb74 100644
--- a/tests/Operator/test_mean.c
+++ b/tests/Operator/test_mean.c
@@ -6,16 +6,16 @@
 
 void test_mean_operator() {
     const char* op_name = "mean";
-    PoolId pool_id = 3; 
+    PoolId pool_id = 3;
     cten_begin_malloc(pool_id);
 
-    TensorShape exp_shape_scalar = {1}; 
+    TensorShape exp_shape_scalar = {1};
 
     // Test Case 1: Mean of a scalar tensor
     {
         const char* tc_name = "mean_scalar";
         TensorShape s_shape = {1};
-        float d1[] = {5.0f}; 
+        float d1[] = {5.0f};
         float exp_d[] = {5.0f};
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape_scalar, exp_d, false);
@@ -28,7 +28,7 @@ void test_mean_operator() {
     {
         const char* tc_name = "mean_vector_1D";
         TensorShape v_shape = {3};
-        float d1[] = {1.0f, 2.0f, 3.0f}; // Sum = 6, Count = 3, Mean = 2
+        float d1[] = {1.0f, 2.0f, 3.0f};  // Sum = 6, Count = 3, Mean = 2
         float exp_d[] = {2.0f};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape_scalar, exp_d, false);
@@ -41,7 +41,7 @@ void test_mean_operator() {
     {
         const char* tc_name = "mean_matrix_2x2";
         TensorShape m_shape = {2, 2};
-        float d1[] = {1.0f, 2.0f, 3.0f, 4.0f}; // Sum = 10, Count = 4, Mean = 2.5
+        float d1[] = {1.0f, 2.0f, 3.0f, 4.0f};  // Sum = 10, Count = 4, Mean = 2.5
         float exp_d[] = {2.5f};
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape_scalar, exp_d, false);
@@ -49,12 +49,12 @@ void test_mean_operator() {
 
         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 4: Mean of a matrix with negative numbers
     {
         const char* tc_name = "mean_matrix_2x2_negative";
         TensorShape m_shape = {2, 2};
-        float d1[] = {-1.0f, 2.0f, -3.0f, 4.0f}; // Sum = 2, Count = 4, Mean = 0.5
+        float d1[] = {-1.0f, 2.0f, -3.0f, 4.0f};  // Sum = 2, Count = 4, Mean = 0.5
         float exp_d[] = {0.5f};
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape_scalar, exp_d, false);
@@ -67,7 +67,7 @@ void test_mean_operator() {
     {
         const char* tc_name = "mean_vector_all_zeros";
         TensorShape v_shape = {4};
-        float d1[] = {0.0f, 0.0f, 0.0f, 0.0f}; // Sum = 0, Count = 4, Mean = 0
+        float d1[] = {0.0f, 0.0f, 0.0f, 0.0f};  // Sum = 0, Count = 4, Mean = 0
         float exp_d[] = {0.0f};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape_scalar, exp_d, false);
@@ -79,16 +79,17 @@ void test_mean_operator() {
     // Test Case 6: Large Tensor Reductions
     {
         const char* tc_name = "mean_large_tensor_reductions";
-        
+
         // Sub-test 1: Large tensor mean (1,000 elements)
         {
             TensorShape large_shape = {1000};
             float large_data[1000];
-            for(int i = 0; i < 1000; i++) large_data[i] = 1.0f;
-            
-            float exp_d[] = {1.0f}; // Mean of 1000 ones = 1.0
+            for(int i = 0; i < 1000; i++)
+                large_data[i] = 1.0f;
+
+            float exp_d[] = {1.0f};  // Mean of 1000 ones = 1.0
             TensorShape exp_shape = {1};
-            
+
             Tensor t1 = create_test_tensor(large_shape, large_data, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor actual_res = Tensor_mean(t1);
@@ -100,11 +101,12 @@ void test_mean_operator() {
         {
             TensorShape stress_shape = {5000};
             float stress_data[5000];
-            for(int i = 0; i < 5000; i++) stress_data[i] = 2.0f;
-            
-            float exp_d[] = {2.0f}; // Mean of 5000 twos = 2.0
+            for(int i = 0; i < 5000; i++)
+                stress_data[i] = 2.0f;
+
+            float exp_d[] = {2.0f};  // Mean of 5000 twos = 2.0
             TensorShape exp_shape = {1};
-            
+
             Tensor t1 = create_test_tensor(stress_shape, stress_data, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor actual_res = Tensor_mean(t1);
@@ -113,18 +115,38 @@ void test_mean_operator() {
         }
     }
 
-
     // Test Case 7: Higher Dimensional Tensors
     {
         const char* tc_name = "mean_higher_dimensional_tensors";
-        
+
         // Sub-test 1: 3D tensor mean along axis 1 (3x4x5 -> 3x5)
         {
             TensorShape shape_3d = {3, 4, 5};
-            float d1[] = {0.0761f, 0.8512f, 0.4951f, 0.4806f, 0.5924f, 0.8247f, 0.3478f, 0.6780f, 0.5657f, 0.2670f, 0.8786f, 0.7974f, 0.6585f, 0.8506f, 0.8673f, 0.7084f, 0.8370f, 0.6975f, 0.6801f, 0.6186f, 0.7527f, 0.1586f, 0.8809f, 0.8718f, 0.0292f, 0.8258f, 0.1289f, 0.3351f, 0.7435f, 0.1608f, 0.8180f, 0.8321f, 0.5075f, 0.0064f, 0.2870f, 0.6169f, 0.9812f, 0.6318f, 0.2598f, 0.6340f, 0.5400f, 0.7798f, 0.1070f, 0.7610f, 0.5413f, 0.9630f, 0.3419f, 0.6326f, 0.9320f, 0.1025f, 0.9372f, 0.6879f, 0.0678f, 0.3010f, 0.7082f, 0.0674f, 0.5822f, 0.3459f, 0.6209f, 0.0457f};
+            float d1[] = {0.0761f, 0.8512f, 0.4951f, 0.4806f, 0.5924f, 0.8247f, 0.3478f, 0.6780f,
+                          0.5657f, 0.2670f, 0.8786f, 0.7974f, 0.6585f, 0.8506f, 0.8673f, 0.7084f,
+                          0.8370f, 0.6975f, 0.6801f, 0.6186f, 0.7527f, 0.1586f, 0.8809f, 0.8718f,
+                          0.0292f, 0.8258f, 0.1289f, 0.3351f, 0.7435f, 0.1608f, 0.8180f, 0.8321f,
+                          0.5075f, 0.0064f, 0.2870f, 0.6169f, 0.9812f, 0.6318f, 0.2598f, 0.6340f,
+                          0.5400f, 0.7798f, 0.1070f, 0.7610f, 0.5413f, 0.9630f, 0.3419f, 0.6326f,
+                          0.9320f, 0.1025f, 0.9372f, 0.6879f, 0.0678f, 0.3010f, 0.7082f, 0.0674f,
+                          0.5822f, 0.3459f, 0.6209f, 0.0457f};
             TensorShape exp_shape = {3, 5};
-            float exp_d[] = {0.6220f, 0.7084f, 0.6323f, 0.6443f, 0.5863f, 0.7534f, 0.5252f, 0.5888f, 0.4704f, 0.2778f, 0.6269f, 0.5979f, 0.2883f, 0.6537f, 0.3494f};
-            
+            float exp_d[] = {0.6220f,
+                             0.7084f,
+                             0.6323f,
+                             0.6443f,
+                             0.5863f,
+                             0.7534f,
+                             0.5252f,
+                             0.5888f,
+                             0.4704f,
+                             0.2778f,
+                             0.6269f,
+                             0.5979f,
+                             0.2883f,
+                             0.6537f,
+                             0.3494f};
+
             Tensor t1 = create_test_tensor(shape_3d, d1, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor actual_res = Tensor_mean(t1, 1);
@@ -136,54 +158,70 @@ void test_mean_operator() {
         // Testing negative index case so 2 becomes -2
         {
             TensorShape shape_4d = {2, 3, 4, 5};
-            float d1[] = {0.8715f, 0.9735f, 0.9689f, 0.7497f, 0.1301f, 0.7583f, 0.0246f, 0.0221f, 0.3236f, 0.4886f, 0.7704f, 0.6833f, 0.4459f, 0.2736f, 0.9971f, 0.4262f, 0.4514f, 0.1636f, 0.7948f, 0.6937f, 0.2208f, 0.0824f, 0.6805f, 0.6545f, 0.2733f, 0.9509f, 0.1511f, 0.4323f, 0.9436f, 0.4197f, 0.6385f, 0.3976f, 0.2742f, 0.9840f, 0.4093f, 0.8941f, 0.2300f, 0.2131f, 0.0311f, 0.6517f, 0.3685f, 0.8644f, 0.4732f, 0.9682f, 0.1855f, 0.8686f, 0.7766f, 0.7709f, 0.8448f, 0.7610f, 0.6262f, 0.1312f, 0.0325f, 0.9208f, 0.6167f, 0.7965f, 0.4815f, 0.1173f, 0.1252f, 0.6856f, 0.4303f, 0.2005f, 0.4916f, 0.0642f, 0.5820f, 0.2690f, 0.7976f, 0.3104f, 0.4552f, 0.0116f, 0.0724f, 0.3925f, 0.4799f, 0.6000f, 0.2917f, 0.6950f, 0.8601f, 0.7799f, 0.0396f, 0.4805f, 0.1049f, 0.2420f, 0.9867f, 0.1425f, 0.4989f, 0.6182f, 0.7025f, 0.5596f, 0.0098f, 0.3265f, 0.5177f, 0.0879f, 0.3506f, 0.0332f, 0.0786f, 0.3969f, 0.1327f, 0.5675f, 0.6895f, 0.8006f, 0.2002f, 0.1675f, 0.1046f, 0.6364f, 0.7065f, 0.0316f, 0.9362f, 0.0520f, 0.5413f, 0.7091f, 0.8710f, 0.7141f, 0.8017f, 0.3395f, 0.8148f, 0.0801f, 0.8948f, 0.5476f, 0.8173f, 0.4523f};
+            float d1[] = {0.8715f, 0.9735f, 0.9689f, 0.7497f, 0.1301f, 0.7583f, 0.0246f, 0.0221f,
+                          0.3236f, 0.4886f, 0.7704f, 0.6833f, 0.4459f, 0.2736f, 0.9971f, 0.4262f,
+                          0.4514f, 0.1636f, 0.7948f, 0.6937f, 0.2208f, 0.0824f, 0.6805f, 0.6545f,
+                          0.2733f, 0.9509f, 0.1511f, 0.4323f, 0.9436f, 0.4197f, 0.6385f, 0.3976f,
+                          0.2742f, 0.9840f, 0.4093f, 0.8941f, 0.2300f, 0.2131f, 0.0311f, 0.6517f,
+                          0.3685f, 0.8644f, 0.4732f, 0.9682f, 0.1855f, 0.8686f, 0.7766f, 0.7709f,
+                          0.8448f, 0.7610f, 0.6262f, 0.1312f, 0.0325f, 0.9208f, 0.6167f, 0.7965f,
+                          0.4815f, 0.1173f, 0.1252f, 0.6856f, 0.4303f, 0.2005f, 0.4916f, 0.0642f,
+                          0.5820f, 0.2690f, 0.7976f, 0.3104f, 0.4552f, 0.0116f, 0.0724f, 0.3925f,
+                          0.4799f, 0.6000f, 0.2917f, 0.6950f, 0.8601f, 0.7799f, 0.0396f, 0.4805f,
+                          0.1049f, 0.2420f, 0.9867f, 0.1425f, 0.4989f, 0.6182f, 0.7025f, 0.5596f,
+                          0.0098f, 0.3265f, 0.5177f, 0.0879f, 0.3506f, 0.0332f, 0.0786f, 0.3969f,
+                          0.1327f, 0.5675f, 0.6895f, 0.8006f, 0.2002f, 0.1675f, 0.1046f, 0.6364f,
+                          0.7065f, 0.0316f, 0.9362f, 0.0520f, 0.5413f, 0.7091f, 0.8710f, 0.7141f,
+                          0.8017f, 0.3395f, 0.8148f, 0.0801f, 0.8948f, 0.5476f, 0.8173f, 0.4523f};
             TensorShape exp_shape = {2, 3, 5};
-            float exp_d[] = {0.7066f, 0.5332f, 0.4001f, 0.5354f, 0.5774f, 0.6761f, 0.2152f, 0.4000f, 0.6533f, 0.4385f, 0.6650f, 0.5634f, 0.3485f, 0.7148f, 0.5622f, 0.3667f, 0.5627f, 0.5154f, 0.2898f, 0.3414f, 0.4094f, 0.2913f, 0.6161f, 0.2187f, 0.4261f, 0.2957f, 0.6781f, 0.3765f, 0.5836f, 0.6707f};
-            
+            float exp_d[] = {0.7066f, 0.5332f, 0.4001f, 0.5354f, 0.5774f, 0.6761f, 0.2152f, 0.4000f,
+                             0.6533f, 0.4385f, 0.6650f, 0.5634f, 0.3485f, 0.7148f, 0.5622f, 0.3667f,
+                             0.5627f, 0.5154f, 0.2898f, 0.3414f, 0.4094f, 0.2913f, 0.6161f, 0.2187f,
+                             0.4261f, 0.2957f, 0.6781f, 0.3765f, 0.5836f, 0.6707f};
+
             Tensor t1 = create_test_tensor(shape_4d, d1, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
-            Tensor actual_res = Tensor_mean(t1, -2); // Testing negative index case so 2 becomes -2
+            Tensor actual_res = Tensor_mean(t1, -2);  // Testing negative index case so 2 becomes -2
 
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
         }
         // Sub-test 3: 4D tensor mean along axis 1 and axis 2 (2x3x4x5 -> 2x5)
-        // Here I want to test the negative index case, Here first we do mean along axis 2 (which is -2)
-        // Now we become (2x3x5) and then we do mean along axis 1 (which is -2) again
-        // Now we become (2x5)
+        // Here I want to test the negative index case, Here first we do mean along axis 2 (which is
+        // -2) Now we become (2x3x5) and then we do mean along axis 1 (which is -2) again Now we
+        // become (2x5)
         {
             TensorShape shape_4d = {2, 3, 4, 5};
             float d1[120] = {
-                0.374540f, 0.950714f, 0.731994f, 0.598658f, 0.156019f,
-                0.155995f, 0.058084f, 0.866176f, 0.601115f, 0.708073f,
-                0.020584f, 0.969910f, 0.832443f, 0.212339f, 0.181825f,
-                0.183405f, 0.304242f, 0.524756f, 0.431945f, 0.291229f,
-                0.611853f, 0.139494f, 0.292145f, 0.366362f, 0.456070f,
-                0.785176f, 0.199674f, 0.514234f, 0.592415f, 0.046450f,
-                0.607545f, 0.170524f, 0.065052f, 0.948886f, 0.965632f,
-                0.808397f, 0.304614f, 0.097672f, 0.684233f, 0.440152f,
-                0.122038f, 0.495177f, 0.034389f, 0.909320f, 0.258780f,
-                0.662522f, 0.311711f, 0.520068f, 0.546710f, 0.184854f,
-                0.969585f, 0.775133f, 0.939499f, 0.894827f, 0.597900f,
-                0.921874f, 0.088493f, 0.195983f, 0.045227f, 0.325330f,
-                0.388677f, 0.271349f, 0.828738f, 0.356753f, 0.280935f,
-                0.542696f, 0.140924f, 0.802197f, 0.074551f, 0.986887f,
-                0.772245f, 0.198716f, 0.005522f, 0.815461f, 0.706857f,
-                0.729007f, 0.771270f, 0.074045f, 0.358466f, 0.115869f,
-                0.863103f, 0.623298f, 0.330898f, 0.063558f, 0.310982f,
-                0.325183f, 0.729606f, 0.637557f, 0.887213f, 0.472215f,
-                0.119594f, 0.713245f, 0.760785f, 0.561277f, 0.770967f,
-                0.493796f, 0.522733f, 0.427541f, 0.025419f, 0.107891f,
-                0.031429f, 0.636410f, 0.314356f, 0.508571f, 0.907566f,
-                0.249292f, 0.410383f, 0.755551f, 0.228798f, 0.076980f,
-                0.289751f, 0.161221f, 0.929698f, 0.808120f, 0.633404f,
-                0.871461f, 0.803672f, 0.186570f, 0.892559f, 0.539342f
-            };
+                0.374540f, 0.950714f, 0.731994f, 0.598658f, 0.156019f, 0.155995f, 0.058084f,
+                0.866176f, 0.601115f, 0.708073f, 0.020584f, 0.969910f, 0.832443f, 0.212339f,
+                0.181825f, 0.183405f, 0.304242f, 0.524756f, 0.431945f, 0.291229f, 0.611853f,
+                0.139494f, 0.292145f, 0.366362f, 0.456070f, 0.785176f, 0.199674f, 0.514234f,
+                0.592415f, 0.046450f, 0.607545f, 0.170524f, 0.065052f, 0.948886f, 0.965632f,
+                0.808397f, 0.304614f, 0.097672f, 0.684233f, 0.440152f, 0.122038f, 0.495177f,
+                0.034389f, 0.909320f, 0.258780f, 0.662522f, 0.311711f, 0.520068f, 0.546710f,
+                0.184854f, 0.969585f, 0.775133f, 0.939499f, 0.894827f, 0.597900f, 0.921874f,
+                0.088493f, 0.195983f, 0.045227f, 0.325330f, 0.388677f, 0.271349f, 0.828738f,
+                0.356753f, 0.280935f, 0.542696f, 0.140924f, 0.802197f, 0.074551f, 0.986887f,
+                0.772245f, 0.198716f, 0.005522f, 0.815461f, 0.706857f, 0.729007f, 0.771270f,
+                0.074045f, 0.358466f, 0.115869f, 0.863103f, 0.623298f, 0.330898f, 0.063558f,
+                0.310982f, 0.325183f, 0.729606f, 0.637557f, 0.887213f, 0.472215f, 0.119594f,
+                0.713245f, 0.760785f, 0.561277f, 0.770967f, 0.493796f, 0.522733f, 0.427541f,
+                0.025419f, 0.107891f, 0.031429f, 0.636410f, 0.314356f, 0.508571f, 0.907566f,
+                0.249292f, 0.410383f, 0.755551f, 0.228798f, 0.076980f, 0.289751f, 0.161221f,
+                0.929698f, 0.808120f, 0.633404f, 0.871461f, 0.803672f, 0.186570f, 0.892559f,
+                0.539342f};
             TensorShape exp_shape = {2, 5};
-            float exp_d[10] = {
-                0.518626f, 0.397314f, 0.467868f, 0.569336f, 0.384360f,
-                0.473020f, 0.498569f, 0.504455f, 0.465062f, 0.492491f
-            };
-            
+            float exp_d[10] = {0.518626f,
+                               0.397314f,
+                               0.467868f,
+                               0.569336f,
+                               0.384360f,
+                               0.473020f,
+                               0.498569f,
+                               0.504455f,
+                               0.465062f,
+                               0.492491f};
+
             Tensor t1 = create_test_tensor(shape_4d, d1, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor temp_res = Tensor_mean(t1, -2);
@@ -196,13 +234,13 @@ void test_mean_operator() {
     // Test Case 8: Edge Cases
     {
         const char* tc_name = "mean_edge_cases";
-        
+
         // Sub-test 1: Single element mean (division by 1)
         {
             TensorShape single_shape = {1};
             float d1[] = {42.5f};
-            float exp_d[] = {42.5f}; // Mean of single element is itself
-            
+            float exp_d[] = {42.5f};  // Mean of single element is itself
+
             Tensor t1 = create_test_tensor(single_shape, d1, false);
             Tensor expected_res = create_test_tensor(exp_shape_scalar, exp_d, false);
             Tensor actual_res = Tensor_mean(t1);
diff --git a/tests/Operator/test_min.c b/tests/Operator/test_min.c
index f93f85d..2646ac0 100644
--- a/tests/Operator/test_min.c
+++ b/tests/Operator/test_min.c
@@ -42,7 +42,7 @@ void test_min_operator() {
         const char* tc_name = "min_matrix";
         TensorShape m_shape = {2, 3};
         float d1[] = {7.8436f, -8.2612f, -1.5616f, -9.4041f, -5.6272f, 0.1071f};
-        float exp_d[] = {-9.4041f}; 
+        float exp_d[] = {-9.4041f};
         TensorShape exp_shape = {1, 0, 0, 0};
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
@@ -70,7 +70,7 @@ void test_min_operator() {
         const char* tc_name = "min_duplicate";
         TensorShape v_shape = {5};
         float d1[] = {1.7853f, -9.87f, -2.7956f, -2.7956f, -9.87f};
-        float exp_d[] = {-9.87f}; 
+        float exp_d[] = {-9.87f};
         TensorShape exp_shape = {1, 0, 0, 0};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
@@ -105,11 +105,16 @@ void test_min_operator() {
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
         Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false);
-        
+
         TensorMaxMinResult actual = Tensor_min(t1, 0);
 
         compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&actual.indices,
+                        &expected_indices,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 8: Min over a specific dimension of a matrix (dim=1)
@@ -124,32 +129,41 @@ void test_min_operator() {
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
         Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false);
-        
+
         TensorMaxMinResult actual = Tensor_min(t1, 1);
 
         compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&actual.indices,
+                        &expected_indices,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     // Test Case 9: Min over a dimension of a 3D tensor (dim=2)
     {
         const char* tc_name = "min_3d_tensor_dim_2";
         TensorShape t_shape = {2, 2, 3};
-        float d1[] = {1.0f, 8.0f, -3.0f,   4.0f, 2.0f, 9.0f,
-                      7.0f, 0.0f, 5.0f,   -4.0f, -1.0f, -2.0f};
+        float d1[] = {1.0f, 8.0f, -3.0f, 4.0f, 2.0f, 9.0f, 7.0f, 0.0f, 5.0f, -4.0f, -1.0f, -2.0f};
 
         float exp_d[] = {-3.0f, 2.0f, 0.0f, -4.0f};
         float exp_idx[] = {2.0f, 1.0f, 1.0f, 0.0f};
         TensorShape exp_shape = {2, 2};
-        
+
         Tensor t1 = create_test_tensor(t_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
         Tensor expected_indices = create_test_tensor(exp_shape, exp_idx, false);
-        
+
         TensorMaxMinResult actual = Tensor_min(t1, 2);
-        
+
         compare_tensors(&actual.values, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-        compare_tensors(&actual.indices, &expected_indices, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+        compare_tensors(&actual.indices,
+                        &expected_indices,
+                        op_name,
+                        tc_name,
+                        2,
+                        TEST_FLOAT_TOLERANCE);
     }
 
     cten_free(pool_id);
diff --git a/tests/Operator/test_mul.c b/tests/Operator/test_mul.c
index 3cbfacb..a5a7590 100644
--- a/tests/Operator/test_mul.c
+++ b/tests/Operator/test_mul.c
@@ -14,7 +14,7 @@ void test_mul_operator() {
     {
         const char* tc_name = "mul_scalar";
         TensorShape s_shape = {1};
-        float d1[] = {2.0f}; 
+        float d1[] = {2.0f};
         float d2[] = {3.0f};
         float exp_d[] = {6.0f};
         Tensor t1 = create_test_tensor(s_shape, d1, false);
@@ -59,32 +59,46 @@ void test_mul_operator() {
     // Example: [[1,2],[3,4]] * [2] (shape {1}) -> PyTorch result: [[2,4],[6,8]]
     {
         const char* tc_name = "mul_broadcast_matrix_by_scalar_tensor";
-        TensorShape mat_shape = {2, 2}; float mat_data[] = {1.0f, 2.0f, 3.0f, 4.0f};
-        TensorShape scalar_shape = {1}; float scalar_data[] = {2.0f};
-        
-        TensorShape expected_shape = {2, 2}; float exp_data[] = {2.0f, 4.0f, 6.0f, 8.0f};
+        TensorShape mat_shape = {2, 2};
+        float mat_data[] = {1.0f, 2.0f, 3.0f, 4.0f};
+        TensorShape scalar_shape = {1};
+        float scalar_data[] = {2.0f};
+
+        TensorShape expected_shape = {2, 2};
+        float exp_data[] = {2.0f, 4.0f, 6.0f, 8.0f};
 
         Tensor t_mat = create_test_tensor(mat_shape, mat_data, false);
         Tensor t_scalar_original = create_test_tensor(scalar_shape, scalar_data, false);
-        
-        Tensor actual_res = Tensor_mul(t_mat, t_scalar_original); 
+
+        Tensor actual_res = Tensor_mul(t_mat, t_scalar_original);
         Tensor expected_res = create_test_tensor(expected_shape, exp_data, false);
 
         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 5: Advanced Broadcasting
     {
         const char* tc_name = "mul_advanced_broadcasting";
-        
+
         // Sub-test 1: Multi-dimensional broadcasting {3,1} * {1,4} -> {3,4}
         {
-            TensorShape s1_shape = {3, 1}; float d1[] = {2.0f, 3.0f, 4.0f};
-            TensorShape s2_shape = {1, 4}; float d2[] = {1.0f, 2.0f, 3.0f, 4.0f};
-            TensorShape exp_shape = {3, 4}; 
-            float exp_d[] = {2.0f, 4.0f, 6.0f, 8.0f,    // 2*[1,2,3,4]
-                             3.0f, 6.0f, 9.0f, 12.0f,   // 3*[1,2,3,4]
-                             4.0f, 8.0f, 12.0f, 16.0f}; // 4*[1,2,3,4]
+            TensorShape s1_shape = {3, 1};
+            float d1[] = {2.0f, 3.0f, 4.0f};
+            TensorShape s2_shape = {1, 4};
+            float d2[] = {1.0f, 2.0f, 3.0f, 4.0f};
+            TensorShape exp_shape = {3, 4};
+            float exp_d[] = {2.0f,
+                             4.0f,
+                             6.0f,
+                             8.0f,  // 2*[1,2,3,4]
+                             3.0f,
+                             6.0f,
+                             9.0f,
+                             12.0f,  // 3*[1,2,3,4]
+                             4.0f,
+                             8.0f,
+                             12.0f,
+                             16.0f};  // 4*[1,2,3,4]
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -96,58 +110,141 @@ void test_mul_operator() {
 
         // Sub-test 2: 4D broadcasting {1,2,3,4} * {5,1,1,1} -> {5,2,3,4}
         {
-            TensorShape s1_shape = {1, 2, 3, 4}; 
-            float d1[] = {
-                0.1254f, 0.7612f, 0.3476f, 0.8791f, 
-                0.2415f, 0.5832f, 0.6903f, 0.9234f, 
-                0.1327f, 0.4651f, 0.7561f, 0.5872f, 
-                0.9135f, 0.2783f, 0.3491f, 0.7392f, 
-                0.5517f, 0.8253f, 0.6023f, 0.1937f, 
-                0.4936f, 0.2341f, 0.8745f, 0.5291f
-            };
-            
-            TensorShape s2_shape = {5, 1, 1, 1}; 
+            TensorShape s1_shape = {1, 2, 3, 4};
+            float d1[] = {0.1254f, 0.7612f, 0.3476f, 0.8791f, 0.2415f, 0.5832f, 0.6903f, 0.9234f,
+                          0.1327f, 0.4651f, 0.7561f, 0.5872f, 0.9135f, 0.2783f, 0.3491f, 0.7392f,
+                          0.5517f, 0.8253f, 0.6023f, 0.1937f, 0.4936f, 0.2341f, 0.8745f, 0.5291f};
+
+            TensorShape s2_shape = {5, 1, 1, 1};
             float d2[] = {0.8365f, 0.2471f, 0.9382f, 0.5713f, 0.1648f};
-            
+
             TensorShape exp_shape = {5, 2, 3, 4};
-            
-            float exp_d[] = {
-                // Batch 0
-                0.1049f, 0.6367f, 0.2908f, 0.7354f,
-                0.2020f, 0.4878f, 0.5774f, 0.7724f,
-                0.1110f, 0.3891f, 0.6325f, 0.4912f,
-                0.7641f, 0.2328f, 0.2920f, 0.6183f,
-                0.4615f, 0.6904f, 0.5038f, 0.1620f,
-                0.4129f, 0.1958f, 0.7315f, 0.4426f,
-                // Batch 1
-                0.0310f, 0.1881f, 0.0859f, 0.2172f,
-                0.0597f, 0.1441f, 0.1706f, 0.2282f,
-                0.0328f, 0.1149f, 0.1868f, 0.1451f,
-                0.2257f, 0.0688f, 0.0863f, 0.1827f,
-                0.1363f, 0.2039f, 0.1488f, 0.0479f,
-                0.1220f, 0.0578f, 0.2161f, 0.1307f,
-                // Batch 2
-                0.1177f, 0.7142f, 0.3261f, 0.8248f,
-                0.2266f, 0.5472f, 0.6476f, 0.8663f,
-                0.1245f, 0.4364f, 0.7094f, 0.5509f,
-                0.8570f, 0.2611f, 0.3275f, 0.6935f,
-                0.5176f, 0.7743f, 0.5651f, 0.1817f,
-                0.4631f, 0.2196f, 0.8205f, 0.4964f,
-                // Batch 3
-                0.0716f, 0.4349f, 0.1986f, 0.5022f,
-                0.1380f, 0.3332f, 0.3944f, 0.5275f,
-                0.0758f, 0.2657f, 0.4320f, 0.3355f,
-                0.5219f, 0.1590f, 0.1994f, 0.4223f,
-                0.3152f, 0.4715f, 0.3441f, 0.1107f,
-                0.2820f, 0.1337f, 0.4996f, 0.3023f,
-                // Batch 4
-                0.0207f, 0.1254f, 0.0573f, 0.1449f,
-                0.0398f, 0.0961f, 0.1138f, 0.1522f,
-                0.0219f, 0.0766f, 0.1246f, 0.0968f,
-                0.1505f, 0.0459f, 0.0575f, 0.1218f,
-                0.0909f, 0.1360f, 0.0993f, 0.0319f,
-                0.0813f, 0.0386f, 0.1441f, 0.0872f
-            };
+
+            float exp_d[] = {// Batch 0
+                             0.1049f,
+                             0.6367f,
+                             0.2908f,
+                             0.7354f,
+                             0.2020f,
+                             0.4878f,
+                             0.5774f,
+                             0.7724f,
+                             0.1110f,
+                             0.3891f,
+                             0.6325f,
+                             0.4912f,
+                             0.7641f,
+                             0.2328f,
+                             0.2920f,
+                             0.6183f,
+                             0.4615f,
+                             0.6904f,
+                             0.5038f,
+                             0.1620f,
+                             0.4129f,
+                             0.1958f,
+                             0.7315f,
+                             0.4426f,
+                             // Batch 1
+                             0.0310f,
+                             0.1881f,
+                             0.0859f,
+                             0.2172f,
+                             0.0597f,
+                             0.1441f,
+                             0.1706f,
+                             0.2282f,
+                             0.0328f,
+                             0.1149f,
+                             0.1868f,
+                             0.1451f,
+                             0.2257f,
+                             0.0688f,
+                             0.0863f,
+                             0.1827f,
+                             0.1363f,
+                             0.2039f,
+                             0.1488f,
+                             0.0479f,
+                             0.1220f,
+                             0.0578f,
+                             0.2161f,
+                             0.1307f,
+                             // Batch 2
+                             0.1177f,
+                             0.7142f,
+                             0.3261f,
+                             0.8248f,
+                             0.2266f,
+                             0.5472f,
+                             0.6476f,
+                             0.8663f,
+                             0.1245f,
+                             0.4364f,
+                             0.7094f,
+                             0.5509f,
+                             0.8570f,
+                             0.2611f,
+                             0.3275f,
+                             0.6935f,
+                             0.5176f,
+                             0.7743f,
+                             0.5651f,
+                             0.1817f,
+                             0.4631f,
+                             0.2196f,
+                             0.8205f,
+                             0.4964f,
+                             // Batch 3
+                             0.0716f,
+                             0.4349f,
+                             0.1986f,
+                             0.5022f,
+                             0.1380f,
+                             0.3332f,
+                             0.3944f,
+                             0.5275f,
+                             0.0758f,
+                             0.2657f,
+                             0.4320f,
+                             0.3355f,
+                             0.5219f,
+                             0.1590f,
+                             0.1994f,
+                             0.4223f,
+                             0.3152f,
+                             0.4715f,
+                             0.3441f,
+                             0.1107f,
+                             0.2820f,
+                             0.1337f,
+                             0.4996f,
+                             0.3023f,
+                             // Batch 4
+                             0.0207f,
+                             0.1254f,
+                             0.0573f,
+                             0.1449f,
+                             0.0398f,
+                             0.0961f,
+                             0.1138f,
+                             0.1522f,
+                             0.0219f,
+                             0.0766f,
+                             0.1246f,
+                             0.0968f,
+                             0.1505f,
+                             0.0459f,
+                             0.0575f,
+                             0.1218f,
+                             0.0909f,
+                             0.1360f,
+                             0.0993f,
+                             0.0319f,
+                             0.0813f,
+                             0.0386f,
+                             0.1441f,
+                             0.0872f};
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -161,13 +258,13 @@ void test_mul_operator() {
     // Test Case 6: Sign Preservation
     {
         const char* tc_name = "mul_sign_preservation";
-        
+
         // Sub-test 1: Negative number multiplication
         {
             TensorShape v_shape = {2};
             float d1[] = {-1.0f, 1.0f};
             float d2[] = {-2.0f, -3.0f};
-            float exp_d[] = {2.0f, -3.0f}; // (-1)*(-2)=2, (1)*(-3)=-3
+            float exp_d[] = {2.0f, -3.0f};  // (-1)*(-2)=2, (1)*(-3)=-3
 
             Tensor t1 = create_test_tensor(v_shape, d1, false);
             Tensor t2 = create_test_tensor(v_shape, d2, false);
@@ -181,15 +278,20 @@ void test_mul_operator() {
     // Test Case 7: Higher Dimensional Tensors
     {
         const char* tc_name = "mul_higher_dimensional_tensors";
-        
+
         // Sub-test 1: 3D tensor multiplication (same shape)
         {
             TensorShape shape_3d = {2, 3, 4};
-            float d1[] = {0.1471f, 0.9266f, 0.4921f, 0.2582f, 0.4591f, 0.9800f, 0.4926f, 0.3288f, 0.6334f, 0.2401f, 0.0759f, 0.1289f, 0.1280f, 0.1519f, 0.1388f, 0.6409f, 0.1819f, 0.3457f, 0.8968f, 0.4740f, 0.6676f, 0.1723f, 0.1923f, 0.0409f};
-            float d2[] = {0.1689f, 0.2786f, 0.1770f, 0.0887f, 0.1206f, 0.4608f, 0.2063f, 0.3643f, 0.5034f, 0.6904f, 0.0393f, 0.7994f, 0.6279f, 0.0818f, 0.8736f, 0.9209f, 0.0611f, 0.2769f, 0.8062f, 0.7483f, 0.1845f, 0.2093f, 0.3705f, 0.4845f};
-            float exp_d[] = {0.0248f, 0.2582f, 0.0871f, 0.0229f, 0.0554f, 0.4516f, 0.1016f, 0.1198f, 0.3189f,
-                0.1658f, 0.0030f, 0.1030f, 0.0804f, 0.0124f, 0.1213f, 0.5902f, 0.0111f, 0.0957f,
-                0.7230f, 0.3547f, 0.1232f, 0.0361f, 0.0712f, 0.0198f};
+            float d1[] = {0.1471f, 0.9266f, 0.4921f, 0.2582f, 0.4591f, 0.9800f, 0.4926f, 0.3288f,
+                          0.6334f, 0.2401f, 0.0759f, 0.1289f, 0.1280f, 0.1519f, 0.1388f, 0.6409f,
+                          0.1819f, 0.3457f, 0.8968f, 0.4740f, 0.6676f, 0.1723f, 0.1923f, 0.0409f};
+            float d2[] = {0.1689f, 0.2786f, 0.1770f, 0.0887f, 0.1206f, 0.4608f, 0.2063f, 0.3643f,
+                          0.5034f, 0.6904f, 0.0393f, 0.7994f, 0.6279f, 0.0818f, 0.8736f, 0.9209f,
+                          0.0611f, 0.2769f, 0.8062f, 0.7483f, 0.1845f, 0.2093f, 0.3705f, 0.4845f};
+            float exp_d[] = {0.0248f, 0.2582f, 0.0871f, 0.0229f, 0.0554f, 0.4516f,
+                             0.1016f, 0.1198f, 0.3189f, 0.1658f, 0.0030f, 0.1030f,
+                             0.0804f, 0.0124f, 0.1213f, 0.5902f, 0.0111f, 0.0957f,
+                             0.7230f, 0.3547f, 0.1232f, 0.0361f, 0.0712f, 0.0198f};
 
             Tensor t1 = create_test_tensor(shape_3d, d1, false);
             Tensor t2 = create_test_tensor(shape_3d, d2, false);
@@ -202,32 +304,57 @@ void test_mul_operator() {
         // Sub-test 2: 4D tensor multiplication (same shape)
         {
             TensorShape shape_4d = {2, 3, 4, 5};
-            float d1[] = {0.6183f, 0.3689f, 0.4625f, 0.7475f, 0.0367f, 0.2524f, 0.7133f, 0.8952f, 0.5117f, 0.5321f, 0.1072f, 0.4474f, 0.5326f, 0.2425f, 0.2692f, 0.3773f, 0.0201f, 0.3221f, 0.2114f, 0.3275f, 0.1198f, 0.8905f, 0.5936f, 0.6791f, 0.7892f, 0.4984f, 0.0869f, 0.5371f, 0.5868f, 0.7454f, 0.4317f, 0.1276f, 0.2838f, 0.3631f, 0.6459f, 0.5708f, 0.3561f, 0.9865f, 0.6058f, 0.2372f, 0.1018f, 0.1529f, 0.2460f, 0.1607f, 0.1866f, 0.2851f, 0.1734f, 0.8968f, 0.0802f, 0.5245f, 0.4104f, 0.9824f, 0.1120f, 0.3979f, 0.9695f, 0.8655f, 0.8171f, 0.2579f, 0.1709f, 0.6686f, 0.9294f, 0.5568f, 0.5716f, 0.2800f, 0.7695f, 0.1870f, 0.3237f, 0.4254f, 0.5076f, 0.2424f, 0.1148f, 0.6106f, 0.2886f, 0.5812f, 0.1544f, 0.4811f, 0.5326f, 0.0518f, 0.3366f, 0.1344f, 0.0634f, 0.9900f, 0.3224f, 0.8099f, 0.2546f, 0.6815f, 0.7602f, 0.5956f, 0.4716f, 0.4118f, 0.3489f, 0.9295f, 0.8306f, 0.9650f, 0.1243f, 0.7309f, 0.9383f, 0.1812f, 0.0665f, 0.7411f, 0.5745f, 0.8418f, 0.1398f, 0.7953f, 0.2016f, 0.1637f, 0.1643f, 0.8146f, 0.6652f, 0.5231f, 0.3588f, 0.8772f, 0.3924f, 0.8166f, 0.4391f, 0.3769f, 0.4627f, 0.3014f, 0.7476f, 0.5027f};
-            float d2[] = {0.2322f, 0.8996f, 0.3839f, 0.5436f, 0.9065f, 0.6242f, 0.1169f, 0.9398f, 0.6277f, 0.3349f, 0.1393f, 0.7940f, 0.6201f, 0.5335f, 0.8939f, 0.7886f, 0.1517f, 0.3117f, 0.2485f, 0.7439f, 0.0335f, 0.5699f, 0.7625f, 0.8768f, 0.3421f, 0.8213f, 0.1106f, 0.8465f, 0.1275f, 0.3973f, 0.7973f, 0.1499f, 0.2293f, 0.7223f, 0.7200f, 0.6411f, 0.6939f, 0.5427f, 0.2518f, 0.3457f, 0.1816f, 0.9085f, 0.5834f, 0.4009f, 0.4620f, 0.9473f, 0.1534f, 0.5862f, 0.5059f, 0.6115f, 0.0181f, 0.8721f, 0.9321f, 0.5651f, 0.6967f, 0.9225f, 0.7072f, 0.1525f, 0.5763f, 0.6067f, 0.4241f, 0.7364f, 0.9344f, 0.9256f, 0.4508f, 0.1132f, 0.9848f, 0.8389f, 0.1247f, 0.9208f, 0.8699f, 0.5188f, 0.5913f, 0.3990f, 0.0548f, 0.3352f, 0.8029f, 0.0046f, 0.3335f, 0.3982f, 0.5374f, 0.9199f, 0.3463f, 0.3470f, 0.7375f, 0.4522f, 0.2246f, 0.4524f, 0.1409f, 0.1764f, 0.4984f, 0.4189f, 0.9148f, 0.3624f, 0.5806f, 0.6323f, 0.0131f, 0.6635f, 0.1780f, 0.9611f, 0.1487f, 0.4146f, 0.0853f, 0.9969f, 0.5022f, 0.5954f, 0.0671f, 0.7500f, 0.2099f, 0.8981f, 0.2051f, 0.1907f, 0.0365f, 0.4721f, 0.5648f, 0.0657f, 0.7755f, 0.4533f, 0.5244f, 0.4408f};
-            float exp_d[] = {0.1435693f, 0.3318624f, 0.1775537f, 0.4063410f, 0.03326855f,
-                0.1575481f, 0.08338477f, 0.8413090f, 0.3211941f, 0.1782003f,
-                0.01493296f, 0.3552356f, 0.3302653f, 0.1293738f, 0.2406379f,
-                0.2975388f, 0.003049170f, 0.1003986f, 0.05253290f, 0.2436272f,
-                0.004013300f, 0.5074959f, 0.4526200f, 0.5954348f, 0.2699853f,
-                0.4093359f, 0.009611141f, 0.4546551f, 0.07481699f, 0.2961474f,
-                0.3441944f, 0.01912724f, 0.06507535f, 0.2622671f, 0.4650480f,
-                0.3659399f, 0.2470978f, 0.5353736f, 0.1525404f, 0.08200004f,
-                0.01848688f, 0.1389097f, 0.1435164f, 0.06442463f, 0.08620920f,
-                0.2700753f, 0.02659956f, 0.5257041f, 0.04057318f, 0.3207318f,
-                0.007428240f, 0.8567510f, 0.1043952f, 0.2248533f, 0.6754506f,
-                0.7984238f, 0.5778531f, 0.03932975f, 0.09848967f, 0.4056396f,
-                0.3941586f, 0.4100275f, 0.5341031f, 0.2591680f, 0.3468906f,
-                0.02116840f, 0.3187798f, 0.3568681f, 0.06329772f, 0.2232019f,
-                0.09986452f, 0.3167793f, 0.1706492f, 0.2318988f, 0.008461121f,
-                0.1612647f, 0.4276245f, 0.0002382800f, 0.1122561f, 0.05351808f,
-                0.03407116f, 0.9107010f, 0.1116471f, 0.2810353f, 0.1877675f,
-                0.3081743f, 0.1707409f, 0.2694494f, 0.06644844f, 0.07264152f,
-                0.1738918f, 0.3893676f, 0.7598329f, 0.3497160f, 0.07216858f,
-                0.4621481f, 0.01229173f, 0.1202262f, 0.01183700f, 0.7122712f,
-                0.08542816f, 0.3490103f, 0.01192494f, 0.7928346f, 0.1012435f,
-                0.09746698f, 0.01102453f, 0.6109500f, 0.1396255f, 0.4697962f,
-                0.07358988f, 0.1672820f, 0.01432260f, 0.3855169f, 0.2480037f,
-                0.02476233f, 0.3588239f, 0.1366246f, 0.3920414f, 0.2215901f};
+            float d1[] = {0.6183f, 0.3689f, 0.4625f, 0.7475f, 0.0367f, 0.2524f, 0.7133f, 0.8952f,
+                          0.5117f, 0.5321f, 0.1072f, 0.4474f, 0.5326f, 0.2425f, 0.2692f, 0.3773f,
+                          0.0201f, 0.3221f, 0.2114f, 0.3275f, 0.1198f, 0.8905f, 0.5936f, 0.6791f,
+                          0.7892f, 0.4984f, 0.0869f, 0.5371f, 0.5868f, 0.7454f, 0.4317f, 0.1276f,
+                          0.2838f, 0.3631f, 0.6459f, 0.5708f, 0.3561f, 0.9865f, 0.6058f, 0.2372f,
+                          0.1018f, 0.1529f, 0.2460f, 0.1607f, 0.1866f, 0.2851f, 0.1734f, 0.8968f,
+                          0.0802f, 0.5245f, 0.4104f, 0.9824f, 0.1120f, 0.3979f, 0.9695f, 0.8655f,
+                          0.8171f, 0.2579f, 0.1709f, 0.6686f, 0.9294f, 0.5568f, 0.5716f, 0.2800f,
+                          0.7695f, 0.1870f, 0.3237f, 0.4254f, 0.5076f, 0.2424f, 0.1148f, 0.6106f,
+                          0.2886f, 0.5812f, 0.1544f, 0.4811f, 0.5326f, 0.0518f, 0.3366f, 0.1344f,
+                          0.0634f, 0.9900f, 0.3224f, 0.8099f, 0.2546f, 0.6815f, 0.7602f, 0.5956f,
+                          0.4716f, 0.4118f, 0.3489f, 0.9295f, 0.8306f, 0.9650f, 0.1243f, 0.7309f,
+                          0.9383f, 0.1812f, 0.0665f, 0.7411f, 0.5745f, 0.8418f, 0.1398f, 0.7953f,
+                          0.2016f, 0.1637f, 0.1643f, 0.8146f, 0.6652f, 0.5231f, 0.3588f, 0.8772f,
+                          0.3924f, 0.8166f, 0.4391f, 0.3769f, 0.4627f, 0.3014f, 0.7476f, 0.5027f};
+            float d2[] = {0.2322f, 0.8996f, 0.3839f, 0.5436f, 0.9065f, 0.6242f, 0.1169f, 0.9398f,
+                          0.6277f, 0.3349f, 0.1393f, 0.7940f, 0.6201f, 0.5335f, 0.8939f, 0.7886f,
+                          0.1517f, 0.3117f, 0.2485f, 0.7439f, 0.0335f, 0.5699f, 0.7625f, 0.8768f,
+                          0.3421f, 0.8213f, 0.1106f, 0.8465f, 0.1275f, 0.3973f, 0.7973f, 0.1499f,
+                          0.2293f, 0.7223f, 0.7200f, 0.6411f, 0.6939f, 0.5427f, 0.2518f, 0.3457f,
+                          0.1816f, 0.9085f, 0.5834f, 0.4009f, 0.4620f, 0.9473f, 0.1534f, 0.5862f,
+                          0.5059f, 0.6115f, 0.0181f, 0.8721f, 0.9321f, 0.5651f, 0.6967f, 0.9225f,
+                          0.7072f, 0.1525f, 0.5763f, 0.6067f, 0.4241f, 0.7364f, 0.9344f, 0.9256f,
+                          0.4508f, 0.1132f, 0.9848f, 0.8389f, 0.1247f, 0.9208f, 0.8699f, 0.5188f,
+                          0.5913f, 0.3990f, 0.0548f, 0.3352f, 0.8029f, 0.0046f, 0.3335f, 0.3982f,
+                          0.5374f, 0.9199f, 0.3463f, 0.3470f, 0.7375f, 0.4522f, 0.2246f, 0.4524f,
+                          0.1409f, 0.1764f, 0.4984f, 0.4189f, 0.9148f, 0.3624f, 0.5806f, 0.6323f,
+                          0.0131f, 0.6635f, 0.1780f, 0.9611f, 0.1487f, 0.4146f, 0.0853f, 0.9969f,
+                          0.5022f, 0.5954f, 0.0671f, 0.7500f, 0.2099f, 0.8981f, 0.2051f, 0.1907f,
+                          0.0365f, 0.4721f, 0.5648f, 0.0657f, 0.7755f, 0.4533f, 0.5244f, 0.4408f};
+            float exp_d[] = {
+                0.1435693f,  0.3318624f,  0.1775537f,   0.4063410f,  0.03326855f,  0.1575481f,
+                0.08338477f, 0.8413090f,  0.3211941f,   0.1782003f,  0.01493296f,  0.3552356f,
+                0.3302653f,  0.1293738f,  0.2406379f,   0.2975388f,  0.003049170f, 0.1003986f,
+                0.05253290f, 0.2436272f,  0.004013300f, 0.5074959f,  0.4526200f,   0.5954348f,
+                0.2699853f,  0.4093359f,  0.009611141f, 0.4546551f,  0.07481699f,  0.2961474f,
+                0.3441944f,  0.01912724f, 0.06507535f,  0.2622671f,  0.4650480f,   0.3659399f,
+                0.2470978f,  0.5353736f,  0.1525404f,   0.08200004f, 0.01848688f,  0.1389097f,
+                0.1435164f,  0.06442463f, 0.08620920f,  0.2700753f,  0.02659956f,  0.5257041f,
+                0.04057318f, 0.3207318f,  0.007428240f, 0.8567510f,  0.1043952f,   0.2248533f,
+                0.6754506f,  0.7984238f,  0.5778531f,   0.03932975f, 0.09848967f,  0.4056396f,
+                0.3941586f,  0.4100275f,  0.5341031f,   0.2591680f,  0.3468906f,   0.02116840f,
+                0.3187798f,  0.3568681f,  0.06329772f,  0.2232019f,  0.09986452f,  0.3167793f,
+                0.1706492f,  0.2318988f,  0.008461121f, 0.1612647f,  0.4276245f,   0.0002382800f,
+                0.1122561f,  0.05351808f, 0.03407116f,  0.9107010f,  0.1116471f,   0.2810353f,
+                0.1877675f,  0.3081743f,  0.1707409f,   0.2694494f,  0.06644844f,  0.07264152f,
+                0.1738918f,  0.3893676f,  0.7598329f,   0.3497160f,  0.07216858f,  0.4621481f,
+                0.01229173f, 0.1202262f,  0.01183700f,  0.7122712f,  0.08542816f,  0.3490103f,
+                0.01192494f, 0.7928346f,  0.1012435f,   0.09746698f, 0.01102453f,  0.6109500f,
+                0.1396255f,  0.4697962f,  0.07358988f,  0.1672820f,  0.01432260f,  0.3855169f,
+                0.2480037f,  0.02476233f, 0.3588239f,   0.1366246f,  0.3920414f,   0.2215901f};
 
             Tensor t1 = create_test_tensor(shape_4d, d1, false);
             Tensor t2 = create_test_tensor(shape_4d, d2, false);
@@ -237,462 +364,523 @@ void test_mul_operator() {
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
         }
     }
-    
+
     // Test Case 8: Extended Test Case
     {
-        const char* tc_name = "Extended_test_case";    
-        float arange_data[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
-                                18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f,
-                                35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f,
-                                52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f,
-                                69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f,
-                                86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 96.0f };
-    
+        const char* tc_name = "Extended_test_case";
+        float arange_data[] = {
+            1.0f,  2.0f,  3.0f,  4.0f,  5.0f,  6.0f,  7.0f,  8.0f,  9.0f,  10.0f, 11.0f, 12.0f,
+            13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f,
+            25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f,
+            37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f,
+            49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f,
+            61.0f, 62.0f, 63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f,
+            73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f, 83.0f, 84.0f,
+            85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 96.0f};
+
         // dim1 0*0
         {
             TensorShape shape_0d = {0};
             Tensor t1 = create_test_tensor(shape_0d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_0d, arange_data, false);
-            float exp_d[] = { 1.0f };
+            float exp_d[] = {1.0f};
             Tensor expected_res = create_test_tensor(shape_0d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim1 0*x
         {
             TensorShape shape_1d_x = {4};
             TensorShape shape_0d = {0};
             Tensor t1 = create_test_tensor(shape_0d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_1d_x, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f };
+            float exp_d[] = {1.0f, 2.0f, 3.0f, 4.0f};
             Tensor expected_res = create_test_tensor(shape_1d_x, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim1 x*x
         {
             TensorShape shape_1d_x = {4};
             Tensor t1 = create_test_tensor(shape_1d_x, arange_data, false);
             Tensor t2 = create_test_tensor(shape_1d_x, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f };
+            float exp_d[] = {1.0f, 4.0f, 9.0f, 16.0f};
             Tensor expected_res = create_test_tensor(shape_1d_x, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim2 x*x
         {
             TensorShape shape_2d_xx = {2, 4};
             Tensor t1 = create_test_tensor(shape_2d_xx, arange_data, false);
             Tensor t2 = create_test_tensor(shape_2d_xx, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f };
+            float exp_d[] = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f};
             Tensor expected_res = create_test_tensor(shape_2d_xx, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 4, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim3 x*x
         {
             TensorShape shape_3d_xx = {2, 3, 4};
             Tensor t1 = create_test_tensor(shape_3d_xx, arange_data, false);
             Tensor t2 = create_test_tensor(shape_3d_xx, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f, 144.0f, 169.0f,
-                              196.0f, 225.0f, 256.0f, 289.0f, 324.0f, 361.0f, 400.0f, 441.0f, 484.0f, 529.0f, 576.0f };
+            float exp_d[] = {1.0f,   4.0f,   9.0f,   16.0f,  25.0f,  36.0f,  49.0f,  64.0f,
+                             81.0f,  100.0f, 121.0f, 144.0f, 169.0f, 196.0f, 225.0f, 256.0f,
+                             289.0f, 324.0f, 361.0f, 400.0f, 441.0f, 484.0f, 529.0f, 576.0f};
             Tensor expected_res = create_test_tensor(shape_3d_xx, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 5, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim4 x*x
         {
             TensorShape shape_4d_xx = {2, 3, 4, 4};
             Tensor t1 = create_test_tensor(shape_4d_xx, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d_xx, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f,
-                              144.0f, 169.0f, 196.0f, 225.0f, 256.0f, 289.0f, 324.0f, 361.0f, 400.0f, 441.0f, 484.0f,
-                              529.0f, 576.0f, 625.0f, 676.0f, 729.0f, 784.0f, 841.0f, 900.0f, 961.0f, 1024.0f, 1089.0f,
-                              1156.0f, 1225.0f, 1296.0f, 1369.0f, 1444.0f, 1521.0f, 1600.0f, 1681.0f, 1764.0f, 1849.0f, 1936.0f,
-                              2025.0f, 2116.0f, 2209.0f, 2304.0f, 2401.0f, 2500.0f, 2601.0f, 2704.0f, 2809.0f, 2916.0f, 3025.0f,
-                              3136.0f, 3249.0f, 3364.0f, 3481.0f, 3600.0f, 3721.0f, 3844.0f, 3969.0f, 4096.0f, 4225.0f, 4356.0f,
-                              4489.0f, 4624.0f, 4761.0f, 4900.0f, 5041.0f, 5184.0f, 5329.0f, 5476.0f, 5625.0f, 5776.0f, 5929.0f,
-                              6084.0f, 6241.0f, 6400.0f, 6561.0f, 6724.0f, 6889.0f, 7056.0f, 7225.0f, 7396.0f, 7569.0f, 7744.0f,
-                              7921.0f, 8100.0f, 8281.0f, 8464.0f, 8649.0f, 8836.0f, 9025.0f, 9216.0f };
+            float exp_d[] = {
+                1.0f,    4.0f,    9.0f,    16.0f,   25.0f,   36.0f,   49.0f,   64.0f,   81.0f,
+                100.0f,  121.0f,  144.0f,  169.0f,  196.0f,  225.0f,  256.0f,  289.0f,  324.0f,
+                361.0f,  400.0f,  441.0f,  484.0f,  529.0f,  576.0f,  625.0f,  676.0f,  729.0f,
+                784.0f,  841.0f,  900.0f,  961.0f,  1024.0f, 1089.0f, 1156.0f, 1225.0f, 1296.0f,
+                1369.0f, 1444.0f, 1521.0f, 1600.0f, 1681.0f, 1764.0f, 1849.0f, 1936.0f, 2025.0f,
+                2116.0f, 2209.0f, 2304.0f, 2401.0f, 2500.0f, 2601.0f, 2704.0f, 2809.0f, 2916.0f,
+                3025.0f, 3136.0f, 3249.0f, 3364.0f, 3481.0f, 3600.0f, 3721.0f, 3844.0f, 3969.0f,
+                4096.0f, 4225.0f, 4356.0f, 4489.0f, 4624.0f, 4761.0f, 4900.0f, 5041.0f, 5184.0f,
+                5329.0f, 5476.0f, 5625.0f, 5776.0f, 5929.0f, 6084.0f, 6241.0f, 6400.0f, 6561.0f,
+                6724.0f, 6889.0f, 7056.0f, 7225.0f, 7396.0f, 7569.0f, 7744.0f, 7921.0f, 8100.0f,
+                8281.0f, 8464.0f, 8649.0f, 8836.0f, 9025.0f, 9216.0f};
             Tensor expected_res = create_test_tensor(shape_4d_xx, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 6, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim0*dim2
         {
             TensorShape shape_0d = {0};
             TensorShape shape_2d = {2, 4};
             Tensor t1 = create_test_tensor(shape_0d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_2d, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f };
+            float exp_d[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f};
             Tensor expected_res = create_test_tensor(shape_2d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 7, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim0*dim3
         {
             TensorShape shape_0d = {0};
             TensorShape shape_3d = {2, 3, 4};
             Tensor t1 = create_test_tensor(shape_0d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_3d, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
-                              18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f };
+            float exp_d[] = {1.0f,  2.0f,  3.0f,  4.0f,  5.0f,  6.0f,  7.0f,  8.0f,
+                             9.0f,  10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f,
+                             17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f};
             Tensor expected_res = create_test_tensor(shape_3d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 8, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim0*dim4
         {
             TensorShape shape_0d = {0};
             TensorShape shape_4d = {2, 3, 4, 4};
             Tensor t1 = create_test_tensor(shape_0d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0f, 17.0f,
-                              18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f,
-                              35.0f, 36.0f, 37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 49.0f, 50.0f, 51.0f,
-                              52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f,
-                              69.0f, 70.0f, 71.0f, 72.0f, 73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f, 83.0f, 84.0f, 85.0f,
-                              86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 96.0f };
+            float exp_d[] = {
+                1.0f,  2.0f,  3.0f,  4.0f,  5.0f,  6.0f,  7.0f,  8.0f,  9.0f,  10.0f, 11.0f, 12.0f,
+                13.0f, 14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f,
+                25.0f, 26.0f, 27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f,
+                37.0f, 38.0f, 39.0f, 40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f,
+                49.0f, 50.0f, 51.0f, 52.0f, 53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f,
+                61.0f, 62.0f, 63.0f, 64.0f, 65.0f, 66.0f, 67.0f, 68.0f, 69.0f, 70.0f, 71.0f, 72.0f,
+                73.0f, 74.0f, 75.0f, 76.0f, 77.0f, 78.0f, 79.0f, 80.0f, 81.0f, 82.0f, 83.0f, 84.0f,
+                85.0f, 86.0f, 87.0f, 88.0f, 89.0f, 90.0f, 91.0f, 92.0f, 93.0f, 94.0f, 95.0f, 96.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 9, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim1*dim2
         {
             TensorShape shape_1d = {4};
             TensorShape shape_2d = {3, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_2d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f };
+            float exp_d[] =
+                {1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f};
             Tensor expected_res = create_test_tensor(shape_2d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 10, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_1d = {4};
             TensorShape shape_2d = {1, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_2d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f };
+            float exp_d[] = {1.0f, 4.0f, 9.0f, 16.0f};
             Tensor expected_res = create_test_tensor(shape_2d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 11, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim1*dim3
         {
             TensorShape shape_1d = {4};
             TensorShape shape_3d = {2, 3, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_3d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f, 13.0f,
-                              28.0f, 45.0f, 64.0f, 17.0f, 36.0f, 57.0f, 80.0f, 21.0f, 44.0f, 69.0f, 96.0f };
+            float exp_d[] = {1.0f,  4.0f,  9.0f,  16.0f, 5.0f,  12.0f, 21.0f, 32.0f,
+                             9.0f,  20.0f, 33.0f, 48.0f, 13.0f, 28.0f, 45.0f, 64.0f,
+                             17.0f, 36.0f, 57.0f, 80.0f, 21.0f, 44.0f, 69.0f, 96.0f};
             Tensor expected_res = create_test_tensor(shape_3d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 12, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_1d = {4};
             TensorShape shape_3d = {1, 3, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_3d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f };
+            float exp_d[] =
+                {1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f};
             Tensor expected_res = create_test_tensor(shape_3d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 13, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_1d = {4};
             TensorShape shape_3d = {2, 1, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_3d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f };
+            float exp_d[] = {1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f};
             Tensor expected_res = create_test_tensor(shape_3d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 14, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim1*dim4
         {
             TensorShape shape_1d = {4};
             TensorShape shape_4d = {2, 3, 4, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f, 13.0f,
-                              28.0f, 45.0f, 64.0f, 17.0f, 36.0f, 57.0f, 80.0f, 21.0f, 44.0f, 69.0f, 96.0f, 25.0f, 52.0f,
-                              81.0f, 112.0f, 29.0f, 60.0f, 93.0f, 128.0f, 33.0f, 68.0f, 105.0f, 144.0f, 37.0f, 76.0f, 117.0f,
-                              160.0f, 41.0f, 84.0f, 129.0f, 176.0f, 45.0f, 92.0f, 141.0f, 192.0f, 49.0f, 100.0f, 153.0f, 208.0f,
-                              53.0f, 108.0f, 165.0f, 224.0f, 57.0f, 116.0f, 177.0f, 240.0f, 61.0f, 124.0f, 189.0f, 256.0f, 65.0f,
-                              132.0f, 201.0f, 272.0f, 69.0f, 140.0f, 213.0f, 288.0f, 73.0f, 148.0f, 225.0f, 304.0f, 77.0f, 156.0f,
-                              237.0f, 320.0f, 81.0f, 164.0f, 249.0f, 336.0f, 85.0f, 172.0f, 261.0f, 352.0f, 89.0f, 180.0f, 273.0f,
-                              368.0f, 93.0f, 188.0f, 285.0f, 384.0f };
+            float exp_d[] = {1.0f,   4.0f,   9.0f,   16.0f,  5.0f,   12.0f,  21.0f,  32.0f,  9.0f,
+                             20.0f,  33.0f,  48.0f,  13.0f,  28.0f,  45.0f,  64.0f,  17.0f,  36.0f,
+                             57.0f,  80.0f,  21.0f,  44.0f,  69.0f,  96.0f,  25.0f,  52.0f,  81.0f,
+                             112.0f, 29.0f,  60.0f,  93.0f,  128.0f, 33.0f,  68.0f,  105.0f, 144.0f,
+                             37.0f,  76.0f,  117.0f, 160.0f, 41.0f,  84.0f,  129.0f, 176.0f, 45.0f,
+                             92.0f,  141.0f, 192.0f, 49.0f,  100.0f, 153.0f, 208.0f, 53.0f,  108.0f,
+                             165.0f, 224.0f, 57.0f,  116.0f, 177.0f, 240.0f, 61.0f,  124.0f, 189.0f,
+                             256.0f, 65.0f,  132.0f, 201.0f, 272.0f, 69.0f,  140.0f, 213.0f, 288.0f,
+                             73.0f,  148.0f, 225.0f, 304.0f, 77.0f,  156.0f, 237.0f, 320.0f, 81.0f,
+                             164.0f, 249.0f, 336.0f, 85.0f,  172.0f, 261.0f, 352.0f, 89.0f,  180.0f,
+                             273.0f, 368.0f, 93.0f,  188.0f, 285.0f, 384.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 15, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_1d = {4};
             TensorShape shape_4d = {1, 3, 4, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f, 13.0f,
-                              28.0f, 45.0f, 64.0f, 17.0f, 36.0f, 57.0f, 80.0f, 21.0f, 44.0f, 69.0f, 96.0f, 25.0f, 52.0f,
-                              81.0f, 112.0f, 29.0f, 60.0f, 93.0f, 128.0f, 33.0f, 68.0f, 105.0f, 144.0f, 37.0f, 76.0f, 117.0f,
-                              160.0f, 41.0f, 84.0f, 129.0f, 176.0f, 45.0f, 92.0f, 141.0f, 192.0f };
+            float exp_d[] = {1.0f,  4.0f,  9.0f,   16.0f,  5.0f,  12.0f, 21.0f,  32.0f,
+                             9.0f,  20.0f, 33.0f,  48.0f,  13.0f, 28.0f, 45.0f,  64.0f,
+                             17.0f, 36.0f, 57.0f,  80.0f,  21.0f, 44.0f, 69.0f,  96.0f,
+                             25.0f, 52.0f, 81.0f,  112.0f, 29.0f, 60.0f, 93.0f,  128.0f,
+                             33.0f, 68.0f, 105.0f, 144.0f, 37.0f, 76.0f, 117.0f, 160.0f,
+                             41.0f, 84.0f, 129.0f, 176.0f, 45.0f, 92.0f, 141.0f, 192.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 16, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_1d = {4};
             TensorShape shape_4d = {2, 1, 4, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f, 13.0f,
-                              28.0f, 45.0f, 64.0f, 17.0f, 36.0f, 57.0f, 80.0f, 21.0f, 44.0f, 69.0f, 96.0f, 25.0f, 52.0f,
-                              81.0f, 112.0f, 29.0f, 60.0f, 93.0f, 128.0f };
+            float exp_d[] = {1.0f,  4.0f,  9.0f,  16.0f,  5.0f,  12.0f, 21.0f, 32.0f,
+                             9.0f,  20.0f, 33.0f, 48.0f,  13.0f, 28.0f, 45.0f, 64.0f,
+                             17.0f, 36.0f, 57.0f, 80.0f,  21.0f, 44.0f, 69.0f, 96.0f,
+                             25.0f, 52.0f, 81.0f, 112.0f, 29.0f, 60.0f, 93.0f, 128.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 17, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_1d = {4};
             TensorShape shape_4d = {2, 3, 1, 4};
             Tensor t1 = create_test_tensor(shape_1d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f, 13.0f, 28.0f, 45.0f, 64.0f, 17.0f,
-                              36.0f, 57.0f, 80.0f, 21.0f, 44.0f, 69.0f, 96.0f };
+            float exp_d[] = {1.0f,  4.0f,  9.0f,  16.0f, 5.0f,  12.0f, 21.0f, 32.0f,
+                             9.0f,  20.0f, 33.0f, 48.0f, 13.0f, 28.0f, 45.0f, 64.0f,
+                             17.0f, 36.0f, 57.0f, 80.0f, 21.0f, 44.0f, 69.0f, 96.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 18, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim2*dim3
         {
             TensorShape shape_2d = {3, 4};
             TensorShape shape_3d = {2, 3, 4};
             Tensor t1 = create_test_tensor(shape_2d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_3d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f, 144.0f, 13.0f,
-                              28.0f, 45.0f, 64.0f, 85.0f, 108.0f, 133.0f, 160.0f, 189.0f, 220.0f, 253.0f, 288.0f };
+            float exp_d[] = {1.0f,  4.0f,   9.0f,   16.0f,  25.0f,  36.0f,  49.0f,  64.0f,
+                             81.0f, 100.0f, 121.0f, 144.0f, 13.0f,  28.0f,  45.0f,  64.0f,
+                             85.0f, 108.0f, 133.0f, 160.0f, 189.0f, 220.0f, 253.0f, 288.0f};
             Tensor expected_res = create_test_tensor(shape_3d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 19, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_2d = {3, 4};
             TensorShape shape_3d = {1, 3, 4};
             Tensor t1 = create_test_tensor(shape_2d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_3d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f, 144.0f };
+            float exp_d[] = {1.0f,
+                             4.0f,
+                             9.0f,
+                             16.0f,
+                             25.0f,
+                             36.0f,
+                             49.0f,
+                             64.0f,
+                             81.0f,
+                             100.0f,
+                             121.0f,
+                             144.0f};
             Tensor expected_res = create_test_tensor(shape_3d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 20, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim2*dim4
         {
             TensorShape shape_2d = {4, 4};
             TensorShape shape_4d = {2, 3, 4, 4};
             Tensor t1 = create_test_tensor(shape_2d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f,
-                              144.0f, 169.0f, 196.0f, 225.0f, 256.0f, 17.0f, 36.0f, 57.0f, 80.0f, 105.0f, 132.0f,
-                              161.0f, 192.0f, 225.0f, 260.0f, 297.0f, 336.0f, 377.0f, 420.0f, 465.0f, 512.0f, 33.0f,
-                              68.0f, 105.0f, 144.0f, 185.0f, 228.0f, 273.0f, 320.0f, 369.0f, 420.0f, 473.0f, 528.0f,
-                              585.0f, 644.0f, 705.0f, 768.0f, 49.0f, 100.0f, 153.0f, 208.0f, 265.0f, 324.0f, 385.0f,
-                              448.0f, 513.0f, 580.0f, 649.0f, 720.0f, 793.0f, 868.0f, 945.0f, 1024.0f, 65.0f, 132.0f,
-                              201.0f, 272.0f, 345.0f, 420.0f, 497.0f, 576.0f, 657.0f, 740.0f, 825.0f, 912.0f, 1001.0f,
-                              1092.0f, 1185.0f, 1280.0f, 81.0f, 164.0f, 249.0f, 336.0f, 425.0f, 516.0f, 609.0f, 704.0f,
-                              801.0f, 900.0f, 1001.0f, 1104.0f, 1209.0f, 1316.0f, 1425.0f, 1536.0f };
+            float exp_d[] = {1.0f,   4.0f,   9.0f,    16.0f,   25.0f,   36.0f,   49.0f,   64.0f,
+                             81.0f,  100.0f, 121.0f,  144.0f,  169.0f,  196.0f,  225.0f,  256.0f,
+                             17.0f,  36.0f,  57.0f,   80.0f,   105.0f,  132.0f,  161.0f,  192.0f,
+                             225.0f, 260.0f, 297.0f,  336.0f,  377.0f,  420.0f,  465.0f,  512.0f,
+                             33.0f,  68.0f,  105.0f,  144.0f,  185.0f,  228.0f,  273.0f,  320.0f,
+                             369.0f, 420.0f, 473.0f,  528.0f,  585.0f,  644.0f,  705.0f,  768.0f,
+                             49.0f,  100.0f, 153.0f,  208.0f,  265.0f,  324.0f,  385.0f,  448.0f,
+                             513.0f, 580.0f, 649.0f,  720.0f,  793.0f,  868.0f,  945.0f,  1024.0f,
+                             65.0f,  132.0f, 201.0f,  272.0f,  345.0f,  420.0f,  497.0f,  576.0f,
+                             657.0f, 740.0f, 825.0f,  912.0f,  1001.0f, 1092.0f, 1185.0f, 1280.0f,
+                             81.0f,  164.0f, 249.0f,  336.0f,  425.0f,  516.0f,  609.0f,  704.0f,
+                             801.0f, 900.0f, 1001.0f, 1104.0f, 1209.0f, 1316.0f, 1425.0f, 1536.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 21, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_2d = {4, 4};
             TensorShape shape_4d = {1, 3, 4, 4};
             Tensor t1 = create_test_tensor(shape_2d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f, 144.0f, 169.0f,
-                              196.0f, 225.0f, 256.0f, 17.0f, 36.0f, 57.0f, 80.0f, 105.0f, 132.0f, 161.0f, 192.0f, 225.0f, 260.0f,
-                              297.0f, 336.0f, 377.0f, 420.0f, 465.0f, 512.0f, 33.0f, 68.0f, 105.0f, 144.0f, 185.0f, 228.0f, 273.0f,
-                              320.0f, 369.0f, 420.0f, 473.0f, 528.0f, 585.0f, 644.0f, 705.0f, 768.0f };
+            float exp_d[] = {1.0f,   4.0f,   9.0f,   16.0f,  25.0f,  36.0f,  49.0f,  64.0f,
+                             81.0f,  100.0f, 121.0f, 144.0f, 169.0f, 196.0f, 225.0f, 256.0f,
+                             17.0f,  36.0f,  57.0f,  80.0f,  105.0f, 132.0f, 161.0f, 192.0f,
+                             225.0f, 260.0f, 297.0f, 336.0f, 377.0f, 420.0f, 465.0f, 512.0f,
+                             33.0f,  68.0f,  105.0f, 144.0f, 185.0f, 228.0f, 273.0f, 320.0f,
+                             369.0f, 420.0f, 473.0f, 528.0f, 585.0f, 644.0f, 705.0f, 768.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 22, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_2d = {4, 4};
             TensorShape shape_4d = {2, 1, 4, 4};
             Tensor t1 = create_test_tensor(shape_2d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f, 144.0f, 169.0f,
-                              196.0f, 225.0f, 256.0f, 17.0f, 36.0f, 57.0f, 80.0f, 105.0f, 132.0f, 161.0f, 192.0f, 225.0f, 260.0f,
-                              297.0f, 336.0f, 377.0f, 420.0f, 465.0f, 512.0f };
+            float exp_d[] = {1.0f,   4.0f,   9.0f,   16.0f,  25.0f,  36.0f,  49.0f,  64.0f,
+                             81.0f,  100.0f, 121.0f, 144.0f, 169.0f, 196.0f, 225.0f, 256.0f,
+                             17.0f,  36.0f,  57.0f,  80.0f,  105.0f, 132.0f, 161.0f, 192.0f,
+                             225.0f, 260.0f, 297.0f, 336.0f, 377.0f, 420.0f, 465.0f, 512.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 23, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // dim3*dim4
         {
             TensorShape shape_3d = {3, 4, 4};
             TensorShape shape_4d = {2, 3, 4, 4};
             Tensor t1 = create_test_tensor(shape_3d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f,
-                              144.0f, 169.0f, 196.0f, 225.0f, 256.0f, 289.0f, 324.0f, 361.0f, 400.0f, 441.0f, 484.0f,
-                              529.0f, 576.0f, 625.0f, 676.0f, 729.0f, 784.0f, 841.0f, 900.0f, 961.0f, 1024.0f, 1089.0f,
-                              1156.0f, 1225.0f, 1296.0f, 1369.0f, 1444.0f, 1521.0f, 1600.0f, 1681.0f, 1764.0f, 1849.0f, 1936.0f,
-                              2025.0f, 2116.0f, 2209.0f, 2304.0f, 49.0f, 100.0f, 153.0f, 208.0f, 265.0f, 324.0f, 385.0f,
-                              448.0f, 513.0f, 580.0f, 649.0f, 720.0f, 793.0f, 868.0f, 945.0f, 1024.0f, 1105.0f, 1188.0f,
-                              1273.0f, 1360.0f, 1449.0f, 1540.0f, 1633.0f, 1728.0f, 1825.0f, 1924.0f, 2025.0f, 2128.0f, 2233.0f,
-                              2340.0f, 2449.0f, 2560.0f, 2673.0f, 2788.0f, 2905.0f, 3024.0f, 3145.0f, 3268.0f, 3393.0f, 3520.0f,
-                              3649.0f, 3780.0f, 3913.0f, 4048.0f, 4185.0f, 4324.0f, 4465.0f, 4608.0f };
+            float exp_d[] = {
+                1.0f,    4.0f,    9.0f,    16.0f,   25.0f,   36.0f,   49.0f,   64.0f,   81.0f,
+                100.0f,  121.0f,  144.0f,  169.0f,  196.0f,  225.0f,  256.0f,  289.0f,  324.0f,
+                361.0f,  400.0f,  441.0f,  484.0f,  529.0f,  576.0f,  625.0f,  676.0f,  729.0f,
+                784.0f,  841.0f,  900.0f,  961.0f,  1024.0f, 1089.0f, 1156.0f, 1225.0f, 1296.0f,
+                1369.0f, 1444.0f, 1521.0f, 1600.0f, 1681.0f, 1764.0f, 1849.0f, 1936.0f, 2025.0f,
+                2116.0f, 2209.0f, 2304.0f, 49.0f,   100.0f,  153.0f,  208.0f,  265.0f,  324.0f,
+                385.0f,  448.0f,  513.0f,  580.0f,  649.0f,  720.0f,  793.0f,  868.0f,  945.0f,
+                1024.0f, 1105.0f, 1188.0f, 1273.0f, 1360.0f, 1449.0f, 1540.0f, 1633.0f, 1728.0f,
+                1825.0f, 1924.0f, 2025.0f, 2128.0f, 2233.0f, 2340.0f, 2449.0f, 2560.0f, 2673.0f,
+                2788.0f, 2905.0f, 3024.0f, 3145.0f, 3268.0f, 3393.0f, 3520.0f, 3649.0f, 3780.0f,
+                3913.0f, 4048.0f, 4185.0f, 4324.0f, 4465.0f, 4608.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 24, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_3d = {3, 4, 4};
             TensorShape shape_4d = {1, 3, 4, 4};
             Tensor t1 = create_test_tensor(shape_3d, arange_data, false);
             Tensor t2 = create_test_tensor(shape_4d, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f, 49.0f, 64.0f, 81.0f, 100.0f, 121.0f,
-                              144.0f, 169.0f, 196.0f, 225.0f, 256.0f, 289.0f, 324.0f, 361.0f, 400.0f, 441.0f, 484.0f,
-                              529.0f, 576.0f, 625.0f, 676.0f, 729.0f, 784.0f, 841.0f, 900.0f, 961.0f, 1024.0f, 1089.0f,
-                              1156.0f, 1225.0f, 1296.0f, 1369.0f, 1444.0f, 1521.0f, 1600.0f, 1681.0f, 1764.0f, 1849.0f, 1936.0f,
-                              2025.0f, 2116.0f, 2209.0f, 2304.0f };
+            float exp_d[] = {1.0f,    4.0f,    9.0f,    16.0f,   25.0f,   36.0f,   49.0f,
+                             64.0f,   81.0f,   100.0f,  121.0f,  144.0f,  169.0f,  196.0f,
+                             225.0f,  256.0f,  289.0f,  324.0f,  361.0f,  400.0f,  441.0f,
+                             484.0f,  529.0f,  576.0f,  625.0f,  676.0f,  729.0f,  784.0f,
+                             841.0f,  900.0f,  961.0f,  1024.0f, 1089.0f, 1156.0f, 1225.0f,
+                             1296.0f, 1369.0f, 1444.0f, 1521.0f, 1600.0f, 1681.0f, 1764.0f,
+                             1849.0f, 1936.0f, 2025.0f, 2116.0f, 2209.0f, 2304.0f};
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 25, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // special dim4*dim4
         {
             TensorShape shape_a = {2, 3, 4, 4};
             TensorShape shape_b = {2, 1, 1, 1};
             Tensor t1 = create_test_tensor(shape_a, arange_data, false);
             Tensor t2 = create_test_tensor(shape_b, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f,
-                              14.0f, 15.0f, 16.0f, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f, 23.0f, 24.0f, 25.0f, 26.0f,
-                              27.0f, 28.0f, 29.0f, 30.0f, 31.0f, 32.0f, 33.0f, 34.0f, 35.0f, 36.0f, 37.0f, 38.0f, 39.0f,
-                              40.0f, 41.0f, 42.0f, 43.0f, 44.0f, 45.0f, 46.0f, 47.0f, 48.0f, 98.0f, 100.0f, 102.0f, 104.0f,
-                              106.0f, 108.0f, 110.0f, 112.0f, 114.0f, 116.0f, 118.0f, 120.0f, 122.0f, 124.0f, 126.0f, 128.0f, 130.0f,
-                              132.0f, 134.0f, 136.0f, 138.0f, 140.0f, 142.0f, 144.0f, 146.0f, 148.0f, 150.0f, 152.0f, 154.0f, 156.0f,
-                              158.0f, 160.0f, 162.0f, 164.0f, 166.0f, 168.0f, 170.0f, 172.0f, 174.0f, 176.0f, 178.0f, 180.0f, 182.0f,
-                              184.0f, 186.0f, 188.0f, 190.0f, 192.0f };
+            float exp_d[] = {1.0f,   2.0f,   3.0f,   4.0f,   5.0f,   6.0f,   7.0f,   8.0f,   9.0f,
+                             10.0f,  11.0f,  12.0f,  13.0f,  14.0f,  15.0f,  16.0f,  17.0f,  18.0f,
+                             19.0f,  20.0f,  21.0f,  22.0f,  23.0f,  24.0f,  25.0f,  26.0f,  27.0f,
+                             28.0f,  29.0f,  30.0f,  31.0f,  32.0f,  33.0f,  34.0f,  35.0f,  36.0f,
+                             37.0f,  38.0f,  39.0f,  40.0f,  41.0f,  42.0f,  43.0f,  44.0f,  45.0f,
+                             46.0f,  47.0f,  48.0f,  98.0f,  100.0f, 102.0f, 104.0f, 106.0f, 108.0f,
+                             110.0f, 112.0f, 114.0f, 116.0f, 118.0f, 120.0f, 122.0f, 124.0f, 126.0f,
+                             128.0f, 130.0f, 132.0f, 134.0f, 136.0f, 138.0f, 140.0f, 142.0f, 144.0f,
+                             146.0f, 148.0f, 150.0f, 152.0f, 154.0f, 156.0f, 158.0f, 160.0f, 162.0f,
+                             164.0f, 166.0f, 168.0f, 170.0f, 172.0f, 174.0f, 176.0f, 178.0f, 180.0f,
+                             182.0f, 184.0f, 186.0f, 188.0f, 190.0f, 192.0f};
             Tensor expected_res = create_test_tensor(shape_a, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 26, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_a = {2, 3, 4, 4};
             TensorShape shape_b = {1, 3, 1, 1};
             Tensor t1 = create_test_tensor(shape_a, arange_data, false);
             Tensor t2 = create_test_tensor(shape_b, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f, 10.0f, 11.0f, 12.0f, 13.0f,
-                              14.0f, 15.0f, 16.0f, 34.0f, 36.0f, 38.0f, 40.0f, 42.0f, 44.0f, 46.0f, 48.0f, 50.0f, 52.0f,
-                              54.0f, 56.0f, 58.0f, 60.0f, 62.0f, 64.0f, 99.0f, 102.0f, 105.0f, 108.0f, 111.0f, 114.0f, 117.0f,
-                              120.0f, 123.0f, 126.0f, 129.0f, 132.0f, 135.0f, 138.0f, 141.0f, 144.0f, 49.0f, 50.0f, 51.0f, 52.0f,
-                              53.0f, 54.0f, 55.0f, 56.0f, 57.0f, 58.0f, 59.0f, 60.0f, 61.0f, 62.0f, 63.0f, 64.0f, 130.0f,
-                              132.0f, 134.0f, 136.0f, 138.0f, 140.0f, 142.0f, 144.0f, 146.0f, 148.0f, 150.0f, 152.0f, 154.0f, 156.0f,
-                              158.0f, 160.0f, 243.0f, 246.0f, 249.0f, 252.0f, 255.0f, 258.0f, 261.0f, 264.0f, 267.0f, 270.0f, 273.0f,
-                              276.0f, 279.0f, 282.0f, 285.0f, 288.0f };
+            float exp_d[] = {1.0f,   2.0f,   3.0f,   4.0f,   5.0f,   6.0f,   7.0f,   8.0f,   9.0f,
+                             10.0f,  11.0f,  12.0f,  13.0f,  14.0f,  15.0f,  16.0f,  34.0f,  36.0f,
+                             38.0f,  40.0f,  42.0f,  44.0f,  46.0f,  48.0f,  50.0f,  52.0f,  54.0f,
+                             56.0f,  58.0f,  60.0f,  62.0f,  64.0f,  99.0f,  102.0f, 105.0f, 108.0f,
+                             111.0f, 114.0f, 117.0f, 120.0f, 123.0f, 126.0f, 129.0f, 132.0f, 135.0f,
+                             138.0f, 141.0f, 144.0f, 49.0f,  50.0f,  51.0f,  52.0f,  53.0f,  54.0f,
+                             55.0f,  56.0f,  57.0f,  58.0f,  59.0f,  60.0f,  61.0f,  62.0f,  63.0f,
+                             64.0f,  130.0f, 132.0f, 134.0f, 136.0f, 138.0f, 140.0f, 142.0f, 144.0f,
+                             146.0f, 148.0f, 150.0f, 152.0f, 154.0f, 156.0f, 158.0f, 160.0f, 243.0f,
+                             246.0f, 249.0f, 252.0f, 255.0f, 258.0f, 261.0f, 264.0f, 267.0f, 270.0f,
+                             273.0f, 276.0f, 279.0f, 282.0f, 285.0f, 288.0f};
             Tensor expected_res = create_test_tensor(shape_a, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 27, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_a = {2, 3, 4, 4};
             TensorShape shape_b = {1, 1, 4, 1};
             Tensor t1 = create_test_tensor(shape_a, arange_data, false);
             Tensor t2 = create_test_tensor(shape_b, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 12.0f, 14.0f, 16.0f, 27.0f, 30.0f, 33.0f, 36.0f, 52.0f,
-                              56.0f, 60.0f, 64.0f, 17.0f, 18.0f, 19.0f, 20.0f, 42.0f, 44.0f, 46.0f, 48.0f, 75.0f, 78.0f,
-                              81.0f, 84.0f, 116.0f, 120.0f, 124.0f, 128.0f, 33.0f, 34.0f, 35.0f, 36.0f, 74.0f, 76.0f, 78.0f,
-                              80.0f, 123.0f, 126.0f, 129.0f, 132.0f, 180.0f, 184.0f, 188.0f, 192.0f, 49.0f, 50.0f, 51.0f, 52.0f,
-                              106.0f, 108.0f, 110.0f, 112.0f, 171.0f, 174.0f, 177.0f, 180.0f, 244.0f, 248.0f, 252.0f, 256.0f, 65.0f,
-                              66.0f, 67.0f, 68.0f, 138.0f, 140.0f, 142.0f, 144.0f, 219.0f, 222.0f, 225.0f, 228.0f, 308.0f, 312.0f,
-                              316.0f, 320.0f, 81.0f, 82.0f, 83.0f, 84.0f, 170.0f, 172.0f, 174.0f, 176.0f, 267.0f, 270.0f, 273.0f,
-                              276.0f, 372.0f, 376.0f, 380.0f, 384.0f };
+            float exp_d[] = {1.0f,   2.0f,   3.0f,   4.0f,   10.0f,  12.0f,  14.0f,  16.0f,  27.0f,
+                             30.0f,  33.0f,  36.0f,  52.0f,  56.0f,  60.0f,  64.0f,  17.0f,  18.0f,
+                             19.0f,  20.0f,  42.0f,  44.0f,  46.0f,  48.0f,  75.0f,  78.0f,  81.0f,
+                             84.0f,  116.0f, 120.0f, 124.0f, 128.0f, 33.0f,  34.0f,  35.0f,  36.0f,
+                             74.0f,  76.0f,  78.0f,  80.0f,  123.0f, 126.0f, 129.0f, 132.0f, 180.0f,
+                             184.0f, 188.0f, 192.0f, 49.0f,  50.0f,  51.0f,  52.0f,  106.0f, 108.0f,
+                             110.0f, 112.0f, 171.0f, 174.0f, 177.0f, 180.0f, 244.0f, 248.0f, 252.0f,
+                             256.0f, 65.0f,  66.0f,  67.0f,  68.0f,  138.0f, 140.0f, 142.0f, 144.0f,
+                             219.0f, 222.0f, 225.0f, 228.0f, 308.0f, 312.0f, 316.0f, 320.0f, 81.0f,
+                             82.0f,  83.0f,  84.0f,  170.0f, 172.0f, 174.0f, 176.0f, 267.0f, 270.0f,
+                             273.0f, 276.0f, 372.0f, 376.0f, 380.0f, 384.0f};
             Tensor expected_res = create_test_tensor(shape_a, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 28, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_a = {2, 3, 4, 4};
             TensorShape shape_b = {2, 1, 4, 1};
             Tensor t1 = create_test_tensor(shape_a, arange_data, false);
             Tensor t2 = create_test_tensor(shape_b, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 12.0f, 14.0f, 16.0f, 27.0f, 30.0f, 33.0f, 36.0f, 52.0f,
-                              56.0f, 60.0f, 64.0f, 17.0f, 18.0f, 19.0f, 20.0f, 42.0f, 44.0f, 46.0f, 48.0f, 75.0f, 78.0f,
-                              81.0f, 84.0f, 116.0f, 120.0f, 124.0f, 128.0f, 33.0f, 34.0f, 35.0f, 36.0f, 74.0f, 76.0f, 78.0f,
-                              80.0f, 123.0f, 126.0f, 129.0f, 132.0f, 180.0f, 184.0f, 188.0f, 192.0f, 245.0f, 250.0f, 255.0f, 260.0f,
-                              318.0f, 324.0f, 330.0f, 336.0f, 399.0f, 406.0f, 413.0f, 420.0f, 488.0f, 496.0f, 504.0f, 512.0f, 325.0f,
-                              330.0f, 335.0f, 340.0f, 414.0f, 420.0f, 426.0f, 432.0f, 511.0f, 518.0f, 525.0f, 532.0f, 616.0f, 624.0f,
-                              632.0f, 640.0f, 405.0f, 410.0f, 415.0f, 420.0f, 510.0f, 516.0f, 522.0f, 528.0f, 623.0f, 630.0f, 637.0f,
-                              644.0f, 744.0f, 752.0f, 760.0f, 768.0f };
+            float exp_d[] = {1.0f,   2.0f,   3.0f,   4.0f,   10.0f,  12.0f,  14.0f,  16.0f,  27.0f,
+                             30.0f,  33.0f,  36.0f,  52.0f,  56.0f,  60.0f,  64.0f,  17.0f,  18.0f,
+                             19.0f,  20.0f,  42.0f,  44.0f,  46.0f,  48.0f,  75.0f,  78.0f,  81.0f,
+                             84.0f,  116.0f, 120.0f, 124.0f, 128.0f, 33.0f,  34.0f,  35.0f,  36.0f,
+                             74.0f,  76.0f,  78.0f,  80.0f,  123.0f, 126.0f, 129.0f, 132.0f, 180.0f,
+                             184.0f, 188.0f, 192.0f, 245.0f, 250.0f, 255.0f, 260.0f, 318.0f, 324.0f,
+                             330.0f, 336.0f, 399.0f, 406.0f, 413.0f, 420.0f, 488.0f, 496.0f, 504.0f,
+                             512.0f, 325.0f, 330.0f, 335.0f, 340.0f, 414.0f, 420.0f, 426.0f, 432.0f,
+                             511.0f, 518.0f, 525.0f, 532.0f, 616.0f, 624.0f, 632.0f, 640.0f, 405.0f,
+                             410.0f, 415.0f, 420.0f, 510.0f, 516.0f, 522.0f, 528.0f, 623.0f, 630.0f,
+                             637.0f, 644.0f, 744.0f, 752.0f, 760.0f, 768.0f};
             Tensor expected_res = create_test_tensor(shape_a, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 29, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_a = {2, 3, 4, 4};
             TensorShape shape_b = {2, 1, 1, 4};
             Tensor t1 = create_test_tensor(shape_a, arange_data, false);
             Tensor t2 = create_test_tensor(shape_b, arange_data, false);
-            float exp_d[] = { 1.0f, 4.0f, 9.0f, 16.0f, 5.0f, 12.0f, 21.0f, 32.0f, 9.0f, 20.0f, 33.0f, 48.0f, 13.0f,
-                              28.0f, 45.0f, 64.0f, 17.0f, 36.0f, 57.0f, 80.0f, 21.0f, 44.0f, 69.0f, 96.0f, 25.0f, 52.0f,
-                              81.0f, 112.0f, 29.0f, 60.0f, 93.0f, 128.0f, 33.0f, 68.0f, 105.0f, 144.0f, 37.0f, 76.0f, 117.0f,
-                              160.0f, 41.0f, 84.0f, 129.0f, 176.0f, 45.0f, 92.0f, 141.0f, 192.0f, 245.0f, 300.0f, 357.0f, 416.0f,
-                              265.0f, 324.0f, 385.0f, 448.0f, 285.0f, 348.0f, 413.0f, 480.0f, 305.0f, 372.0f, 441.0f, 512.0f, 325.0f,
-                              396.0f, 469.0f, 544.0f, 345.0f, 420.0f, 497.0f, 576.0f, 365.0f, 444.0f, 525.0f, 608.0f, 385.0f, 468.0f,
-                              553.0f, 640.0f, 405.0f, 492.0f, 581.0f, 672.0f, 425.0f, 516.0f, 609.0f, 704.0f, 445.0f, 540.0f, 637.0f,
-                              736.0f, 465.0f, 564.0f, 665.0f, 768.0f };
+            float exp_d[] = {1.0f,   4.0f,   9.0f,   16.0f,  5.0f,   12.0f,  21.0f,  32.0f,  9.0f,
+                             20.0f,  33.0f,  48.0f,  13.0f,  28.0f,  45.0f,  64.0f,  17.0f,  36.0f,
+                             57.0f,  80.0f,  21.0f,  44.0f,  69.0f,  96.0f,  25.0f,  52.0f,  81.0f,
+                             112.0f, 29.0f,  60.0f,  93.0f,  128.0f, 33.0f,  68.0f,  105.0f, 144.0f,
+                             37.0f,  76.0f,  117.0f, 160.0f, 41.0f,  84.0f,  129.0f, 176.0f, 45.0f,
+                             92.0f,  141.0f, 192.0f, 245.0f, 300.0f, 357.0f, 416.0f, 265.0f, 324.0f,
+                             385.0f, 448.0f, 285.0f, 348.0f, 413.0f, 480.0f, 305.0f, 372.0f, 441.0f,
+                             512.0f, 325.0f, 396.0f, 469.0f, 544.0f, 345.0f, 420.0f, 497.0f, 576.0f,
+                             365.0f, 444.0f, 525.0f, 608.0f, 385.0f, 468.0f, 553.0f, 640.0f, 405.0f,
+                             492.0f, 581.0f, 672.0f, 425.0f, 516.0f, 609.0f, 704.0f, 445.0f, 540.0f,
+                             637.0f, 736.0f, 465.0f, 564.0f, 665.0f, 768.0f};
             Tensor expected_res = create_test_tensor(shape_a, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 29, TEST_FLOAT_TOLERANCE);
         }
-    
+
         {
             TensorShape shape_a = {2, 3, 4, 4};
             TensorShape shape_b = {1, 3, 4, 1};
             Tensor t1 = create_test_tensor(shape_a, arange_data, false);
             Tensor t2 = create_test_tensor(shape_b, arange_data, false);
-            float exp_d[] = { 1.0f, 2.0f, 3.0f, 4.0f, 10.0f, 12.0f, 14.0f, 16.0f, 27.0f, 30.0f, 33.0f,
-                              36.0f, 52.0f, 56.0f, 60.0f, 64.0f, 85.0f, 90.0f, 95.0f, 100.0f, 126.0f, 132.0f,
-                              138.0f, 144.0f, 175.0f, 182.0f, 189.0f, 196.0f, 232.0f, 240.0f, 248.0f, 256.0f, 297.0f,
-                              306.0f, 315.0f, 324.0f, 370.0f, 380.0f, 390.0f, 400.0f, 451.0f, 462.0f, 473.0f, 484.0f,
-                              540.0f, 552.0f, 564.0f, 576.0f, 49.0f, 50.0f, 51.0f, 52.0f, 106.0f, 108.0f, 110.0f,
-                              112.0f, 171.0f, 174.0f, 177.0f, 180.0f, 244.0f, 248.0f, 252.0f, 256.0f, 325.0f, 330.0f,
-                              335.0f, 340.0f, 414.0f, 420.0f, 426.0f, 432.0f, 511.0f, 518.0f, 525.0f, 532.0f, 616.0f,
-                              624.0f, 632.0f, 640.0f, 729.0f, 738.0f, 747.0f, 756.0f, 850.0f, 860.0f, 870.0f, 880.0f,
-                              979.0f, 990.0f, 1001.0f, 1012.0f, 1116.0f, 1128.0f, 1140.0f, 1152.0f };
+            float exp_d[] = {
+                1.0f,    2.0f,    3.0f,    4.0f,    10.0f,   12.0f,  14.0f,  16.0f,  27.0f,  30.0f,
+                33.0f,   36.0f,   52.0f,   56.0f,   60.0f,   64.0f,  85.0f,  90.0f,  95.0f,  100.0f,
+                126.0f,  132.0f,  138.0f,  144.0f,  175.0f,  182.0f, 189.0f, 196.0f, 232.0f, 240.0f,
+                248.0f,  256.0f,  297.0f,  306.0f,  315.0f,  324.0f, 370.0f, 380.0f, 390.0f, 400.0f,
+                451.0f,  462.0f,  473.0f,  484.0f,  540.0f,  552.0f, 564.0f, 576.0f, 49.0f,  50.0f,
+                51.0f,   52.0f,   106.0f,  108.0f,  110.0f,  112.0f, 171.0f, 174.0f, 177.0f, 180.0f,
+                244.0f,  248.0f,  252.0f,  256.0f,  325.0f,  330.0f, 335.0f, 340.0f, 414.0f, 420.0f,
+                426.0f,  432.0f,  511.0f,  518.0f,  525.0f,  532.0f, 616.0f, 624.0f, 632.0f, 640.0f,
+                729.0f,  738.0f,  747.0f,  756.0f,  850.0f,  860.0f, 870.0f, 880.0f, 979.0f, 990.0f,
+                1001.0f, 1012.0f, 1116.0f, 1128.0f, 1140.0f, 1152.0f};
             Tensor expected_res = create_test_tensor(shape_a, exp_d, false);
             Tensor actual_res = Tensor_mul(t1, t2);
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 30, TEST_FLOAT_TOLERANCE);
diff --git a/tests/Operator/test_mulf.c b/tests/Operator/test_mulf.c
index b68a472..5275ab4 100644
--- a/tests/Operator/test_mulf.c
+++ b/tests/Operator/test_mulf.c
@@ -69,14 +69,14 @@ void test_mulf_operator() {
     // Test Case 5: Special Scalar Values
     {
         const char* tc_name = "mulf_special_scalar_values";
-        
+
         // Sub-test 1: Multiplication by zero
         {
             TensorShape m_shape = {2, 3};
             float d1[] = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
             float scalar_val = 0.0f;
             float exp_d[] = {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f};
-            
+
             Tensor t1 = create_test_tensor(m_shape, d1, false);
             Tensor expected_res = create_test_tensor(m_shape, exp_d, false);
             Tensor actual_res = Tensor_mulf(t1, scalar_val);
@@ -90,7 +90,7 @@ void test_mulf_operator() {
             float d1[] = {1000.0f, 2000.0f, 3000.0f, 4000.0f};
             float scalar_val = 1e-6f;
             float exp_d[] = {1e-3f, 2e-3f, 3e-3f, 4e-3f};
-            
+
             Tensor t1 = create_test_tensor(v_shape, d1, false);
             Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
             Tensor actual_res = Tensor_mulf(t1, scalar_val);
@@ -102,17 +102,19 @@ void test_mulf_operator() {
     // Test Case 6: Higher Dimensional Tensors
     {
         const char* tc_name = "mulf_higher_dimensional_tensors";
-        
+
         // Sub-test 1: 3D tensor scalar multiplication (2x3x4)
         {
             TensorShape shape_3d = {2, 3, 4};
-            float d1[] = {0.6436f, 0.5264f, 0.7316f, 0.0816f, 0.0604f, 0.2471f, 0.1595f, 0.8718f, 0.2192f, 0.9759f, 0.3369f, 0.1821f, 0.7897f, 0.6587f, 0.4982f, 0.5554f, 0.7192f, 0.2285f, 0.9963f, 0.9748f, 0.6503f, 0.1995f, 0.6802f, 0.0722f};
+            float d1[] = {0.6436f, 0.5264f, 0.7316f, 0.0816f, 0.0604f, 0.2471f, 0.1595f, 0.8718f,
+                          0.2192f, 0.9759f, 0.3369f, 0.1821f, 0.7897f, 0.6587f, 0.4982f, 0.5554f,
+                          0.7192f, 0.2285f, 0.9963f, 0.9748f, 0.6503f, 0.1995f, 0.6802f, 0.0722f};
             float scalar = 2.5f;
             float exp_d[] = {1.609000f, 1.316000f, 1.829000f, 0.204000f, 0.151000f, 0.617750f,
-                0.398750f, 2.179500f, 0.548000f, 2.439750f, 0.842250f, 0.455250f, 1.974250f,
-                1.646750f, 1.245500f, 1.388500f, 1.798000f, 0.571250f, 2.490750f, 2.437000f,
-                1.625750f, 0.498750f, 1.700500f, 0.180500f};
-            
+                             0.398750f, 2.179500f, 0.548000f, 2.439750f, 0.842250f, 0.455250f,
+                             1.974250f, 1.646750f, 1.245500f, 1.388500f, 1.798000f, 0.571250f,
+                             2.490750f, 2.437000f, 1.625750f, 0.498750f, 1.700500f, 0.180500f};
+
             Tensor t1 = create_test_tensor(shape_3d, d1, false);
             Tensor expected_res = create_test_tensor(shape_3d, exp_d, false);
             Tensor actual_res = Tensor_mulf(t1, scalar);
@@ -123,9 +125,24 @@ void test_mulf_operator() {
         // Sub-test 2: 4D tensor scalar multiplication (2x3x4x5)
         {
             TensorShape shape_4d = {2, 3, 4, 5};
-            float d1[] = {0.0307f, 0.2577f, 0.4626f, 0.8683f, 0.7272f, 0.7427f, 0.4255f, 0.3459f, 0.3710f, 0.9876f, 0.0401f, 0.8670f, 0.5787f, 0.4386f, 0.7253f, 0.4867f, 0.8734f, 0.9007f, 0.4217f, 0.2768f, 0.5924f, 0.9124f, 0.2107f, 0.6230f, 0.6316f, 0.7331f, 0.1316f, 0.7158f, 0.9090f, 0.1797f, 0.2375f, 0.9714f, 0.1810f, 0.8544f, 0.4923f, 0.2472f, 0.8707f, 0.4453f, 0.5148f, 0.3592f, 0.5930f, 0.1635f, 0.3911f, 0.9694f, 0.2581f, 0.6567f, 0.3252f, 0.7735f, 0.1309f, 0.9698f, 0.4538f, 0.2361f, 0.0735f, 0.1698f, 0.5198f, 0.3370f, 0.8289f, 0.4309f, 0.2487f, 0.6171f, 0.7068f, 0.1670f, 0.1676f, 0.0367f, 0.7364f, 0.6638f, 0.4746f, 0.8442f, 0.8057f, 0.5854f, 0.8683f, 0.2058f, 0.1119f, 0.2697f, 0.0571f, 0.5312f, 0.9366f, 0.0393f, 0.1221f, 0.4522f, 0.9339f, 0.3162f, 0.5072f, 0.0416f, 0.1483f, 0.9866f, 0.9651f, 0.0049f, 0.9518f, 0.6391f, 0.8679f, 0.4547f, 0.5156f, 0.4888f, 0.6669f, 0.1397f, 0.0300f, 0.3079f, 0.7047f, 0.2019f, 0.6734f, 0.9699f, 0.0939f, 0.6726f, 0.4438f, 0.8681f, 0.1771f, 0.6926f, 0.8381f, 0.9446f, 0.6832f, 0.4972f, 0.6178f, 0.8689f, 0.5706f, 0.0304f, 0.9309f, 0.6895f, 0.6765f, 0.2157f};
+            float d1[] = {0.0307f, 0.2577f, 0.4626f, 0.8683f, 0.7272f, 0.7427f, 0.4255f, 0.3459f,
+                          0.3710f, 0.9876f, 0.0401f, 0.8670f, 0.5787f, 0.4386f, 0.7253f, 0.4867f,
+                          0.8734f, 0.9007f, 0.4217f, 0.2768f, 0.5924f, 0.9124f, 0.2107f, 0.6230f,
+                          0.6316f, 0.7331f, 0.1316f, 0.7158f, 0.9090f, 0.1797f, 0.2375f, 0.9714f,
+                          0.1810f, 0.8544f, 0.4923f, 0.2472f, 0.8707f, 0.4453f, 0.5148f, 0.3592f,
+                          0.5930f, 0.1635f, 0.3911f, 0.9694f, 0.2581f, 0.6567f, 0.3252f, 0.7735f,
+                          0.1309f, 0.9698f, 0.4538f, 0.2361f, 0.0735f, 0.1698f, 0.5198f, 0.3370f,
+                          0.8289f, 0.4309f, 0.2487f, 0.6171f, 0.7068f, 0.1670f, 0.1676f, 0.0367f,
+                          0.7364f, 0.6638f, 0.4746f, 0.8442f, 0.8057f, 0.5854f, 0.8683f, 0.2058f,
+                          0.1119f, 0.2697f, 0.0571f, 0.5312f, 0.9366f, 0.0393f, 0.1221f, 0.4522f,
+                          0.9339f, 0.3162f, 0.5072f, 0.0416f, 0.1483f, 0.9866f, 0.9651f, 0.0049f,
+                          0.9518f, 0.6391f, 0.8679f, 0.4547f, 0.5156f, 0.4888f, 0.6669f, 0.1397f,
+                          0.0300f, 0.3079f, 0.7047f, 0.2019f, 0.6734f, 0.9699f, 0.0939f, 0.6726f,
+                          0.4438f, 0.8681f, 0.1771f, 0.6926f, 0.8381f, 0.9446f, 0.6832f, 0.4972f,
+                          0.6178f, 0.8689f, 0.5706f, 0.0304f, 0.9309f, 0.6895f, 0.6765f, 0.2157f};
             float scalar_val = 1.5f;
-            float exp_d[] = {0.0461f, 0.3866f, 0.6939f, 1.3025f, 1.0908f, 1.1140f, 0.6382f, 0.5188f, 0.5565f,
+            float exp_d[] = {
+                0.0461f, 0.3866f, 0.6939f, 1.3025f, 1.0908f, 1.1140f, 0.6382f, 0.5188f, 0.5565f,
                 1.4814f, 0.0602f, 1.3005f, 0.8680f, 0.6579f, 1.0879f, 0.7300f, 1.3101f, 1.3510f,
                 0.6326f, 0.4152f, 0.8886f, 1.3686f, 0.3160f, 0.9345f, 0.9474f, 1.0997f, 0.1974f,
                 1.0737f, 1.3635f, 0.2695f, 0.3562f, 1.4571f, 0.2715f, 1.2816f, 0.7384f, 0.3708f,
@@ -139,7 +156,7 @@ void test_mulf_operator() {
                 0.3029f, 1.0101f, 1.4548f, 0.1409f, 1.0089f, 0.6657f, 1.3022f, 0.2657f, 1.0389f,
                 1.2572f, 1.4169f, 1.0248f, 0.7458f, 0.9267f, 1.3033f, 0.8559f, 0.0456f, 1.3963f,
                 1.0343f, 1.0148f, 0.3235f};
-            
+
             Tensor t1 = create_test_tensor(shape_4d, d1, false);
             Tensor expected_res = create_test_tensor(shape_4d, exp_d, false);
             Tensor actual_res = Tensor_mulf(t1, scalar_val);
diff --git a/tests/Operator/test_pow.c b/tests/Operator/test_pow.c
index 49e87d2..06ecf34 100644
--- a/tests/Operator/test_pow.c
+++ b/tests/Operator/test_pow.c
@@ -6,7 +6,7 @@
 
 void test_pow_operator() {
     const char* op_name = "pow";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Scalar power (represented as 1x1 tensors)
@@ -18,7 +18,7 @@ void test_pow_operator() {
         {
             float d1[] = {3.557707f};
             float d2[] = {2.050022f};
-            float exp_d[] = {13.486858f}; // 3.557707^2.050022 = 13.486858
+            float exp_d[] = {13.486858f};  // 3.557707^2.050022 = 13.486858
             Tensor t1 = create_test_tensor(s_shape, d1, false);
             Tensor t2 = create_test_tensor(s_shape, d2, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -31,7 +31,7 @@ void test_pow_operator() {
         {
             float d1[] = {7.300352f};
             float d2[] = {0.500000f};
-            float exp_d[] = {2.701916f}; // 7.300352^0.5 = 2.701916
+            float exp_d[] = {2.701916f};  // 7.300352^0.5 = 2.701916
             Tensor t1 = create_test_tensor(s_shape, d1, false);
             Tensor t2 = create_test_tensor(s_shape, d2, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -44,7 +44,7 @@ void test_pow_operator() {
         {
             float d1[] = {3.008897f};
             float d2[] = {0.000000f};
-            float exp_d[] = {1.000000f}; // 3.008897^0 = 1.000000
+            float exp_d[] = {1.000000f};  // 3.008897^0 = 1.000000
             Tensor t1 = create_test_tensor(s_shape, d1, false);
             Tensor t2 = create_test_tensor(s_shape, d2, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -60,7 +60,9 @@ void test_pow_operator() {
         TensorShape v_shape = {3};
         float d1[] = {2.498160f, 4.802857f, 3.927976f};
         float d2[] = {2.000000f, 2.000000f, 2.000000f};
-        float exp_d[] = {6.240806f, 23.067438f, 15.428994f}; // [2.498160^2, 4.802857^2, 3.927976^2]
+        float exp_d[] = {6.240806f,
+                         23.067438f,
+                         15.428994f};  // [2.498160^2, 4.802857^2, 3.927976^2]
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor t2 = create_test_tensor(v_shape, d2, false);
         Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
@@ -75,7 +77,9 @@ void test_pow_operator() {
         TensorShape m_shape = {2, 2};
         float d1[] = {3.394634f, 1.624075f, 1.623978f, 1.232334f};
         float d2[] = {2.665440f, 2.002788f, 2.270181f, 0.551461f};
-        float exp_d[] = {25.989442f, 2.641186f, 3.006458f, 1.122104f}; // [3.394634^2.665440, 1.624075^2.002788, 1.623978^2.270181, 1.232334^0.551461]
+        float exp_d[] =
+            {25.989442f, 2.641186f, 3.006458f, 1.122104f};  // [3.394634^2.665440, 1.624075^2.002788,
+                                                            // 1.623978^2.270181, 1.232334^0.551461]
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor t2 = create_test_tensor(m_shape, d2, false);
         Tensor expected_res = create_test_tensor(m_shape, exp_d, false);
@@ -88,9 +92,31 @@ void test_pow_operator() {
     {
         const char* tc_name = "pow_3d_tensor";
         TensorShape t_shape = {2, 2, 2};
-        float d1[] = {4.879639f, 4.329771f, 1.849356f, 1.727300f, 1.733618f, 2.216969f, 3.099026f, 2.727780f};
-        float d2[] = {2.000000f, 2.000000f, 2.000000f, 2.000000f, 2.000000f, 2.000000f, 2.000000f, 2.000000f};
-        float exp_d[] = {23.810881f, 18.746913f, 3.420119f, 2.983565f, 3.005432f, 4.914951f, 9.603960f, 7.440784f}; // [4.879639^2, 4.329771^2, 1.849356^2, 1.727300^2, 1.733618^2, 2.216969^2, 3.099026^2, 2.727780^2]
+        float d1[] = {4.879639f,
+                      4.329771f,
+                      1.849356f,
+                      1.727300f,
+                      1.733618f,
+                      2.216969f,
+                      3.099026f,
+                      2.727780f};
+        float d2[] = {2.000000f,
+                      2.000000f,
+                      2.000000f,
+                      2.000000f,
+                      2.000000f,
+                      2.000000f,
+                      2.000000f,
+                      2.000000f};
+        float exp_d[] = {23.810881f,
+                         18.746913f,
+                         3.420119f,
+                         2.983565f,
+                         3.005432f,
+                         4.914951f,
+                         9.603960f,
+                         7.440784f};  // [4.879639^2, 4.329771^2, 1.849356^2, 1.727300^2, 1.733618^2,
+                                      // 2.216969^2, 3.099026^2, 2.727780^2]
         Tensor t1 = create_test_tensor(t_shape, d1, false);
         Tensor t2 = create_test_tensor(t_shape, d2, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
@@ -102,18 +128,20 @@ void test_pow_operator() {
     // Test Case 5: Broadcasting (scalar power applied to vector)
     {
         const char* tc_name = "pow_broadcast_vector_scalar";
-        TensorShape vec_shape = {3}; 
+        TensorShape vec_shape = {3};
         float vec_data[] = {2.164917f, 3.447412f, 1.557975f};
-        TensorShape scalar_shape = {1}; 
-        float scalar_data[] = {2.000000f}; // power of 2
-        
+        TensorShape scalar_shape = {1};
+        float scalar_data[] = {2.000000f};  // power of 2
+
         // Expected: broadcast scalar {2} to the vector then apply power
-        TensorShape expected_shape = {3}; 
-        float exp_data[] = {4.686864f, 11.884647f, 2.427287f}; // [2.164917^2, 3.447412^2, 1.557975^2]
+        TensorShape expected_shape = {3};
+        float exp_data[] = {4.686864f,
+                            11.884647f,
+                            2.427287f};  // [2.164917^2, 3.447412^2, 1.557975^2]
 
         Tensor t_vec = create_test_tensor(vec_shape, vec_data, false);
         Tensor t_scalar = create_test_tensor(scalar_shape, scalar_data, false);
-        
+
         Tensor actual_res = Tensor_pow(t_vec, t_scalar);
         Tensor expected_res = create_test_tensor(expected_shape, exp_data, false);
 
@@ -126,7 +154,7 @@ void test_pow_operator() {
         TensorShape s_shape = {1};
         float d1[] = {-2.000000f};
         float d2[] = {3.000000f};
-        float exp_d[] = {-8.000000f}; // (-2)^3 = -8
+        float exp_d[] = {-8.000000f};  // (-2)^3 = -8
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor t2 = create_test_tensor(s_shape, d2, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -141,7 +169,7 @@ void test_pow_operator() {
         TensorShape s_shape = {1};
         float d1[] = {4.996321f};
         float d2[] = {-2.876786f};
-        float exp_d[] = {0.009775f}; // 4.996321^(-2.876786) = 0.009775
+        float exp_d[] = {0.009775f};  // 4.996321^(-2.876786) = 0.009775
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor t2 = create_test_tensor(s_shape, d2, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -157,7 +185,7 @@ void test_pow_operator() {
         float vec_data[] = {11.515921f, 9.782560f, 4.028242f, 4.027929f};
         TensorShape scalar_shape = {1};
         float scalar_data[] = {-0.587125f};
-        
+
         // Expected: broadcast scalar {-1} to {4} then apply power
         TensorShape expected_shape = {4};
         float exp_data[] = {0.238169f, 0.262108f, 0.441287f, 0.441307f};
@@ -174,17 +202,17 @@ void test_pow_operator() {
     // Test Case 9: Broadcasting with different dimensional tensors
     {
         const char* tc_name = "pow_broadcast_matrix_vector";
-        TensorShape matrix_shape = {2, 3}; // 2x3 matrix
-        float matrix_data[] ={2.1854f, 4.7782f, 3.7940f, 3.1940f, 1.2021f,1.2020f};
-        TensorShape vector_shape = {3}; // vector with 3 elements
+        TensorShape matrix_shape = {2, 3};  // 2x3 matrix
+        float matrix_data[] = {2.1854f, 4.7782f, 3.7940f, 3.1940f, 1.2021f, 1.2020f};
+        TensorShape vector_shape = {3};  // vector with 3 elements
         float vector_data[] = {0.6162f, 2.2324f, 1.7022f};
-        
+
         // Expected: broadcast vector to shape [2,3] then apply power
         TensorShape expected_shape = {2, 3};
         float exp_data[] = {1.618896f, 32.838981f, 9.676972f, 2.045367f, 1.508202f, 1.367771f};
         Tensor t_matrix = create_test_tensor(matrix_shape, matrix_data, false);
         Tensor t_vector = create_test_tensor(vector_shape, vector_data, false);
-        
+
         Tensor actual_res = Tensor_pow(t_matrix, t_vector);
         Tensor expected_res = create_test_tensor(expected_shape, exp_data, false);
 
@@ -197,13 +225,18 @@ void test_pow_operator() {
         TensorShape s_shape = {1};
         float d1[] = {2.799264f};
         float d2[] = {9.207805f};
-        float exp_d[] = {13070.524894f}; // 2.799264^9.207805 = 13070.524894
+        float exp_d[] = {13070.524894f};  // 2.799264^9.207805 = 13070.524894
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor t2 = create_test_tensor(s_shape, d2, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
         Tensor actual_res = Tensor_pow(t1, t2);
 
-        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, 0.1f); // Large tolerance for large values
+        compare_tensors(&actual_res,
+                        &expected_res,
+                        op_name,
+                        tc_name,
+                        1,
+                        0.1f);  // Large tolerance for large values
     }
 
     // Test Case 11: Fractional exponent
@@ -212,7 +245,7 @@ void test_pow_operator() {
         TensorShape s_shape = {1};
         float d1[] = {70.149528f};
         float d2[] = {0.333333f};
-        float exp_d[] = {4.124218f}; // 70.149528^(0.333333) = 4.124218
+        float exp_d[] = {4.124218f};  // 70.149528^(0.333333) = 4.124218
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor t2 = create_test_tensor(s_shape, d2, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
@@ -224,16 +257,42 @@ void test_pow_operator() {
     // Test Case 12: 4D tensor power operations
     {
         const char* tc_name = "pow_4d_tensor";
-        TensorShape t_shape = {2, 2, 1, 2}; // 2x2x1x2 tensor
-        float d1[] = {1.584617f, 2.582998f, 2.907829f, 1.501168f, 2.988317f, 2.426222f, 2.417480f, 1.510599f};
-        float d2[] = {1.092250f, 3.099099f, 2.599444f, 1.186663f, 4.895022f, 1.931085f, 1.362426f, 3.473544f};
-        float exp_d[] = {1.653360f, 18.932729f, 16.033239f, 1.619430f, 212.434116f, 5.537756f, 3.328920f, 4.190666f}; // Element-wise power operations
+        TensorShape t_shape = {2, 2, 1, 2};  // 2x2x1x2 tensor
+        float d1[] = {1.584617f,
+                      2.582998f,
+                      2.907829f,
+                      1.501168f,
+                      2.988317f,
+                      2.426222f,
+                      2.417480f,
+                      1.510599f};
+        float d2[] = {1.092250f,
+                      3.099099f,
+                      2.599444f,
+                      1.186663f,
+                      4.895022f,
+                      1.931085f,
+                      1.362426f,
+                      3.473544f};
+        float exp_d[] = {1.653360f,
+                         18.932729f,
+                         16.033239f,
+                         1.619430f,
+                         212.434116f,
+                         5.537756f,
+                         3.328920f,
+                         4.190666f};  // Element-wise power operations
         Tensor t1 = create_test_tensor(t_shape, d1, false);
         Tensor t2 = create_test_tensor(t_shape, d2, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
         Tensor actual_res = Tensor_pow(t1, t2);
 
-        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, 0.1f); // Large tolerance for large values
+        compare_tensors(&actual_res,
+                        &expected_res,
+                        op_name,
+                        tc_name,
+                        1,
+                        0.1f);  // Large tolerance for large values
     }
 
     // Test Case 13: Power with zero base
@@ -242,7 +301,7 @@ void test_pow_operator() {
         TensorShape s_shape = {1};
         float d1[] = {0.000000f};
         float d2[] = {5.059696f};
-        float exp_d[] = {0.000000f}; // 0.000000^5.059696 = 0.000000
+        float exp_d[] = {0.000000f};  // 0.000000^5.059696 = 0.000000
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor t2 = create_test_tensor(s_shape, d2, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
diff --git a/tests/Operator/test_reciprocal.c b/tests/Operator/test_reciprocal.c
index ec9ea42..3558bd3 100644
--- a/tests/Operator/test_reciprocal.c
+++ b/tests/Operator/test_reciprocal.c
@@ -6,7 +6,7 @@
 
 void test_reciprocal_operator() {
     const char* op_name = "reciprocal";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Scalar reciprocal (represented as 1x1 tensors)
@@ -16,7 +16,7 @@ void test_reciprocal_operator() {
         // Sub-test 1: Basic reciprocal
         {
             float d[] = {6.754841f};
-            float exp_d[] = {0.148042f}; // 1 / 6.754841 = 0.148042
+            float exp_d[] = {0.148042f};  // 1 / 6.754841 = 0.148042
             Tensor t1 = create_test_tensor(s_shape, d, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
             Tensor actual_res = Tensor_reciprocal(t1);
@@ -27,14 +27,13 @@ void test_reciprocal_operator() {
         // Sub-test 2: Reciprocal of a large number
         {
             float d[] = {188.0f};
-            float exp_d[] = {0.00535f}; // 1/188 = 0.00535
+            float exp_d[] = {0.00535f};  // 1/188 = 0.00535
             Tensor t1 = create_test_tensor(s_shape, d, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
             Tensor actual_res = Tensor_reciprocal(t1);
 
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
         }
-
     }
 
     // Test Case 2: Vector reciprocal operations
@@ -42,7 +41,7 @@ void test_reciprocal_operator() {
         const char* tc_name = "reciprocal_vector_elements";
         TensorShape v_shape = {3};
         float d[] = {4.370861f, 9.556429f, 7.587945f};
-        float exp_d[] = {0.228788f, 0.104642f, 0.131788f}; // [1/4.370861, 1/9.556429, 1/7.587945]
+        float exp_d[] = {0.228788f, 0.104642f, 0.131788f};  // [1/4.370861, 1/9.556429, 1/7.587945]
         Tensor t1 = create_test_tensor(v_shape, d, false);
         Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
         Tensor actual_res = Tensor_reciprocal(t1);
@@ -55,7 +54,10 @@ void test_reciprocal_operator() {
         const char* tc_name = "reciprocal_matrix_2x2";
         TensorShape m_shape = {2, 2};
         float d[] = {6.387926f, 2.404168f, 2.403951f, 1.522753f};
-        float exp_d[] = {0.156545f, 0.415944f, 0.415982f, 0.656706f}; // [1/6.387926, 1/2.404168, 1/2.403951, 1/1.522753]
+        float exp_d[] = {0.156545f,
+                         0.415944f,
+                         0.415982f,
+                         0.656706f};  // [1/6.387926, 1/2.404168, 1/2.403951, 1/1.522753]
         Tensor t1 = create_test_tensor(m_shape, d, false);
         Tensor expected_res = create_test_tensor(m_shape, exp_d, false);
         Tensor actual_res = Tensor_reciprocal(t1);
@@ -67,9 +69,24 @@ void test_reciprocal_operator() {
     {
         const char* tc_name = "reciprocal_3d_tensor";
         TensorShape t_shape = {2, 2, 2};
-        float d[] = {8.795585f, 6.410035f, 7.372653f, 1.185260f, 9.729189f, 8.491984f, 2.911052f, 2.636425f};
-        float exp_d[] = {0.113693f, 0.156005f, 0.135636f, 0.843696f, 0.102783f, 0.117758f, 0.343518f, 0.379302f};
-        // exp_d = [1/8.795585, 1/6.410035, 1/7.372653, 1/1.185260, 1/9.729189, 1/8.491984, 1/2.911052, 1/2.636425], 1/9
+        float d[] = {8.795585f,
+                     6.410035f,
+                     7.372653f,
+                     1.185260f,
+                     9.729189f,
+                     8.491984f,
+                     2.911052f,
+                     2.636425f};
+        float exp_d[] = {0.113693f,
+                         0.156005f,
+                         0.135636f,
+                         0.843696f,
+                         0.102783f,
+                         0.117758f,
+                         0.343518f,
+                         0.379302f};
+        // exp_d = [1/8.795585, 1/6.410035, 1/7.372653, 1/1.185260, 1/9.729189, 1/8.491984,
+        // 1/2.911052, 1/2.636425], 1/9
         Tensor t1 = create_test_tensor(t_shape, d, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
         Tensor actual_res = Tensor_reciprocal(t1);
@@ -81,80 +98,89 @@ void test_reciprocal_operator() {
     {
         const char* tc_name = "reciprocal_near_zero";
         TensorShape s_shape = {1};
-        float d1[] = {1e-6f}; // Very small number
-        float exp_d[] = {1e6f}; // 1 / (1e-6) = 1e6
+        float d1[] = {1e-6f};    // Very small number
+        float exp_d[] = {1e6f};  // 1 / (1e-6) = 1e6
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
         Tensor actual_res = Tensor_reciprocal(t1);
 
-        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, 1e-1f); // Using a larger tolerance due to floating point imprecision
+        compare_tensors(&actual_res,
+                        &expected_res,
+                        op_name,
+                        tc_name,
+                        1,
+                        1e-1f);  // Using a larger tolerance due to floating point imprecision
     }
 
-   // Test Case 6: Reciprocal of negative numbers
-   {
-    const char* tc_name = "reciprocal_negative";
-    TensorShape s_shape = {1};
-    float d1[] = {-19.352466f};
-    float exp_d[] = {-0.051673f}; // 1 / (-19.3525) = -0.0517
-    Tensor t1 = create_test_tensor(s_shape, d1, false);
-    Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
-    Tensor actual_res = Tensor_reciprocal(t1);
-
-    compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+    // Test Case 6: Reciprocal of negative numbers
+    {
+        const char* tc_name = "reciprocal_negative";
+        TensorShape s_shape = {1};
+        float d1[] = {-19.352466f};
+        float exp_d[] = {-0.051673f};  // 1 / (-19.3525) = -0.0517
+        Tensor t1 = create_test_tensor(s_shape, d1, false);
+        Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
+        Tensor actual_res = Tensor_reciprocal(t1);
+
+        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
 
-// Test Case 7: 4D tensor reciprocal operations
-{
-    const char* tc_name = "reciprocal_4d_tensor";
-    TensorShape t_shape = {2, 1, 2, 1}; // 2x1x2x1 tensor
-    float d1[] = {19.063572f, 14.907885f, 12.374511f, 3.964354f};
-    float exp_d[] = {0.052456f, 0.067079f, 0.080811f, 0.252248f};// Expected: [1/d1[0], 1/d1[1], ...]
-    Tensor t1 = create_test_tensor(t_shape, d1, false);
-    Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
-    Tensor actual_res = Tensor_reciprocal(t1);
-
-    compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-}
+    // Test Case 7: 4D tensor reciprocal operations
+    {
+        const char* tc_name = "reciprocal_4d_tensor";
+        TensorShape t_shape = {2, 1, 2, 1};  // 2x1x2x1 tensor
+        float d1[] = {19.063572f, 14.907885f, 12.374511f, 3.964354f};
+        float exp_d[] = {0.052456f,
+                         0.067079f,
+                         0.080811f,
+                         0.252248f};  // Expected: [1/d1[0], 1/d1[1], ...]
+        Tensor t1 = create_test_tensor(t_shape, d1, false);
+        Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
+        Tensor actual_res = Tensor_reciprocal(t1);
 
+        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+    }
 
-// Test Case 8: Mixed positive and negative values
-{
-    const char* tc_name = "reciprocal_mixed_signs";
-    TensorShape v_shape = {4};
-    float d1[] = {-17.200274f, -22.095819f, 18.308807f, 5.055750f};
-    float exp_d[] = {-0.058139f, -0.045257f, 0.054619f, 0.197795f};
-    Tensor t1 = create_test_tensor(v_shape, d1, false);
-    Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
-    Tensor actual_res = Tensor_reciprocal(t1);
+    // Test Case 8: Mixed positive and negative values
+    {
+        const char* tc_name = "reciprocal_mixed_signs";
+        TensorShape v_shape = {4};
+        float d1[] = {-17.200274f, -22.095819f, 18.308807f, 5.055750f};
+        float exp_d[] = {-0.058139f, -0.045257f, 0.054619f, 0.197795f};
+        Tensor t1 = create_test_tensor(v_shape, d1, false);
+        Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
+        Tensor actual_res = Tensor_reciprocal(t1);
 
-    compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-}
+        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+    }
 
-// Test Case 9: Reciprocal of fractional numbers
-{
-    const char* tc_name = "reciprocal_fractional";
-    TensorShape v_shape = {3};
-    float d1[] = {0.737265f, 0.118526f, 0.972919f};
-    float exp_d[] = {1.356364f, 8.436964f, 1.027835f}; // Reciprocal of fractional numbers results in values > 1.0
-    Tensor t1 = create_test_tensor(v_shape, d1, false);
-    Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
-    Tensor actual_res = Tensor_reciprocal(t1);
-
-    compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-}
+    // Test Case 9: Reciprocal of fractional numbers
+    {
+        const char* tc_name = "reciprocal_fractional";
+        TensorShape v_shape = {3};
+        float d1[] = {0.737265f, 0.118526f, 0.972919f};
+        float exp_d[] = {1.356364f,
+                         8.436964f,
+                         1.027835f};  // Reciprocal of fractional numbers results in values > 1.0
+        Tensor t1 = create_test_tensor(v_shape, d1, false);
+        Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
+        Tensor actual_res = Tensor_reciprocal(t1);
 
-// Test Case 10: Reciprocal of very large numbers (testing for underflow)
-{
-    const char* tc_name = "reciprocal_large_numbers";
-    TensorShape s_shape = {1};
-    float d1[] = {8.341182e+06f};
-    float exp_d[] = {1.198871e-07f}; // 1 / (8.34e+06) = 1.20e-07
-    Tensor t1 = create_test_tensor(s_shape, d1, false);
-    Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
-    Tensor actual_res = Tensor_reciprocal(t1);
-
-    compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
-}
+        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+    }
+
+    // Test Case 10: Reciprocal of very large numbers (testing for underflow)
+    {
+        const char* tc_name = "reciprocal_large_numbers";
+        TensorShape s_shape = {1};
+        float d1[] = {8.341182e+06f};
+        float exp_d[] = {1.198871e-07f};  // 1 / (8.34e+06) = 1.20e-07
+        Tensor t1 = create_test_tensor(s_shape, d1, false);
+        Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
+        Tensor actual_res = Tensor_reciprocal(t1);
+
+        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+    }
 
     cten_free(pool_id);
 }
diff --git a/tests/Operator/test_softmax.c b/tests/Operator/test_softmax.c
index 2dab2fe..9f9ced9 100644
--- a/tests/Operator/test_softmax.c
+++ b/tests/Operator/test_softmax.c
@@ -6,304 +6,300 @@
 
 void test_softmax_operator() {
     const char* op_name = "softmax";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1
     {
         const char* tc_name = "softmax_basic";
-        TensorShape shape_1 = { 6, 0, 0, 0 };
+        TensorShape shape_1 = {6, 0, 0, 0};
         int dim_1 = 0;
-        float input_data_1[] = {
-            -1.211608f, -0.344907f, 1.322972f, -0.427439f, -1.496399f, 0.685451f
-        };
-        float expected_output_1[] = {
-            0.039064f, 0.092935f, 0.492638f, 0.085572f, 0.029383f, 0.260409f
-        };
+        float input_data_1[] =
+            {-1.211608f, -0.344907f, 1.322972f, -0.427439f, -1.496399f, 0.685451f};
+        float expected_output_1[] =
+            {0.039064f, 0.092935f, 0.492638f, 0.085572f, 0.029383f, 0.260409f};
         Tensor t_input_1 = create_test_tensor(shape_1, input_data_1, false);
         Tensor t_output_1 = nn_softmax(t_input_1, dim_1);
         Tensor t_expected_1 = create_test_tensor(shape_1, expected_output_1, false);
         compare_tensors(&t_output_1, &t_expected_1, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
-     
+
     // Test Case 2
     {
         const char* tc_name = "softmax_2d";
-        TensorShape shape_2 = { 4, 5, 0, 0 };
+        TensorShape shape_2 = {4, 5, 0, 0};
         int dim_2 = 0;
-        float input_data_2[] = {
-         -0.973596f, -0.593090f, 0.240839f, 0.778621f, -0.619067f, 1.254894f, -0.395984f, -1.496162f,
-         0.154189f, 0.167212f, 0.130392f, -0.652786f, 0.904415f, -0.958059f, -1.114413f, -0.863093f,
-         -0.971413f, 0.044263f, -0.548822f, 0.153366f
-        };
-        float expected_output_2[] = {
-         0.069354f, 0.260082f, 0.253852f, 0.505862f, 0.167515f, 0.644007f, 0.316748f, 0.044690f,
-         0.270922f, 0.367732f, 0.209183f, 0.245011f, 0.492909f, 0.089084f, 0.102077f, 0.077457f,
-         0.178159f, 0.208549f, 0.134131f, 0.362676f
-        };
+        float input_data_2[] = {-0.973596f, -0.593090f, 0.240839f,  0.778621f,  -0.619067f,
+                                1.254894f,  -0.395984f, -1.496162f, 0.154189f,  0.167212f,
+                                0.130392f,  -0.652786f, 0.904415f,  -0.958059f, -1.114413f,
+                                -0.863093f, -0.971413f, 0.044263f,  -0.548822f, 0.153366f};
+        float expected_output_2[] = {0.069354f, 0.260082f, 0.253852f, 0.505862f, 0.167515f,
+                                     0.644007f, 0.316748f, 0.044690f, 0.270922f, 0.367732f,
+                                     0.209183f, 0.245011f, 0.492909f, 0.089084f, 0.102077f,
+                                     0.077457f, 0.178159f, 0.208549f, 0.134131f, 0.362676f};
         Tensor t_input_2 = create_test_tensor(shape_2, input_data_2, false);
         Tensor t_output_2 = nn_softmax(t_input_2, dim_2);
         Tensor t_expected_2 = create_test_tensor(shape_2, expected_output_2, false);
         compare_tensors(&t_output_2, &t_expected_2, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
- 
+
     // Test Case 3
     {
         const char* tc_name = "softmax_2d_dim_1";
-        TensorShape shape_3 = { 4, 5, 0, 0 };
+        TensorShape shape_3 = {4, 5, 0, 0};
         int dim_3 = 1;
-        float input_data_3[] = {
-         -0.635548f, 1.540740f, -0.242792f, 0.434214f, -0.923128f, 0.454926f, 0.762836f, 0.451787f, 
-         -0.751669f, 2.009526f, 0.965157f, -0.943850f, 0.146616f, 0.078624f, -0.412194f, 0.908799f, 
-         -0.616340f, 0.549718f, 0.507248f, 0.581007f
-        };
-        float expected_output_3[] = {
-         0.066848f, 0.589165f, 0.099006f, 0.194840f, 0.050141f, 0.119192f, 0.162170f, 0.118818f, 
-         0.035664f, 0.564157f, 0.443729f, 0.065773f, 0.195717f, 0.182852f, 0.111929f, 0.302508f, 
-         0.065823f, 0.211246f, 0.202463f, 0.217961f
-        };
+        float input_data_3[] = {-0.635548f, 1.540740f,  -0.242792f, 0.434214f,  -0.923128f,
+                                0.454926f,  0.762836f,  0.451787f,  -0.751669f, 2.009526f,
+                                0.965157f,  -0.943850f, 0.146616f,  0.078624f,  -0.412194f,
+                                0.908799f,  -0.616340f, 0.549718f,  0.507248f,  0.581007f};
+        float expected_output_3[] = {0.066848f, 0.589165f, 0.099006f, 0.194840f, 0.050141f,
+                                     0.119192f, 0.162170f, 0.118818f, 0.035664f, 0.564157f,
+                                     0.443729f, 0.065773f, 0.195717f, 0.182852f, 0.111929f,
+                                     0.302508f, 0.065823f, 0.211246f, 0.202463f, 0.217961f};
         Tensor t_input_3 = create_test_tensor(shape_3, input_data_3, false);
         Tensor t_output_3 = nn_softmax(t_input_3, dim_3);
         Tensor t_expected_3 = create_test_tensor(shape_3, expected_output_3, false);
         compare_tensors(&t_output_3, &t_expected_3, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
- 
+
     // Test Case 5
     {
         const char* tc_name = "softmax_3d_dim_1";
-        TensorShape shape_5 = { 3, 4, 5, 0 };
+        TensorShape shape_5 = {3, 4, 5, 0};
         int dim_5 = 1;
         float input_data_5[] = {
-         0.329450f, -0.516812f, -0.198207f, 0.948730f, -1.939420f, -0.845908f, -1.289681f, 0.117924f, 
-         -1.209618f, -0.385438f, -1.246859f, -0.602803f, 0.499607f, 1.421608f, -0.082139f, -0.354326f, 
-         1.332360f, -1.326013f, -1.678908f, 0.702517f, -0.563351f, -0.613228f, 1.338767f, -1.208467f, 
-         0.090329f, 0.869903f, -0.215624f, 0.413125f, 0.446294f, -0.154964f, 0.473783f, -0.962919f, 
-         -0.257307f, -0.566939f, 0.528619f, -0.467904f, -0.606036f, 1.747805f, 0.583291f, -0.186993f, 
-         -1.803139f, 2.192201f, 1.364973f, 0.921052f, -0.799728f, 1.227652f, 1.203330f, 0.735748f, 
-         -0.198611f, 0.797526f, -1.293098f, -0.937443f, 1.848753f, -0.077266f, 0.413816f, -2.428300f, 
-         0.277431f, -0.883364f, -1.196831f, 1.152895f
-        };
+            0.329450f,  -0.516812f, -0.198207f, 0.948730f,  -1.939420f, -0.845908f, -1.289681f,
+            0.117924f,  -1.209618f, -0.385438f, -1.246859f, -0.602803f, 0.499607f,  1.421608f,
+            -0.082139f, -0.354326f, 1.332360f,  -1.326013f, -1.678908f, 0.702517f,  -0.563351f,
+            -0.613228f, 1.338767f,  -1.208467f, 0.090329f,  0.869903f,  -0.215624f, 0.413125f,
+            0.446294f,  -0.154964f, 0.473783f,  -0.962919f, -0.257307f, -0.566939f, 0.528619f,
+            -0.467904f, -0.606036f, 1.747805f,  0.583291f,  -0.186993f, -1.803139f, 2.192201f,
+            1.364973f,  0.921052f,  -0.799728f, 1.227652f,  1.203330f,  0.735748f,  -0.198611f,
+            0.797526f,  -1.293098f, -0.937443f, 1.848753f,  -0.077266f, 0.413816f,  -2.428300f,
+            0.277431f,  -0.883364f, -1.196831f, 1.152895f};
         float expected_output_5[] = {
-         0.495012f, 0.114497f, 0.212544f, 0.358119f, 0.038202f, 0.152814f, 0.052862f, 0.291570f, 
-         0.041368f, 0.180704f, 0.102337f, 0.105063f, 0.427076f, 0.574639f, 0.244730f, 0.249836f, 
-         0.727578f, 0.068810f, 0.025874f, 0.536364f, 0.109726f, 0.238074f, 0.322130f, 0.070765f, 
-         0.244478f, 0.460007f, 0.354314f, 0.127653f, 0.370233f, 0.191298f, 0.309551f, 0.167820f, 
-         0.065293f, 0.134410f, 0.378955f, 0.120715f, 0.239792f, 0.484925f, 0.424592f, 0.185268f, 
-         0.041816f, 0.639752f, 0.306676f, 0.550911f, 0.061155f, 0.866166f, 0.237985f, 0.163460f, 
-         0.179812f, 0.302070f, 0.069639f, 0.027979f, 0.497488f, 0.203010f, 0.205809f, 0.022379f, 
-         0.094284f, 0.032377f, 0.066267f, 0.430966f
-     };
+            0.495012f, 0.114497f, 0.212544f, 0.358119f, 0.038202f, 0.152814f, 0.052862f, 0.291570f,
+            0.041368f, 0.180704f, 0.102337f, 0.105063f, 0.427076f, 0.574639f, 0.244730f, 0.249836f,
+            0.727578f, 0.068810f, 0.025874f, 0.536364f, 0.109726f, 0.238074f, 0.322130f, 0.070765f,
+            0.244478f, 0.460007f, 0.354314f, 0.127653f, 0.370233f, 0.191298f, 0.309551f, 0.167820f,
+            0.065293f, 0.134410f, 0.378955f, 0.120715f, 0.239792f, 0.484925f, 0.424592f, 0.185268f,
+            0.041816f, 0.639752f, 0.306676f, 0.550911f, 0.061155f, 0.866166f, 0.237985f, 0.163460f,
+            0.179812f, 0.302070f, 0.069639f, 0.027979f, 0.497488f, 0.203010f, 0.205809f, 0.022379f,
+            0.094284f, 0.032377f, 0.066267f, 0.430966f};
         Tensor t_input_5 = create_test_tensor(shape_5, input_data_5, false);
         Tensor t_output_5 = nn_softmax(t_input_5, dim_5);
         Tensor t_expected_5 = create_test_tensor(shape_5, expected_output_5, false);
         compare_tensors(&t_output_5, &t_expected_5, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
- 
+
     // Test Case 6
     {
         const char* tc_name = "softmax_3d_dim_2";
-        TensorShape shape_6 = { 3, 4, 5, 0 };
+        TensorShape shape_6 = {3, 4, 5, 0};
         int dim_6 = 2;
         float input_data_6[] = {
-         -0.756514f, -1.056839f, 1.597832f, -2.225006f, -0.249725f, 0.087490f, 1.345537f, -0.307392f, 
-         0.428569f, 0.085686f, 0.741215f, 0.588628f, -0.053020f, -0.708825f, -0.952247f, 1.000654f, 
-         1.107366f, 0.641223f, 0.423863f, -1.611730f, -1.041236f, 0.094752f, -2.314693f, -0.517261f, 
-         0.995701f, 1.425699f, 1.413595f, 0.511550f, -0.910978f, 2.286082f, -0.168117f, 0.493978f, 
-         -1.621782f, 1.217807f, -0.100802f, 0.590658f, 0.337501f, 1.765542f, 0.486405f, 1.376526f, 
-         -1.236085f, 0.626381f, -1.855668f, -0.734225f, 0.681713f, 0.901065f, -2.439517f, -0.263396f, 
-         -1.291715f, 0.858222f, 0.146793f, 0.921066f, -0.744419f, -1.882994f, 0.447125f, 1.785266f, 
-         -0.253973f, -0.175359f, -1.915487f, -0.002335f
-        };
+            -0.756514f, -1.056839f, 1.597832f,  -2.225006f, -0.249725f, 0.087490f,  1.345537f,
+            -0.307392f, 0.428569f,  0.085686f,  0.741215f,  0.588628f,  -0.053020f, -0.708825f,
+            -0.952247f, 1.000654f,  1.107366f,  0.641223f,  0.423863f,  -1.611730f, -1.041236f,
+            0.094752f,  -2.314693f, -0.517261f, 0.995701f,  1.425699f,  1.413595f,  0.511550f,
+            -0.910978f, 2.286082f,  -0.168117f, 0.493978f,  -1.621782f, 1.217807f,  -0.100802f,
+            0.590658f,  0.337501f,  1.765542f,  0.486405f,  1.376526f,  -1.236085f, 0.626381f,
+            -1.855668f, -0.734225f, 0.681713f,  0.901065f,  -2.439517f, -0.263396f, -1.291715f,
+            0.858222f,  0.146793f,  0.921066f,  -0.744419f, -1.882994f, 0.447125f,  1.785266f,
+            -0.253973f, -0.175359f, -1.915487f, -0.002335f};
         float expected_output_6[] = {
-         0.070611f, 0.052293f, 0.743625f, 0.016260f, 0.117211f, 0.131632f, 0.463151f, 0.088688f, 
-         0.185135f, 0.131394f, 0.366454f, 0.314595f, 0.165611f, 0.085956f, 0.067384f, 0.290213f, 
-         0.322895f, 0.202590f, 0.163012f, 0.021290f, 0.072728f, 0.226492f, 0.020354f, 0.122817f, 
-         0.557609f, 0.206205f, 0.203724f, 0.082659f, 0.019929f, 0.487482f, 0.121349f, 0.235277f, 
-         0.028361f, 0.485215f, 0.129798f, 0.123313f, 0.095734f, 0.399260f, 0.111105f, 0.270588f, 
-         0.060844f, 0.391810f, 0.032744f, 0.100501f, 0.414100f, 0.413706f, 0.014652f, 0.129114f, 
-         0.046172f, 0.396356f, 0.197595f, 0.428588f, 0.081045f, 0.025957f, 0.266815f, 0.683544f, 
-         0.088948f, 0.096223f, 0.016887f, 0.114399f
-     };
+            0.070611f, 0.052293f, 0.743625f, 0.016260f, 0.117211f, 0.131632f, 0.463151f, 0.088688f,
+            0.185135f, 0.131394f, 0.366454f, 0.314595f, 0.165611f, 0.085956f, 0.067384f, 0.290213f,
+            0.322895f, 0.202590f, 0.163012f, 0.021290f, 0.072728f, 0.226492f, 0.020354f, 0.122817f,
+            0.557609f, 0.206205f, 0.203724f, 0.082659f, 0.019929f, 0.487482f, 0.121349f, 0.235277f,
+            0.028361f, 0.485215f, 0.129798f, 0.123313f, 0.095734f, 0.399260f, 0.111105f, 0.270588f,
+            0.060844f, 0.391810f, 0.032744f, 0.100501f, 0.414100f, 0.413706f, 0.014652f, 0.129114f,
+            0.046172f, 0.396356f, 0.197595f, 0.428588f, 0.081045f, 0.025957f, 0.266815f, 0.683544f,
+            0.088948f, 0.096223f, 0.016887f, 0.114399f};
         Tensor t_input_6 = create_test_tensor(shape_6, input_data_6, false);
         Tensor t_output_6 = nn_softmax(t_input_6, dim_6);
         Tensor t_expected_6 = create_test_tensor(shape_6, expected_output_6, false);
         compare_tensors(&t_output_6, &t_expected_6, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
- 
+
     // Test Case 7
     {
         const char* tc_name = "softmax_4d_dim_0";
-        TensorShape shape_7 = { 2, 3, 4, 5 };
+        TensorShape shape_7 = {2, 3, 4, 5};
         int dim_7 = 0;
         float input_data_7[] = {
-         -0.511257f, -0.486208f, 1.811264f, -1.887858f, 0.125982f, -1.844349f, -0.887010f, 0.964408f, 
-         -0.424253f, -0.920038f, 0.508277f, 0.178656f, -0.036295f, 1.510589f, 0.413179f, 1.068661f, 
-         -2.145023f, 0.378915f, 1.998491f, -2.765419f, 1.537244f, -0.811624f, -1.216795f, 1.327383f, 
-         -0.726409f, 0.241636f, 1.059124f, 1.986931f, -0.199572f, -1.392530f, -1.172699f, 0.018050f, 
-         -0.371864f, 1.119579f, -0.214250f, -0.692109f, 1.547399f, 0.853238f, 0.812299f, -0.275152f, 
-         -1.434234f, 1.395362f, -0.498836f, -0.007882f, -1.192756f, 1.306992f, -0.337672f, 2.039225f, 
-         1.120167f, 0.087211f, 1.003423f, -0.288729f, -0.824022f, -0.524947f, -0.489327f, 0.323384f, 
-         3.372605f, -0.329193f, 0.681299f, -1.262865f, -0.836464f, 1.061926f, -0.135600f, -0.231079f, 
-         -1.152552f, 0.053548f, -0.834973f, -1.011236f, 0.081576f, -0.580494f, -0.799639f, -0.602497f, 
-         2.375473f, 0.757348f, 0.427160f, 0.556822f, 0.972614f, 0.664390f, 0.395206f, -1.041777f, 
-         1.490402f, -0.157482f, 2.521987f, 0.666604f, 0.563484f, 2.161451f, 0.480465f, -0.581542f, 
-         0.612467f, 2.106347f, -0.821566f, 0.057422f, -0.882044f, -0.337474f, 0.275213f, 0.519093f, 
-         0.629499f, -0.058555f, -1.126014f, -1.360813f, -2.175084f, -2.276683f, -1.839429f, -0.702685f, 
-         -0.197460f, 0.301871f, 2.437877f, -1.052195f, 2.052355f, 0.165766f, -0.066026f, 0.735303f, 
-         0.877751f, -0.804196f, -1.014561f, -0.524483f, 0.728983f, -0.361918f, 0.156797f, -1.103459f
-        };
+            -0.511257f, -0.486208f, 1.811264f,  -1.887858f, 0.125982f,  -1.844349f, -0.887010f,
+            0.964408f,  -0.424253f, -0.920038f, 0.508277f,  0.178656f,  -0.036295f, 1.510589f,
+            0.413179f,  1.068661f,  -2.145023f, 0.378915f,  1.998491f,  -2.765419f, 1.537244f,
+            -0.811624f, -1.216795f, 1.327383f,  -0.726409f, 0.241636f,  1.059124f,  1.986931f,
+            -0.199572f, -1.392530f, -1.172699f, 0.018050f,  -0.371864f, 1.119579f,  -0.214250f,
+            -0.692109f, 1.547399f,  0.853238f,  0.812299f,  -0.275152f, -1.434234f, 1.395362f,
+            -0.498836f, -0.007882f, -1.192756f, 1.306992f,  -0.337672f, 2.039225f,  1.120167f,
+            0.087211f,  1.003423f,  -0.288729f, -0.824022f, -0.524947f, -0.489327f, 0.323384f,
+            3.372605f,  -0.329193f, 0.681299f,  -1.262865f, -0.836464f, 1.061926f,  -0.135600f,
+            -0.231079f, -1.152552f, 0.053548f,  -0.834973f, -1.011236f, 0.081576f,  -0.580494f,
+            -0.799639f, -0.602497f, 2.375473f,  0.757348f,  0.427160f,  0.556822f,  0.972614f,
+            0.664390f,  0.395206f,  -1.041777f, 1.490402f,  -0.157482f, 2.521987f,  0.666604f,
+            0.563484f,  2.161451f,  0.480465f,  -0.581542f, 0.612467f,  2.106347f,  -0.821566f,
+            0.057422f,  -0.882044f, -0.337474f, 0.275213f,  0.519093f,  0.629499f,  -0.058555f,
+            -1.126014f, -1.360813f, -2.175084f, -2.276683f, -1.839429f, -0.702685f, -0.197460f,
+            0.301871f,  2.437877f,  -1.052195f, 2.052355f,  0.165766f,  -0.066026f, 0.735303f,
+            0.877751f,  -0.804196f, -1.014561f, -0.524483f, 0.728983f,  -0.361918f, 0.156797f,
+            -1.103459f};
         float expected_output_7[] = {
-         0.580593f, 0.175356f, 0.875104f, 0.160195f, 0.782200f, 0.130347f, 0.486994f, 0.878216f, 
-         0.376172f, 0.415920f, 0.787164f, 0.685929f, 0.082280f, 0.679884f, 0.496505f, 0.625237f, 
-         0.042386f, 0.429112f, 0.832477f, 0.151403f, 0.511708f, 0.342057f, 0.023231f, 0.659435f, 
-         0.215871f, 0.127882f, 0.640759f, 0.928805f, 0.307456f, 0.029344f, 0.413108f, 0.490158f, 
-         0.624849f, 0.811082f, 0.380020f, 0.229488f, 0.714614f, 0.713367f, 0.874167f, 0.747564f, 
-         0.677182f, 0.975206f, 0.792587f, 0.667035f, 0.269867f, 0.732064f, 0.058660f, 0.956537f, 
-         0.282481f, 0.480372f, 0.744492f, 0.264243f, 0.154234f, 0.569362f, 0.628371f, 0.700119f, 
-         0.933617f, 0.508181f, 0.628200f, 0.460233f, 0.419407f, 0.824644f, 0.124896f, 0.839805f, 
-         0.217800f, 0.869653f, 0.513006f, 0.121784f, 0.623828f, 0.584080f, 0.212836f, 0.314071f, 
-         0.917720f, 0.320115f, 0.503495f, 0.374763f, 0.957614f, 0.570888f, 0.167523f, 0.848597f, 
-         0.488292f, 0.657943f, 0.976769f, 0.340565f, 0.784129f, 0.872118f, 0.359241f, 0.071195f, 
-         0.692544f, 0.970656f, 0.586892f, 0.509842f, 0.375151f, 0.188918f, 0.619980f, 0.770512f, 
-         0.285386f, 0.286633f, 0.125833f, 0.252436f, 0.322818f, 0.024794f, 0.207413f, 0.332965f, 
-         0.730133f, 0.267936f, 0.941340f, 0.043463f, 0.717519f, 0.519628f, 0.255508f, 0.735757f, 
-         0.845766f, 0.430638f, 0.371629f, 0.299881f, 0.066383f, 0.491819f, 0.371800f, 0.539767f
-        };
+            0.580593f, 0.175356f, 0.875104f, 0.160195f, 0.782200f, 0.130347f, 0.486994f, 0.878216f,
+            0.376172f, 0.415920f, 0.787164f, 0.685929f, 0.082280f, 0.679884f, 0.496505f, 0.625237f,
+            0.042386f, 0.429112f, 0.832477f, 0.151403f, 0.511708f, 0.342057f, 0.023231f, 0.659435f,
+            0.215871f, 0.127882f, 0.640759f, 0.928805f, 0.307456f, 0.029344f, 0.413108f, 0.490158f,
+            0.624849f, 0.811082f, 0.380020f, 0.229488f, 0.714614f, 0.713367f, 0.874167f, 0.747564f,
+            0.677182f, 0.975206f, 0.792587f, 0.667035f, 0.269867f, 0.732064f, 0.058660f, 0.956537f,
+            0.282481f, 0.480372f, 0.744492f, 0.264243f, 0.154234f, 0.569362f, 0.628371f, 0.700119f,
+            0.933617f, 0.508181f, 0.628200f, 0.460233f, 0.419407f, 0.824644f, 0.124896f, 0.839805f,
+            0.217800f, 0.869653f, 0.513006f, 0.121784f, 0.623828f, 0.584080f, 0.212836f, 0.314071f,
+            0.917720f, 0.320115f, 0.503495f, 0.374763f, 0.957614f, 0.570888f, 0.167523f, 0.848597f,
+            0.488292f, 0.657943f, 0.976769f, 0.340565f, 0.784129f, 0.872118f, 0.359241f, 0.071195f,
+            0.692544f, 0.970656f, 0.586892f, 0.509842f, 0.375151f, 0.188918f, 0.619980f, 0.770512f,
+            0.285386f, 0.286633f, 0.125833f, 0.252436f, 0.322818f, 0.024794f, 0.207413f, 0.332965f,
+            0.730133f, 0.267936f, 0.941340f, 0.043463f, 0.717519f, 0.519628f, 0.255508f, 0.735757f,
+            0.845766f, 0.430638f, 0.371629f, 0.299881f, 0.066383f, 0.491819f, 0.371800f, 0.539767f};
         Tensor t_input_7 = create_test_tensor(shape_7, input_data_7, false);
         Tensor t_output_7 = nn_softmax(t_input_7, dim_7);
         Tensor t_expected_7 = create_test_tensor(shape_7, expected_output_7, false);
         compare_tensors(&t_output_7, &t_expected_7, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
- 
+
     // Test Case 8
     {
         const char* tc_name = "softmax_4d_dim_1";
-        TensorShape shape_8 = { 2, 3, 4, 5 };
+        TensorShape shape_8 = {2, 3, 4, 5};
         int dim_8 = 1;
         float input_data_8[] = {
-         -1.257495f, -0.301707f, -1.078206f, -0.869783f, -0.487602f, -0.677509f, 0.162030f, -0.905193f, 
-         -0.224179f, 0.913948f, 0.087469f, 0.984896f, -0.373979f, 0.421240f, 1.695008f, -0.253288f, 
-         -2.464733f, 0.410508f, 0.693939f, 0.704088f, -0.715867f, -0.383032f, -0.120659f, 0.230918f, 
-         -0.320296f, 0.395113f, 0.191242f, -0.567990f, 0.270543f, -0.721094f, 0.464662f, -0.195229f, 
-         1.703504f, 0.929006f, -0.398823f, -1.703876f, -0.537868f, 0.680445f, 0.548786f, 0.262192f, 
-         0.891745f, 1.893878f, -1.034683f, 0.883484f, -0.858102f, 0.530082f, -0.637206f, -0.330277f, 
-         0.298938f, 0.188639f, 0.477610f, 0.458072f, 1.039523f, 0.251319f, -0.154824f, 0.237238f, 
-         0.253144f, 1.160356f, 0.298290f, -0.897417f, -2.845256f, 0.800772f, -0.719587f, 0.205090f, 
-         0.121262f, -0.199399f, -0.823564f, 0.250895f, -0.711147f, 0.262902f, 0.009090f, 1.922074f, 
-         1.151204f, -0.220231f, 0.648669f, 0.130951f, -1.111660f, -1.108665f, 0.972070f, 1.388218f, 
-         1.991043f, 0.932965f, -0.278309f, 1.057666f, 0.982868f, -0.582407f, 0.422493f, -1.071779f, 
-         -1.034922f, 0.807841f, -0.172142f, -0.949420f, -0.186462f, 1.273863f, -0.234617f, 0.575369f, 
-         -0.792975f, -2.354831f, 0.391219f, 1.058017f, -1.233542f, 2.169375f, -0.878724f, -0.985356f, 
-         0.411549f, -0.461113f, -2.248060f, -0.861590f, 0.260213f, 0.978174f, 1.910362f, 1.583116f, 
-         0.565735f, 0.682193f, 2.205384f, -0.690529f, 0.997477f, 1.452847f, 1.229271f, -0.092104f
-        };
+            -1.257495f, -0.301707f, -1.078206f, -0.869783f, -0.487602f, -0.677509f, 0.162030f,
+            -0.905193f, -0.224179f, 0.913948f,  0.087469f,  0.984896f,  -0.373979f, 0.421240f,
+            1.695008f,  -0.253288f, -2.464733f, 0.410508f,  0.693939f,  0.704088f,  -0.715867f,
+            -0.383032f, -0.120659f, 0.230918f,  -0.320296f, 0.395113f,  0.191242f,  -0.567990f,
+            0.270543f,  -0.721094f, 0.464662f,  -0.195229f, 1.703504f,  0.929006f,  -0.398823f,
+            -1.703876f, -0.537868f, 0.680445f,  0.548786f,  0.262192f,  0.891745f,  1.893878f,
+            -1.034683f, 0.883484f,  -0.858102f, 0.530082f,  -0.637206f, -0.330277f, 0.298938f,
+            0.188639f,  0.477610f,  0.458072f,  1.039523f,  0.251319f,  -0.154824f, 0.237238f,
+            0.253144f,  1.160356f,  0.298290f,  -0.897417f, -2.845256f, 0.800772f,  -0.719587f,
+            0.205090f,  0.121262f,  -0.199399f, -0.823564f, 0.250895f,  -0.711147f, 0.262902f,
+            0.009090f,  1.922074f,  1.151204f,  -0.220231f, 0.648669f,  0.130951f,  -1.111660f,
+            -1.108665f, 0.972070f,  1.388218f,  1.991043f,  0.932965f,  -0.278309f, 1.057666f,
+            0.982868f,  -0.582407f, 0.422493f,  -1.071779f, -1.034922f, 0.807841f,  -0.172142f,
+            -0.949420f, -0.186462f, 1.273863f,  -0.234617f, 0.575369f,  -0.792975f, -2.354831f,
+            0.391219f,  1.058017f,  -1.233542f, 2.169375f,  -0.878724f, -0.985356f, 0.411549f,
+            -0.461113f, -2.248060f, -0.861590f, 0.260213f,  0.978174f,  1.910362f,  1.583116f,
+            0.565735f,  0.682193f,  2.205384f,  -0.690529f, 0.997477f,  1.452847f,  1.229271f,
+            -0.092104f};
         float expected_output_8[] = {
-         0.088518f, 0.091683f, 0.215064f, 0.102253f, 0.348128f, 0.137581f, 0.403337f, 0.239349f, 
-         0.231090f, 0.595550f, 0.254105f, 0.526949f, 0.076367f, 0.285282f, 0.780959f, 0.348723f, 
-         0.043448f, 0.225909f, 0.393990f, 0.542179f, 0.152145f, 0.084522f, 0.560305f, 0.307399f, 
-         0.411528f, 0.402154f, 0.415293f, 0.335333f, 0.378997f, 0.116099f, 0.370533f, 0.161900f, 
-         0.609740f, 0.474017f, 0.096225f, 0.081752f, 0.298398f, 0.295914f, 0.340758f, 0.348522f, 
-         0.759337f, 0.823795f, 0.224631f, 0.590348f, 0.240344f, 0.460265f, 0.181370f, 0.425318f, 
-         0.389913f, 0.288351f, 0.375362f, 0.311151f, 0.313893f, 0.240701f, 0.122816f, 0.569525f, 
-         0.658154f, 0.478177f, 0.265251f, 0.109299f, 0.007575f, 0.164713f, 0.293464f, 0.273990f, 
-         0.212596f, 0.407907f, 0.211989f, 0.626895f, 0.229098f, 0.209677f, 0.117252f, 0.565251f, 
-         0.549661f, 0.126249f, 0.162426f, 0.333404f, 0.094193f, 0.070210f, 0.350542f, 0.513784f, 
-         0.954464f, 0.187991f, 0.456246f, 0.642694f, 0.503205f, 0.278114f, 0.737001f, 0.167019f, 
-         0.165732f, 0.361588f, 0.097816f, 0.032001f, 0.144263f, 0.562476f, 0.067151f, 0.519971f, 
-         0.129546f, 0.020193f, 0.196101f, 0.369297f, 0.037961f, 0.647296f, 0.250289f, 0.083316f, 
-         0.284200f, 0.313979f, 0.051011f, 0.206086f, 0.605170f, 0.428735f, 0.784932f, 0.402748f, 
-         0.306076f, 0.311275f, 0.770423f, 0.146625f, 0.776261f, 0.909597f, 0.453357f, 0.116919f
-        };
+            0.088518f, 0.091683f, 0.215064f, 0.102253f, 0.348128f, 0.137581f, 0.403337f, 0.239349f,
+            0.231090f, 0.595550f, 0.254105f, 0.526949f, 0.076367f, 0.285282f, 0.780959f, 0.348723f,
+            0.043448f, 0.225909f, 0.393990f, 0.542179f, 0.152145f, 0.084522f, 0.560305f, 0.307399f,
+            0.411528f, 0.402154f, 0.415293f, 0.335333f, 0.378997f, 0.116099f, 0.370533f, 0.161900f,
+            0.609740f, 0.474017f, 0.096225f, 0.081752f, 0.298398f, 0.295914f, 0.340758f, 0.348522f,
+            0.759337f, 0.823795f, 0.224631f, 0.590348f, 0.240344f, 0.460265f, 0.181370f, 0.425318f,
+            0.389913f, 0.288351f, 0.375362f, 0.311151f, 0.313893f, 0.240701f, 0.122816f, 0.569525f,
+            0.658154f, 0.478177f, 0.265251f, 0.109299f, 0.007575f, 0.164713f, 0.293464f, 0.273990f,
+            0.212596f, 0.407907f, 0.211989f, 0.626895f, 0.229098f, 0.209677f, 0.117252f, 0.565251f,
+            0.549661f, 0.126249f, 0.162426f, 0.333404f, 0.094193f, 0.070210f, 0.350542f, 0.513784f,
+            0.954464f, 0.187991f, 0.456246f, 0.642694f, 0.503205f, 0.278114f, 0.737001f, 0.167019f,
+            0.165732f, 0.361588f, 0.097816f, 0.032001f, 0.144263f, 0.562476f, 0.067151f, 0.519971f,
+            0.129546f, 0.020193f, 0.196101f, 0.369297f, 0.037961f, 0.647296f, 0.250289f, 0.083316f,
+            0.284200f, 0.313979f, 0.051011f, 0.206086f, 0.605170f, 0.428735f, 0.784932f, 0.402748f,
+            0.306076f, 0.311275f, 0.770423f, 0.146625f, 0.776261f, 0.909597f, 0.453357f, 0.116919f};
         Tensor t_input_8 = create_test_tensor(shape_8, input_data_8, false);
         Tensor t_output_8 = nn_softmax(t_input_8, dim_8);
         Tensor t_expected_8 = create_test_tensor(shape_8, expected_output_8, false);
         compare_tensors(&t_output_8, &t_expected_8, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
- 
+
     // Test Case 9
     {
         const char* tc_name = "softmax_4d_dim_2";
-        TensorShape shape_9 = { 2, 3, 4, 5 };
+        TensorShape shape_9 = {2, 3, 4, 5};
         int dim_9 = 2;
         float input_data_9[] = {
-         -1.236507f, 0.666192f, 2.592585f, 1.202181f, 0.778915f, -0.988539f, -1.170790f, -0.664889f, 
-         -0.133236f, 0.807191f, 0.587307f, 1.008587f, -0.632058f, -0.340653f, -0.811564f, -0.874503f, 
-         -1.419854f, -0.396616f, 0.403904f, 0.289703f, 0.134876f, -0.566567f, -1.015988f, 0.774370f, 
-         -0.585283f, 1.023082f, -3.720424f, 0.528280f, 0.629004f, 0.682999f, 1.163303f, -0.648755f, 
-         -0.093579f, -0.965785f, 0.615421f, -0.579691f, 0.879971f, -1.569465f, 0.572907f, 0.395161f, 
-         0.707526f, -0.710514f, 0.541310f, -1.303612f, -0.096734f, -0.719989f, 0.291270f, 2.981184f, 
-         -0.085921f, -1.003435f, 1.393750f, -1.186577f, -0.097973f, 0.483390f, -0.115178f, 1.874009f, 
-         0.961650f, -0.905726f, 1.063592f, -1.107064f, -0.975180f, 0.405832f, 0.111302f, 0.681526f, 
-         0.707183f, 0.418216f, -1.533945f, 0.452553f, -0.524506f, -0.095175f, 1.186117f, 0.202994f, 
-         -1.421134f, -0.358176f, 0.012797f, 2.383064f, 0.235132f, -1.752441f, 0.091681f, 3.775636f, 
-         -0.661333f, -0.440233f, -0.432518f, -2.818887f, -0.088851f, -1.265370f, 1.239664f, 0.049627f, 
-         -0.196688f, -1.137375f, 1.724149f, -0.581866f, -0.415181f, 0.563136f, 0.445962f, -0.329975f, 
-         -0.647561f, 1.692823f, 0.457386f, -0.711722f, -0.125222f, -0.381787f, 0.581898f, 0.475846f, 
-         1.356747f, 1.357815f, 0.333107f, 0.230856f, -0.038589f, 0.075892f, -1.058006f, 0.743106f, 
-         0.841856f, 1.096734f, 2.172618f, -0.493073f, 0.838112f, -0.760886f, 1.512828f, -0.775099f
-        };
+            -1.236507f, 0.666192f,  2.592585f,  1.202181f,  0.778915f,  -0.988539f, -1.170790f,
+            -0.664889f, -0.133236f, 0.807191f,  0.587307f,  1.008587f,  -0.632058f, -0.340653f,
+            -0.811564f, -0.874503f, -1.419854f, -0.396616f, 0.403904f,  0.289703f,  0.134876f,
+            -0.566567f, -1.015988f, 0.774370f,  -0.585283f, 1.023082f,  -3.720424f, 0.528280f,
+            0.629004f,  0.682999f,  1.163303f,  -0.648755f, -0.093579f, -0.965785f, 0.615421f,
+            -0.579691f, 0.879971f,  -1.569465f, 0.572907f,  0.395161f,  0.707526f,  -0.710514f,
+            0.541310f,  -1.303612f, -0.096734f, -0.719989f, 0.291270f,  2.981184f,  -0.085921f,
+            -1.003435f, 1.393750f,  -1.186577f, -0.097973f, 0.483390f,  -0.115178f, 1.874009f,
+            0.961650f,  -0.905726f, 1.063592f,  -1.107064f, -0.975180f, 0.405832f,  0.111302f,
+            0.681526f,  0.707183f,  0.418216f,  -1.533945f, 0.452553f,  -0.524506f, -0.095175f,
+            1.186117f,  0.202994f,  -1.421134f, -0.358176f, 0.012797f,  2.383064f,  0.235132f,
+            -1.752441f, 0.091681f,  3.775636f,  -0.661333f, -0.440233f, -0.432518f, -2.818887f,
+            -0.088851f, -1.265370f, 1.239664f,  0.049627f,  -0.196688f, -1.137375f, 1.724149f,
+            -0.581866f, -0.415181f, 0.563136f,  0.445962f,  -0.329975f, -0.647561f, 1.692823f,
+            0.457386f,  -0.711722f, -0.125222f, -0.381787f, 0.581898f,  0.475846f,  1.356747f,
+            1.357815f,  0.333107f,  0.230856f,  -0.038589f, 0.075892f,  -1.058006f, 0.743106f,
+            0.841856f,  1.096734f,  2.172618f,  -0.493073f, 0.838112f,  -0.760886f, 1.512828f,
+            -0.775099f};
         float expected_output_9[] = {
-         0.100877f, 0.371500f, 0.886067f, 0.518961f, 0.351418f, 0.129266f, 0.059179f, 0.034101f, 
-         0.136512f, 0.361496f, 0.624978f, 0.523190f, 0.035239f, 0.110941f, 0.071629f, 0.144880f, 
-         0.046132f, 0.044594f, 0.233586f, 0.215458f, 0.148880f, 0.160974f, 0.113962f, 0.349928f, 
-         0.094851f, 0.361892f, 0.006872f, 0.533861f, 0.302585f, 0.337171f, 0.416366f, 0.148273f, 
-         0.286654f, 0.061410f, 0.315139f, 0.072862f, 0.683881f, 0.065522f, 0.286078f, 0.252839f, 
-         0.155357f, 0.103434f, 0.075560f, 0.047577f, 0.363679f, 0.037271f, 0.281664f, 0.866793f, 
-         0.160782f, 0.146873f, 0.308570f, 0.064255f, 0.039871f, 0.284109f, 0.357033f, 0.498802f, 
-         0.550647f, 0.017777f, 0.507532f, 0.132415f, 0.023557f, 0.356732f, 0.359996f, 0.453031f, 
-         0.042633f, 0.094901f, 0.051275f, 0.506409f, 0.135630f, 0.019111f, 0.204534f, 0.291240f, 
-         0.077762f, 0.160173f, 0.021290f, 0.677007f, 0.300752f, 0.055832f, 0.251166f, 0.916967f, 
-         0.072444f, 0.124290f, 0.083245f, 0.014150f, 0.278243f, 0.039598f, 0.666817f, 0.134818f, 
-         0.194784f, 0.097511f, 0.787054f, 0.107876f, 0.084700f, 0.416428f, 0.474996f, 0.100904f, 
-         0.101017f, 0.697236f, 0.374639f, 0.149250f, 0.154036f, 0.105145f, 0.306565f, 0.159258f, 
-         0.273406f, 0.678730f, 0.214913f, 0.215808f, 0.095211f, 0.075952f, 0.060607f, 0.323834f, 
-         0.397577f, 0.296313f, 0.618211f, 0.106627f, 0.356109f, 0.080050f, 0.449218f, 0.032431f
-        };
+            0.100877f, 0.371500f, 0.886067f, 0.518961f, 0.351418f, 0.129266f, 0.059179f, 0.034101f,
+            0.136512f, 0.361496f, 0.624978f, 0.523190f, 0.035239f, 0.110941f, 0.071629f, 0.144880f,
+            0.046132f, 0.044594f, 0.233586f, 0.215458f, 0.148880f, 0.160974f, 0.113962f, 0.349928f,
+            0.094851f, 0.361892f, 0.006872f, 0.533861f, 0.302585f, 0.337171f, 0.416366f, 0.148273f,
+            0.286654f, 0.061410f, 0.315139f, 0.072862f, 0.683881f, 0.065522f, 0.286078f, 0.252839f,
+            0.155357f, 0.103434f, 0.075560f, 0.047577f, 0.363679f, 0.037271f, 0.281664f, 0.866793f,
+            0.160782f, 0.146873f, 0.308570f, 0.064255f, 0.039871f, 0.284109f, 0.357033f, 0.498802f,
+            0.550647f, 0.017777f, 0.507532f, 0.132415f, 0.023557f, 0.356732f, 0.359996f, 0.453031f,
+            0.042633f, 0.094901f, 0.051275f, 0.506409f, 0.135630f, 0.019111f, 0.204534f, 0.291240f,
+            0.077762f, 0.160173f, 0.021290f, 0.677007f, 0.300752f, 0.055832f, 0.251166f, 0.916967f,
+            0.072444f, 0.124290f, 0.083245f, 0.014150f, 0.278243f, 0.039598f, 0.666817f, 0.134818f,
+            0.194784f, 0.097511f, 0.787054f, 0.107876f, 0.084700f, 0.416428f, 0.474996f, 0.100904f,
+            0.101017f, 0.697236f, 0.374639f, 0.149250f, 0.154036f, 0.105145f, 0.306565f, 0.159258f,
+            0.273406f, 0.678730f, 0.214913f, 0.215808f, 0.095211f, 0.075952f, 0.060607f, 0.323834f,
+            0.397577f, 0.296313f, 0.618211f, 0.106627f, 0.356109f, 0.080050f, 0.449218f, 0.032431f};
         Tensor t_input_9 = create_test_tensor(shape_9, input_data_9, false);
         Tensor t_output_9 = nn_softmax(t_input_9, dim_9);
         Tensor t_expected_9 = create_test_tensor(shape_9, expected_output_9, false);
         compare_tensors(&t_output_9, &t_expected_9, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 10
     {
         const char* tc_name = "softmax_4d_dim_3";
-        TensorShape shape_10 = { 2, 3, 4, 5 };
+        TensorShape shape_10 = {2, 3, 4, 5};
         int dim_10 = 3;
         float input_data_10[] = {
-         -0.134424f, -0.480058f, -0.321428f, -0.705847f, 0.022239f, 0.145085f, 0.526791f, 0.674307f, 
-         0.288172f, 0.167516f, -0.010255f, 1.812615f, -0.116002f, 1.495559f, 1.053778f, -0.978276f, 
-         0.572919f, -0.345361f, 0.304904f, 0.770001f, 0.326273f, -0.870936f, -0.690999f, -0.568904f, 
-         2.087554f, -0.191169f, 1.430569f, -1.059834f, 0.374943f, -0.534675f, -0.422606f, -1.609942f, 
-         -0.922619f, 2.244947f, -0.097903f, 0.121999f, -0.149312f, -0.649967f, -1.045418f, -0.361698f, 
-         -0.163933f, -0.846074f, -0.327313f, 0.547455f, 2.043521f, 0.119833f, -0.708708f, 1.374049f, 
-         0.976319f, -0.034966f, 0.579193f, 0.462073f, -0.338565f, 0.910230f, 0.656344f, 1.844478f, 
-         -0.310301f, -0.241161f, -1.556111f, -1.152509f, 1.286693f, 0.303133f, 0.007123f, 1.411441f, 
-         2.773875f, -1.916589f, 0.379059f, -0.869618f, 0.754217f, 1.803710f, -1.470451f, 1.252759f, 
-         -1.259440f, 1.148975f, -0.794163f, 0.289906f, -1.423498f, 1.268674f, 0.049445f, 0.448380f, 
-         0.195606f, 0.325677f, 0.705436f, 0.019392f, -0.275582f, -0.821970f, -0.685265f, -1.903032f, 
-         0.322047f, 0.067131f, 1.327628f, 0.337458f, 0.622310f, 0.498616f, -1.797895f, 0.285230f, 
-         0.391524f, -1.209723f, 1.226868f, 0.358081f, 1.487907f, 0.660574f, -0.156783f, -0.198020f, 
-         -0.855000f, -1.496671f, -0.828395f, 1.279418f, 0.645864f, -1.312350f, -0.903121f, 1.460771f, 
-         -0.527595f, -1.623403f, -0.513170f, 0.545358f, -0.146408f, -0.963346f, -0.131965f, -0.910921f
-        };
+            -0.134424f, -0.480058f, -0.321428f, -0.705847f, 0.022239f,  0.145085f,  0.526791f,
+            0.674307f,  0.288172f,  0.167516f,  -0.010255f, 1.812615f,  -0.116002f, 1.495559f,
+            1.053778f,  -0.978276f, 0.572919f,  -0.345361f, 0.304904f,  0.770001f,  0.326273f,
+            -0.870936f, -0.690999f, -0.568904f, 2.087554f,  -0.191169f, 1.430569f,  -1.059834f,
+            0.374943f,  -0.534675f, -0.422606f, -1.609942f, -0.922619f, 2.244947f,  -0.097903f,
+            0.121999f,  -0.149312f, -0.649967f, -1.045418f, -0.361698f, -0.163933f, -0.846074f,
+            -0.327313f, 0.547455f,  2.043521f,  0.119833f,  -0.708708f, 1.374049f,  0.976319f,
+            -0.034966f, 0.579193f,  0.462073f,  -0.338565f, 0.910230f,  0.656344f,  1.844478f,
+            -0.310301f, -0.241161f, -1.556111f, -1.152509f, 1.286693f,  0.303133f,  0.007123f,
+            1.411441f,  2.773875f,  -1.916589f, 0.379059f,  -0.869618f, 0.754217f,  1.803710f,
+            -1.470451f, 1.252759f,  -1.259440f, 1.148975f,  -0.794163f, 0.289906f,  -1.423498f,
+            1.268674f,  0.049445f,  0.448380f,  0.195606f,  0.325677f,  0.705436f,  0.019392f,
+            -0.275582f, -0.821970f, -0.685265f, -1.903032f, 0.322047f,  0.067131f,  1.327628f,
+            0.337458f,  0.622310f,  0.498616f,  -1.797895f, 0.285230f,  0.391524f,  -1.209723f,
+            1.226868f,  0.358081f,  1.487907f,  0.660574f,  -0.156783f, -0.198020f, -0.855000f,
+            -1.496671f, -0.828395f, 1.279418f,  0.645864f,  -1.312350f, -0.903121f, 1.460771f,
+            -0.527595f, -1.623403f, -0.513170f, 0.545358f,  -0.146408f, -0.963346f, -0.131965f,
+            -0.910921f};
         float expected_output_10[] = {
-         0.234108f, 0.165695f, 0.194178f, 0.132206f, 0.273813f, 0.157756f, 0.231078f, 0.267808f, 
-         0.182023f, 0.161335f, 0.064537f, 0.399455f, 0.058060f, 0.290919f, 0.187029f, 0.058987f, 
-         0.278246f, 0.111077f, 0.212830f, 0.338860f, 0.126710f, 0.038271f, 0.045816f, 0.051766f, 
-         0.737438f, 0.111706f, 0.565442f, 0.046862f, 0.196759f, 0.079231f, 0.056498f, 0.017234f, 
-         0.034267f, 0.813830f, 0.078171f, 0.317244f, 0.241860f, 0.146599f, 0.098717f, 0.195581f, 
-         0.074161f, 0.037491f, 0.062983f, 0.151053f, 0.674313f, 0.122651f, 0.053560f, 0.429902f, 
-         0.288827f, 0.105061f, 0.210018f, 0.186806f, 0.083884f, 0.292431f, 0.226862f, 0.755602f, 
-         0.087596f, 0.093867f, 0.025202f, 0.037733f, 0.138704f, 0.051872f, 0.038581f, 0.157132f, 
-         0.613710f, 0.014387f, 0.142873f, 0.040988f, 0.207912f, 0.593840f, 0.030158f, 0.459283f, 
-         0.037243f, 0.414007f, 0.059308f, 0.172431f, 0.031081f, 0.458870f, 0.135577f, 0.202041f, 
-         0.189875f, 0.216251f, 0.316144f, 0.159199f, 0.118532f, 0.124100f, 0.142279f, 0.042099f, 
-         0.389594f, 0.301928f, 0.426283f, 0.158370f, 0.210563f, 0.186064f, 0.018720f, 0.167332f, 
-         0.186098f, 0.037526f, 0.429068f, 0.179977f, 0.523120f, 0.228715f, 0.101000f, 0.096920f, 
-         0.050245f, 0.034806f, 0.067903f, 0.558854f, 0.296585f, 0.041851f, 0.066438f, 0.706390f, 
-         0.096718f, 0.032330f, 0.098123f, 0.406015f, 0.203288f, 0.089809f, 0.206245f, 0.094643f
-        };
+            0.234108f, 0.165695f, 0.194178f, 0.132206f, 0.273813f, 0.157756f, 0.231078f, 0.267808f,
+            0.182023f, 0.161335f, 0.064537f, 0.399455f, 0.058060f, 0.290919f, 0.187029f, 0.058987f,
+            0.278246f, 0.111077f, 0.212830f, 0.338860f, 0.126710f, 0.038271f, 0.045816f, 0.051766f,
+            0.737438f, 0.111706f, 0.565442f, 0.046862f, 0.196759f, 0.079231f, 0.056498f, 0.017234f,
+            0.034267f, 0.813830f, 0.078171f, 0.317244f, 0.241860f, 0.146599f, 0.098717f, 0.195581f,
+            0.074161f, 0.037491f, 0.062983f, 0.151053f, 0.674313f, 0.122651f, 0.053560f, 0.429902f,
+            0.288827f, 0.105061f, 0.210018f, 0.186806f, 0.083884f, 0.292431f, 0.226862f, 0.755602f,
+            0.087596f, 0.093867f, 0.025202f, 0.037733f, 0.138704f, 0.051872f, 0.038581f, 0.157132f,
+            0.613710f, 0.014387f, 0.142873f, 0.040988f, 0.207912f, 0.593840f, 0.030158f, 0.459283f,
+            0.037243f, 0.414007f, 0.059308f, 0.172431f, 0.031081f, 0.458870f, 0.135577f, 0.202041f,
+            0.189875f, 0.216251f, 0.316144f, 0.159199f, 0.118532f, 0.124100f, 0.142279f, 0.042099f,
+            0.389594f, 0.301928f, 0.426283f, 0.158370f, 0.210563f, 0.186064f, 0.018720f, 0.167332f,
+            0.186098f, 0.037526f, 0.429068f, 0.179977f, 0.523120f, 0.228715f, 0.101000f, 0.096920f,
+            0.050245f, 0.034806f, 0.067903f, 0.558854f, 0.296585f, 0.041851f, 0.066438f, 0.706390f,
+            0.096718f, 0.032330f, 0.098123f, 0.406015f, 0.203288f, 0.089809f, 0.206245f, 0.094643f};
         Tensor t_input_10 = create_test_tensor(shape_10, input_data_10, false);
         Tensor t_output_10 = nn_softmax(t_input_10, dim_10);
         Tensor t_expected_10 = create_test_tensor(shape_10, expected_output_10, false);
diff --git a/tests/Operator/test_square.c b/tests/Operator/test_square.c
index a937f76..8ac1655 100644
--- a/tests/Operator/test_square.c
+++ b/tests/Operator/test_square.c
@@ -6,7 +6,7 @@
 
 void test_square_operator() {
     const char* op_name = "square";
-    PoolId pool_id = 0; 
+    PoolId pool_id = 0;
     cten_begin_malloc(pool_id);
 
     // Test Case 1: Scalar square (represented as 1x1 tensors)
@@ -17,7 +17,7 @@ void test_square_operator() {
         // Sub-test 1: Basic square
         {
             float d[] = {6.754841f};
-            float exp_d[] = {45.627879f}; // 6.754841^2 = 45.6278799
+            float exp_d[] = {45.627879f};  // 6.754841^2 = 45.6278799
             Tensor t1 = create_test_tensor(s_shape, d, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
             Tensor actual_res = Tensor_square(t1);
@@ -25,10 +25,10 @@ void test_square_operator() {
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
         }
 
-        // Sub-test 2: Square of a negative number  
+        // Sub-test 2: Square of a negative number
         {
             float d[] = {-3.475264f};
-            float exp_d[] = {12.077459f}; // (-3.475264)^2 = 12.07745916
+            float exp_d[] = {12.077459f};  // (-3.475264)^2 = 12.07745916
             Tensor t1 = create_test_tensor(s_shape, d, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
             Tensor actual_res = Tensor_square(t1);
@@ -39,7 +39,7 @@ void test_square_operator() {
         // Sub-test 3: Square of zero
         {
             float d[] = {0.0f};
-            float exp_d[] = {0.0f}; // 0.0^2 = 0.0
+            float exp_d[] = {0.0f};  // 0.0^2 = 0.0
             Tensor t1 = create_test_tensor(s_shape, d, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
             Tensor actual_res = Tensor_square(t1);
@@ -50,7 +50,7 @@ void test_square_operator() {
         // Sub-test 4: Square of a fractional number
         {
             float d[] = {0.5f};
-            float exp_d[] = {0.25f}; // 0.5^2 = 0.25
+            float exp_d[] = {0.25f};  // 0.5^2 = 0.25
             Tensor t1 = create_test_tensor(s_shape, d, false);
             Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
             Tensor actual_res = Tensor_square(t1);
@@ -64,7 +64,9 @@ void test_square_operator() {
         const char* tc_name = "square_vector_elements";
         TensorShape v_shape = {3};
         float d[] = {4.370861f, 9.556429f, 7.587945f};
-        float exp_d[] = {19.104426f, 91.325331f, 57.576917f}; // [4.370861^2, 9.556429^2, 7.587945^2]
+        float exp_d[] = {19.104426f,
+                         91.325331f,
+                         57.576917f};  // [4.370861^2, 9.556429^2, 7.587945^2]
         Tensor t1 = create_test_tensor(v_shape, d, false);
         Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);
@@ -77,7 +79,10 @@ void test_square_operator() {
         const char* tc_name = "square_matrix_2x2";
         TensorShape m_shape = {2, 2};
         float d[] = {6.387926f, 2.404168f, 2.403951f, 1.522753f};
-        float exp_d[] = {40.805603f, 5.780023f, 5.778979f, 2.318775f}; // [6.387926^2, 2.404168^2, 2.403951^2, 1.522753^2]
+        float exp_d[] = {40.805603f,
+                         5.780023f,
+                         5.778979f,
+                         2.318775f};  // [6.387926^2, 2.404168^2, 2.403951^2, 1.522753^2]
         Tensor t1 = create_test_tensor(m_shape, d, false);
         Tensor expected_res = create_test_tensor(m_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);
@@ -89,8 +94,23 @@ void test_square_operator() {
     {
         const char* tc_name = "square_3d_tensor";
         TensorShape t_shape = {2, 2, 2};
-        float d[] = {8.795585f, 6.410035f, 7.372653f, 1.185260f, 9.729189f, 8.491984f, 2.911052f, 2.636425f};
-        float exp_d[] = {77.362321f, 41.088550f, 54.356015f, 1.404842f, 94.657112f, 72.113788f, 8.474224f, 6.950735f}; // [8.795585^2, 6.410035^2, 7.372653^2, 1.185260^2, 9.729189^2, 8.491984^2, 2.911052^2, 2.636425^2]
+        float d[] = {8.795585f,
+                     6.410035f,
+                     7.372653f,
+                     1.185260f,
+                     9.729189f,
+                     8.491984f,
+                     2.911052f,
+                     2.636425f};
+        float exp_d[] = {77.362321f,
+                         41.088550f,
+                         54.356015f,
+                         1.404842f,
+                         94.657112f,
+                         72.113788f,
+                         8.474224f,
+                         6.950735f};  // [8.795585^2, 6.410035^2, 7.372653^2, 1.185260^2, 9.729189^2,
+                                      // 8.491984^2, 2.911052^2, 2.636425^2]
         Tensor t1 = create_test_tensor(t_shape, d, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);
@@ -101,9 +121,12 @@ void test_square_operator() {
     // Test Case 5: 4D tensor square operations
     {
         const char* tc_name = "square_4d_tensor";
-        TensorShape t_shape = {2, 1, 2, 1}; // 4 elements
+        TensorShape t_shape = {2, 1, 2, 1};  // 4 elements
         float d1[] = {-0.376380f, 1.352143f, 0.695982f, 0.295975f};
-        float exp_d[] = {0.141662f, 1.828290f, 0.484391f, 0.087601f}; // Expected: [d1[0]^2, d1[1]^2, ...]
+        float exp_d[] = {0.141662f,
+                         1.828290f,
+                         0.484391f,
+                         0.087601f};  // Expected: [d1[0]^2, d1[1]^2, ...]
         Tensor t1 = create_test_tensor(t_shape, d1, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);
@@ -116,12 +139,17 @@ void test_square_operator() {
         const char* tc_name = "square_large_numbers";
         TensorShape s_shape = {1};
         float d1[] = {1624.074562f};
-        float exp_d[] = {2.637618e+06f}; // 1624.07^2 = 2637618.18
+        float exp_d[] = {2.637618e+06f};  // 1624.07^2 = 2637618.18
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);
 
-        compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, 1.0f); // Using larger tolerance for large values
+        compare_tensors(&actual_res,
+                        &expected_res,
+                        op_name,
+                        tc_name,
+                        1,
+                        1.0f);  // Using larger tolerance for large values
     }
 
     // Test Case 7: Very small numbers
@@ -129,7 +157,7 @@ void test_square_operator() {
         const char* tc_name = "square_small_numbers";
         TensorShape s_shape = {1};
         float d1[] = {0.000164f};
-        float exp_d[] = {2.703873e-08f}; // (1.644e-04)^2 = 2.704e-08
+        float exp_d[] = {2.703873e-08f};  // (1.644e-04)^2 = 2.704e-08
         Tensor t1 = create_test_tensor(s_shape, d1, false);
         Tensor expected_res = create_test_tensor(s_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);
@@ -141,14 +169,8 @@ void test_square_operator() {
     {
         const char* tc_name = "square_mixed_sign_vector";
         TensorShape v_shape = {5};
-        float d1[] = {
-            -8.838327f, 7.323523f, 2.022300f, 4.161451f, 
-            -9.588310f
-        };
-        float exp_d[] = {
-            78.116028f, 53.633991f, 4.089698f, 17.317677f, 
-            91.935692f
-        };
+        float d1[] = {-8.838327f, 7.323523f, 2.022300f, 4.161451f, -9.588310f};
+        float exp_d[] = {78.116028f, 53.633991f, 4.089698f, 17.317677f, 91.935692f};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);
@@ -161,7 +183,10 @@ void test_square_operator() {
         const char* tc_name = "square_zeros_and_ones";
         TensorShape v_shape = {4};
         float d1[] = {1.000000f, 1.000000f, 0.000000f, 0.000000f};
-        float exp_d[] = {1.000000f, 1.000000f, 0.000000f, 0.000000f}; // Expected: [0^2, 1^2, ...] should be the same as input
+        float exp_d[] = {1.000000f,
+                         1.000000f,
+                         0.000000f,
+                         0.000000f};  // Expected: [0^2, 1^2, ...] should be the same as input
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor expected_res = create_test_tensor(v_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);
diff --git a/tests/Operator/test_sub.c b/tests/Operator/test_sub.c
index 09c1f80..623f087 100644
--- a/tests/Operator/test_sub.c
+++ b/tests/Operator/test_sub.c
@@ -14,7 +14,7 @@ void test_sub_operator() {
     {
         const char* tc_name = "sub_scalar";
         TensorShape s_shape = {1};
-        float d1[] = {5.0f}; 
+        float d1[] = {5.0f};
         float d2[] = {3.0f};
         float exp_d[] = {2.0f};
         Tensor t1 = create_test_tensor(s_shape, d1, false);
@@ -59,34 +59,46 @@ void test_sub_operator() {
     // Example: [[1,2],[3,4]] - [1] (shape {1}) -> PyTorch result: [[0,1],[2,3]]
     {
         const char* tc_name = "sub_broadcast_matrix_minus_scalar_tensor";
-        TensorShape mat_shape = {2, 2}; float mat_data[] = {1.0f, 2.0f, 3.0f, 4.0f};
-        TensorShape scalar_shape = {1}; float scalar_data[] = {1.0f};
-        
-        TensorShape expected_shape = {2, 2}; float exp_data[] = {0.0f, 1.0f, 2.0f, 3.0f};
+        TensorShape mat_shape = {2, 2};
+        float mat_data[] = {1.0f, 2.0f, 3.0f, 4.0f};
+        TensorShape scalar_shape = {1};
+        float scalar_data[] = {1.0f};
+
+        TensorShape expected_shape = {2, 2};
+        float exp_data[] = {0.0f, 1.0f, 2.0f, 3.0f};
 
         Tensor t_mat = create_test_tensor(mat_shape, mat_data, false);
         Tensor t_scalar_original = create_test_tensor(scalar_shape, scalar_data, false);
-        
-        Tensor actual_res = Tensor_sub(t_mat, t_scalar_original); 
+
+        Tensor actual_res = Tensor_sub(t_mat, t_scalar_original);
         Tensor expected_res = create_test_tensor(expected_shape, exp_data, false);
 
         compare_tensors(&actual_res, &expected_res, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
     }
-    
+
     // Test Case 5: Advanced Broadcasting
     {
         const char* tc_name = "sub_advanced_broadcasting";
-        
+
         // Sub-test 1: Multi-dimensional broadcasting {3,1} - {1,4} -> {3,4}
         {
             TensorShape s1_shape = {3, 1};
             float d1[] = {10.0f, 20.0f, 30.0f};
             TensorShape s2_shape = {1, 4};
             float d2[] = {1.0f, 2.0f, 3.0f, 4.0f};
-            TensorShape exp_shape = {3, 4}; 
-            float exp_d[] = {9.0f, 8.0f, 7.0f, 6.0f,    // 10-[1,2,3,4]
-                             19.0f, 18.0f, 17.0f, 16.0f, // 20-[1,2,3,4]
-                             29.0f, 28.0f, 27.0f, 26.0f}; // 30-[1,2,3,4]
+            TensorShape exp_shape = {3, 4};
+            float exp_d[] = {9.0f,
+                             8.0f,
+                             7.0f,
+                             6.0f,  // 10-[1,2,3,4]
+                             19.0f,
+                             18.0f,
+                             17.0f,
+                             16.0f,  // 20-[1,2,3,4]
+                             29.0f,
+                             28.0f,
+                             27.0f,
+                             26.0f};  // 30-[1,2,3,4]
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
@@ -105,13 +117,31 @@ void test_sub_operator() {
             TensorShape exp_shape = {2, 3, 4};
             float exp_d[] = {
                 // First 2x3 slice
-                9.0f, 8.0f, 7.0f, 6.0f,    // 10-[1,2,3,4]
-                19.0f, 18.0f, 17.0f, 16.0f, // 20-[1,2,3,4]
-                29.0f, 28.0f, 27.0f, 26.0f, // 30-[1,2,3,4]
+                9.0f,
+                8.0f,
+                7.0f,
+                6.0f,  // 10-[1,2,3,4]
+                19.0f,
+                18.0f,
+                17.0f,
+                16.0f,  // 20-[1,2,3,4]
+                29.0f,
+                28.0f,
+                27.0f,
+                26.0f,  // 30-[1,2,3,4]
                 // Second 2x3 slice
-                39.0f, 38.0f, 37.0f, 36.0f, // 40-[1,2,3,4]
-                49.0f, 48.0f, 47.0f, 46.0f, // 50-[1,2,3,4]
-                59.0f, 58.0f, 57.0f, 56.0f  // 60-[1,2,3,4]
+                39.0f,
+                38.0f,
+                37.0f,
+                36.0f,  // 40-[1,2,3,4]
+                49.0f,
+                48.0f,
+                47.0f,
+                46.0f,  // 50-[1,2,3,4]
+                59.0f,
+                58.0f,
+                57.0f,
+                56.0f  // 60-[1,2,3,4]
             };
 
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
@@ -121,55 +151,165 @@ void test_sub_operator() {
 
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
         }
-    
+
         // Sub-test 3: 4D broadcasting {1,3,1,5} - {2,1,4,1} -> {2,3,4,5}
         {
-            TensorShape s1_shape = {1, 3, 1, 5};            
+            TensorShape s1_shape = {1, 3, 1, 5};
             TensorShape s2_shape = {2, 1, 4, 1};
             TensorShape exp_shape = {2, 3, 4, 5};
-            
-            float d1[] = {0.3745f, 0.9507f, 0.732f, 0.5987f, 0.156f, 0.1576f, 0.0721f, 0.8381f, 0.5801f, 0.5153f, 0.0206f, 0.9699f, 0.8324f, 0.2123f, 0.1818f};
+
+            float d1[] = {0.3745f,
+                          0.9507f,
+                          0.732f,
+                          0.5987f,
+                          0.156f,
+                          0.1576f,
+                          0.0721f,
+                          0.8381f,
+                          0.5801f,
+                          0.5153f,
+                          0.0206f,
+                          0.9699f,
+                          0.8324f,
+                          0.2123f,
+                          0.1818f};
             float d2[] = {0.1834f, 0.3042f, 0.5248f, 0.4319f, 0.2912f, 0.6119f, 0.1395f, 0.2921f};
 
             float exp_d[] = {
                 // Batch 0
-                0.1911f, 0.7673f, 0.5486f, 0.4153f, -0.0274f,
-                0.0703f, 0.6465f, 0.4278f, 0.2945f, -0.1482f,
-                -0.1503f, 0.4259f, 0.2072f, 0.0739f, -0.3688f,
-                -0.0574f, 0.5188f, 0.3001f, 0.1668f, -0.2759f,
-                
-                -0.0258f, -0.1113f, 0.6547f, 0.3967f, 0.3319f,
-                -0.1466f, -0.2321f, 0.5339f, 0.2759f, 0.2111f,
-                -0.3672f, -0.4527f, 0.3133f, 0.0553f, -0.0095f,
-                -0.2743f, -0.3598f, 0.4062f, 0.1482f, 0.0834f,
-                
-                -0.1628f, 0.7865f, 0.649f, 0.0289f, -0.0016f,
-                -0.2836f, 0.6657f, 0.5282f, -0.0919f, -0.1224f,
-                -0.5042f, 0.4451f, 0.3076f, -0.3125f, -0.343f,
-                -0.4113f, 0.538f, 0.4005f, -0.2196f, -0.2501f,
-                
+                0.1911f,
+                0.7673f,
+                0.5486f,
+                0.4153f,
+                -0.0274f,
+                0.0703f,
+                0.6465f,
+                0.4278f,
+                0.2945f,
+                -0.1482f,
+                -0.1503f,
+                0.4259f,
+                0.2072f,
+                0.0739f,
+                -0.3688f,
+                -0.0574f,
+                0.5188f,
+                0.3001f,
+                0.1668f,
+                -0.2759f,
+
+                -0.0258f,
+                -0.1113f,
+                0.6547f,
+                0.3967f,
+                0.3319f,
+                -0.1466f,
+                -0.2321f,
+                0.5339f,
+                0.2759f,
+                0.2111f,
+                -0.3672f,
+                -0.4527f,
+                0.3133f,
+                0.0553f,
+                -0.0095f,
+                -0.2743f,
+                -0.3598f,
+                0.4062f,
+                0.1482f,
+                0.0834f,
+
+                -0.1628f,
+                0.7865f,
+                0.649f,
+                0.0289f,
+                -0.0016f,
+                -0.2836f,
+                0.6657f,
+                0.5282f,
+                -0.0919f,
+                -0.1224f,
+                -0.5042f,
+                0.4451f,
+                0.3076f,
+                -0.3125f,
+                -0.343f,
+                -0.4113f,
+                0.538f,
+                0.4005f,
+                -0.2196f,
+                -0.2501f,
+
                 // Batch 1
-                0.0833f, 0.6595f, 0.4408f, 0.3075f, -0.1352f,
-                -0.2374f, 0.3388f, 0.1201f, -0.0132f, -0.4559f,
-                0.235f, 0.8112f, 0.5925f, 0.4592f, 0.0165f,
-                0.0824f, 0.6586f, 0.4399f, 0.3066f, -0.1361f,
-                
-                -0.1336f, -0.2191f, 0.5469f, 0.2889f, 0.2241f,
-                -0.4543f, -0.5398f, 0.2262f, -0.0318f, -0.0966f,
-                0.0181f, -0.0674f, 0.6986f, 0.4406f, 0.3758f,
-                -0.1345f, -0.22f, 0.546f, 0.288f, 0.2232f,
-                
-                -0.2706f, 0.6787f, 0.5412f, -0.0789f, -0.1094f,
-                -0.5913f, 0.358f, 0.2205f, -0.3996f, -0.4301f,
-                -0.1189f, 0.8304f, 0.6929f, 0.0728f, 0.0423f,
-                -0.2715f, 0.6778f, 0.5403f, -0.0798f, -0.1103f,
+                0.0833f,
+                0.6595f,
+                0.4408f,
+                0.3075f,
+                -0.1352f,
+                -0.2374f,
+                0.3388f,
+                0.1201f,
+                -0.0132f,
+                -0.4559f,
+                0.235f,
+                0.8112f,
+                0.5925f,
+                0.4592f,
+                0.0165f,
+                0.0824f,
+                0.6586f,
+                0.4399f,
+                0.3066f,
+                -0.1361f,
+
+                -0.1336f,
+                -0.2191f,
+                0.5469f,
+                0.2889f,
+                0.2241f,
+                -0.4543f,
+                -0.5398f,
+                0.2262f,
+                -0.0318f,
+                -0.0966f,
+                0.0181f,
+                -0.0674f,
+                0.6986f,
+                0.4406f,
+                0.3758f,
+                -0.1345f,
+                -0.22f,
+                0.546f,
+                0.288f,
+                0.2232f,
+
+                -0.2706f,
+                0.6787f,
+                0.5412f,
+                -0.0789f,
+                -0.1094f,
+                -0.5913f,
+                0.358f,
+                0.2205f,
+                -0.3996f,
+                -0.4301f,
+                -0.1189f,
+                0.8304f,
+                0.6929f,
+                0.0728f,
+                0.0423f,
+                -0.2715f,
+                0.6778f,
+                0.5403f,
+                -0.0798f,
+                -0.1103f,
             };
-    
+
             Tensor t1 = create_test_tensor(s1_shape, d1, false);
             Tensor t2 = create_test_tensor(s2_shape, d2, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor actual_res = Tensor_sub(t1, t2);
-    
+
             compare_tensors(&actual_res, &expected_res, op_name, tc_name, 3, TEST_FLOAT_TOLERANCE);
         }
     }
@@ -177,44 +317,59 @@ void test_sub_operator() {
     // Test Case 6: Order Dependency
     {
         const char* tc_name = "sub_order_dependency";
-        
+
         // Sub-test 1: a - b ≠ b - a verification
         {
             TensorShape v_shape = {2};
-            
+
             // First: [5.0, 3.0] - [2.0, 1.0] = [3.0, 2.0]
             float d1[] = {5.0f, 3.0f};
             float d2[] = {2.0f, 1.0f};
             float exp_d1[] = {3.0f, 2.0f};
-            
+
             Tensor t1 = create_test_tensor(v_shape, d1, false);
             Tensor t2 = create_test_tensor(v_shape, d2, false);
             Tensor expected_res1 = create_test_tensor(v_shape, exp_d1, false);
             Tensor actual_res1 = Tensor_sub(t1, t2);
 
-            compare_tensors(&actual_res1, &expected_res1, op_name, tc_name, 1, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&actual_res1,
+                            &expected_res1,
+                            op_name,
+                            tc_name,
+                            1,
+                            TEST_FLOAT_TOLERANCE);
 
             // Second: [2.0, 1.0] - [5.0, 3.0] = [-3.0, -2.0]
             float exp_d2[] = {-3.0f, -2.0f};
             Tensor expected_res2 = create_test_tensor(v_shape, exp_d2, false);
             Tensor actual_res2 = Tensor_sub(t2, t1);
 
-            compare_tensors(&actual_res2, &expected_res2, op_name, tc_name, 2, TEST_FLOAT_TOLERANCE);
+            compare_tensors(&actual_res2,
+                            &expected_res2,
+                            op_name,
+                            tc_name,
+                            2,
+                            TEST_FLOAT_TOLERANCE);
         }
     }
 
     // Test Case 7: Higher Dimensional Tensors
     {
         const char* tc_name = "sub_higher_dimensional_tensors";
-        
+
         // Sub-test 1: 3D tensor subtraction (same shape)
         {
             TensorShape shape_3d = {2, 3, 4};
-            float d1[] = {0.2865f, 0.5908f, 0.0305f, 0.0373f, 0.8226f, 0.3602f, 0.1271f, 0.5222f, 0.7700f, 0.2158f, 0.6229f, 0.0853f, 0.0517f, 0.5314f, 0.5406f, 0.6374f, 0.7261f, 0.9759f, 0.5163f, 0.3230f, 0.7952f, 0.2708f, 0.4390f, 0.0785f};
-            float d2[] = {0.0254f, 0.9626f, 0.8360f, 0.6960f, 0.4090f, 0.1733f, 0.1564f, 0.2502f, 0.5492f, 0.7146f, 0.6602f, 0.2799f, 0.9549f, 0.7379f, 0.5544f, 0.6117f, 0.4196f, 0.2477f, 0.3560f, 0.7578f, 0.0144f, 0.1161f, 0.0460f, 0.0407f};
-            float exp_d[] = {0.2611f, -0.3718f, -0.8055f, -0.6587f,  0.4136f,  0.1869f, -0.0293f,  0.2720f,
-                0.2208f, -0.4988f, -0.0373f, -0.1946f, -0.9032f, -0.2065f, -0.0138f,  0.0257f,
-                0.3065f,  0.7282f,  0.1603f, -0.4348f,  0.7808f,  0.1547f,  0.3930f,  0.0378f};
+            float d1[] = {0.2865f, 0.5908f, 0.0305f, 0.0373f, 0.8226f, 0.3602f, 0.1271f, 0.5222f,
+                          0.7700f, 0.2158f, 0.6229f, 0.0853f, 0.0517f, 0.5314f, 0.5406f, 0.6374f,
+                          0.7261f, 0.9759f, 0.5163f, 0.3230f, 0.7952f, 0.2708f, 0.4390f, 0.0785f};
+            float d2[] = {0.0254f, 0.9626f, 0.8360f, 0.6960f, 0.4090f, 0.1733f, 0.1564f, 0.2502f,
+                          0.5492f, 0.7146f, 0.6602f, 0.2799f, 0.9549f, 0.7379f, 0.5544f, 0.6117f,
+                          0.4196f, 0.2477f, 0.3560f, 0.7578f, 0.0144f, 0.1161f, 0.0460f, 0.0407f};
+            float exp_d[] = {0.2611f,  -0.3718f, -0.8055f, -0.6587f, 0.4136f,  0.1869f,
+                             -0.0293f, 0.2720f,  0.2208f,  -0.4988f, -0.0373f, -0.1946f,
+                             -0.9032f, -0.2065f, -0.0138f, 0.0257f,  0.3065f,  0.7282f,
+                             0.1603f,  -0.4348f, 0.7808f,  0.1547f,  0.3930f,  0.0378f};
 
             Tensor t1 = create_test_tensor(shape_3d, d1, false);
             Tensor t2 = create_test_tensor(shape_3d, d2, false);
@@ -227,23 +382,52 @@ void test_sub_operator() {
         // Sub-test 2: 4D tensor subtraction (same shape)
         {
             TensorShape shape_4d = {2, 3, 4, 5};
-            float d1[] = {0.8555f, 0.7037f, 0.4742f, 0.0978f, 0.4916f, 0.4735f, 0.1732f, 0.4339f, 0.3985f, 0.6159f, 0.6351f, 0.0453f, 0.3746f, 0.6259f, 0.5031f, 0.8565f, 0.6587f, 0.1629f, 0.0706f, 0.6424f, 0.0265f, 0.5858f, 0.9402f, 0.5755f, 0.3882f, 0.6433f, 0.4583f, 0.5456f, 0.9415f, 0.3861f, 0.9612f, 0.9054f, 0.1958f, 0.0694f, 0.1008f, 0.0182f, 0.0944f, 0.6830f, 0.0712f, 0.3190f, 0.8449f, 0.0233f, 0.8145f, 0.2819f, 0.1182f, 0.6967f, 0.6289f, 0.8775f, 0.7351f, 0.8035f, 0.2820f, 0.1774f, 0.7506f, 0.8068f, 0.9905f, 0.4126f, 0.3720f, 0.7764f, 0.3408f, 0.9308f, 0.8584f, 0.4290f, 0.7509f, 0.7545f, 0.1031f, 0.9026f, 0.5053f, 0.8265f, 0.3200f, 0.8955f, 0.3892f, 0.0108f, 0.9054f, 0.0913f, 0.3193f, 0.9501f, 0.9506f, 0.5734f, 0.6318f, 0.4484f, 0.2932f, 0.3287f, 0.6725f, 0.7524f, 0.7916f, 0.7896f, 0.0912f, 0.4944f, 0.0576f, 0.5495f, 0.4415f, 0.8877f, 0.3509f, 0.1171f, 0.1430f, 0.7615f, 0.6182f, 0.1011f, 0.0841f, 0.7010f, 0.0728f, 0.8219f, 0.7062f, 0.0813f, 0.0848f, 0.9866f, 0.3743f, 0.3706f, 0.8128f, 0.9472f, 0.9860f, 0.7534f, 0.3763f, 0.0835f, 0.7771f, 0.5584f, 0.4242f, 0.9064f, 0.1112f, 0.4926f};
-            float d2[] = {0.0114f, 0.4687f, 0.0563f, 0.1188f, 0.1175f, 0.6492f, 0.7460f, 0.5834f, 0.9622f, 0.3749f, 0.2857f, 0.8686f, 0.2236f, 0.9632f, 0.0122f, 0.9699f, 0.0432f, 0.8911f, 0.5277f, 0.9930f, 0.0738f, 0.5539f, 0.9693f, 0.5231f, 0.6294f, 0.6957f, 0.4545f, 0.6276f, 0.5843f, 0.9012f, 0.0454f, 0.2810f, 0.9504f, 0.8903f, 0.4557f, 0.6201f, 0.2774f, 0.1881f, 0.4637f, 0.3534f, 0.5837f, 0.0777f, 0.9744f, 0.9862f, 0.6982f, 0.5361f, 0.3095f, 0.8138f, 0.6847f, 0.1626f, 0.9109f, 0.8225f, 0.9498f, 0.7257f, 0.6134f, 0.4182f, 0.9327f, 0.8661f, 0.0452f, 0.0264f, 0.3765f, 0.8106f, 0.9873f, 0.1504f, 0.5941f, 0.3809f, 0.9699f, 0.8421f, 0.8383f, 0.4687f, 0.4148f, 0.2734f, 0.0564f, 0.8647f, 0.8129f, 0.9997f, 0.9966f, 0.5554f, 0.7690f, 0.9448f, 0.8496f, 0.2473f, 0.4505f, 0.1292f, 0.9541f, 0.6062f, 0.2286f, 0.6717f, 0.6181f, 0.3582f, 0.1136f, 0.6716f, 0.5203f, 0.7723f, 0.5202f, 0.8522f, 0.5519f, 0.5609f, 0.8767f, 0.4035f, 0.1340f, 0.0288f, 0.7551f, 0.6203f, 0.7041f, 0.2130f, 0.1364f, 0.0145f, 0.3506f, 0.5899f, 0.3922f, 0.4375f, 0.9042f, 0.3483f, 0.5140f, 0.7837f, 0.3965f, 0.6221f, 0.8624f, 0.9495f};
-            float exp_d[] = {0.8441f,  0.2350f,  0.4179f, -0.0210f,  0.3741f, -0.1757f, -0.5728f, -0.1495f,
-                -0.5637f,  0.2410f,  0.3494f, -0.8233f,  0.1510f, -0.3373f,  0.4909f, -0.1134f,
-                 0.6155f, -0.7282f, -0.4571f, -0.3506f, -0.0473f,  0.0319f, -0.0291f,  0.0524f,
-                -0.2412f, -0.0524f,  0.0038f, -0.0820f,  0.3572f, -0.5151f,  0.9158f,  0.6244f,
-                -0.7546f, -0.8209f, -0.3549f, -0.6019f, -0.1830f,  0.4949f, -0.3925f, -0.0344f,
-                 0.2612f, -0.0544f, -0.1599f, -0.7043f, -0.5800f,  0.1606f,  0.3194f,  0.0637f,
-                 0.0504f,  0.6409f, -0.6289f, -0.6451f, -0.1992f,  0.0811f,  0.3771f, -0.0056f,
-                -0.5607f, -0.0897f,  0.2956f,  0.9044f,  0.4819f, -0.3816f, -0.2364f,  0.6041f,
-                -0.4910f,  0.5217f, -0.4646f, -0.0156f, -0.5183f,  0.4268f, -0.0256f, -0.2626f,
-                 0.8490f, -0.7734f, -0.4936f, -0.0496f, -0.0460f,  0.0180f, -0.1372f, -0.4964f,
-                -0.5564f,  0.0814f,  0.2220f,  0.6232f, -0.1625f,  0.1834f, -0.1374f, -0.1773f,
-                -0.5605f,  0.1913f,  0.3279f,  0.2161f, -0.1694f, -0.6552f, -0.3772f, -0.0907f,
-                 0.0663f, -0.4598f, -0.7926f,  0.2975f, -0.0612f,  0.7931f, -0.0489f, -0.5390f,
-                -0.6193f,  0.7736f,  0.2379f,  0.3561f,  0.4622f,  0.3573f,  0.5938f,  0.3159f,
-                -0.5279f, -0.2648f,  0.2631f, -0.2253f,  0.0277f,  0.2843f, -0.7512f, -0.4569f};
+            float d1[] = {0.8555f, 0.7037f, 0.4742f, 0.0978f, 0.4916f, 0.4735f, 0.1732f, 0.4339f,
+                          0.3985f, 0.6159f, 0.6351f, 0.0453f, 0.3746f, 0.6259f, 0.5031f, 0.8565f,
+                          0.6587f, 0.1629f, 0.0706f, 0.6424f, 0.0265f, 0.5858f, 0.9402f, 0.5755f,
+                          0.3882f, 0.6433f, 0.4583f, 0.5456f, 0.9415f, 0.3861f, 0.9612f, 0.9054f,
+                          0.1958f, 0.0694f, 0.1008f, 0.0182f, 0.0944f, 0.6830f, 0.0712f, 0.3190f,
+                          0.8449f, 0.0233f, 0.8145f, 0.2819f, 0.1182f, 0.6967f, 0.6289f, 0.8775f,
+                          0.7351f, 0.8035f, 0.2820f, 0.1774f, 0.7506f, 0.8068f, 0.9905f, 0.4126f,
+                          0.3720f, 0.7764f, 0.3408f, 0.9308f, 0.8584f, 0.4290f, 0.7509f, 0.7545f,
+                          0.1031f, 0.9026f, 0.5053f, 0.8265f, 0.3200f, 0.8955f, 0.3892f, 0.0108f,
+                          0.9054f, 0.0913f, 0.3193f, 0.9501f, 0.9506f, 0.5734f, 0.6318f, 0.4484f,
+                          0.2932f, 0.3287f, 0.6725f, 0.7524f, 0.7916f, 0.7896f, 0.0912f, 0.4944f,
+                          0.0576f, 0.5495f, 0.4415f, 0.8877f, 0.3509f, 0.1171f, 0.1430f, 0.7615f,
+                          0.6182f, 0.1011f, 0.0841f, 0.7010f, 0.0728f, 0.8219f, 0.7062f, 0.0813f,
+                          0.0848f, 0.9866f, 0.3743f, 0.3706f, 0.8128f, 0.9472f, 0.9860f, 0.7534f,
+                          0.3763f, 0.0835f, 0.7771f, 0.5584f, 0.4242f, 0.9064f, 0.1112f, 0.4926f};
+            float d2[] = {0.0114f, 0.4687f, 0.0563f, 0.1188f, 0.1175f, 0.6492f, 0.7460f, 0.5834f,
+                          0.9622f, 0.3749f, 0.2857f, 0.8686f, 0.2236f, 0.9632f, 0.0122f, 0.9699f,
+                          0.0432f, 0.8911f, 0.5277f, 0.9930f, 0.0738f, 0.5539f, 0.9693f, 0.5231f,
+                          0.6294f, 0.6957f, 0.4545f, 0.6276f, 0.5843f, 0.9012f, 0.0454f, 0.2810f,
+                          0.9504f, 0.8903f, 0.4557f, 0.6201f, 0.2774f, 0.1881f, 0.4637f, 0.3534f,
+                          0.5837f, 0.0777f, 0.9744f, 0.9862f, 0.6982f, 0.5361f, 0.3095f, 0.8138f,
+                          0.6847f, 0.1626f, 0.9109f, 0.8225f, 0.9498f, 0.7257f, 0.6134f, 0.4182f,
+                          0.9327f, 0.8661f, 0.0452f, 0.0264f, 0.3765f, 0.8106f, 0.9873f, 0.1504f,
+                          0.5941f, 0.3809f, 0.9699f, 0.8421f, 0.8383f, 0.4687f, 0.4148f, 0.2734f,
+                          0.0564f, 0.8647f, 0.8129f, 0.9997f, 0.9966f, 0.5554f, 0.7690f, 0.9448f,
+                          0.8496f, 0.2473f, 0.4505f, 0.1292f, 0.9541f, 0.6062f, 0.2286f, 0.6717f,
+                          0.6181f, 0.3582f, 0.1136f, 0.6716f, 0.5203f, 0.7723f, 0.5202f, 0.8522f,
+                          0.5519f, 0.5609f, 0.8767f, 0.4035f, 0.1340f, 0.0288f, 0.7551f, 0.6203f,
+                          0.7041f, 0.2130f, 0.1364f, 0.0145f, 0.3506f, 0.5899f, 0.3922f, 0.4375f,
+                          0.9042f, 0.3483f, 0.5140f, 0.7837f, 0.3965f, 0.6221f, 0.8624f, 0.9495f};
+            float exp_d[] = {
+                0.8441f,  0.2350f,  0.4179f,  -0.0210f, 0.3741f,  -0.1757f, -0.5728f, -0.1495f,
+                -0.5637f, 0.2410f,  0.3494f,  -0.8233f, 0.1510f,  -0.3373f, 0.4909f,  -0.1134f,
+                0.6155f,  -0.7282f, -0.4571f, -0.3506f, -0.0473f, 0.0319f,  -0.0291f, 0.0524f,
+                -0.2412f, -0.0524f, 0.0038f,  -0.0820f, 0.3572f,  -0.5151f, 0.9158f,  0.6244f,
+                -0.7546f, -0.8209f, -0.3549f, -0.6019f, -0.1830f, 0.4949f,  -0.3925f, -0.0344f,
+                0.2612f,  -0.0544f, -0.1599f, -0.7043f, -0.5800f, 0.1606f,  0.3194f,  0.0637f,
+                0.0504f,  0.6409f,  -0.6289f, -0.6451f, -0.1992f, 0.0811f,  0.3771f,  -0.0056f,
+                -0.5607f, -0.0897f, 0.2956f,  0.9044f,  0.4819f,  -0.3816f, -0.2364f, 0.6041f,
+                -0.4910f, 0.5217f,  -0.4646f, -0.0156f, -0.5183f, 0.4268f,  -0.0256f, -0.2626f,
+                0.8490f,  -0.7734f, -0.4936f, -0.0496f, -0.0460f, 0.0180f,  -0.1372f, -0.4964f,
+                -0.5564f, 0.0814f,  0.2220f,  0.6232f,  -0.1625f, 0.1834f,  -0.1374f, -0.1773f,
+                -0.5605f, 0.1913f,  0.3279f,  0.2161f,  -0.1694f, -0.6552f, -0.3772f, -0.0907f,
+                0.0663f,  -0.4598f, -0.7926f, 0.2975f,  -0.0612f, 0.7931f,  -0.0489f, -0.5390f,
+                -0.6193f, 0.7736f,  0.2379f,  0.3561f,  0.4622f,  0.3573f,  0.5938f,  0.3159f,
+                -0.5279f, -0.2648f, 0.2631f,  -0.2253f, 0.0277f,  0.2843f,  -0.7512f, -0.4569f};
 
             Tensor t1 = create_test_tensor(shape_4d, d1, false);
             Tensor t2 = create_test_tensor(shape_4d, d2, false);
diff --git a/tests/Operator/test_sum.c b/tests/Operator/test_sum.c
index bb3b55e..ceb02fc 100644
--- a/tests/Operator/test_sum.c
+++ b/tests/Operator/test_sum.c
@@ -28,7 +28,7 @@ void test_sum_operator() {
         const char* tc_name = "sum_vector";
         TensorShape v_shape = {3};
         float d1[] = {1.0f, 2.0f, 3.0f};
-        float exp_d[] = {6.0f}; // Sum is 1+2+3 = 6
+        float exp_d[] = {6.0f};  // Sum is 1+2+3 = 6
         TensorShape exp_shape = {1};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
@@ -42,7 +42,7 @@ void test_sum_operator() {
         const char* tc_name = "sum_matrix";
         TensorShape m_shape = {2, 2};
         float d1[] = {1.0f, 2.0f, 3.0f, 4.0f};
-        float exp_d[] = {10.0f}; // Sum is 1+2+3+4 = 10
+        float exp_d[] = {10.0f};  // Sum is 1+2+3+4 = 10
         TensorShape exp_shape = {1, 0, 0, 0};
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
@@ -56,7 +56,7 @@ void test_sum_operator() {
         const char* tc_name = "sum_vector_negative";
         TensorShape v_shape = {4};
         float d1[] = {-1.0f, 2.0f, -3.0f, 0.5f};
-        float exp_d[] = {-1.5f}; // Sum is -1+2-3+0.5 = -1.5
+        float exp_d[] = {-1.5f};  // Sum is -1+2-3+0.5 = -1.5
         TensorShape exp_shape = {1};
         Tensor t1 = create_test_tensor(v_shape, d1, false);
         Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
@@ -68,16 +68,17 @@ void test_sum_operator() {
     // Test Case 5: Large Tensor Reductions
     {
         const char* tc_name = "sum_large_tensor_reductions";
-        
+
         // Sub-test 1: Large tensor sum (1,000 elements)
         {
             TensorShape large_shape = {1000};
             float large_data[1000];
-            for(int i = 0; i < 1000; i++) large_data[i] = 1.0f;
-            
-            float exp_d[] = {1000.0f}; // Sum of 1000 ones
+            for(int i = 0; i < 1000; i++)
+                large_data[i] = 1.0f;
+
+            float exp_d[] = {1000.0f};  // Sum of 1000 ones
             TensorShape exp_shape = {1};
-            
+
             Tensor t1 = create_test_tensor(large_shape, large_data, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor actual_res = Tensor_sum(t1);
@@ -89,11 +90,12 @@ void test_sum_operator() {
         {
             TensorShape stress_shape = {5000};
             float stress_data[5000];
-            for(int i = 0; i < 5000; i++) stress_data[i] = 1.0f;
-            
-            float exp_d[] = {5000.0f}; // Sum of 5000 ones
+            for(int i = 0; i < 5000; i++)
+                stress_data[i] = 1.0f;
+
+            float exp_d[] = {5000.0f};  // Sum of 5000 ones
             TensorShape exp_shape = {1};
-            
+
             Tensor t1 = create_test_tensor(stress_shape, stress_data, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor actual_res = Tensor_sum(t1);
@@ -105,17 +107,36 @@ void test_sum_operator() {
     // Test Case 4: Higher Dimensional Tensor
     {
         const char* tc_name = "sum_higher_dimensional_tensors";
-        
+
         // Sub-test 1: 3D tensor sum along axis 1 (3x4x5 -> 3x5)
         // Here testing negative index for axis so 1 become -2
         {
             TensorShape shape_3d = {3, 4, 5};
-            float d1[] = {0.3831f, 0.5189f, 0.0470f, 0.1663f, 0.7380f, 0.0828f, 0.6032f, 0.2453f, 0.3893f, 0.2887f, 0.3557f, 0.7190f, 0.2971f, 0.5664f, 0.4761f, 0.6637f, 0.9368f, 0.7326f, 0.2149f, 0.0312f, 0.2623f, 0.5951f, 0.0514f, 0.4964f, 0.5968f, 0.3342f, 0.7709f, 0.1066f, 0.0751f, 0.7282f, 0.4955f, 0.6884f, 0.4348f, 0.2464f, 0.8191f, 0.7994f, 0.6947f, 0.2721f, 0.5902f, 0.3610f, 0.0916f, 0.9173f, 0.1368f, 0.9502f, 0.4460f, 0.1851f, 0.5419f, 0.8729f, 0.7322f, 0.8066f, 0.6588f, 0.6923f, 0.8492f, 0.2497f, 0.4894f, 0.2212f, 0.9877f, 0.9441f, 0.0394f, 0.7056f};
+            float d1[] = {0.3831f, 0.5189f, 0.0470f, 0.1663f, 0.7380f, 0.0828f, 0.6032f, 0.2453f,
+                          0.3893f, 0.2887f, 0.3557f, 0.7190f, 0.2971f, 0.5664f, 0.4761f, 0.6637f,
+                          0.9368f, 0.7326f, 0.2149f, 0.0312f, 0.2623f, 0.5951f, 0.0514f, 0.4964f,
+                          0.5968f, 0.3342f, 0.7709f, 0.1066f, 0.0751f, 0.7282f, 0.4955f, 0.6884f,
+                          0.4348f, 0.2464f, 0.8191f, 0.7994f, 0.6947f, 0.2721f, 0.5902f, 0.3610f,
+                          0.0916f, 0.9173f, 0.1368f, 0.9502f, 0.4460f, 0.1851f, 0.5419f, 0.8729f,
+                          0.7322f, 0.8066f, 0.6588f, 0.6923f, 0.8492f, 0.2497f, 0.4894f, 0.2212f,
+                          0.9877f, 0.9441f, 0.0394f, 0.7056f};
             TensorShape exp_shape = {3, 5};
-            float exp_d[] = {1.485300f, 2.777900f, 1.322000f, 1.336900f, 1.534000f,
-                1.891400f, 2.749100f, 0.864900f, 1.408100f, 2.505100f,
-                1.156700f, 3.139200f, 2.803000f, 1.971500f, 2.447600f};
-            
+            float exp_d[] = {1.485300f,
+                             2.777900f,
+                             1.322000f,
+                             1.336900f,
+                             1.534000f,
+                             1.891400f,
+                             2.749100f,
+                             0.864900f,
+                             1.408100f,
+                             2.505100f,
+                             1.156700f,
+                             3.139200f,
+                             2.803000f,
+                             1.971500f,
+                             2.447600f};
+
             Tensor t1 = create_test_tensor(shape_3d, d1, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor actual_res = Tensor_sum(t1, -2);
@@ -126,15 +147,28 @@ void test_sum_operator() {
         // Sub-test 2: 4D tensor sum along axis 2 (2x3x4x5 -> 2x3x5)
         {
             TensorShape shape_4d = {2, 3, 4, 5};
-            float d1[] = {0.9252f, 0.1806f, 0.5679f, 0.9155f, 0.0339f, 0.6974f, 0.2973f, 0.9244f, 0.9711f, 0.9443f, 0.4742f, 0.8620f, 0.8445f, 0.3191f, 0.8289f, 0.0370f, 0.5963f, 0.2300f, 0.1206f, 0.0770f, 0.6963f, 0.3399f, 0.7248f, 0.0654f, 0.3153f, 0.5395f, 0.7907f, 0.3188f, 0.6259f, 0.8860f, 0.6159f, 0.2330f, 0.0244f, 0.8701f, 0.0213f, 0.8747f, 0.5289f, 0.9391f, 0.7988f, 0.9979f, 0.3507f, 0.7672f, 0.4019f, 0.4799f, 0.6275f, 0.8737f, 0.9841f, 0.7683f, 0.4178f, 0.4214f, 0.7376f, 0.2388f, 0.1105f, 0.3546f, 0.2872f, 0.2963f, 0.2336f, 0.0421f, 0.0179f, 0.9877f, 0.4278f, 0.3843f, 0.6796f, 0.2183f, 0.9500f, 0.7863f, 0.0894f, 0.4176f, 0.8791f, 0.9447f, 0.4674f, 0.6134f, 0.1670f, 0.9912f, 0.2317f, 0.9427f, 0.6496f, 0.6077f, 0.5127f, 0.2307f, 0.1765f, 0.2205f, 0.1864f, 0.7796f, 0.3501f, 0.0578f, 0.9691f, 0.8838f, 0.9278f, 0.9949f, 0.1739f, 0.3962f, 0.7582f, 0.6960f, 0.1539f, 0.8158f, 0.2244f, 0.2238f, 0.5370f, 0.5929f, 0.5801f, 0.0915f, 0.8775f, 0.2656f, 0.1295f, 0.8887f, 0.9557f, 0.8621f, 0.8095f, 0.6552f, 0.5509f, 0.0870f, 0.4085f, 0.3727f, 0.2598f, 0.7234f, 0.4959f, 0.0810f, 0.2202f, 0.6833f};
+            float d1[] = {0.9252f, 0.1806f, 0.5679f, 0.9155f, 0.0339f, 0.6974f, 0.2973f, 0.9244f,
+                          0.9711f, 0.9443f, 0.4742f, 0.8620f, 0.8445f, 0.3191f, 0.8289f, 0.0370f,
+                          0.5963f, 0.2300f, 0.1206f, 0.0770f, 0.6963f, 0.3399f, 0.7248f, 0.0654f,
+                          0.3153f, 0.5395f, 0.7907f, 0.3188f, 0.6259f, 0.8860f, 0.6159f, 0.2330f,
+                          0.0244f, 0.8701f, 0.0213f, 0.8747f, 0.5289f, 0.9391f, 0.7988f, 0.9979f,
+                          0.3507f, 0.7672f, 0.4019f, 0.4799f, 0.6275f, 0.8737f, 0.9841f, 0.7683f,
+                          0.4178f, 0.4214f, 0.7376f, 0.2388f, 0.1105f, 0.3546f, 0.2872f, 0.2963f,
+                          0.2336f, 0.0421f, 0.0179f, 0.9877f, 0.4278f, 0.3843f, 0.6796f, 0.2183f,
+                          0.9500f, 0.7863f, 0.0894f, 0.4176f, 0.8791f, 0.9447f, 0.4674f, 0.6134f,
+                          0.1670f, 0.9912f, 0.2317f, 0.9427f, 0.6496f, 0.6077f, 0.5127f, 0.2307f,
+                          0.1765f, 0.2205f, 0.1864f, 0.7796f, 0.3501f, 0.0578f, 0.9691f, 0.8838f,
+                          0.9278f, 0.9949f, 0.1739f, 0.3962f, 0.7582f, 0.6960f, 0.1539f, 0.8158f,
+                          0.2244f, 0.2238f, 0.5370f, 0.5929f, 0.5801f, 0.0915f, 0.8775f, 0.2656f,
+                          0.1295f, 0.8887f, 0.9557f, 0.8621f, 0.8095f, 0.6552f, 0.5509f, 0.0870f,
+                          0.4085f, 0.3727f, 0.2598f, 0.7234f, 0.4959f, 0.0810f, 0.2202f, 0.6833f};
             TensorShape exp_shape = {2, 3, 5};
-            float exp_d[] = {2.133800f, 1.936200f, 2.566800f, 2.326300f, 1.884100f,
-                2.726400f, 1.892500f, 2.007100f, 2.360200f, 2.220500f,
-                2.258300f, 2.223700f, 1.322800f, 1.270200f, 2.323800f,
-                2.624200f, 1.736700f, 1.871900f, 2.601300f, 2.357100f,
-                1.224000f, 1.810200f, 2.052200f, 2.940400f, 2.091800f,
-                2.743100f, 1.630100f, 2.229100f, 1.668000f, 1.727800f};
-            
+            float exp_d[] = {2.133800f, 1.936200f, 2.566800f, 2.326300f, 1.884100f, 2.726400f,
+                             1.892500f, 2.007100f, 2.360200f, 2.220500f, 2.258300f, 2.223700f,
+                             1.322800f, 1.270200f, 2.323800f, 2.624200f, 1.736700f, 1.871900f,
+                             2.601300f, 2.357100f, 1.224000f, 1.810200f, 2.052200f, 2.940400f,
+                             2.091800f, 2.743100f, 1.630100f, 2.229100f, 1.668000f, 1.727800f};
+
             Tensor t1 = create_test_tensor(shape_4d, d1, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor actual_res = Tensor_sum(t1, 2);
@@ -146,37 +180,34 @@ void test_sum_operator() {
         {
             TensorShape shape_4d = {2, 3, 4, 5};
             float d1[120] = {
-                0.374540f, 0.950714f, 0.731994f, 0.598658f, 0.156019f,
-                0.155995f, 0.058084f, 0.866176f, 0.601115f, 0.708073f,
-                0.020584f, 0.969910f, 0.832443f, 0.212339f, 0.181825f,
-                0.183405f, 0.304242f, 0.524756f, 0.431945f, 0.291229f,
-                0.611853f, 0.139494f, 0.292145f, 0.366362f, 0.456070f,
-                0.785176f, 0.199674f, 0.514234f, 0.592415f, 0.046450f,
-                0.607545f, 0.170524f, 0.065052f, 0.948886f, 0.965632f,
-                0.808397f, 0.304614f, 0.097672f, 0.684233f, 0.440152f,
-                0.122038f, 0.495177f, 0.034389f, 0.909320f, 0.258780f,
-                0.662522f, 0.311711f, 0.520068f, 0.546710f, 0.184854f,
-                0.969585f, 0.775133f, 0.939499f, 0.894827f, 0.597900f,
-                0.921874f, 0.088493f, 0.195983f, 0.045227f, 0.325330f,
-                0.388677f, 0.271349f, 0.828738f, 0.356753f, 0.280935f,
-                0.542696f, 0.140924f, 0.802197f, 0.074551f, 0.986887f,
-                0.772245f, 0.198716f, 0.005522f, 0.815461f, 0.706857f,
-                0.729007f, 0.771270f, 0.074045f, 0.358466f, 0.115869f,
-                0.863103f, 0.623298f, 0.330898f, 0.063558f, 0.310982f,
-                0.325183f, 0.729606f, 0.637557f, 0.887213f, 0.472215f,
-                0.119594f, 0.713245f, 0.760785f, 0.561277f, 0.770967f,
-                0.493796f, 0.522733f, 0.427541f, 0.025419f, 0.107891f,
-                0.031429f, 0.636410f, 0.314356f, 0.508571f, 0.907566f,
-                0.249292f, 0.410383f, 0.755551f, 0.228798f, 0.076980f,
-                0.289751f, 0.161221f, 0.929698f, 0.808120f, 0.633404f,
-                0.871461f, 0.803672f, 0.186570f, 0.892559f, 0.539342f
-            };
+                0.374540f, 0.950714f, 0.731994f, 0.598658f, 0.156019f, 0.155995f, 0.058084f,
+                0.866176f, 0.601115f, 0.708073f, 0.020584f, 0.969910f, 0.832443f, 0.212339f,
+                0.181825f, 0.183405f, 0.304242f, 0.524756f, 0.431945f, 0.291229f, 0.611853f,
+                0.139494f, 0.292145f, 0.366362f, 0.456070f, 0.785176f, 0.199674f, 0.514234f,
+                0.592415f, 0.046450f, 0.607545f, 0.170524f, 0.065052f, 0.948886f, 0.965632f,
+                0.808397f, 0.304614f, 0.097672f, 0.684233f, 0.440152f, 0.122038f, 0.495177f,
+                0.034389f, 0.909320f, 0.258780f, 0.662522f, 0.311711f, 0.520068f, 0.546710f,
+                0.184854f, 0.969585f, 0.775133f, 0.939499f, 0.894827f, 0.597900f, 0.921874f,
+                0.088493f, 0.195983f, 0.045227f, 0.325330f, 0.388677f, 0.271349f, 0.828738f,
+                0.356753f, 0.280935f, 0.542696f, 0.140924f, 0.802197f, 0.074551f, 0.986887f,
+                0.772245f, 0.198716f, 0.005522f, 0.815461f, 0.706857f, 0.729007f, 0.771270f,
+                0.074045f, 0.358466f, 0.115869f, 0.863103f, 0.623298f, 0.330898f, 0.063558f,
+                0.310982f, 0.325183f, 0.729606f, 0.637557f, 0.887213f, 0.472215f, 0.119594f,
+                0.713245f, 0.760785f, 0.561277f, 0.770967f, 0.493796f, 0.522733f, 0.427541f,
+                0.025419f, 0.107891f, 0.031429f, 0.636410f, 0.314356f, 0.508571f, 0.907566f,
+                0.249292f, 0.410383f, 0.755551f, 0.228798f, 0.076980f, 0.289751f, 0.161221f,
+                0.929698f, 0.808120f, 0.633404f, 0.871461f, 0.803672f, 0.186570f, 0.892559f,
+                0.539342f};
             TensorShape exp_shape = {2, 4};
-            float exp_d[8] = {
-                6.497553f, 6.753257f, 9.151683f, 5.647553f,
-                6.716625f, 7.320034f, 8.246864f, 6.919641f
-            };
-            
+            float exp_d[8] = {6.497553f,
+                              6.753257f,
+                              9.151683f,
+                              5.647553f,
+                              6.716625f,
+                              7.320034f,
+                              8.246864f,
+                              6.919641f};
+
             Tensor t1 = create_test_tensor(shape_4d, d1, false);
             Tensor expected_res = create_test_tensor(exp_shape, exp_d, false);
             Tensor temp_res = Tensor_sum(t1, 3);
diff --git a/tests/csv_reporter.c b/tests/csv_reporter.c
index c43d678..a3c4856 100644
--- a/tests/csv_reporter.c
+++ b/tests/csv_reporter.c
@@ -5,15 +5,15 @@
 #include <stdlib.h>
 
 #define INITIAL_CAPACITY 16
-#define MAX_SUB_TESTS 64 
+#define MAX_SUB_TESTS 64
 
 typedef struct {
-    char *operator_name;
-    char *test_point_name;
-    char *results[MAX_SUB_TESTS]; 
+    char* operator_name;
+    char* test_point_name;
+    char* results[MAX_SUB_TESTS];
 } TestRow;
 
-static TestRow *unique_rows_buffer = NULL;
+static TestRow* unique_rows_buffer = NULL;
 static size_t unique_rows_count = 0;
 static size_t unique_rows_capacity = 0;
 
@@ -21,26 +21,24 @@ static char csv_filename[FILENAME_MAX];
 static int overall_max_sub_test_index = 0;
 
 static char* cten_strdup(const char* s) {
-    if (!s) return NULL;
+    if(!s) return NULL;
     size_t len = strlen(s) + 1;
     char* new_s = (char*)malloc(len);
-    if (new_s) {
-        memcpy(new_s, s, len);
-    }
+    if(new_s) { memcpy(new_s, s, len); }
     return new_s;
 }
 
-int csv_reporter_init(const char *filename) {
-    if (unique_rows_capacity > 0 && unique_rows_buffer) {
-        for (size_t i = 0; i < unique_rows_count; ++i) {
+int csv_reporter_init(const char* filename) {
+    if(unique_rows_capacity > 0 && unique_rows_buffer) {
+        for(size_t i = 0; i < unique_rows_count; ++i) {
             free(unique_rows_buffer[i].operator_name);
             free(unique_rows_buffer[i].test_point_name);
-            for(int j=0; j < MAX_SUB_TESTS; ++j) {
-                free(unique_rows_buffer[i].results[j]); 
+            for(int j = 0; j < MAX_SUB_TESTS; ++j) {
+                free(unique_rows_buffer[i].results[j]);
             }
         }
         free(unique_rows_buffer);
-        unique_rows_buffer = NULL; 
+        unique_rows_buffer = NULL;
     }
 
     strncpy(csv_filename, filename, FILENAME_MAX - 1);
@@ -50,41 +48,52 @@ int csv_reporter_init(const char *filename) {
     unique_rows_capacity = unique_rows_buffer ? INITIAL_CAPACITY : 0;
     unique_rows_count = 0;
     overall_max_sub_test_index = 0;
-    
+
     return unique_rows_buffer ? 0 : -1;
 }
 
-void csv_reporter_record_result(const char *operator_name, const char *test_point_name, int sub_test_index, const char *result_detail) {
-    if (!unique_rows_buffer || unique_rows_capacity == 0) {
-        fprintf(stderr, "CSV Reporter: Record called before successful init or after failed init. Discarding result.\n");
+void csv_reporter_record_result(const char* operator_name,
+                                const char* test_point_name,
+                                int sub_test_index,
+                                const char* result_detail) {
+    if(!unique_rows_buffer || unique_rows_capacity == 0) {
+        fprintf(
+            stderr,
+            "CSV Reporter: Record called before successful init or after failed init. Discarding result.\n");
         return;
     }
 
-    if (sub_test_index <= 0 || sub_test_index > MAX_SUB_TESTS) {
-        fprintf(stderr, "CSV Reporter: Invalid sub_test_index %d for %s - %s. Must be 1-%d. Discarding.\n", 
-                sub_test_index, operator_name, test_point_name, MAX_SUB_TESTS);
+    if(sub_test_index <= 0 || sub_test_index > MAX_SUB_TESTS) {
+        fprintf(stderr,
+                "CSV Reporter: Invalid sub_test_index %d for %s - %s. Must be 1-%d. Discarding.\n",
+                sub_test_index,
+                operator_name,
+                test_point_name,
+                MAX_SUB_TESTS);
         return;
     }
 
-    TestRow *row_to_update = NULL;
+    TestRow* row_to_update = NULL;
 
     // Find existing row
-    for (size_t i = 0; i < unique_rows_count; ++i) {
-        if (strcmp(unique_rows_buffer[i].operator_name, operator_name) == 0 &&
-            strcmp(unique_rows_buffer[i].test_point_name, test_point_name) == 0) {
+    for(size_t i = 0; i < unique_rows_count; ++i) {
+        if(strcmp(unique_rows_buffer[i].operator_name, operator_name) == 0 &&
+           strcmp(unique_rows_buffer[i].test_point_name, test_point_name) == 0) {
             row_to_update = &unique_rows_buffer[i];
             break;
         }
     }
 
     // If no existing row, create a new one
-    if (!row_to_update) {
-        if (unique_rows_count >= unique_rows_capacity) {
+    if(!row_to_update) {
+        if(unique_rows_count >= unique_rows_capacity) {
             size_t new_cap = unique_rows_capacity > 0 ? unique_rows_capacity * 2 : INITIAL_CAPACITY;
             TestRow* new_urb = (TestRow*)realloc(unique_rows_buffer, new_cap * sizeof(TestRow));
-            if (!new_urb) {
-                fprintf(stderr, "CSV Reporter: Failed to reallocate buffer for new row. Discarding result.\n");
-                return; // Cannot allocate more
+            if(!new_urb) {
+                fprintf(
+                    stderr,
+                    "CSV Reporter: Failed to reallocate buffer for new row. Discarding result.\n");
+                return;  // Cannot allocate more
             }
             unique_rows_buffer = new_urb;
             unique_rows_capacity = new_cap;
@@ -92,53 +101,64 @@ void csv_reporter_record_result(const char *operator_name, const char *test_poin
         row_to_update = &unique_rows_buffer[unique_rows_count];
         row_to_update->operator_name = cten_strdup(operator_name);
         row_to_update->test_point_name = cten_strdup(test_point_name);
-        if (!row_to_update->operator_name || !row_to_update->test_point_name) {
-            fprintf(stderr, "CSV Reporter: Failed to allocate memory for new row names. Discarding result.\n");
-            free(row_to_update->operator_name); 
+        if(!row_to_update->operator_name || !row_to_update->test_point_name) {
+            fprintf(
+                stderr,
+                "CSV Reporter: Failed to allocate memory for new row names. Discarding result.\n");
+            free(row_to_update->operator_name);
             free(row_to_update->test_point_name);
             return;
         }
-        for(int k=0; k < MAX_SUB_TESTS; ++k) row_to_update->results[k] = NULL; 
+        for(int k = 0; k < MAX_SUB_TESTS; ++k)
+            row_to_update->results[k] = NULL;
         unique_rows_count++;
     }
-    
+
     // Store result detail in the correct sub-test slot
     free(row_to_update->results[sub_test_index - 1]);
     row_to_update->results[sub_test_index - 1] = cten_strdup(result_detail);
-    if (!row_to_update->results[sub_test_index - 1] && result_detail != NULL) {
-         fprintf(stderr, "CSV Reporter: Failed to allocate memory for result detail. Result for %s-%s sub-test %d will be missing.\n", 
-                operator_name, test_point_name, sub_test_index);
+    if(!row_to_update->results[sub_test_index - 1] && result_detail != NULL) {
+        fprintf(
+            stderr,
+            "CSV Reporter: Failed to allocate memory for result detail. Result for %s-%s sub-test %d will be missing.\n",
+            operator_name,
+            test_point_name,
+            sub_test_index);
     }
 
-    if (sub_test_index > overall_max_sub_test_index) {
-        overall_max_sub_test_index = sub_test_index;
-    }
-}   
+    if(sub_test_index > overall_max_sub_test_index) { overall_max_sub_test_index = sub_test_index; }
+}
 
 void csv_reporter_close() {
-    if (!unique_rows_buffer) {
+    if(!unique_rows_buffer) {
         fprintf(stderr, "CSV Reporter: Close called but not initialized or buffer is NULL.\n");
         return;
     }
 
-    FILE *file = fopen(csv_filename, "w");
-    if (!file) {
+    FILE* file = fopen(csv_filename, "w");
+    if(!file) {
         perror("CSV Reporter: Error opening CSV file for writing");
-        goto cleanup; // Still try to free memory
+        goto cleanup;  // Still try to free memory
     }
 
     fprintf(file, "Operator,TestPoint");
-    for (int i = 1; i <= overall_max_sub_test_index; ++i) {
+    for(int i = 1; i <= overall_max_sub_test_index; ++i) {
         fprintf(file, ",%d", i);
     }
     fprintf(file, "\n");
 
-    for (size_t i = 0; i < unique_rows_count; ++i) {
-        fprintf(file, "%s,%s", 
-            unique_rows_buffer[i].operator_name ? unique_rows_buffer[i].operator_name : "ERROR_NULL_OP", 
-            unique_rows_buffer[i].test_point_name ? unique_rows_buffer[i].test_point_name : "ERROR_NULL_TP");
-        for (int j = 0; j < overall_max_sub_test_index; ++j) { // Iterate up to overall_max_sub_test_index columns
-            fprintf(file, ",%s", unique_rows_buffer[i].results[j] ? unique_rows_buffer[i].results[j] : "");
+    for(size_t i = 0; i < unique_rows_count; ++i) {
+        fprintf(file,
+                "%s,%s",
+                unique_rows_buffer[i].operator_name ? unique_rows_buffer[i].operator_name
+                                                    : "ERROR_NULL_OP",
+                unique_rows_buffer[i].test_point_name ? unique_rows_buffer[i].test_point_name
+                                                      : "ERROR_NULL_TP");
+        for(int j = 0; j < overall_max_sub_test_index;
+            ++j) {  // Iterate up to overall_max_sub_test_index columns
+            fprintf(file,
+                    ",%s",
+                    unique_rows_buffer[i].results[j] ? unique_rows_buffer[i].results[j] : "");
         }
         fprintf(file, "\n");
     }
@@ -146,11 +166,11 @@ void csv_reporter_close() {
     fclose(file);
 
 cleanup:
-    if (unique_rows_buffer) {
-        for (size_t i = 0; i < unique_rows_count; ++i) {
+    if(unique_rows_buffer) {
+        for(size_t i = 0; i < unique_rows_count; ++i) {
             free(unique_rows_buffer[i].operator_name);
             free(unique_rows_buffer[i].test_point_name);
-            for (int j = 0; j < MAX_SUB_TESTS; ++j) {
+            for(int j = 0; j < MAX_SUB_TESTS; ++j) {
                 free(unique_rows_buffer[i].results[j]);
             }
         }
diff --git a/tests/csv_reporter.h b/tests/csv_reporter.h
index b592f80..011a60b 100644
--- a/tests/csv_reporter.h
+++ b/tests/csv_reporter.h
@@ -1,19 +1,23 @@
 #ifndef CSV_REPORTER_H
 #define CSV_REPORTER_H
 
-#include <stdio.h> 
+#include <stdio.h>
 
 // Returns 0 on success, -1 on failure.
-int csv_reporter_init(const char *filename);
+int csv_reporter_init(const char* filename);
 
 // Records a test result for a specific sub-test within a test point.
 // operator_name: Name of the operator being tested (e.g., "add").
 
 // test_point_name: The base name for the test case, defining a row in the CSV (e.g., "add_scalar").
 
-// sub_test_index: A 1-based index specifying the sub-test column for this result (e.g., 1, 2, 3...).
-// result_detail: A string describing the result (e.g., "/" for pass, or "observed/expected/platform" for fail).
-void csv_reporter_record_result(const char *operator_name, const char *test_point_name, int sub_test_index, const char *result_detail);
+// sub_test_index: A 1-based index specifying the sub-test column for this result (e.g., 1,
+// 2, 3...). result_detail: A string describing the result (e.g., "/" for pass, or
+// "observed/expected/platform" for fail).
+void csv_reporter_record_result(const char* operator_name,
+                                const char* test_point_name,
+                                int sub_test_index,
+                                const char* result_detail);
 void csv_reporter_close();
 
 #endif
diff --git a/tests/cten_tests.c b/tests/cten_tests.c
index e506df0..c68338f 100644
--- a/tests/cten_tests.c
+++ b/tests/cten_tests.c
@@ -6,9 +6,9 @@
 #include <stdlib.h>
 
 #if defined(_WIN32) || defined(_WIN64)
-    #define PATH_SEPARATOR_CHAR '\\'
+#define PATH_SEPARATOR_CHAR '\\'
 #else
-    #define PATH_SEPARATOR_CHAR '/'
+#define PATH_SEPARATOR_CHAR '/'
 #endif
 
 #define XSTR(s) STR(s)
@@ -56,20 +56,25 @@ int main() {
     const char* build_dir = XSTR(CTEN_BUILD_DIR_PATH);
     char clean_build_dir[256];
     size_t len = strlen(build_dir);
-    if (len > 1 && build_dir[0] == '"' && build_dir[len-1] == '"') {
+    if(len > 1 && build_dir[0] == '"' && build_dir[len - 1] == '"') {
         strncpy(clean_build_dir, build_dir + 1, len - 2);
         clean_build_dir[len - 2] = '\0';
     } else {
         strcpy(clean_build_dir, build_dir);
     }
-    snprintf(report_path, sizeof(report_path), "%s%ccten_test_report_%s.csv", clean_build_dir, PATH_SEPARATOR_CHAR, PLATFORM_NAME);
+    snprintf(report_path,
+             sizeof(report_path),
+             "%s%ccten_test_report_%s.csv",
+             clean_build_dir,
+             PATH_SEPARATOR_CHAR,
+             PLATFORM_NAME);
 #else
     snprintf(report_path, sizeof(report_path), "cten_test_report_%s.csv", PLATFORM_NAME);
 #endif
 
     printf("Test report will be generated at: %s\n", report_path);
 
-    if (csv_reporter_init(report_path) != 0) {
+    if(csv_reporter_init(report_path) != 0) {
         fprintf(stderr, "Failed to initialize CSV reporter. Aborting tests.\n");
         cten_finalize();
         return 1;
@@ -129,13 +134,13 @@ int main() {
 
     test_matmul_backward();
     printf("Matmul backward tests finished.\n");
-    
+
     test_sub_backward();
     printf("Sub backward tests finished.\n");
-    
+
     test_relu_backward();
     printf("ReLU backward tests finished.\n");
-    
+
     test_linear_backward();
     printf("Linear backward tests finished.\n");
 
@@ -147,27 +152,28 @@ int main() {
 
     test_sum_backward();
     printf("Sum backward tests finished.\n");
-        
+
     test_mean_backward();
     printf("Mean backward tests finished.\n");
-    
+
     test_div_backward();
     printf("Div backward tests finished.\n");
-    
+
     test_pow_backward();
     printf("Pow backward tests finished.\n");
-    
+
     test_abs_backward();
     printf("Abs backward tests finished.\n");
-    
+
     test_softmax_backward();
     printf("Softmax backward tests finished.\n");
-    
+
     // other tests
-    
+
     csv_reporter_close();
     cten_finalize();
 
-    printf("cTensor Test Suite finished. Report generated: cten_test_report_%s.csv\n", PLATFORM_NAME);
+    printf("cTensor Test Suite finished. Report generated: cten_test_report_%s.csv\n",
+           PLATFORM_NAME);
     return 0;
 }
diff --git a/tests/test_config.h b/tests/test_config.h
index 92e3630..9f87d11 100644
--- a/tests/test_config.h
+++ b/tests/test_config.h
@@ -4,13 +4,13 @@
 #define TEST_FLOAT_TOLERANCE 1e-4f
 
 #if defined(_WIN32) || defined(_WIN64)
-    #define PLATFORM_NAME "windows"
+#define PLATFORM_NAME "windows"
 #elif defined(__linux__)
-    #define PLATFORM_NAME "linux"
+#define PLATFORM_NAME "linux"
 #elif defined(__APPLE__) || defined(__MACH__)
-    #define PLATFORM_NAME "macos"
+#define PLATFORM_NAME "macos"
 #else
-    #define PLATFORM_NAME "unknown"
+#define PLATFORM_NAME "unknown"
 #endif
 
 #define CTENSOR_MAX_DIMS 4
diff --git a/tests/test_utils.c b/tests/test_utils.c
index 1fb7184..f8350ec 100644
--- a/tests/test_utils.c
+++ b/tests/test_utils.c
@@ -4,63 +4,74 @@
 #include <math.h>
 #include <stdio.h>
 #include <string.h>
-#include <stdlib.h> 
+#include <stdlib.h>
 
-bool compare_floats(float a, float b, float tolerance) {
-    return fabs(a - b) < tolerance;
-}
+bool compare_floats(float a, float b, float tolerance) { return fabs(a - b) < tolerance; }
 
 Tensor create_test_tensor(TensorShape shape, float* data, bool requires_grad) {
     Tensor t = Tensor_new(shape, requires_grad);
-    if (t.data != NULL && data != NULL) {
+    if(t.data != NULL && data != NULL) {
         memcpy(t.data->flex, data, t.data->numel * sizeof(float));
     }
     return t;
 }
 
 void print_tensor(const Tensor* t, const char* name) {
-    if (name) {
+    if(name) {
         printf("Tensor %s (Shape: (", name);
     } else {
         printf("Tensor (Shape: (");
     }
     TensorShape shape_copy_for_print;
     memcpy(shape_copy_for_print, t->shape, sizeof(TensorShape));
-    for (int i = 0; i < CTENSOR_MAX_DIMS && shape_copy_for_print[i] != 0; ++i) {
-        printf("%d%s", shape_copy_for_print[i], (shape_copy_for_print[i+1] != 0 && i < CTENSOR_MAX_DIMS -1) ? ", " : "");
+    for(int i = 0; i < CTENSOR_MAX_DIMS && shape_copy_for_print[i] != 0; ++i) {
+        printf("%d%s",
+               shape_copy_for_print[i],
+               (shape_copy_for_print[i + 1] != 0 && i < CTENSOR_MAX_DIMS - 1) ? ", " : "");
     }
     printf(")):\n");
 
-    if (t->data == NULL) {
+    if(t->data == NULL) {
         printf("  [Data is NULL]\n");
         return;
     }
-    if (t->data->numel == 0) {
+    if(t->data->numel == 0) {
         printf("  [Empty tensor]\n");
         return;
     }
 
-    for (size_t i = 0; i < t->data->numel; ++i) {
+    for(size_t i = 0; i < t->data->numel; ++i) {
         printf("%.4f ", t->data->flex[i]);
-        if (t->shape[1] != 0 && (i + 1) % t->shape[1] == 0) {
-            printf("\n");
-        }
-    }
-    if (t->shape[1] == 0 && t->data->numel > 0) { 
- printf("\n");
+        if(t->shape[1] != 0 && (i + 1) % t->shape[1] == 0) { printf("\n"); }
     }
+    if(t->shape[1] == 0 && t->data->numel > 0) { printf("\n"); }
     printf("\n");
 }
 
-bool compare_tensors(const Tensor* t_observed, const Tensor* t_expected, const char* operator_name, const char* test_point_name, int sub_test_index, float tolerance) {
+bool compare_tensors(const Tensor* t_observed,
+                     const Tensor* t_expected,
+                     const char* operator_name,
+                     const char* test_point_name,
+                     int sub_test_index,
+                     float tolerance) {
     char failure_detail_buffer[512];
 
-    if (t_observed == NULL || t_expected == NULL) {
+    if(t_observed == NULL || t_expected == NULL) {
         const char* detail = "observed_is_NULL";
-        if (t_observed == NULL && t_expected == NULL) detail = "both_are_NULL";
-        else if (t_expected == NULL) detail = "expected_is_NULL";
-        snprintf(failure_detail_buffer, sizeof(failure_detail_buffer), "%s/%s/%s", detail, "not_null", PLATFORM_NAME);
-        csv_reporter_record_result(operator_name, test_point_name, sub_test_index, failure_detail_buffer);
+        if(t_observed == NULL && t_expected == NULL)
+            detail = "both_are_NULL";
+        else if(t_expected == NULL)
+            detail = "expected_is_NULL";
+        snprintf(failure_detail_buffer,
+                 sizeof(failure_detail_buffer),
+                 "%s/%s/%s",
+                 detail,
+                 "not_null",
+                 PLATFORM_NAME);
+        csv_reporter_record_result(operator_name,
+                                   test_point_name,
+                                   sub_test_index,
+                                   failure_detail_buffer);
         return false;
     }
 
@@ -70,40 +81,77 @@ bool compare_tensors(const Tensor* t_observed, const Tensor* t_expected, const c
     memcpy(shape_exp_copy_for_dim, t_expected->shape, sizeof(TensorShape));
     int dim_obs = TensorShape_dim(shape_obs_copy_for_dim);
     int dim_exp = TensorShape_dim(shape_exp_copy_for_dim);
-    if (dim_obs != dim_exp) {
-        snprintf(failure_detail_buffer, sizeof(failure_detail_buffer), "%d/%d/%s_dim_mismatch", dim_obs, dim_exp, PLATFORM_NAME);
-        csv_reporter_record_result(operator_name, test_point_name, sub_test_index, failure_detail_buffer);
+    if(dim_obs != dim_exp) {
+        snprintf(failure_detail_buffer,
+                 sizeof(failure_detail_buffer),
+                 "%d/%d/%s_dim_mismatch",
+                 dim_obs,
+                 dim_exp,
+                 PLATFORM_NAME);
+        csv_reporter_record_result(operator_name,
+                                   test_point_name,
+                                   sub_test_index,
+                                   failure_detail_buffer);
         return false;
     }
 
     // 2. Compare shapes
-    for (int i = 0; i < dim_obs; ++i) {
-        if (t_observed->shape[i] != t_expected->shape[i]) {
-            snprintf(failure_detail_buffer, sizeof(failure_detail_buffer), "%d/%d/%s_shape_mismatch_at_dim%d", t_observed->shape[i], t_expected->shape[i], PLATFORM_NAME, i);
-            csv_reporter_record_result(operator_name, test_point_name, sub_test_index, failure_detail_buffer);
+    for(int i = 0; i < dim_obs; ++i) {
+        if(t_observed->shape[i] != t_expected->shape[i]) {
+            snprintf(failure_detail_buffer,
+                     sizeof(failure_detail_buffer),
+                     "%d/%d/%s_shape_mismatch_at_dim%d",
+                     t_observed->shape[i],
+                     t_expected->shape[i],
+                     PLATFORM_NAME,
+                     i);
+            csv_reporter_record_result(operator_name,
+                                       test_point_name,
+                                       sub_test_index,
+                                       failure_detail_buffer);
             return false;
         }
     }
 
     // Check for NULL data buffers
-    if (t_observed->data == NULL || t_expected->data == NULL) {
+    if(t_observed->data == NULL || t_expected->data == NULL) {
         const char* detail = "observed_data_is_NULL";
-        if (t_observed->data == NULL && t_expected->data == NULL && t_observed->shape[0] == 0);
-        else if (t_observed->data == NULL && t_expected->data == NULL) detail = "both_data_are_NULL";
-        else if (t_expected->data == NULL) detail = "expected_data_is_NULL";
+        if(t_observed->data == NULL && t_expected->data == NULL && t_observed->shape[0] == 0)
+            ;
+        else if(t_observed->data == NULL && t_expected->data == NULL)
+            detail = "both_data_are_NULL";
+        else if(t_expected->data == NULL)
+            detail = "expected_data_is_NULL";
         else {
-            snprintf(failure_detail_buffer, sizeof(failure_detail_buffer), "%s/%s/%s", detail, "non-NULL_data_expected_or_vice_versa", PLATFORM_NAME);
-            csv_reporter_record_result(operator_name, test_point_name, sub_test_index, failure_detail_buffer);
+            snprintf(failure_detail_buffer,
+                     sizeof(failure_detail_buffer),
+                     "%s/%s/%s",
+                     detail,
+                     "non-NULL_data_expected_or_vice_versa",
+                     PLATFORM_NAME);
+            csv_reporter_record_result(operator_name,
+                                       test_point_name,
+                                       sub_test_index,
+                                       failure_detail_buffer);
             return false;
         }
     }
-    
+
     // Handle case where one data is NULL but other is not (and not an empty tensor case)
-    if ((t_observed->data == NULL && t_expected->data != NULL && t_expected->data->numel > 0) || 
-        (t_observed->data != NULL && t_expected->data == NULL && t_observed->data->numel > 0)) {
-        const char* detail = (t_observed->data == NULL) ? "observed_data_NULL_expected_not_NULL" : "observed_data_not_NULL_expected_NULL";
-        snprintf(failure_detail_buffer, sizeof(failure_detail_buffer), "%s/%s/%s", detail, "data_buffer_discrepancy", PLATFORM_NAME);
-        csv_reporter_record_result(operator_name, test_point_name, sub_test_index, failure_detail_buffer);
+    if((t_observed->data == NULL && t_expected->data != NULL && t_expected->data->numel > 0) ||
+       (t_observed->data != NULL && t_expected->data == NULL && t_observed->data->numel > 0)) {
+        const char* detail = (t_observed->data == NULL) ? "observed_data_NULL_expected_not_NULL"
+                                                        : "observed_data_not_NULL_expected_NULL";
+        snprintf(failure_detail_buffer,
+                 sizeof(failure_detail_buffer),
+                 "%s/%s/%s",
+                 detail,
+                 "data_buffer_discrepancy",
+                 PLATFORM_NAME);
+        csv_reporter_record_result(operator_name,
+                                   test_point_name,
+                                   sub_test_index,
+                                   failure_detail_buffer);
         return false;
     }
 
@@ -112,29 +160,44 @@ bool compare_tensors(const Tensor* t_observed, const Tensor* t_expected, const c
     size_t numel_exp = (t_expected->data) ? t_expected->data->numel : 0;
 
     // 3. Compare number of elements
-    if (numel_obs != numel_exp) {
-        snprintf(failure_detail_buffer, sizeof(failure_detail_buffer), "%zu/%zu/%s_numel_mismatch", numel_obs, numel_exp, PLATFORM_NAME);
-        csv_reporter_record_result(operator_name, test_point_name, sub_test_index, failure_detail_buffer);
+    if(numel_obs != numel_exp) {
+        snprintf(failure_detail_buffer,
+                 sizeof(failure_detail_buffer),
+                 "%zu/%zu/%s_numel_mismatch",
+                 numel_obs,
+                 numel_exp,
+                 PLATFORM_NAME);
+        csv_reporter_record_result(operator_name,
+                                   test_point_name,
+                                   sub_test_index,
+                                   failure_detail_buffer);
         return false;
     }
 
-    // If numel is 0, and all previous checks passed, tensors are considered equal (e.g. two empty tensors of same shape)
-    if (numel_obs == 0) {
-        return true;
-    }
+    // If numel is 0, and all previous checks passed, tensors are considered equal (e.g. two empty
+    // tensors of same shape)
+    if(numel_obs == 0) { return true; }
 
     // 4. Compare data element-wise (only if data buffers are not NULL and numel > 0)
-    for (size_t i = 0; i < numel_obs; ++i) {
-        if (!compare_floats(t_observed->data->flex[i], t_expected->data->flex[i], tolerance)) {
-            snprintf(failure_detail_buffer, sizeof(failure_detail_buffer), "%.*g/%.*g/%s",
-                     15, t_observed->data->flex[i], 15, t_expected->data->flex[i], PLATFORM_NAME);
-            csv_reporter_record_result(operator_name, test_point_name, sub_test_index, failure_detail_buffer);
-            return false; // Fail on first mismatch
+    for(size_t i = 0; i < numel_obs; ++i) {
+        if(!compare_floats(t_observed->data->flex[i], t_expected->data->flex[i], tolerance)) {
+            snprintf(failure_detail_buffer,
+                     sizeof(failure_detail_buffer),
+                     "%.*g/%.*g/%s",
+                     15,
+                     t_observed->data->flex[i],
+                     15,
+                     t_expected->data->flex[i],
+                     PLATFORM_NAME);
+            csv_reporter_record_result(operator_name,
+                                       test_point_name,
+                                       sub_test_index,
+                                       failure_detail_buffer);
+            return false;  // Fail on first mismatch
         }
     }
-    
+
     // All tests passed, record success
     csv_reporter_record_result(operator_name, test_point_name, sub_test_index, "/");
     return true;
 }
-
diff --git a/tests/test_utils.h b/tests/test_utils.h
index c8de456..8817417 100644
--- a/tests/test_utils.h
+++ b/tests/test_utils.h
@@ -5,7 +5,12 @@
 #include <stdbool.h>
 
 bool compare_floats(float a, float b, float tolerance);
-bool compare_tensors(const Tensor* t_observed, const Tensor* t_expected, const char* operator_name, const char* test_point_name, int sub_test_index, float tolerance);
+bool compare_tensors(const Tensor* t_observed,
+                     const Tensor* t_expected,
+                     const char* operator_name,
+                     const char* test_point_name,
+                     int sub_test_index,
+                     float tolerance);
 Tensor create_test_tensor(TensorShape shape, float* data, bool requires_grad);
 void print_tensor(const Tensor* t, const char* name);
 

From 0b77b081bec106718410bdd70e16855cffccb9ea Mon Sep 17 00:00:00 2001
From: Advaitgaur004 <b22cs004@iitj.ac.in>
Date: Tue, 19 Aug 2025 22:55:37 +0530
Subject: [PATCH 2/2] Make the build process simpler for linux user

- It follows SIngle source of truth.
- All build logic (which files to compile, which flags to use, how to link libraries) is now located only in CMakeLists.txt file.
---
 build.sh   | 10 ++++++++++
 build_g.sh | 15 ---------------
 2 files changed, 10 insertions(+), 15 deletions(-)
 create mode 100644 build.sh
 delete mode 100644 build_g.sh

diff --git a/build.sh b/build.sh
new file mode 100644
index 0000000..590123c
--- /dev/null
+++ b/build.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+
+set -e
+mkdir -p build
+cd build
+cmake ..
+cmake --build .
+
+echo "\nBuild complete. Ctensor Tests Executable is in the 'build/bin' directory. Run 'build/bin/cten_tests' to run the tests. \n"
+echo "\nFor running the main(Demo) executable, run 'build/cten_exe' \n"
\ No newline at end of file
diff --git a/build_g.sh b/build_g.sh
deleted file mode 100644
index b6aa9dd..0000000
--- a/build_g.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-set -e
-
-SRC=$(find src/ -name "*.c")
-
-FLAGS="-std=c11 -lm -Iinclude -O0 -Wfatal-errors -g -DDEBUG"
-
-SANITIZE_FLAGS="-fsanitize=address,leak,undefined"
-
-if [ "$(uname)" == "Darwin" ]; then
-    SANITIZE_FLAGS="-fsanitize=address,undefined"
-fi
-
-echo "Compiling C files..."
-clang $FLAGS $SANITIZE_FLAGS $SRC src2/main.c -o main
-