From a94cac97453bbaf95740866508c825f4b3889464 Mon Sep 17 00:00:00 2001
From: Advaitgaur004 <b22cs004@iitj.ac.in>
Date: Wed, 20 Aug 2025 14:26:00 +0530
Subject: [PATCH 1/5] minor-cleanup left

---
 tests/Operator/test_div.c    |  8 +++++---
 tests/Operator/test_pow.c    | 27 +++++++++++++++------------
 tests/Operator/test_square.c | 19 ++++++++++---------
 3 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/tests/Operator/test_div.c b/tests/Operator/test_div.c
index c8dbf9b..3f8d540 100644
--- a/tests/Operator/test_div.c
+++ b/tests/Operator/test_div.c
@@ -65,9 +65,11 @@ void test_div_operator() {
         TensorShape m_shape = {2, 2};
         float d1[] = {1.522753f, 8.795585f, 6.410035f, 7.372653f};
         float d2[] = {0.592630f, 4.864594f, 4.245992f, 1.455526f};
-        float exp_d[] =
-            {2.569482f, 1.808082f, 1.509667f, 5.065284f};  // [1.522753/0.592630, 8.795585/4.864594, 6.410035/4.245992,
-                                                           // 7.372653/1.455526]
+        float exp_d[] = {2.569482f,
+                         1.808082f,
+                         1.509667f,
+                         5.065284f};  // [1.522753/0.592630, 8.795585/4.864594, 6.410035/4.245992,
+                                      // 7.372653/1.455526]
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor t2 = create_test_tensor(m_shape, d2, false);
         Tensor expected_res = create_test_tensor(m_shape, exp_d, false);
diff --git a/tests/Operator/test_pow.c b/tests/Operator/test_pow.c
index 06ecf34..1f165fb 100644
--- a/tests/Operator/test_pow.c
+++ b/tests/Operator/test_pow.c
@@ -77,9 +77,11 @@ void test_pow_operator() {
         TensorShape m_shape = {2, 2};
         float d1[] = {3.394634f, 1.624075f, 1.623978f, 1.232334f};
         float d2[] = {2.665440f, 2.002788f, 2.270181f, 0.551461f};
-        float exp_d[] =
-            {25.989442f, 2.641186f, 3.006458f, 1.122104f};  // [3.394634^2.665440, 1.624075^2.002788,
-                                                            // 1.623978^2.270181, 1.232334^0.551461]
+        float exp_d[] = {25.989442f,
+                         2.641186f,
+                         3.006458f,
+                         1.122104f};  // [3.394634^2.665440, 1.624075^2.002788,
+                                      // 1.623978^2.270181, 1.232334^0.551461]
         Tensor t1 = create_test_tensor(m_shape, d1, false);
         Tensor t2 = create_test_tensor(m_shape, d2, false);
         Tensor expected_res = create_test_tensor(m_shape, exp_d, false);
@@ -108,15 +110,16 @@ void test_pow_operator() {
                       2.000000f,
                       2.000000f,
                       2.000000f};
-        float exp_d[] = {23.810881f,
-                         18.746913f,
-                         3.420119f,
-                         2.983565f,
-                         3.005432f,
-                         4.914951f,
-                         9.603960f,
-                         7.440784f};  // [4.879639^2, 4.329771^2, 1.849356^2, 1.727300^2, 1.733618^2,
-                                      // 2.216969^2, 3.099026^2, 2.727780^2]
+        float exp_d[] = {
+            23.810881f,
+            18.746913f,
+            3.420119f,
+            2.983565f,
+            3.005432f,
+            4.914951f,
+            9.603960f,
+            7.440784f};  // [4.879639^2, 4.329771^2, 1.849356^2, 1.727300^2, 1.733618^2,
+                         // 2.216969^2, 3.099026^2, 2.727780^2]
         Tensor t1 = create_test_tensor(t_shape, d1, false);
         Tensor t2 = create_test_tensor(t_shape, d2, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
diff --git a/tests/Operator/test_square.c b/tests/Operator/test_square.c
index 8ac1655..92eeb74 100644
--- a/tests/Operator/test_square.c
+++ b/tests/Operator/test_square.c
@@ -102,15 +102,16 @@ void test_square_operator() {
                      8.491984f,
                      2.911052f,
                      2.636425f};
-        float exp_d[] = {77.362321f,
-                         41.088550f,
-                         54.356015f,
-                         1.404842f,
-                         94.657112f,
-                         72.113788f,
-                         8.474224f,
-                         6.950735f};  // [8.795585^2, 6.410035^2, 7.372653^2, 1.185260^2, 9.729189^2,
-                                      // 8.491984^2, 2.911052^2, 2.636425^2]
+        float exp_d[] = {
+            77.362321f,
+            41.088550f,
+            54.356015f,
+            1.404842f,
+            94.657112f,
+            72.113788f,
+            8.474224f,
+            6.950735f};  // [8.795585^2, 6.410035^2, 7.372653^2, 1.185260^2, 9.729189^2,
+                         // 8.491984^2, 2.911052^2, 2.636425^2]
         Tensor t1 = create_test_tensor(t_shape, d, false);
         Tensor expected_res = create_test_tensor(t_shape, exp_d, false);
         Tensor actual_res = Tensor_square(t1);

From 144a3977ce5228965ca6a4f89a64615d3686b54c Mon Sep 17 00:00:00 2001
From: Advaitgaur004 <b22cs004@iitj.ac.in>
Date: Wed, 20 Aug 2025 14:27:10 +0530
Subject: [PATCH 2/5] Doxygen-style doc written - cten.h

---
 include/cten.h | 676 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 663 insertions(+), 13 deletions(-)

diff --git a/include/cten.h b/include/cten.h
index f1b1861..9a072d4 100644
--- a/include/cten.h
+++ b/include/cten.h
@@ -1,3 +1,11 @@
+/**
+ * @file cten.h
+ * @brief CTensor - A lightweight tensor library for C with automatic differentiation
+ *
+ * CTensor provides tensor operations, neural network functions, and automatic
+ * differentiation capabilities for machine learning applications in C.
+ */
+
 #pragma once
 
 #include <assert.h>
@@ -15,143 +23,632 @@
 #define Tensor_mean(...) _CTEN_PICK(__VA_ARGS__, Tensor_mean_dim, Tensor_mean_all)(__VA_ARGS__)
 #define Tensor_sum(...) _CTEN_PICK(__VA_ARGS__, Tensor_sum_dim, Tensor_sum_all)(__VA_ARGS__)
 
+/** @brief Tensor shape type supporting up to 4 dimensions */
 typedef int TensorShape[4];
 typedef struct GradNode GradNode;
 
+/**
+ * @brief Float buffer structure with flexible array member
+ * @details Stores tensor data with element count and flexible array
+ */
 typedef struct FloatBuffer {
-    int numel;
-    float flex[];
+    int numel;    /**< Number of elements in the buffer */
+    float flex[]; /**< Flexible array member containing the actual data */
 } FloatBuffer;
 
+/**
+ * @brief Main tensor structure
+ * @details Contains tensor shape, data buffer, and gradient computation node
+ */
 typedef struct Tensor {
-    TensorShape shape;
-    FloatBuffer* data;
-    GradNode* node;
+    TensorShape shape; /**< Tensor dimensions [dim0, dim1, dim2, dim3] */
+    FloatBuffer* data; /**< Pointer to data buffer */
+    GradNode* node;    /**< Gradient computation node (NULL if no gradients) */
 } Tensor;
 
+/**
+ * @brief Gradient computation node for automatic differentiation
+ * @details Stores gradient function, inputs, and metadata for backpropagation
+ */
 typedef struct GradNode {
-    struct Tensor grad;
-    struct Tensor (*grad_fn)(struct Tensor self, int i);
-    struct Tensor inputs[4];
-    int n_inputs;
-    const char* name;
-    int params[4];
+    struct Tensor grad;                                  /**< Accumulated gradient */
+    struct Tensor (*grad_fn)(struct Tensor self, int i); /**< Gradient function */
+    struct Tensor inputs[4];                             /**< Input tensors */
+    int n_inputs;                                        /**< Number of inputs */
+    const char* name;                                    /**< Operation name for debugging */
+    int params[4];                                       /**< Additional parameters */
 } GradNode;
 
+/**
+ * @brief Result structure for max/min operations along a dimension
+ * @details Contains both the values and their corresponding indices
+ */
 typedef struct {
-    Tensor values;
-    Tensor indices;
+    Tensor values;  /**< Maximum/minimum values */
+    Tensor indices; /**< Indices of maximum/minimum values */
 } TensorMaxMinResult;
 
+/**
+ * @brief Initialize the CTensor library
+ * @details Sets up internal memory management system. Must be called before using CTensor.
+ */
 void cten_initilize();
+
+/**
+ * @brief Finalize and cleanup the CTensor library
+ * @details Frees all allocated memory and cleans up internal structures.
+ * Should be called when done using CTensor.
+ */
 void cten_finalize();
 
 /* TensorShape */
+
+/**
+ * @brief Calculate total number of elements in a tensor shape
+ * @param shape The tensor shape
+ * @return Number of elements (product of all dimensions)
+ */
 int TensorShape_numel(TensorShape shape);
+
+/**
+ * @brief Get the number of dimensions in a tensor shape
+ * @param shape The tensor shape
+ * @return Number of non-zero dimensions
+ */
 int TensorShape_dim(TensorShape shape);
+
+/**
+ * @brief Normalize dimension index (handle negative indices)
+ * @param shape The tensor shape
+ * @param dim The dimension index (can be negative)
+ * @return Normalized positive dimension index
+ */
 int TensorShape_asdim(TensorShape shape, int dim);
+
+/**
+ * @brief Convert tensor shape to string representation
+ * @param shape The tensor shape
+ * @param buf Buffer to write the string to
+ * @param size Size of the buffer
+ * @return Number of characters written
+ */
 int TensorShape_tostring(TensorShape shape, char* buf, int size);
 
 /* Tensor Basic */
+
+/**
+ * @brief Create a new tensor with uninitialized data
+ * @param shape The desired tensor shape
+ * @param requires_grad Whether to track gradients for this tensor
+ * @return New tensor with allocated memory
+ */
 Tensor Tensor_new(TensorShape shape, bool requires_grad);
+
+/**
+ * @brief Create a tensor filled with zeros
+ * @param shape The desired tensor shape
+ * @param requires_grad Whether to track gradients for this tensor
+ * @return New tensor filled with zeros
+ */
 Tensor Tensor_zeros(TensorShape shape, bool requires_grad);
+
+/**
+ * @brief Create a tensor filled with ones
+ * @param shape The desired tensor shape
+ * @param requires_grad Whether to track gradients for this tensor
+ * @return New tensor filled with ones
+ */
 Tensor Tensor_ones(TensorShape shape, bool requires_grad);
+
+/**
+ * @brief Transpose a 2D tensor
+ * @param self The input tensor (must be 2D)
+ * @return Transposed tensor
+ */
 Tensor Tensor_transpose(Tensor self);
 
+/**
+ * @brief Get element value at specified indices
+ * @param self The tensor
+ * @param i First dimension index
+ * @param j Second dimension index
+ * @param k Third dimension index
+ * @param l Fourth dimension index
+ * @return Element value at the specified position
+ */
 float Tensor_get(Tensor self, int i, int j, int k, int l);
+
+/**
+ * @brief Set element value at specified indices
+ * @param self The tensor
+ * @param i First dimension index
+ * @param j Second dimension index
+ * @param k Third dimension index
+ * @param l Fourth dimension index
+ * @param value Value to set
+ */
 void Tensor_set(Tensor self, int i, int j, int k, int l, float value);
+
+/**
+ * @brief Perform backward pass (backpropagation)
+ * @param self The tensor to backpropagate from
+ * @param grad The gradient to backpropagate
+ */
 void Tensor_backward(Tensor self, Tensor grad);
+
+/**
+ * @brief Apply a function to all tensors in the computation graph
+ * @param self The root tensor
+ * @param f Function to apply to each tensor (can be NULL)
+ * @param ctx Context pointer passed to the function
+ * @return Number of tensors visited in the computation graph
+ */
 int Tensor_backward_apply(Tensor self, void (*f)(Tensor, void*), void* ctx);
 
+/**
+ * @brief Print tensor contents to stdout
+ * @param self The tensor to print
+ */
 void Tensor_print(Tensor self);
 
 /* Tensor Operations */
+
+/**
+ * @brief Element-wise addition of two tensors
+ * @param self First tensor
+ * @param other Second tensor
+ * @return Result of self + other (with broadcasting if needed)
+ */
 Tensor Tensor_add(Tensor self, Tensor other);
+
+/**
+ * @brief Element-wise subtraction of two tensors
+ * @param self First tensor
+ * @param other Second tensor
+ * @return Result of self - other (with broadcasting if needed)
+ */
 Tensor Tensor_sub(Tensor self, Tensor other);
+
+/**
+ * @brief Element-wise multiplication of two tensors
+ * @param self First tensor
+ * @param other Second tensor
+ * @return Result of self * other (with broadcasting if needed)
+ */
 Tensor Tensor_mul(Tensor self, Tensor other);
+
+/**
+ * @brief Element-wise division of two tensors
+ * @param self First tensor
+ * @param other Second tensor
+ * @return Result of self / other (with broadcasting if needed)
+ */
 Tensor Tensor_div(Tensor self, Tensor other);
+
+/**
+ * @brief Element-wise power of two tensors
+ * @param self Base tensor
+ * @param other Exponent tensor
+ * @return Result of self ^ other (with broadcasting if needed)
+ */
 Tensor Tensor_pow(Tensor self, Tensor other);
 
+/**
+ * @brief Add a scalar to all elements of a tensor
+ * @param self The tensor
+ * @param other Scalar value to add
+ * @return Result of self + other
+ */
 Tensor Tensor_addf(Tensor self, float other);
+
+/**
+ * @brief Subtract a scalar from all elements of a tensor
+ * @param self The tensor
+ * @param other Scalar value to subtract
+ * @return Result of self - other
+ */
 Tensor Tensor_subf(Tensor self, float other);
+
+/**
+ * @brief Multiply all elements of a tensor by a scalar
+ * @param self The tensor
+ * @param other Scalar value to multiply by
+ * @return Result of self * other
+ */
 Tensor Tensor_mulf(Tensor self, float other);
+
+/**
+ * @brief Divide all elements of a tensor by a scalar
+ * @param self The tensor
+ * @param other Scalar value to divide by
+ * @return Result of self / other
+ */
 Tensor Tensor_divf(Tensor self, float other);
+
+/**
+ * @brief Raise all elements of a tensor to a scalar power
+ * @param self The tensor
+ * @param other Scalar exponent
+ * @return Result of self ^ other
+ */
 Tensor Tensor_powf(Tensor self, float other);
 
+/**
+ * @brief Matrix multiplication of two tensors
+ * @param self First tensor (left operand)
+ * @param other Second tensor (right operand)
+ * @return Result of matrix multiplication self @ other
+ */
 Tensor Tensor_matmul(Tensor self, Tensor other);
 
+/**
+ * @brief Element-wise negation
+ * @param self The tensor
+ * @return Result of -self
+ */
 Tensor Tensor_neg(Tensor self);
+
+/**
+ * @brief Element-wise absolute value
+ * @param self The tensor
+ * @return Result of |self|
+ */
 Tensor Tensor_abs(Tensor self);
+
+/**
+ * @brief Element-wise square
+ * @param self The tensor
+ * @return Result of self^2
+ */
 Tensor Tensor_square(Tensor self);
+
+/**
+ * @brief Element-wise reciprocal
+ * @param self The tensor
+ * @return Result of 1/self
+ */
 Tensor Tensor_reciprocal(Tensor self);
 
 /* Helper functions that the macros dispatch to */
+
+/**
+ * @brief Calculate mean of all elements in a tensor
+ * @param self The tensor
+ * @return Scalar tensor containing the mean
+ */
 Tensor Tensor_mean_all(Tensor self);
+
+/**
+ * @brief Calculate mean along a specific dimension
+ * @param self The tensor
+ * @param dim The dimension to reduce
+ * @return Tensor with reduced dimension
+ */
 Tensor Tensor_mean_dim(Tensor self, int dim);
+
+/**
+ * @brief Calculate sum of all elements in a tensor
+ * @param self The tensor
+ * @return Scalar tensor containing the sum
+ */
 Tensor Tensor_sum_all(Tensor self);
+
+/**
+ * @brief Calculate sum along a specific dimension
+ * @param self The tensor
+ * @param dim The dimension to reduce
+ * @return Tensor with reduced dimension
+ */
 Tensor Tensor_sum_dim(Tensor self, int dim);
 
+/**
+ * @brief Find maximum value among all elements
+ * @param self The tensor
+ * @return Scalar tensor containing the maximum value
+ */
 Tensor Tensor_max_all(Tensor self);
+
+/**
+ * @brief Find maximum values and indices along a specific dimension
+ * @param self The tensor
+ * @param dim The dimension to reduce
+ * @return TensorMaxMinResult containing values and indices
+ */
 TensorMaxMinResult Tensor_max_dim(Tensor self, int dim);
+
+/**
+ * @brief Find minimum value among all elements
+ * @param self The tensor
+ * @return Scalar tensor containing the minimum value
+ */
 Tensor Tensor_min_all(Tensor self);
+
+/**
+ * @brief Find minimum values and indices along a specific dimension
+ * @param self The tensor
+ * @param dim The dimension to reduce
+ * @return TensorMaxMinResult containing values and indices
+ */
 TensorMaxMinResult Tensor_min_dim(Tensor self, int dim);
 
+/**
+ * @brief Find indices of maximum values
+ * @param self The tensor
+ * @param out Output array to store indices
+ */
 void Tensor_argmax(Tensor self, int* out);
 
 /* Neural Networks */
+
+/**
+ * @brief Element-wise natural logarithm
+ * @param self The tensor
+ * @return Result of ln(self)
+ */
 Tensor nn_log(Tensor self);
+
+/**
+ * @brief Element-wise exponential function
+ * @param self The tensor
+ * @return Result of exp(self)
+ */
 Tensor nn_exp(Tensor self);
 
+/**
+ * @brief Element-wise sine function
+ * @param self The tensor
+ * @return Result of sin(self)
+ */
 Tensor nn_sin(Tensor self);
+
+/**
+ * @brief Element-wise cosine function
+ * @param self The tensor
+ * @return Result of cos(self)
+ */
 Tensor nn_cos(Tensor self);
+
+/**
+ * @brief Element-wise tangent function
+ * @param self The tensor
+ * @return Result of tan(self)
+ */
 Tensor nn_tan(Tensor self);
 
+/**
+ * @brief Linear transformation (fully connected layer)
+ * @param input Input tensor [batch_size, in_features]
+ * @param weight Weight tensor [in_features, out_features]
+ * @param bias Bias tensor [out_features]
+ * @return Result of input @ weight + bias
+ */
 Tensor nn_linear(Tensor input, Tensor weight, Tensor bias);
+
+/**
+ * @brief ReLU activation function
+ * @param input The input tensor
+ * @return Result of max(0, input)
+ */
 Tensor nn_relu(Tensor input);
+
+/**
+ * @brief Sigmoid activation function
+ * @param input The input tensor
+ * @return Result of 1 / (1 + exp(-input))
+ */
 Tensor nn_sigmoid(Tensor input);
+
+/**
+ * @brief Hyperbolic tangent activation function
+ * @param input The input tensor
+ * @return Result of tanh(input)
+ */
 Tensor nn_tanh(Tensor input);
+
+/**
+ * @brief ELU (Exponential Linear Unit) activation function
+ * @param self The input tensor
+ * @param alpha ELU parameter (typically 1.0)
+ * @return Result of ELU(self, alpha)
+ */
 Tensor nn_elu(Tensor self, float alpha);
+
+/**
+ * @brief SELU (Scaled Exponential Linear Unit) activation function
+ * @param self The input tensor
+ * @return Result of SELU(self)
+ */
 Tensor nn_selu(Tensor self);
+
+/**
+ * @brief Softmax function along a specified dimension
+ * @param input The input tensor
+ * @param dim The dimension to apply softmax
+ * @return Softmax probabilities
+ */
 Tensor nn_softmax(Tensor input, int dim);
+
+/**
+ * @brief Initialize tensor with Glorot/Xavier initialization
+ * @param shape The tensor shape (typically [fan_in, fan_out])
+ * @param requires_grad Whether to track gradients
+ * @return Tensor initialized with Glorot distribution
+ */
 Tensor Glorot_init(TensorShape shape, bool requires_grad);
+
+/**
+ * @brief Cross-entropy loss function
+ * @param y_true True labels (one-hot encoded) [batch_size, num_classes]
+ * @param y_pred Predicted probabilities [batch_size, num_classes]
+ * @return Scalar tensor containing the cross-entropy loss
+ */
 Tensor nn_crossentropy(Tensor y_true, Tensor y_pred);
+
+/**
+ * @brief Softmax followed by cross-entropy loss (numerically stable)
+ * @param y_true True labels (one-hot encoded) [batch_size, num_classes]
+ * @param logits Raw logits [batch_size, num_classes]
+ * @return Scalar tensor containing the softmax cross-entropy loss
+ */
 Tensor nn_softmax_crossentropy(Tensor y_true, Tensor logits);
+
+/**
+ * @brief Mean Squared Error loss function
+ * @param y_true True values
+ * @param y_pred Predicted values
+ * @return Scalar tensor containing MSE loss
+ */
 Tensor nn_mse_loss(Tensor y_true, Tensor y_pred);
+
+/**
+ * @brief Mean Absolute Error loss function
+ * @param y_true True values
+ * @param y_pred Predicted values
+ * @return Scalar tensor containing MAE loss
+ */
 Tensor nn_mae_loss(Tensor y_true, Tensor y_pred);
+
+/**
+ * @brief Huber loss function (smooth L1 loss)
+ * @param y_true True values
+ * @param y_pred Predicted values
+ * @param delta Threshold parameter
+ * @return Scalar tensor containing Huber loss
+ */
 Tensor nn_huber_loss(Tensor y_true, Tensor y_pred, float delta);
 
 /* Memory Management */
+
+/** @brief Pool identifier type for memory management */
 typedef int64_t PoolId;
 
+/**
+ * @brief Begin memory allocation in a specific pool
+ * @param id Pool identifier
+ * @details All subsequent tensor allocations will be assigned to this pool
+ */
 void cten_begin_malloc(PoolId id);
+
+/**
+ * @brief End the current memory allocation pool
+ * @details Returns to the previous pool in the stack
+ */
 void cten_end_malloc();
+
+/**
+ * @brief Free all tensors allocated in a specific pool
+ * @param id Pool identifier to free
+ */
 void cten_free(PoolId id);
 
 /* Optimizer */
+
+/** @brief SGD optimizer structure */
 typedef struct optim_sgd optim_sgd;
+/** @brief AdaGrad optimizer structure */
 typedef struct optim_adagrad optim_adagrad;
+/** @brief RMSprop optimizer structure */
 typedef struct optim_rmsprop optim_rmsprop;
+/** @brief Adam optimizer structure */
 typedef struct optim_adam optim_adam;
 
 // SGD
+
+/**
+ * @brief Create new SGD optimizer
+ * @param n_params Number of parameter tensors
+ * @param params Array of parameter tensors to optimize
+ * @param weight_decay L2 regularization coefficient
+ * @return Pointer to SGD optimizer instance
+ */
 optim_sgd* optim_sgd_new(int n_params, Tensor* params, float weight_decay);
+
+/**
+ * @brief Configure SGD optimizer parameters
+ * @param self SGD optimizer instance
+ * @param lr Learning rate
+ * @param momentum Momentum coefficient
+ */
 void optim_sgd_config(optim_sgd* self, float lr, float momentum);
+
+/**
+ * @brief Zero out all gradients
+ * @param self SGD optimizer instance
+ */
 void optim_sgd_zerograd(optim_sgd* self);
+
+/**
+ * @brief Perform one optimization step
+ * @param self SGD optimizer instance
+ */
 void optim_sgd_step(optim_sgd* self);
 
 // AdaGrad
+
+/**
+ * @brief Create new AdaGrad optimizer
+ * @param n_params Number of parameter tensors
+ * @param params Array of parameter tensors to optimize
+ * @param lr Learning rate
+ * @param ε Small constant for numerical stability
+ * @param weight_decay L2 regularization coefficient
+ * @return Pointer to AdaGrad optimizer instance
+ */
 optim_adagrad*
     optim_adagrad_new(int n_params, Tensor* params, float lr, float ε, float weight_decay);
+
+/**
+ * @brief Zero out all gradients
+ * @param self AdaGrad optimizer instance
+ */
 void optim_adagrad_zerograd(optim_adagrad* self);
+
+/**
+ * @brief Perform one optimization step
+ * @param self AdaGrad optimizer instance
+ */
 void optim_adagrad_step(optim_adagrad* self);
 
 // RMSProp
+
+/**
+ * @brief Create new RMSProp optimizer
+ * @param n_params Number of parameter tensors
+ * @param params Array of parameter tensors to optimize
+ * @param lr Learning rate
+ * @param β Decay rate for moving average
+ * @param ε Small constant for numerical stability
+ * @param weight_decay L2 regularization coefficient
+ * @return Pointer to RMSProp optimizer instance
+ */
 optim_rmsprop*
     optim_rmsprop_new(int n_params, Tensor* params, float lr, float β, float ε, float weight_decay);
+
+/**
+ * @brief Zero out all gradients
+ * @param self RMSProp optimizer instance
+ */
 void optim_rmsprop_zerograd(optim_rmsprop* self);
+
+/**
+ * @brief Perform one optimization step
+ * @param self RMSProp optimizer instance
+ */
 void optim_rmsprop_step(optim_rmsprop* self);
 
 // Adam
+
+/**
+ * @brief Create new Adam optimizer
+ * @param n_params Number of parameter tensors
+ * @param params Array of parameter tensors to optimize
+ * @param lr Learning rate
+ * @param β1 Exponential decay rate for first moment estimates
+ * @param β2 Exponential decay rate for second moment estimates
+ * @param ε Small constant for numerical stability
+ * @param weight_decay L2 regularization coefficient
+ * @return Pointer to Adam optimizer instance
+ */
 optim_adam* optim_adam_new(int n_params,
                            Tensor* params,
                            float lr,
@@ -159,42 +656,195 @@ optim_adam* optim_adam_new(int n_params,
                            float β2,
                            float ε,
                            float weight_decay);
+
+/**
+ * @brief Zero out all gradients
+ * @param self Adam optimizer instance
+ */
 void optim_adam_zerograd(optim_adam* self);
+
+/**
+ * @brief Perform one optimization step
+ * @param self Adam optimizer instance
+ */
 void optim_adam_step(optim_adam* self);
 
 /* Gradient Clipping */
+
+/**
+ * @brief Clip gradients by global norm
+ * @param params Array of parameter tensors
+ * @param n_params Number of parameters
+ * @param max_norm Maximum allowed gradient norm
+ */
 void cten_clip_grad_norm(Tensor* params, int n_params, float max_norm);
+
+/**
+ * @brief Clip gradients by absolute value
+ * @param params Array of parameter tensors
+ * @param n_params Number of parameters
+ * @param max_value Maximum absolute value for gradients
+ */
 void cten_clip_grad_value(Tensor* params, int n_params, float max_value);
+
+/**
+ * @brief Clip gradients to a value range
+ * @param params Array of parameter tensors
+ * @param n_params Number of parameters
+ * @param min_value Minimum gradient value
+ * @param max_value Maximum gradient value
+ */
 void cten_clip_grad_value_range(Tensor* params, int n_params, float min_value, float max_value);
+
+/**
+ * @brief Clip positive gradients to maximum value
+ * @param params Array of parameter tensors
+ * @param n_params Number of parameters
+ * @param max_value Maximum value for positive gradients
+ */
 void cten_clip_grad_positive(Tensor* params, int n_params, float max_value);
+
+/**
+ * @brief Clip negative gradients to minimum value
+ * @param params Array of parameter tensors
+ * @param n_params Number of parameters
+ * @param min_value Minimum value for negative gradients
+ */
 void cten_clip_grad_negative(Tensor* params, int n_params, float min_value);
 
 /* Misc */
+
+/**
+ * @brief Enter evaluation mode (disables gradient computation)
+ * @details Gradients will not be computed for operations in eval mode
+ */
 void cten_begin_eval();
+
+/**
+ * @brief Check if currently in evaluation mode
+ * @return true if in evaluation mode, false otherwise
+ */
 bool cten_is_eval();
+
+/**
+ * @brief Exit evaluation mode (re-enables gradient computation)
+ */
 void cten_end_eval();
+
+/**
+ * @brief Check if variadic argument is present (utility function)
+ * @param args Variadic argument list
+ * @return Always returns false (placeholder implementation)
+ */
 bool va_arg_is_present(va_list args);
 
 /* Utils */
+
+/**
+ * @brief Normalize dataset using training statistics
+ * @param X Input dataset [n_samples][n_features]
+ * @param X_norm Output normalized dataset [n_samples][n_features]
+ * @param n_samples Total number of samples
+ * @param n_train_samples Number of training samples (used for computing stats)
+ * @param n_features Number of features
+ * @details Computes mean and std from training samples, applies to all samples
+ */
 void Tensor_normalize_dataset(const float (*X)[4],
                               float (*X_norm)[4],
                               int n_samples,
                               int n_train_samples,
                               int n_features);
+
+/**
+ * @brief Detach tensor from computation graph
+ * @param self The tensor to detach
+ * @return New tensor with same data but no gradient tracking
+ * @details Creates a copy that doesn't participate in backpropagation
+ */
 Tensor Tensor_detach(Tensor self);
+
+/**
+ * @brief Shuffle dataset randomly
+ * @param X Input features [n_samples][n_features]
+ * @param y Input labels [n_samples]
+ * @param X_shuffled Output shuffled features [n_samples][n_features]
+ * @param y_shuffled Output shuffled labels [n_samples]
+ * @param n_samples Number of samples
+ * @param n_features Number of features
+ */
 void Tensor_shuffle_dataset(const float (*X)[4],
                             const int* y,
                             float (*X_shuffled)[4],
                             int* y_shuffled,
                             int n_samples,
                             int n_features);
+
+/**
+ * @brief Assert condition with formatted message
+ * @param cond Condition to check
+ * @param fmt Format string for error message
+ * @param ... Format arguments
+ */
 void cten_assert(bool cond, const char* fmt, ...);
+
+/**
+ * @brief Assert that two tensor shapes are equal
+ * @param title Description for assertion
+ * @param a First tensor shape
+ * @param b Second tensor shape
+ */
 void cten_assert_shape(const char* title, TensorShape a, TensorShape b);
+
+/**
+ * @brief Assert that two dimensions are equal
+ * @param title Description for assertion
+ * @param a First dimension
+ * @param b Second dimension
+ */
 void cten_assert_dim(const char* title, int a, int b);
+
+/**
+ * @brief Perform element-wise broadcasting on two tensors
+ * @param a Pointer to first tensor (modified in-place if broadcasting needed)
+ * @param b Pointer to second tensor (modified in-place if broadcasting needed)
+ * @return true if broadcasting successful, false if incompatible shapes
+ * @details Modifies tensors in-place to have compatible shapes for element-wise ops
+ */
 bool cten_elemwise_broadcast(Tensor* a, Tensor* b);
+
+/**
+ * @brief Load the Iris dataset
+ * @param X Pointer to receive features array [150][4]
+ * @param y Pointer to receive labels array [150]
+ * @return Number of samples loaded (150 for Iris)
+ */
 int load_iris_dataset(const float (**X)[4], const int** y);
+
+/**
+ * @brief Reduce tensor along a dimension using specified operation
+ * @param self The input tensor
+ * @param dim The dimension to reduce
+ * @param operation Operation name ("sum", "mean", etc.)
+ * @return Tensor with reduced dimension
+ * @details Internal function used by reduction operations
+ */
 Tensor Tensor_reduce_dim(Tensor self, int dim, const char* operation);
+
+/**
+ * @brief Reduce gradient tensor to match original shape after broadcasting
+ * @param grad The gradient tensor
+ * @param original_shape The original tensor shape before broadcasting
+ * @param broadcasted_shape The shape after broadcasting
+ * @return Reduced gradient tensor matching original shape
+ */
 Tensor reduce_gradient_for_broadcasting(Tensor grad,
                                         TensorShape original_shape,
                                         TensorShape broadcasted_shape);
+
+/**
+ * @brief Add a singleton dimension at specified position
+ * @param self The input tensor
+ * @param dim Position to insert new dimension
+ * @return Tensor with added dimension of size 1
+ */
 Tensor Tensor_unsqueeze(Tensor self, int dim);
\ No newline at end of file

From 095192187d8cee56e7c6d69e43eca4c8782fd50d Mon Sep 17 00:00:00 2001
From: Advaitgaur004 <b22cs004@iitj.ac.in>
Date: Wed, 20 Aug 2025 14:51:47 +0530
Subject: [PATCH 3/5] api reference added

---
 API.md    | 771 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.md |   3 +
 2 files changed, 774 insertions(+)
 create mode 100644 API.md

diff --git a/API.md b/API.md
new file mode 100644
index 0000000..6e0bd73
--- /dev/null
+++ b/API.md
@@ -0,0 +1,771 @@
+# cTensor API Reference
+
+This document provides a detailed API reference for the cTensor library, a lightweight tensor library for C with automatic differentiation.
+
+## Table of Contents
+
+1.  [Core Data Structures](#core-data-structures)
+2.  [Library Initialization & Finalization](#library-initialization--finalization)
+3.  [TensorShape Utilities](#tensorshape-utilities)
+4.  [Tensor Creation & Management](#tensor-creation--management)
+5.  [Tensor Operations](#tensor-operations)
+      * [Element-wise Arithmetic](#element-wise-arithmetic)
+      * [Matrix & Unary Operations](#matrix--unary-operations)
+      * [Reduction Operations](#reduction-operations)
+6.  [Neural Network Functions](#neural-network-functions)
+      * [Layers & Initializers](#layers--initializers)
+      * [Activation Functions](#activation-functions)
+      * [Loss Functions](#loss-functions)
+      * [Mathematical Functions](#mathematical-functions)
+7.  [Automatic Differentiation](#automatic-differentiation)
+8.  [Optimizers](#optimizers)
+      * [SGD (Stochastic Gradient Descent)](#sgd-stochastic-gradient-descent)
+      * [AdaGrad](#adagrad)
+      * [RMSprop](#rmsprop)
+      * [Adam](#adam)
+9.  [Gradient Clipping](#gradient-clipping)
+10. [Memory Management](#memory-management)
+11. [Utilities & Miscellaneous](#utilities--miscellaneous)
+
+-----
+
+## Core Data Structures
+
+These are the fundamental data types used throughout the cTensor library.
+
+### `TensorShape`
+
+A type definition for tensor shapes, supporting up to 4 dimensions.
+
+```c
+typedef int TensorShape[4];
+```
+
+-----
+
+### `FloatBuffer`
+
+A structure storing the raw tensor data.
+
+```c
+typedef struct FloatBuffer {
+    int numel;      /**< Number of elements in the buffer */
+    float flex[];   /**< Flexible array member containing the actual data */
+} FloatBuffer;
+```
+
+-----
+
+### `Tensor`
+
+The main tensor structure, containing its shape, data, and a node for gradient computation.
+
+```c
+typedef struct Tensor {
+    TensorShape shape; /**< Tensor dimensions [dim0, dim1, dim2, dim3] */
+    FloatBuffer* data; /**< Pointer to data buffer */
+    GradNode* node;    /**< Gradient computation node (NULL if no gradients) */
+} Tensor;
+```
+
+-----
+
+### `GradNode`
+
+A node in the computation graph used for automatic differentiation.
+
+```c
+typedef struct GradNode {
+    struct Tensor grad;
+    struct Tensor (*grad_fn)(struct Tensor self, int i);
+    struct Tensor inputs[4];
+    int n_inputs;
+    const char* name;
+    int params[4];
+} GradNode;
+```
+
+**Fields:**
+
+  * `grad`: The accumulated gradient for the tensor associated with this node.
+  * `grad_fn`: A function pointer to the gradient function used in backpropagation.
+  * `inputs`: An array of input tensors that produced the current tensor.
+  * `n_inputs`: The number of input tensors.
+  * `name`: The name of the operation for debugging.
+  * `params`: Additional integer parameters required by the operation.
+
+-----
+
+### `TensorMaxMinResult`
+
+A structure to hold the results of `max` or `min` operations along a dimension.
+
+```c
+typedef struct {
+    Tensor values;  /**< Maximum/minimum values */
+    Tensor indices; /**< Indices of maximum/minimum values */
+} TensorMaxMinResult;
+```
+
+-----
+
+## Library Initialization & Finalization
+
+### `cten_initilize`
+
+Initializes the CTensor library and its internal memory management system. **Must be called before any other CTensor function.**
+
+```c
+void cten_initilize();
+```
+
+-----
+
+### `cten_finalize`
+
+Frees all allocated memory and cleans up internal library structures. Should be called when finished using CTensor.
+
+```c
+void cten_finalize();
+```
+
+-----
+
+## TensorShape Utilities
+
+Functions for working with `TensorShape` types.
+
+### `TensorShape_numel`
+
+Calculates the total number of elements in a tensor shape (product of dimensions).
+
+```c
+int TensorShape_numel(TensorShape shape);
+```
+
+-----
+
+### `TensorShape_dim`
+
+Gets the number of dimensions in a tensor shape (number of non-zero dimensions).
+
+```c
+int TensorShape_dim(TensorShape shape);
+```
+
+-----
+
+### `TensorShape_asdim`
+
+Normalizes a dimension index to handle negative indices (e.g., -1 for the last dimension).
+
+```c
+int TensorShape_asdim(TensorShape shape, int dim);
+```
+
+-----
+
+### `TensorShape_tostring`
+
+Converts a tensor shape to its string representation.
+
+```c
+int TensorShape_tostring(TensorShape shape, char* buf, int size);
+```
+
+-----
+
+## Tensor Creation & Management
+
+Functions for creating and manipulating `Tensor` objects.
+
+### `Tensor_new`
+
+Creates a new tensor with **uninitialized data**.
+
+```c
+Tensor Tensor_new(TensorShape shape, bool requires_grad);
+```
+
+-----
+
+### `Tensor_zeros`
+
+Creates a new tensor filled with **zeros**.
+
+```c
+Tensor Tensor_zeros(TensorShape shape, bool requires_grad);
+```
+
+-----
+
+### `Tensor_ones`
+
+Creates a new tensor filled with **ones**.
+
+```c
+Tensor Tensor_ones(TensorShape shape, bool requires_grad);
+```
+
+-----
+
+### `Tensor_detach`
+
+**Detaches a tensor from the computation graph.** The new tensor shares the same data but does not require gradients.
+
+```c
+Tensor Tensor_detach(Tensor self);
+```
+
+-----
+
+### `Tensor_unsqueeze`
+
+Adds a singleton dimension (a dimension of size 1) at a specified position.
+
+```c
+Tensor Tensor_unsqueeze(Tensor self, int dim);
+```
+
+-----
+
+### `Tensor_get`
+
+Gets the element value at the specified indices.
+
+```c
+float Tensor_get(Tensor self, int i, int j, int k, int l);
+```
+
+-----
+
+### `Tensor_set`
+
+Sets the element value at the specified indices.
+
+```c
+void Tensor_set(Tensor self, int i, int j, int k, int l, float value);
+```
+
+-----
+
+### `Tensor_print`
+
+Prints the contents of a tensor to `stdout`.
+
+```c
+void Tensor_print(Tensor self);
+```
+
+-----
+
+## Tensor Operations
+
+### Element-wise Arithmetic
+
+These functions perform element-wise arithmetic. They support **broadcasting** to handle operands with different but compatible shapes.
+
+| Function | Description |
+|---|---|
+| `Tensor_add(a, b)` | Adds two tensors. |
+| `Tensor_sub(a, b)` | Subtracts tensor `b` from `a`. |
+| `Tensor_mul(a, b)` | Multiplies two tensors. |
+| `Tensor_div(a, b)` | Divides tensor `a` by `b`. |
+| `Tensor_pow(a, b)` | Raises tensor `a` to the power of `b`. |
+| `Tensor_addf(a, s)` | Adds a scalar `s` to a tensor. |
+| `Tensor_subf(a, s)` | Subtracts a scalar `s` from a tensor. |
+| `Tensor_mulf(a, s)` | Multiplies a tensor by a scalar `s`. |
+| `Tensor_divf(a, s)` | Divides a tensor by a scalar `s`. |
+| `Tensor_powf(a, s)` | Raises a tensor to the power of a scalar `s`. |
+
+-----
+
+### Matrix & Unary Operations
+
+### `Tensor_matmul`
+
+Performs **matrix multiplication** of two tensors.
+
+```c
+Tensor Tensor_matmul(Tensor self, Tensor other);
+```
+
+-----
+
+### `Tensor_transpose`
+
+Transposes a 2D tensor.
+
+```c
+Tensor Tensor_transpose(Tensor self);
+```
+
+-----
+
+### `Tensor_neg`
+
+Performs element-wise negation (`-self`).
+
+```c
+Tensor Tensor_neg(Tensor self);
+```
+
+-----
+
+### `Tensor_abs`
+
+Computes the element-wise absolute value (`|self|`).
+
+```c
+Tensor Tensor_abs(Tensor self);
+```
+
+-----
+
+### `Tensor_square`
+
+Computes the element-wise square (`self^2`).
+
+```c
+Tensor Tensor_square(Tensor self);
+```
+
+-----
+
+### `Tensor_reciprocal`
+
+Computes the element-wise reciprocal (`1/self`).
+
+```c
+Tensor Tensor_reciprocal(Tensor self);
+```
+
+-----
+
+### Reduction Operations
+
+These operations reduce a tensor to a single value or along a specified dimension. They are exposed via macros for a simpler API.
+
+#### Sum
+
+**Usage:**
+
+```c
+// Sum of all elements (returns a scalar tensor)
+Tensor sum_all = Tensor_sum(my_tensor);
+
+// Sum along dimension 1 (returns a tensor with the dimension removed)
+Tensor sum_dim = Tensor_sum(my_tensor, 1);
+```
+
+## **Underlying Functions:** `Tensor_sum_all(Tensor self)`, `Tensor_sum_dim(Tensor self, int dim)`
+
+#### Mean
+
+**Usage:**
+
+```c
+// Mean of all elements
+Tensor mean_all = Tensor_mean(my_tensor);
+
+// Mean along dimension 1
+Tensor mean_dim = Tensor_mean(my_tensor, 1);
+```
+
+## **Underlying Functions:** `Tensor_mean_all(Tensor self)`, `Tensor_mean_dim(Tensor self, int dim)`
+
+#### Max
+
+**Usage:**
+
+```c
+// Max of all elements
+Tensor max_val = Tensor_max(my_tensor);
+
+// Max along dimension 1 (returns values and indices)
+TensorMaxMinResult max_res = Tensor_max(my_tensor, 1);
+Tensor max_vals = max_res.values;
+Tensor max_indices = max_res.indices;
+```
+
+## **Underlying Functions:** `Tensor_max_all(Tensor self)`, `TensorMaxMinResult Tensor_max_dim(Tensor self, int dim)`
+
+#### Min
+
+**Usage:**
+
+```c
+// Min of all elements
+Tensor min_val = Tensor_min(my_tensor);
+
+// Min along dimension 1 (returns values and indices)
+TensorMaxMinResult min_res = Tensor_min(my_tensor, 1);
+```
+
+## **Underlying Functions:** `Tensor_min_all(Tensor self)`, `TensorMaxMinResult Tensor_min_dim(Tensor self, int dim)`
+
+#### Argmax
+
+### `Tensor_argmax`
+
+Finds the indices of the maximum values along the last dimension.
+
+```c
+void Tensor_argmax(Tensor self, int* out);
+```
+
+-----
+
+## Neural Network Functions
+
+### Layers & Initializers
+
+### `nn_linear`
+
+Applies a linear transformation (`input @ weight + bias`).
+
+```c
+Tensor nn_linear(Tensor input, Tensor weight, Tensor bias);
+```
+
+-----
+
+### `Glorot_init`
+
+Initializes a tensor with weights sampled from a **Glorot (Xavier)** uniform distribution.
+
+```c
+Tensor Glorot_init(TensorShape shape, bool requires_grad);
+```
+
+-----
+
+### Activation Functions
+
+| Function | Description |
+|---|---|
+| `nn_relu(input)` | Rectified Linear Unit: `max(0, input)`. |
+| `nn_sigmoid(input)` | Sigmoid: `1 / (1 + exp(-input))`. |
+| `nn_tanh(input)` | Hyperbolic Tangent. |
+| `nn_elu(self, alpha)` | Exponential Linear Unit. |
+| `nn_selu(self)` | Scaled Exponential Linear Unit. |
+| `nn_softmax(input, dim)` | Softmax function along a specified dimension. |
+
+-----
+
+### Loss Functions
+
+### `nn_crossentropy`
+
+Computes the **cross-entropy loss** between true labels and predicted probabilities.
+
+```c
+Tensor nn_crossentropy(Tensor y_true, Tensor y_pred);
+```
+
+-----
+
+### `nn_softmax_crossentropy`
+
+A numerically stable combination of Softmax and Cross-Entropy loss.
+
+```c
+Tensor nn_softmax_crossentropy(Tensor y_true, Tensor logits);
+```
+
+-----
+
+### `nn_mse_loss`
+
+Computes the **Mean Squared Error** loss.
+
+```c
+Tensor nn_mse_loss(Tensor y_true, Tensor y_pred);
+```
+
+-----
+
+### `nn_mae_loss`
+
+Computes the **Mean Absolute Error** loss.
+
+```c
+Tensor nn_mae_loss(Tensor y_true, Tensor y_pred);
+```
+
+-----
+
+### `nn_huber_loss`
+
+Computes the **Huber loss** (a smooth L1 loss).
+
+```c
+Tensor nn_huber_loss(Tensor y_true, Tensor y_pred, float delta);
+```
+
+-----
+
+### Mathematical Functions
+
+| Function | Description |
+|---|---|
+| `nn_log(self)` | Element-wise natural logarithm. |
+| `nn_exp(self)` | Element-wise exponential function (`e^x`). |
+| `nn_sin(self)` | Element-wise sine. |
+| `nn_cos(self)` | Element-wise cosine. |
+| `nn_tan(self)` | Element-wise tangent. |
+
+-----
+
+## Automatic Differentiation
+
+### `Tensor_backward`
+
+Performs the **backward pass (backpropagation)** from this tensor, computing gradients for all tensors in its computation graph that have `requires_grad=true`.
+
+```c
+void Tensor_backward(Tensor self, Tensor grad);
+```
+
+**Parameters:**
+
+  * `self`: The tensor to start the backpropagation from (often the final loss).
+  * `grad`: The initial gradient to propagate. For a scalar loss, this is typically a tensor containing the value `1.0`.
+
+-----
+
+### `Tensor_backward_apply`
+
+Applies a function to all tensors visited during a backward pass.
+
+```c
+int Tensor_backward_apply(Tensor self, void (*f)(Tensor, void*), void* ctx);
+```
+
+-----
+
+## Optimizers
+
+### SGD (Stochastic Gradient Descent)
+
+```c
+// Create a new SGD optimizer
+optim_sgd* optim_sgd_new(int n_params, Tensor* params, float weight_decay);
+
+// Configure learning rate and momentum
+void optim_sgd_config(optim_sgd* self, float lr, float momentum);
+
+// Zero out the gradients of all managed parameters
+void optim_sgd_zerograd(optim_sgd* self);
+
+// Perform one optimization step
+void optim_sgd_step(optim_sgd* self);
+```
+
+-----
+
+### AdaGrad
+
+```c
+optim_adagrad* optim_adagrad_new(int n_params, Tensor* params, float lr, float ε, float weight_decay);
+void optim_adagrad_zerograd(optim_adagrad* self);
+void optim_adagrad_step(optim_adagrad* self);
+```
+
+-----
+
+### RMSprop
+
+```c
+optim_rmsprop* optim_rmsprop_new(int n_params, Tensor* params, float lr, float β, float ε, float weight_decay);
+void optim_rmsprop_zerograd(optim_rmsprop* self);
+void optim_rmsprop_step(optim_rmsprop* self);
+```
+
+-----
+
+### Adam
+
+```c
+optim_adam* optim_adam_new(int n_params, Tensor* params, float lr, float β1, float β2, float ε, float weight_decay);
+void optim_adam_zerograd(optim_adam* self);
+void optim_adam_step(optim_adam* self);
+```
+
+-----
+
+## Gradient Clipping
+
+Functions to prevent exploding gradients during training.
+
+### `cten_clip_grad_norm`
+
+Clips the gradients of a set of parameters by their **global L2 norm**.
+
+```c
+void cten_clip_grad_norm(Tensor* params, int n_params, float max_norm);
+```
+
+-----
+
+### `cten_clip_grad_value`
+
+Clips gradients element-wise to a **maximum absolute value**.
+
+```c
+void cten_clip_grad_value(Tensor* params, int n_params, float max_value);
+```
+
+-----
+
+### `cten_clip_grad_value_range`
+
+Clips gradients element-wise to be within `[min_value, max_value]`.
+
+```c
+void cten_clip_grad_value_range(Tensor* params, int n_params, float min_value, float max_value);
+```
+
+-----
+
+## Memory Management
+
+cTensor uses a pool-based memory allocator to manage tensor memory, which is especially useful for controlling memory usage during different phases like training epochs.
+
+### `cten_begin_malloc`
+
+Begins a new memory allocation pool. All subsequent tensor allocations will be associated with this pool ID.
+
+```c
+void cten_begin_malloc(PoolId id);
+```
+
+-----
+
+### `cten_end_malloc`
+
+Ends the current memory allocation pool, returning to the previous one in the stack.
+
+```c
+void cten_end_malloc();
+```
+
+-----
+
+### `cten_free`
+
+Frees **all** tensors that were allocated in the specified pool.
+
+```c
+void cten_free(PoolId id);
+```
+
+-----
+
+## Utilities & Miscellaneous
+
+### Evaluation Mode
+
+Disables gradient computation globally, useful for inference or validation.
+
+### `cten_begin_eval`
+
+Enters evaluation mode.
+
+```c
+void cten_begin_eval();
+```
+
+-----
+
+### `cten_is_eval`
+
+Checks if the library is currently in evaluation mode.
+
+```c
+bool cten_is_eval();
+```
+
+-----
+
+### `cten_end_eval`
+
+Exits evaluation mode, re-enabling gradient computation.
+
+```c
+void cten_end_eval();
+```
+
+-----
+
+### Dataset Helpers
+
+### `load_iris_dataset`
+
+Loads the built-in Iris dataset.
+
+```c
+int load_iris_dataset(const float (**X)[4], const int** y);
+```
+
+-----
+
+### `Tensor_normalize_dataset`
+
+Normalizes a dataset using the mean and standard deviation from its training split.
+
+```c
+void Tensor_normalize_dataset(const float (*X)[4], float (*X_norm)[4], int n_samples, int n_train_samples, int n_features);
+```
+
+-----
+
+### `Tensor_shuffle_dataset`
+
+Randomly shuffles a dataset (features and labels together).
+
+```c
+void Tensor_shuffle_dataset(const float (*X)[4], const int* y, float (*X_shuffled)[4], int* y_shuffled, int n_samples, int n_features);
+```
+
+-----
+
+### Assertions & Broadcasting
+
+### `cten_assert`
+
+Asserts that a condition is true, otherwise prints a formatted error message and exits.
+
+```c
+void cten_assert(bool cond, const char* fmt, ...);
+```
+
+-----
+
+### `cten_assert_shape`
+
+Asserts that two tensor shapes are equal.
+
+```c
+void cten_assert_shape(const char* title, TensorShape a, TensorShape b);
+```
+
+-----
+
+### `cten_assert_dim`
+
+Asserts that two dimension sizes are equal.
+
+```c
+void cten_assert_dim(const char* title, int a, int b);
+```
+
+-----
+
+### `cten_elemwise_broadcast`
+
+Internal function to perform broadcasting on two tensors for element-wise operations.
+
+```c
+bool cten_elemwise_broadcast(Tensor* a, Tensor* b);
+```
\ No newline at end of file
diff --git a/README.md b/README.md
index 4161bc2..0baeb33 100644
--- a/README.md
+++ b/README.md
@@ -317,6 +317,9 @@ cTensor/
 │   └── main.c        # Iris dataset example
 └── tests/            # Test suite
 ```
+## API Reference
+
+For a detailed API reference, refer to [API Documentation](API.md).
 
 ## Contributing
 

From 7753f79e2a0c81aee55c737f6d82366c887c996c Mon Sep 17 00:00:00 2001
From: Advaitgaur004 <b22cs004@iitj.ac.in>
Date: Wed, 20 Aug 2025 18:33:08 +0530
Subject: [PATCH 4/5] minor change in heading

---
 API.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/API.md b/API.md
index 6e0bd73..e6e9bfe 100644
--- a/API.md
+++ b/API.md
@@ -346,7 +346,7 @@ Tensor Tensor_reciprocal(Tensor self);
 
 These operations reduce a tensor to a single value or along a specified dimension. They are exposed via macros for a simpler API.
 
-#### Sum
+### Sum
 
 **Usage:**
 
@@ -358,9 +358,9 @@ Tensor sum_all = Tensor_sum(my_tensor);
 Tensor sum_dim = Tensor_sum(my_tensor, 1);
 ```
 
-## **Underlying Functions:** `Tensor_sum_all(Tensor self)`, `Tensor_sum_dim(Tensor self, int dim)`
+#### **Underlying Functions:** `Tensor_sum_all(Tensor self)`, `Tensor_sum_dim(Tensor self, int dim)`
 
-#### Mean
+### Mean
 
 **Usage:**
 
@@ -372,9 +372,9 @@ Tensor mean_all = Tensor_mean(my_tensor);
 Tensor mean_dim = Tensor_mean(my_tensor, 1);
 ```
 
-## **Underlying Functions:** `Tensor_mean_all(Tensor self)`, `Tensor_mean_dim(Tensor self, int dim)`
+#### **Underlying Functions:** `Tensor_mean_all(Tensor self)`, `Tensor_mean_dim(Tensor self, int dim)`
 
-#### Max
+### Max
 
 **Usage:**
 
@@ -388,9 +388,9 @@ Tensor max_vals = max_res.values;
 Tensor max_indices = max_res.indices;
 ```
 
-## **Underlying Functions:** `Tensor_max_all(Tensor self)`, `TensorMaxMinResult Tensor_max_dim(Tensor self, int dim)`
+#### **Underlying Functions:** `Tensor_max_all(Tensor self)`, `TensorMaxMinResult Tensor_max_dim(Tensor self, int dim)`
 
-#### Min
+### Min
 
 **Usage:**
 
@@ -402,9 +402,9 @@ Tensor min_val = Tensor_min(my_tensor);
 TensorMaxMinResult min_res = Tensor_min(my_tensor, 1);
 ```
 
-## **Underlying Functions:** `Tensor_min_all(Tensor self)`, `TensorMaxMinResult Tensor_min_dim(Tensor self, int dim)`
+#### **Underlying Functions:** `Tensor_min_all(Tensor self)`, `TensorMaxMinResult Tensor_min_dim(Tensor self, int dim)`
 
-#### Argmax
+### Argmax
 
 ### `Tensor_argmax`
 

From ba6f4698eed3b00efdf5c1dd0f09e8a2f0485bf3 Mon Sep 17 00:00:00 2001
From: Advaitgaur004 <b22cs004@iitj.ac.in>
Date: Wed, 20 Aug 2025 18:35:16 +0530
Subject: [PATCH 5/5] better representation of max,min,sum and mean

---
 API.md | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/API.md b/API.md
index e6e9bfe..de69dca 100644
--- a/API.md
+++ b/API.md
@@ -346,7 +346,7 @@ Tensor Tensor_reciprocal(Tensor self);
 
 These operations reduce a tensor to a single value or along a specified dimension. They are exposed via macros for a simpler API.
 
-### Sum
+### `Tensor_sum`
 
 **Usage:**
 
@@ -360,7 +360,7 @@ Tensor sum_dim = Tensor_sum(my_tensor, 1);
 
 #### **Underlying Functions:** `Tensor_sum_all(Tensor self)`, `Tensor_sum_dim(Tensor self, int dim)`
 
-### Mean
+### `Tensor_mean`
 
 **Usage:**
 
@@ -374,7 +374,7 @@ Tensor mean_dim = Tensor_mean(my_tensor, 1);
 
 #### **Underlying Functions:** `Tensor_mean_all(Tensor self)`, `Tensor_mean_dim(Tensor self, int dim)`
 
-### Max
+### `Tensor_max`
 
 **Usage:**
 
@@ -390,7 +390,7 @@ Tensor max_indices = max_res.indices;
 
 #### **Underlying Functions:** `Tensor_max_all(Tensor self)`, `TensorMaxMinResult Tensor_max_dim(Tensor self, int dim)`
 
-### Min
+### `Tensor_min`
 
 **Usage:**
 
@@ -404,8 +404,6 @@ TensorMaxMinResult min_res = Tensor_min(my_tensor, 1);
 
 #### **Underlying Functions:** `Tensor_min_all(Tensor self)`, `TensorMaxMinResult Tensor_min_dim(Tensor self, int dim)`
 
-### Argmax
-
 ### `Tensor_argmax`
 
 Finds the indices of the maximum values along the last dimension.