From 41492cab68fbebd8e2bb86e57ba9df1a41296253 Mon Sep 17 00:00:00 2001
From: a-parida12 <abhijeet.parida@tum.de>
Date: Mon, 8 May 2023 13:38:02 -0400
Subject: [PATCH 1/2] fix(unter,vitautoenc): acess the attn mat

---
 monai/networks/nets/unetr.py      | 31 ++++++++++++++++-------------
 monai/networks/nets/vitautoenc.py | 33 +++++++++++++++++++------------
 2 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/monai/networks/nets/unetr.py b/monai/networks/nets/unetr.py
index 7ad12daa89..ed47119ee6 100644
--- a/monai/networks/nets/unetr.py
+++ b/monai/networks/nets/unetr.py
@@ -43,23 +43,25 @@ def __init__(
         dropout_rate: float = 0.0,
         spatial_dims: int = 3,
         qkv_bias: bool = False,
+        save_attn: bool = False,
     ) -> None:
         """
         Args:
-            in_channels: dimension of input channels.
-            out_channels: dimension of output channels.
-            img_size: dimension of input image.
-            feature_size: dimension of network feature size.
-            hidden_size: dimension of hidden layer.
-            mlp_dim: dimension of feedforward layer.
-            num_heads: number of attention heads.
-            pos_embed: position embedding layer type.
-            norm_name: feature normalization type and arguments.
-            conv_block: bool argument to determine if convolutional block is used.
-            res_block: bool argument to determine if residual block is used.
-            dropout_rate: faction of the input units to drop.
-            spatial_dims: number of spatial dims.
-            qkv_bias: apply the bias term for the qkv linear layer in self attention block
+            in_channels (int): dimension of input channels.
+            out_channels (int): dimension of output channels.
+            img_size (Sequence[int] | int): dimension of input image.
+            feature_size (int, optional): dimension of network feature size. Defaults to 16.
+            hidden_size (int, optional): dimension of hidden layer. Defaults to 768.
+            mlp_dim (int, optional): dimension of feedforward layer. Defaults to 3072.
+            num_heads (int, optional): number of attention heads. Defaults to 12.
+            pos_embed (str, optional): position embedding layer type. Defaults to "conv".
+            norm_name (tuple | str, optional): feature normalization type and arguments. Defaults to "instance".
+            conv_block (bool, optional): if convolutional block is used. Defaults to True.
+            res_block (bool, optional): if residual block is used. Defaults to True.
+            dropout_rate (float, optional): fraction of the input units to drop. Defaults to 0.0.
+            spatial_dims (int, optional): number of spatial dims. Defaults to 3.
+            qkv_bias (bool, optional): apply the bias term for the qkv linear layer in self attention block. Defaults to False.
+            save_attn (bool, optional): to make accessible the attention in self attention block. Defaults to False.
 
         Examples::
 
@@ -101,6 +103,7 @@ def __init__(
             dropout_rate=dropout_rate,
             spatial_dims=spatial_dims,
             qkv_bias=qkv_bias,
+            save_attn=save_attn,
         )
         self.encoder1 = UnetrBasicBlock(
             spatial_dims=spatial_dims,
diff --git a/monai/networks/nets/vitautoenc.py b/monai/networks/nets/vitautoenc.py
index ff6f637118..e364a4a5ab 100644
--- a/monai/networks/nets/vitautoenc.py
+++ b/monai/networks/nets/vitautoenc.py
@@ -46,21 +46,25 @@ def __init__(
         pos_embed: str = "conv",
         dropout_rate: float = 0.0,
         spatial_dims: int = 3,
+        qkv_bias: bool = False,
+        save_attn: bool = False,
     ) -> None:
         """
         Args:
-            in_channels: dimension of input channels or the number of channels for input
-            img_size: dimension of input image.
-            patch_size: dimension of patch size.
-            hidden_size: dimension of hidden layer.
-            out_channels: number of output channels.
-            deconv_chns: number of channels for the deconvolution layers.
-            mlp_dim: dimension of feedforward layer.
-            num_layers: number of transformer blocks.
-            num_heads: number of attention heads.
-            pos_embed: position embedding layer type.
-            dropout_rate: faction of the input units to drop.
-            spatial_dims: number of spatial dimensions.
+            in_channels (int): dimension of input channels or the number of channels for input.
+            img_size (Sequence[int] | int): dimension of input image.
+            patch_size (Sequence[int] | int): dimension of patch size
+            out_channels (int, optional):  number of output channels. Defaults to 1.
+            deconv_chns (int, optional): number of channels for the deconvolution layers. Defaults to 16.
+            hidden_size (int, optional): dimension of hidden layer. Defaults to 768.
+            mlp_dim (int, optional): dimension of feedforward layer. Defaults to 3072.
+            num_layers (int, optional):  number of transformer blocks. Defaults to 12.
+            num_heads (int, optional): number of attention heads. Defaults to 12.
+            pos_embed (str, optional): position embedding layer type. Defaults to "conv".
+            dropout_rate (float, optional): faction of the input units to drop. Defaults to 0.0.
+            spatial_dims (int, optional): number of spatial dimensions. Defaults to 3.
+            qkv_bias (bool, optional) : apply bias to the qkv linear layer in self attention block. Defaults to False.
+            save_attn (bool, optional): to make accessible the attention in self attention block. Defaults to False. Defaults to False.
 
         Examples::
 
@@ -89,7 +93,10 @@ def __init__(
             spatial_dims=self.spatial_dims,
         )
         self.blocks = nn.ModuleList(
-            [TransformerBlock(hidden_size, mlp_dim, num_heads, dropout_rate) for i in range(num_layers)]
+            [
+                TransformerBlock(hidden_size, mlp_dim, num_heads, dropout_rate, qkv_bias, save_attn)
+                for i in range(num_layers)
+            ]
         )
         self.norm = nn.LayerNorm(hidden_size)
 

From 13aea0675c763eb9320813f336ab58eb25bd11af Mon Sep 17 00:00:00 2001
From: a-parida12 <abhijeet.parida@tum.de>
Date: Thu, 11 May 2023 14:30:02 -0400
Subject: [PATCH 2/2] docs(vitautoenc,unter): remove types in docstrings

Signed-off-by: a-parida12 <abhijeet.parida@tum.de>
---
 monai/networks/nets/unetr.py      | 30 +++++++++++++++---------------
 monai/networks/nets/vitautoenc.py | 28 ++++++++++++++--------------
 2 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/monai/networks/nets/unetr.py b/monai/networks/nets/unetr.py
index ed47119ee6..4cdcd73c4d 100644
--- a/monai/networks/nets/unetr.py
+++ b/monai/networks/nets/unetr.py
@@ -47,21 +47,21 @@ def __init__(
     ) -> None:
         """
         Args:
-            in_channels (int): dimension of input channels.
-            out_channels (int): dimension of output channels.
-            img_size (Sequence[int] | int): dimension of input image.
-            feature_size (int, optional): dimension of network feature size. Defaults to 16.
-            hidden_size (int, optional): dimension of hidden layer. Defaults to 768.
-            mlp_dim (int, optional): dimension of feedforward layer. Defaults to 3072.
-            num_heads (int, optional): number of attention heads. Defaults to 12.
-            pos_embed (str, optional): position embedding layer type. Defaults to "conv".
-            norm_name (tuple | str, optional): feature normalization type and arguments. Defaults to "instance".
-            conv_block (bool, optional): if convolutional block is used. Defaults to True.
-            res_block (bool, optional): if residual block is used. Defaults to True.
-            dropout_rate (float, optional): fraction of the input units to drop. Defaults to 0.0.
-            spatial_dims (int, optional): number of spatial dims. Defaults to 3.
-            qkv_bias (bool, optional): apply the bias term for the qkv linear layer in self attention block. Defaults to False.
-            save_attn (bool, optional): to make accessible the attention in self attention block. Defaults to False.
+            in_channels: dimension of input channels.
+            out_channels: dimension of output channels.
+            img_size: dimension of input image.
+            feature_size: dimension of network feature size. Defaults to 16.
+            hidden_size: dimension of hidden layer. Defaults to 768.
+            mlp_dim: dimension of feedforward layer. Defaults to 3072.
+            num_heads: number of attention heads. Defaults to 12.
+            pos_embed: position embedding layer type. Defaults to "conv".
+            norm_name: feature normalization type and arguments. Defaults to "instance".
+            conv_block: if convolutional block is used. Defaults to True.
+            res_block: if residual block is used. Defaults to True.
+            dropout_rate: fraction of the input units to drop. Defaults to 0.0.
+            spatial_dims: number of spatial dims. Defaults to 3.
+            qkv_bias: apply the bias term for the qkv linear layer in self attention block. Defaults to False.
+            save_attn: to make accessible the attention in self attention block. Defaults to False.
 
         Examples::
 
diff --git a/monai/networks/nets/vitautoenc.py b/monai/networks/nets/vitautoenc.py
index e364a4a5ab..6ae8135438 100644
--- a/monai/networks/nets/vitautoenc.py
+++ b/monai/networks/nets/vitautoenc.py
@@ -51,20 +51,20 @@ def __init__(
     ) -> None:
         """
         Args:
-            in_channels (int): dimension of input channels or the number of channels for input.
-            img_size (Sequence[int] | int): dimension of input image.
-            patch_size (Sequence[int] | int): dimension of patch size
-            out_channels (int, optional):  number of output channels. Defaults to 1.
-            deconv_chns (int, optional): number of channels for the deconvolution layers. Defaults to 16.
-            hidden_size (int, optional): dimension of hidden layer. Defaults to 768.
-            mlp_dim (int, optional): dimension of feedforward layer. Defaults to 3072.
-            num_layers (int, optional):  number of transformer blocks. Defaults to 12.
-            num_heads (int, optional): number of attention heads. Defaults to 12.
-            pos_embed (str, optional): position embedding layer type. Defaults to "conv".
-            dropout_rate (float, optional): faction of the input units to drop. Defaults to 0.0.
-            spatial_dims (int, optional): number of spatial dimensions. Defaults to 3.
-            qkv_bias (bool, optional) : apply bias to the qkv linear layer in self attention block. Defaults to False.
-            save_attn (bool, optional): to make accessible the attention in self attention block. Defaults to False. Defaults to False.
+            in_channels: dimension of input channels or the number of channels for input.
+            img_size: dimension of input image.
+            patch_size: dimension of patch size
+            out_channels:  number of output channels. Defaults to 1.
+            deconv_chns: number of channels for the deconvolution layers. Defaults to 16.
+            hidden_size: dimension of hidden layer. Defaults to 768.
+            mlp_dim: dimension of feedforward layer. Defaults to 3072.
+            num_layers:  number of transformer blocks. Defaults to 12.
+            num_heads: number of attention heads. Defaults to 12.
+            pos_embed: position embedding layer type. Defaults to "conv".
+            dropout_rate: faction of the input units to drop. Defaults to 0.0.
+            spatial_dims: number of spatial dimensions. Defaults to 3.
+            qkv_bias: apply bias to the qkv linear layer in self attention block. Defaults to False.
+            save_attn: to make accessible the attention in self attention block. Defaults to False. Defaults to False.
 
         Examples::