diff --git a/monai/networks/nets/unetr.py b/monai/networks/nets/unetr.py index 7ad12daa89..4cdcd73c4d 100644 --- a/monai/networks/nets/unetr.py +++ b/monai/networks/nets/unetr.py @@ -43,23 +43,25 @@ def __init__( dropout_rate: float = 0.0, spatial_dims: int = 3, qkv_bias: bool = False, + save_attn: bool = False, ) -> None: """ Args: in_channels: dimension of input channels. out_channels: dimension of output channels. img_size: dimension of input image. - feature_size: dimension of network feature size. - hidden_size: dimension of hidden layer. - mlp_dim: dimension of feedforward layer. - num_heads: number of attention heads. - pos_embed: position embedding layer type. - norm_name: feature normalization type and arguments. - conv_block: bool argument to determine if convolutional block is used. - res_block: bool argument to determine if residual block is used. - dropout_rate: faction of the input units to drop. - spatial_dims: number of spatial dims. - qkv_bias: apply the bias term for the qkv linear layer in self attention block + feature_size: dimension of network feature size. Defaults to 16. + hidden_size: dimension of hidden layer. Defaults to 768. + mlp_dim: dimension of feedforward layer. Defaults to 3072. + num_heads: number of attention heads. Defaults to 12. + pos_embed: position embedding layer type. Defaults to "conv". + norm_name: feature normalization type and arguments. Defaults to "instance". + conv_block: if convolutional block is used. Defaults to True. + res_block: if residual block is used. Defaults to True. + dropout_rate: fraction of the input units to drop. Defaults to 0.0. + spatial_dims: number of spatial dims. Defaults to 3. + qkv_bias: apply the bias term for the qkv linear layer in self attention block. Defaults to False. + save_attn: to make accessible the attention in self attention block. Defaults to False. Examples:: @@ -101,6 +103,7 @@ def __init__( dropout_rate=dropout_rate, spatial_dims=spatial_dims, qkv_bias=qkv_bias, + save_attn=save_attn, ) self.encoder1 = UnetrBasicBlock( spatial_dims=spatial_dims, diff --git a/monai/networks/nets/vitautoenc.py b/monai/networks/nets/vitautoenc.py index ff6f637118..6ae8135438 100644 --- a/monai/networks/nets/vitautoenc.py +++ b/monai/networks/nets/vitautoenc.py @@ -46,21 +46,25 @@ def __init__( pos_embed: str = "conv", dropout_rate: float = 0.0, spatial_dims: int = 3, + qkv_bias: bool = False, + save_attn: bool = False, ) -> None: """ Args: - in_channels: dimension of input channels or the number of channels for input + in_channels: dimension of input channels or the number of channels for input. img_size: dimension of input image. - patch_size: dimension of patch size. - hidden_size: dimension of hidden layer. - out_channels: number of output channels. - deconv_chns: number of channels for the deconvolution layers. - mlp_dim: dimension of feedforward layer. - num_layers: number of transformer blocks. - num_heads: number of attention heads. - pos_embed: position embedding layer type. - dropout_rate: faction of the input units to drop. - spatial_dims: number of spatial dimensions. + patch_size: dimension of patch size + out_channels: number of output channels. Defaults to 1. + deconv_chns: number of channels for the deconvolution layers. Defaults to 16. + hidden_size: dimension of hidden layer. Defaults to 768. + mlp_dim: dimension of feedforward layer. Defaults to 3072. + num_layers: number of transformer blocks. Defaults to 12. + num_heads: number of attention heads. Defaults to 12. + pos_embed: position embedding layer type. Defaults to "conv". + dropout_rate: faction of the input units to drop. Defaults to 0.0. + spatial_dims: number of spatial dimensions. Defaults to 3. + qkv_bias: apply bias to the qkv linear layer in self attention block. Defaults to False. + save_attn: to make accessible the attention in self attention block. Defaults to False. Defaults to False. Examples:: @@ -89,7 +93,10 @@ def __init__( spatial_dims=self.spatial_dims, ) self.blocks = nn.ModuleList( - [TransformerBlock(hidden_size, mlp_dim, num_heads, dropout_rate) for i in range(num_layers)] + [ + TransformerBlock(hidden_size, mlp_dim, num_heads, dropout_rate, qkv_bias, save_attn) + for i in range(num_layers) + ] ) self.norm = nn.LayerNorm(hidden_size)