Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/transformers/core_model_loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

from __future__ import annotations

import math
import os
import re
import traceback
Expand All @@ -33,7 +32,7 @@

from .integrations.accelerate import get_device, offload_weight
from .integrations.tensor_parallel import ALL_PARALLEL_STYLES
from .utils import is_env_variable_true
from .utils import int_div_ceil, is_env_variable_true
from .utils.loading_report import LoadStateDictInfo
from .utils.logging import get_logger, tqdm

Expand Down Expand Up @@ -354,7 +353,7 @@ def __init__(self, stack_dim: int = 0, concat_dim: int = 1):
self.concat_dim = concat_dim

def split_list_into_chunks(self, tensor_list: list[torch.Tensor], chunks: int = 2):
split_size = math.ceil(len(tensor_list) / chunks) # best effort split size
split_size = int_div_ceil(len(tensor_list), chunks) # best effort split size
return [tensor_list[i * split_size : (i + 1) * split_size] for i in range(chunks)]

@torch.no_grad()
Expand Down
11 changes: 5 additions & 6 deletions src/transformers/integrations/tensor_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,13 @@
# limitations under the License.
from __future__ import annotations

import math
import operator
import os
import re
from functools import reduce

from ..distributed import DistributedConfig
from ..utils import is_torch_greater_or_equal, logging
from ..utils import int_div_ceil, is_torch_greater_or_equal, logging
from ..utils.generic import GeneralInterface
from ..utils.import_utils import is_torch_available

Expand Down Expand Up @@ -374,7 +373,7 @@ def get_tensor_shard(param, empty_param, device_mesh, rank, dim, tensor_idx: int
elif empty_param.dim() == 3 and dim == 2 and len(param_shape) == 2:
dim = 1

shard_size = math.ceil(param_shape[dim] / world_size)
shard_size = int_div_ceil(param_shape[dim], world_size)
start = rank * shard_size
end = min(start + shard_size, param_shape[dim])

Expand Down Expand Up @@ -723,7 +722,7 @@ def get_expected_sharded_shape(self, full_shape: tuple[int, ...] | torch.Size) -
# Colwise shards dim -2, but 1D tensors (bias) shard on dim -1
dim = -1 if len(shape) == 1 else -2
dim = len(shape) + dim if dim < 0 else dim
shard_size = math.ceil(shape[dim] / world_size)
shard_size = int_div_ceil(shape[dim], world_size)
start = self.rank * shard_size
end = min(start + shard_size, shape[dim])
shape[dim] = end - start
Expand Down Expand Up @@ -866,7 +865,7 @@ def get_expected_sharded_shape(self, full_shape: tuple[int, ...] | torch.Size) -
shape = list(full_shape)
dim = -1
dim = len(shape) + dim if dim < 0 else dim
shard_size = math.ceil(shape[dim] / world_size)
shard_size = int_div_ceil(shape[dim], world_size)
start = self.rank * shard_size
end = min(start + shard_size, shape[dim])
shape[dim] = end - start
Expand Down Expand Up @@ -996,7 +995,7 @@ def get_expected_sharded_shape(self, full_shape: tuple[int, ...] | torch.Size) -
# 1D tensors (bias) shard on dim -1
dim = -1 if len(shape) == 1 else self.embedding_dim_sharding
dim = len(shape) + dim if dim < 0 else dim
shard_size = math.ceil(shape[dim] / world_size)
shard_size = int_div_ceil(shape[dim], world_size)
start = self.rank * shard_size
end = min(start + shard_size, shape[dim])
shape[dim] = end - start
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/bit/modeling_bit.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
ImageClassifierOutputWithNoAttention,
)
from ...modeling_utils import PreTrainedModel
from ...utils import auto_docstring, logging
from ...utils import auto_docstring, int_div_ceil, logging
from ...utils.generic import can_return_tuple
from .configuration_bit import BitConfig

Expand Down Expand Up @@ -169,7 +169,7 @@ def __init__(self, kernel_size, stride, dilation, value=0):
self.value = value

def compute_padding(x, kernel_size, stride, dilation):
return max((math.ceil(x / stride) - 1) * stride + (kernel_size - 1) * dilation + 1 - x, 0)
return max((int_div_ceil(x, stride) - 1) * stride + (kernel_size - 1) * dilation + 1 - x, 0)

self.compute_padding = compute_padding

Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/chmv2/image_processing_chmv2_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from ...image_utils import ChannelDimension, ImageInput, PILImageResampling, SizeDict, is_torch_tensor
from ...modeling_outputs import DepthEstimatorOutput
from ...processing_utils import Unpack
from ...utils import TensorType, auto_docstring, requires_backends
from ...utils import TensorType, auto_docstring, int_div_ceil, requires_backends
from .image_processing_chmv2 import CHMv2ImageProcessorKwargs


Expand All @@ -48,7 +48,7 @@ def constrain_to_multiple_of(val, multiple, min_val=0, max_val=None):
x = math.floor(val / multiple) * multiple

if x < min_val:
x = math.ceil(val / multiple) * multiple
x = int_div_ceil(val, multiple) * multiple

return x

Expand Down Expand Up @@ -313,7 +313,7 @@ def pad_image(
height, width = image.shape[-2:]

def _get_pad(size, size_divisor):
new_size = math.ceil(size / size_divisor) * size_divisor
new_size = int_div_ceil(size, size_divisor) * size_divisor
pad_size = new_size - size
pad_size_left = pad_size // 2
pad_size_right = pad_size - pad_size_left
Expand Down
6 changes: 2 additions & 4 deletions src/transformers/models/dac/configuration_dac.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@
# limitations under the License.
"""Dac model configuration"""

import math

import numpy as np

from ...configuration_utils import PreTrainedConfig
from ...utils import auto_docstring, logging
from ...utils import auto_docstring, int_div_ceil, logging


logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -88,7 +86,7 @@ def __init__(
@property
def frame_rate(self) -> int:
hop_length = np.prod(self.upsampling_ratios)
return math.ceil(self.sampling_rate / hop_length)
return int_div_ceil(self.sampling_rate, int(hop_length))


__all__ = ["DacConfig"]
6 changes: 3 additions & 3 deletions src/transformers/models/dac/modeling_dac.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

from ... import initialization as init
from ...modeling_utils import PreTrainedAudioTokenizerBase
from ...utils import ModelOutput, auto_docstring
from ...utils import ModelOutput, auto_docstring, int_div_ceil
from .configuration_dac import DacConfig


Expand Down Expand Up @@ -219,7 +219,7 @@ def __init__(self, config: DacConfig, stride: int = 1, stride_index: int = 1):
self.res_unit3 = DacResidualUnit(dimension // 2, dilation=9)
self.snake1 = Snake1d(dimension // 2)
self.conv1 = nn.Conv1d(
dimension // 2, dimension, kernel_size=2 * stride, stride=stride, padding=math.ceil(stride / 2)
dimension // 2, dimension, kernel_size=2 * stride, stride=stride, padding=int_div_ceil(stride, 2)
)

def forward(self, hidden_state):
Expand All @@ -245,7 +245,7 @@ def __init__(self, config: DacConfig, stride: int = 1, stride_index: int = 1):
output_dim,
kernel_size=2 * stride,
stride=stride,
padding=math.ceil(stride / 2),
padding=int_div_ceil(stride, 2),
)

self.res_unit1 = DacResidualUnit(output_dim, dilation=1)
Expand Down
8 changes: 4 additions & 4 deletions src/transformers/models/depth_pro/modeling_depth_pro.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

from ... import initialization as init
from ...modeling_utils import PreTrainedModel
from ...utils import ModelOutput, auto_docstring, logging, torch_int
from ...utils import ModelOutput, auto_docstring, int_div_ceil, logging, torch_int
from ..auto import AutoModel
from .configuration_depth_pro import DepthProConfig

Expand Down Expand Up @@ -895,16 +895,16 @@ def __init__(self, config: DepthProConfig):
for i in range(config.num_fov_head_layers):
self.layers.append(
nn.Conv2d(
math.ceil(self.fusion_hidden_size / 2 ** (i + 1)),
math.ceil(self.fusion_hidden_size / 2 ** (i + 2)),
int_div_ceil(self.fusion_hidden_size, 2 ** (i + 1)),
int_div_ceil(self.fusion_hidden_size, 2 ** (i + 2)),
kernel_size=3,
stride=2,
padding=1,
)
)
self.layers.append(nn.ReLU(True))
# calculate expected shapes to finally generate a scalar output from final head layer
final_in_channels = math.ceil(self.fusion_hidden_size / 2 ** (config.num_fov_head_layers + 1))
final_in_channels = int_div_ceil(self.fusion_hidden_size, 2 ** (config.num_fov_head_layers + 1))
final_kernel_size = torch_int((self.out_size - 1) / 2**config.num_fov_head_layers + 1)
self.layers.append(
nn.Conv2d(
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/dpt/convert_dinov2_depth_to_hf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@

import argparse
import itertools
import math
from io import BytesIO
from pathlib import Path

Expand All @@ -26,7 +25,7 @@
from torchvision import transforms

from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation, DPTImageProcessor
from transformers.utils import logging
from transformers.utils import int_div_ceil, logging


logging.set_verbosity_info()
Expand Down Expand Up @@ -207,7 +206,7 @@ def __init__(self, multiple):
self.multiple = multiple

def _get_pad(self, size):
new_size = math.ceil(size / self.multiple) * self.multiple
new_size = int_div_ceil(size, self.multiple) * self.multiple
pad_size = new_size - size
pad_size_left = pad_size // 2
pad_size_right = pad_size - pad_size_left
Expand Down
5 changes: 3 additions & 2 deletions src/transformers/models/dpt/image_processing_dpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
from ...utils import (
TensorType,
filter_out_non_signature_kwargs,
int_div_ceil,
is_vision_available,
logging,
requires_backends,
Expand Down Expand Up @@ -97,7 +98,7 @@ def constrain_to_multiple_of(val, multiple, min_val=0, max_val=None):
x = math.floor(val / multiple) * multiple

if x < min_val:
x = math.ceil(val / multiple) * multiple
x = int_div_ceil(val, multiple) * multiple

return x

Expand Down Expand Up @@ -289,7 +290,7 @@ def pad_image(
"""

def _get_pad(size, size_divisor):
new_size = math.ceil(size / size_divisor) * size_divisor
new_size = int_div_ceil(size, size_divisor) * size_divisor
pad_size = new_size - size
pad_size_left = pad_size // 2
pad_size_right = pad_size - pad_size_left
Expand Down
6 changes: 3 additions & 3 deletions src/transformers/models/dpt/image_processing_dpt_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
is_torch_tensor,
)
from ...processing_utils import Unpack
from ...utils import TensorType, auto_docstring, requires_backends
from ...utils import TensorType, auto_docstring, int_div_ceil, requires_backends
from .image_processing_dpt import DPTImageProcessorKwargs


Expand All @@ -60,7 +60,7 @@ def constrain_to_multiple_of(val, multiple, min_val=0, max_val=None):
x = math.floor(val / multiple) * multiple

if x < min_val:
x = math.ceil(val / multiple) * multiple
x = int_div_ceil(val, multiple) * multiple

return x

Expand Down Expand Up @@ -325,7 +325,7 @@ def pad_image(
height, width = image.shape[-2:]

def _get_pad(size, size_divisor):
new_size = math.ceil(size / size_divisor) * size_divisor
new_size = int_div_ceil(size, size_divisor) * size_divisor
pad_size = new_size - size
pad_size_left = pad_size // 2
pad_size_right = pad_size - pad_size_left
Expand Down
5 changes: 3 additions & 2 deletions src/transformers/models/dpt/modular_dpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
from ...utils import (
TensorType,
auto_docstring,
int_div_ceil,
requires_backends,
)
from ..beit.image_processing_beit_fast import BeitImageProcessorFast
Expand All @@ -56,7 +57,7 @@ def constrain_to_multiple_of(val, multiple, min_val=0, max_val=None):
x = math.floor(val / multiple) * multiple

if x < min_val:
x = math.ceil(val / multiple) * multiple
x = int_div_ceil(val, multiple) * multiple

return x

Expand Down Expand Up @@ -160,7 +161,7 @@ def pad_image(
height, width = image.shape[-2:]

def _get_pad(size, size_divisor):
new_size = math.ceil(size / size_divisor) * size_divisor
new_size = int_div_ceil(size, size_divisor) * size_divisor
pad_size = new_size - size
pad_size_left = pad_size // 2
pad_size_right = pad_size - pad_size_left
Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/encodec/configuration_encodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import numpy as np

from ...configuration_utils import PreTrainedConfig
from ...utils import auto_docstring, logging
from ...utils import auto_docstring, int_div_ceil, logging


logger = logging.get_logger(__name__)
Expand Down Expand Up @@ -169,7 +169,7 @@ def codebook_nbits(self) -> int:

@property
def frame_rate(self) -> int:
return math.ceil(self.sampling_rate / self.hop_length)
return int_div_ceil(self.sampling_rate, self.hop_length)

@property
def num_quantizers(self) -> int:
Expand Down
5 changes: 2 additions & 3 deletions src/transformers/models/eomt/image_processing_eomt.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
# limitations under the License.
"""Image processor class for EoMT."""

import math

import numpy as np

from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict
Expand All @@ -41,6 +39,7 @@
IMAGENET_DEFAULT_STD,
TensorType,
filter_out_non_signature_kwargs,
int_div_ceil,
is_torch_available,
logging,
)
Expand Down Expand Up @@ -347,7 +346,7 @@ def _split_image(self, image: ImageInput, size: dict, image_index: int) -> tuple
patch_size = size["shortest_edge"]

longer_side = max(image_size)
num_patches = math.ceil(longer_side / patch_size)
num_patches = int_div_ceil(longer_side, patch_size)
total_overlap = num_patches * patch_size - longer_side
overlap_per_patch = total_overlap / (num_patches - 1) if num_patches > 1 else 0

Expand Down
4 changes: 2 additions & 2 deletions src/transformers/models/eomt/image_processing_eomt_fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.
"""Fast Image processor class for EoMT."""

import math
from typing import Optional, Union

import numpy as np
Expand All @@ -39,6 +38,7 @@
TensorType,
auto_docstring,
filter_out_non_signature_kwargs,
int_div_ceil,
)
from .image_processing_eomt import (
EomtImageProcessorKwargs,
Expand Down Expand Up @@ -127,7 +127,7 @@ def _split_image(self, images: torch.Tensor, size: dict, image_indices: int) ->
patch_size = size["shortest_edge"]

longer_side = max(height, width)
num_patches = math.ceil(longer_side / patch_size)
num_patches = int_div_ceil(longer_side, patch_size)
total_overlap = num_patches * patch_size - longer_side
overlap_per_patch = total_overlap / (num_patches - 1) if num_patches > 1 else 0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math

from ...configuration_utils import PreTrainedConfig
from ...utils import auto_docstring
from ...utils import auto_docstring, int_div_ceil


@auto_docstring(checkpoint="tiiuae/falcon-mamba-7b")
Expand Down Expand Up @@ -117,7 +115,7 @@ def __init__(
self.use_conv_bias = use_conv_bias
self.hidden_act = hidden_act
self.initializer_range = initializer_range
self.time_step_rank = math.ceil(self.hidden_size / 16) if time_step_rank == "auto" else time_step_rank
self.time_step_rank = int_div_ceil(self.hidden_size, 16) if time_step_rank == "auto" else time_step_rank
self.time_step_scale = time_step_scale
self.time_step_min = time_step_min
self.time_step_max = time_step_max
Expand Down
Loading
Loading