From 69ac401c42da8cde87e7247fc9a100aa7502434d Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Sun, 15 Mar 2026 15:52:01 -0400 Subject: [PATCH 1/5] Create int_div_ceil helper function --- src/transformers/utils/__init__.py | 1 + src/transformers/utils/generic.py | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 699d28c7ff04..c6b34a22edb5 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -55,6 +55,7 @@ filter_out_non_signature_kwargs, find_labels, flatten_dict, + int_div_ceil, is_numpy_array, is_tensor, is_timm_config_dict, diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index 379b23b58de6..2dd6af34eb8a 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -665,6 +665,19 @@ def torch_float(x): return x.to(torch.float32) if torch.jit.is_tracing() and isinstance(x, torch.Tensor) else int(x) +def int_div_ceil(a: int | "torch.Tensor", b: int | "torch.Tensor") -> int: + """ + Perform integer ceiling division without intermediate floating-point conversion. + + Equivalent to `math.ceil(a / b)` for non-negative integers, but avoids casting operands + to floats, which can result in runtime-specific precision issues. + + For example, `math.ceil(torch.tensor(300).cuda() / 30) == 11`, + while `int_div_ceil(torch.tensor(300).cuda(), 30) == 10` as expected. + """ + return (a + b - 1) // b + + def filter_out_non_signature_kwargs(extra: list | None = None): """ Decorator to filter out named arguments that are not in the function signature. From e75137f59d80b87d439d0bcd8502d1e920b6b6e8 Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:16:21 -0400 Subject: [PATCH 2/5] Avoid floating point math for ceil operations --- src/transformers/core_model_loading.py | 5 ++- .../integrations/tensor_parallel.py | 11 +++---- src/transformers/models/bit/modeling_bit.py | 4 +-- .../chmv2/image_processing_chmv2_fast.py | 6 ++-- .../models/dac/configuration_dac.py | 6 ++-- src/transformers/models/dac/modeling_dac.py | 6 ++-- .../models/depth_pro/modeling_depth_pro.py | 8 ++--- .../models/dpt/convert_dinov2_depth_to_hf.py | 5 ++- .../models/dpt/image_processing_dpt.py | 5 +-- .../models/dpt/image_processing_dpt_fast.py | 6 ++-- src/transformers/models/dpt/modular_dpt.py | 5 +-- .../models/encodec/configuration_encodec.py | 4 +-- .../models/eomt/image_processing_eomt.py | 5 ++- .../models/eomt/image_processing_eomt_fast.py | 4 +-- .../configuration_falcon_mamba.py | 6 ++-- .../models/fuyu/image_processing_fuyu.py | 14 ++------- .../models/fuyu/image_processing_fuyu_fast.py | 13 ++------ .../models/gemma3/image_processing_gemma3.py | 6 ++-- .../gemma3/image_processing_gemma3_fast.py | 5 +-- .../feature_extraction_granite_speech.py | 5 ++- .../granite_speech/modeling_granite_speech.py | 6 ++-- .../configuration_higgs_audio_v2_tokenizer.py | 4 +-- .../idefics3/image_processing_idefics3.py | 31 +++++++++---------- .../image_processing_idefics3_fast.py | 28 ++++++++--------- .../models/jamba/configuration_jamba.py | 5 ++- .../models/layoutlmv2/modeling_layoutlmv2.py | 6 ++-- .../models/lfm2_vl/processing_lfm2_vl.py | 9 +++--- .../modeling_llava_onevision.py | 4 +-- .../modular_llava_onevision.py | 4 +-- .../processing_llava_onevision.py | 4 +-- .../models/mamba/configuration_mamba.py | 5 ++- ...convert_mamba_ssm_checkpoint_to_pytorch.py | 5 ++- .../models/mamba2/configuration_mamba2.py | 5 ++- .../image_processing_mask2former.py | 6 ++-- .../image_processing_mask2former_fast.py | 7 ++--- .../maskformer/image_processing_maskformer.py | 6 ++-- .../image_processing_maskformer_fast.py | 6 ++-- .../metaclip_2/convert_metaclip_2_to_hf.py | 1 - .../models/mimi/configuration_mimi.py | 4 +-- .../mllama/convert_mllama_weights_to_hf.py | 4 +-- .../models/mobilevit/modeling_mobilevit.py | 6 ++-- .../modeling_musicgen_melody.py | 6 ++-- .../models/nllb_moe/modeling_nllb_moe.py | 4 +-- .../models/pe_audio/modeling_pe_audio.py | 5 ++- .../models/perceiver/modeling_perceiver.py | 4 +-- .../modeling_phi4_multimodal.py | 4 +-- .../modular_phi4_multimodal.py | 4 +-- .../qwen3_vl/video_processing_qwen3_vl.py | 4 +-- .../smolvlm/image_processing_smolvlm.py | 31 +++++++++---------- .../smolvlm/image_processing_smolvlm_fast.py | 27 ++++++++-------- .../modeling_voxtral_realtime.py | 11 +++++-- .../modular_voxtral_realtime.py | 11 +++++-- .../models/xcodec/configuration_xcodec.py | 4 +-- .../models/zamba/configuration_zamba.py | 5 ++- src/transformers/trainer.py | 5 +-- src/transformers/trainer_pt_utils.py | 10 +++--- src/transformers/utils/generic.py | 2 +- 57 files changed, 201 insertions(+), 221 deletions(-) diff --git a/src/transformers/core_model_loading.py b/src/transformers/core_model_loading.py index b62fc90d4cdf..3caf538c4c48 100644 --- a/src/transformers/core_model_loading.py +++ b/src/transformers/core_model_loading.py @@ -15,7 +15,6 @@ from __future__ import annotations -import math import os import re import traceback @@ -33,7 +32,7 @@ from .integrations.accelerate import get_device, offload_weight from .integrations.tensor_parallel import ALL_PARALLEL_STYLES -from .utils import is_env_variable_true +from .utils import int_div_ceil, is_env_variable_true from .utils.loading_report import LoadStateDictInfo from .utils.logging import get_logger, tqdm @@ -354,7 +353,7 @@ def __init__(self, stack_dim: int = 0, concat_dim: int = 1): self.concat_dim = concat_dim def split_list_into_chunks(self, tensor_list: list[torch.Tensor], chunks: int = 2): - split_size = math.ceil(len(tensor_list) / chunks) # best effort split size + split_size = int_div_ceil(len(tensor_list), chunks) # best effort split size return [tensor_list[i * split_size : (i + 1) * split_size] for i in range(chunks)] @torch.no_grad() diff --git a/src/transformers/integrations/tensor_parallel.py b/src/transformers/integrations/tensor_parallel.py index f9a6d5233b0e..5b561fd41f9d 100644 --- a/src/transformers/integrations/tensor_parallel.py +++ b/src/transformers/integrations/tensor_parallel.py @@ -13,14 +13,13 @@ # limitations under the License. from __future__ import annotations -import math import operator import os import re from functools import reduce from ..distributed import DistributedConfig -from ..utils import is_torch_greater_or_equal, logging +from ..utils import int_div_ceil, is_torch_greater_or_equal, logging from ..utils.generic import GeneralInterface from ..utils.import_utils import is_torch_available @@ -374,7 +373,7 @@ def get_tensor_shard(param, empty_param, device_mesh, rank, dim, tensor_idx: int elif empty_param.dim() == 3 and dim == 2 and len(param_shape) == 2: dim = 1 - shard_size = math.ceil(param_shape[dim] / world_size) + shard_size = int_div_ceil(param_shape[dim], world_size) start = rank * shard_size end = min(start + shard_size, param_shape[dim]) @@ -723,7 +722,7 @@ def get_expected_sharded_shape(self, full_shape: tuple[int, ...] | torch.Size) - # Colwise shards dim -2, but 1D tensors (bias) shard on dim -1 dim = -1 if len(shape) == 1 else -2 dim = len(shape) + dim if dim < 0 else dim - shard_size = math.ceil(shape[dim] / world_size) + shard_size = int_div_ceil(shape[dim], world_size) start = self.rank * shard_size end = min(start + shard_size, shape[dim]) shape[dim] = end - start @@ -866,7 +865,7 @@ def get_expected_sharded_shape(self, full_shape: tuple[int, ...] | torch.Size) - shape = list(full_shape) dim = -1 dim = len(shape) + dim if dim < 0 else dim - shard_size = math.ceil(shape[dim] / world_size) + shard_size = int_div_ceil(shape[dim], world_size) start = self.rank * shard_size end = min(start + shard_size, shape[dim]) shape[dim] = end - start @@ -996,7 +995,7 @@ def get_expected_sharded_shape(self, full_shape: tuple[int, ...] | torch.Size) - # 1D tensors (bias) shard on dim -1 dim = -1 if len(shape) == 1 else self.embedding_dim_sharding dim = len(shape) + dim if dim < 0 else dim - shard_size = math.ceil(shape[dim] / world_size) + shard_size = int_div_ceil(shape[dim], world_size) start = self.rank * shard_size end = min(start + shard_size, shape[dim]) shape[dim] = end - start diff --git a/src/transformers/models/bit/modeling_bit.py b/src/transformers/models/bit/modeling_bit.py index 66b5a62234a5..2295b44db12b 100644 --- a/src/transformers/models/bit/modeling_bit.py +++ b/src/transformers/models/bit/modeling_bit.py @@ -30,7 +30,7 @@ ImageClassifierOutputWithNoAttention, ) from ...modeling_utils import PreTrainedModel -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging from ...utils.generic import can_return_tuple from .configuration_bit import BitConfig @@ -169,7 +169,7 @@ def __init__(self, kernel_size, stride, dilation, value=0): self.value = value def compute_padding(x, kernel_size, stride, dilation): - return max((math.ceil(x / stride) - 1) * stride + (kernel_size - 1) * dilation + 1 - x, 0) + return max((int_div_ceil(x, stride) - 1) * stride + (kernel_size - 1) * dilation + 1 - x, 0) self.compute_padding = compute_padding diff --git a/src/transformers/models/chmv2/image_processing_chmv2_fast.py b/src/transformers/models/chmv2/image_processing_chmv2_fast.py index 1a80e1eb4cc9..77afdf463613 100644 --- a/src/transformers/models/chmv2/image_processing_chmv2_fast.py +++ b/src/transformers/models/chmv2/image_processing_chmv2_fast.py @@ -31,7 +31,7 @@ from ...image_utils import ChannelDimension, ImageInput, PILImageResampling, SizeDict, is_torch_tensor from ...modeling_outputs import DepthEstimatorOutput from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, requires_backends +from ...utils import TensorType, auto_docstring, int_div_ceil, requires_backends from .image_processing_chmv2 import CHMv2ImageProcessorKwargs @@ -48,7 +48,7 @@ def constrain_to_multiple_of(val, multiple, min_val=0, max_val=None): x = math.floor(val / multiple) * multiple if x < min_val: - x = math.ceil(val / multiple) * multiple + x = int_div_ceil(val, multiple) * multiple return x @@ -313,7 +313,7 @@ def pad_image( height, width = image.shape[-2:] def _get_pad(size, size_divisor): - new_size = math.ceil(size / size_divisor) * size_divisor + new_size = int_div_ceil(size, size_divisor) * size_divisor pad_size = new_size - size pad_size_left = pad_size // 2 pad_size_right = pad_size - pad_size_left diff --git a/src/transformers/models/dac/configuration_dac.py b/src/transformers/models/dac/configuration_dac.py index 551c1964bebc..88e8d9701574 100644 --- a/src/transformers/models/dac/configuration_dac.py +++ b/src/transformers/models/dac/configuration_dac.py @@ -13,12 +13,10 @@ # limitations under the License. """Dac model configuration""" -import math - import numpy as np from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging logger = logging.get_logger(__name__) @@ -88,7 +86,7 @@ def __init__( @property def frame_rate(self) -> int: hop_length = np.prod(self.upsampling_ratios) - return math.ceil(self.sampling_rate / hop_length) + return int_div_ceil(self.sampling_rate, int(hop_length)) __all__ = ["DacConfig"] diff --git a/src/transformers/models/dac/modeling_dac.py b/src/transformers/models/dac/modeling_dac.py index 6ac46f78a4a6..d0be39982a2e 100644 --- a/src/transformers/models/dac/modeling_dac.py +++ b/src/transformers/models/dac/modeling_dac.py @@ -23,7 +23,7 @@ from ... import initialization as init from ...modeling_utils import PreTrainedAudioTokenizerBase -from ...utils import ModelOutput, auto_docstring +from ...utils import ModelOutput, auto_docstring, int_div_ceil from .configuration_dac import DacConfig @@ -219,7 +219,7 @@ def __init__(self, config: DacConfig, stride: int = 1, stride_index: int = 1): self.res_unit3 = DacResidualUnit(dimension // 2, dilation=9) self.snake1 = Snake1d(dimension // 2) self.conv1 = nn.Conv1d( - dimension // 2, dimension, kernel_size=2 * stride, stride=stride, padding=math.ceil(stride / 2) + dimension // 2, dimension, kernel_size=2 * stride, stride=stride, padding=int_div_ceil(stride, 2) ) def forward(self, hidden_state): @@ -245,7 +245,7 @@ def __init__(self, config: DacConfig, stride: int = 1, stride_index: int = 1): output_dim, kernel_size=2 * stride, stride=stride, - padding=math.ceil(stride / 2), + padding=int_div_ceil(stride, 2), ) self.res_unit1 = DacResidualUnit(output_dim, dilation=1) diff --git a/src/transformers/models/depth_pro/modeling_depth_pro.py b/src/transformers/models/depth_pro/modeling_depth_pro.py index dfa9fb5d5f79..d0b227bc805b 100644 --- a/src/transformers/models/depth_pro/modeling_depth_pro.py +++ b/src/transformers/models/depth_pro/modeling_depth_pro.py @@ -22,7 +22,7 @@ from ... import initialization as init from ...modeling_utils import PreTrainedModel -from ...utils import ModelOutput, auto_docstring, logging, torch_int +from ...utils import ModelOutput, auto_docstring, int_div_ceil, logging, torch_int from ..auto import AutoModel from .configuration_depth_pro import DepthProConfig @@ -895,8 +895,8 @@ def __init__(self, config: DepthProConfig): for i in range(config.num_fov_head_layers): self.layers.append( nn.Conv2d( - math.ceil(self.fusion_hidden_size / 2 ** (i + 1)), - math.ceil(self.fusion_hidden_size / 2 ** (i + 2)), + int_div_ceil(self.fusion_hidden_size, 2 ** (i + 1)), + int_div_ceil(self.fusion_hidden_size, 2 ** (i + 2)), kernel_size=3, stride=2, padding=1, @@ -904,7 +904,7 @@ def __init__(self, config: DepthProConfig): ) self.layers.append(nn.ReLU(True)) # calculate expected shapes to finally generate a scalar output from final head layer - final_in_channels = math.ceil(self.fusion_hidden_size / 2 ** (config.num_fov_head_layers + 1)) + final_in_channels = int_div_ceil(self.fusion_hidden_size, 2 ** (config.num_fov_head_layers + 1)) final_kernel_size = torch_int((self.out_size - 1) / 2**config.num_fov_head_layers + 1) self.layers.append( nn.Conv2d( diff --git a/src/transformers/models/dpt/convert_dinov2_depth_to_hf.py b/src/transformers/models/dpt/convert_dinov2_depth_to_hf.py index cb3ce6ec869e..94dd70944ce2 100644 --- a/src/transformers/models/dpt/convert_dinov2_depth_to_hf.py +++ b/src/transformers/models/dpt/convert_dinov2_depth_to_hf.py @@ -16,7 +16,6 @@ import argparse import itertools -import math from io import BytesIO from pathlib import Path @@ -26,7 +25,7 @@ from torchvision import transforms from transformers import Dinov2Config, DPTConfig, DPTForDepthEstimation, DPTImageProcessor -from transformers.utils import logging +from transformers.utils import int_div_ceil, logging logging.set_verbosity_info() @@ -207,7 +206,7 @@ def __init__(self, multiple): self.multiple = multiple def _get_pad(self, size): - new_size = math.ceil(size / self.multiple) * self.multiple + new_size = int_div_ceil(size, self.multiple) * self.multiple pad_size = new_size - size pad_size_left = pad_size // 2 pad_size_right = pad_size - pad_size_left diff --git a/src/transformers/models/dpt/image_processing_dpt.py b/src/transformers/models/dpt/image_processing_dpt.py index 1584d23849fd..b15cdca8f59b 100644 --- a/src/transformers/models/dpt/image_processing_dpt.py +++ b/src/transformers/models/dpt/image_processing_dpt.py @@ -47,6 +47,7 @@ from ...utils import ( TensorType, filter_out_non_signature_kwargs, + int_div_ceil, is_vision_available, logging, requires_backends, @@ -97,7 +98,7 @@ def constrain_to_multiple_of(val, multiple, min_val=0, max_val=None): x = math.floor(val / multiple) * multiple if x < min_val: - x = math.ceil(val / multiple) * multiple + x = int_div_ceil(val, multiple) * multiple return x @@ -289,7 +290,7 @@ def pad_image( """ def _get_pad(size, size_divisor): - new_size = math.ceil(size / size_divisor) * size_divisor + new_size = int_div_ceil(size, size_divisor) * size_divisor pad_size = new_size - size pad_size_left = pad_size // 2 pad_size_right = pad_size - pad_size_left diff --git a/src/transformers/models/dpt/image_processing_dpt_fast.py b/src/transformers/models/dpt/image_processing_dpt_fast.py index ccc41d950bce..6250c2309892 100644 --- a/src/transformers/models/dpt/image_processing_dpt_fast.py +++ b/src/transformers/models/dpt/image_processing_dpt_fast.py @@ -39,7 +39,7 @@ is_torch_tensor, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, requires_backends +from ...utils import TensorType, auto_docstring, int_div_ceil, requires_backends from .image_processing_dpt import DPTImageProcessorKwargs @@ -60,7 +60,7 @@ def constrain_to_multiple_of(val, multiple, min_val=0, max_val=None): x = math.floor(val / multiple) * multiple if x < min_val: - x = math.ceil(val / multiple) * multiple + x = int_div_ceil(val, multiple) * multiple return x @@ -325,7 +325,7 @@ def pad_image( height, width = image.shape[-2:] def _get_pad(size, size_divisor): - new_size = math.ceil(size / size_divisor) * size_divisor + new_size = int_div_ceil(size, size_divisor) * size_divisor pad_size = new_size - size pad_size_left = pad_size // 2 pad_size_right = pad_size - pad_size_left diff --git a/src/transformers/models/dpt/modular_dpt.py b/src/transformers/models/dpt/modular_dpt.py index b0e0c8858ec7..e335f6b79e23 100644 --- a/src/transformers/models/dpt/modular_dpt.py +++ b/src/transformers/models/dpt/modular_dpt.py @@ -31,6 +31,7 @@ from ...utils import ( TensorType, auto_docstring, + int_div_ceil, requires_backends, ) from ..beit.image_processing_beit_fast import BeitImageProcessorFast @@ -56,7 +57,7 @@ def constrain_to_multiple_of(val, multiple, min_val=0, max_val=None): x = math.floor(val / multiple) * multiple if x < min_val: - x = math.ceil(val / multiple) * multiple + x = int_div_ceil(val, multiple) * multiple return x @@ -160,7 +161,7 @@ def pad_image( height, width = image.shape[-2:] def _get_pad(size, size_divisor): - new_size = math.ceil(size / size_divisor) * size_divisor + new_size = int_div_ceil(size, size_divisor) * size_divisor pad_size = new_size - size pad_size_left = pad_size // 2 pad_size_right = pad_size - pad_size_left diff --git a/src/transformers/models/encodec/configuration_encodec.py b/src/transformers/models/encodec/configuration_encodec.py index 9c5c0a10093d..b10f201c5c0c 100644 --- a/src/transformers/models/encodec/configuration_encodec.py +++ b/src/transformers/models/encodec/configuration_encodec.py @@ -18,7 +18,7 @@ import numpy as np from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging logger = logging.get_logger(__name__) @@ -169,7 +169,7 @@ def codebook_nbits(self) -> int: @property def frame_rate(self) -> int: - return math.ceil(self.sampling_rate / self.hop_length) + return int_div_ceil(self.sampling_rate, self.hop_length) @property def num_quantizers(self) -> int: diff --git a/src/transformers/models/eomt/image_processing_eomt.py b/src/transformers/models/eomt/image_processing_eomt.py index 9c41b78a6e0d..e62673bc6b04 100644 --- a/src/transformers/models/eomt/image_processing_eomt.py +++ b/src/transformers/models/eomt/image_processing_eomt.py @@ -13,8 +13,6 @@ # limitations under the License. """Image processor class for EoMT.""" -import math - import numpy as np from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict @@ -41,6 +39,7 @@ IMAGENET_DEFAULT_STD, TensorType, filter_out_non_signature_kwargs, + int_div_ceil, is_torch_available, logging, ) @@ -347,7 +346,7 @@ def _split_image(self, image: ImageInput, size: dict, image_index: int) -> tuple patch_size = size["shortest_edge"] longer_side = max(image_size) - num_patches = math.ceil(longer_side / patch_size) + num_patches = int_div_ceil(longer_side, patch_size) total_overlap = num_patches * patch_size - longer_side overlap_per_patch = total_overlap / (num_patches - 1) if num_patches > 1 else 0 diff --git a/src/transformers/models/eomt/image_processing_eomt_fast.py b/src/transformers/models/eomt/image_processing_eomt_fast.py index 69e8f1dadf42..cd3d3704f8ce 100644 --- a/src/transformers/models/eomt/image_processing_eomt_fast.py +++ b/src/transformers/models/eomt/image_processing_eomt_fast.py @@ -13,7 +13,6 @@ # limitations under the License. """Fast Image processor class for EoMT.""" -import math from typing import Optional, Union import numpy as np @@ -39,6 +38,7 @@ TensorType, auto_docstring, filter_out_non_signature_kwargs, + int_div_ceil, ) from .image_processing_eomt import ( EomtImageProcessorKwargs, @@ -127,7 +127,7 @@ def _split_image(self, images: torch.Tensor, size: dict, image_indices: int) -> patch_size = size["shortest_edge"] longer_side = max(height, width) - num_patches = math.ceil(longer_side / patch_size) + num_patches = int_div_ceil(longer_side, patch_size) total_overlap = num_patches * patch_size - longer_side overlap_per_patch = total_overlap / (num_patches - 1) if num_patches > 1 else 0 diff --git a/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py b/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py index cc7583fcbbd8..8aafb7b27e06 100644 --- a/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py +++ b/src/transformers/models/falcon_mamba/configuration_falcon_mamba.py @@ -17,10 +17,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import math - from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring +from ...utils import auto_docstring, int_div_ceil @auto_docstring(checkpoint="tiiuae/falcon-mamba-7b") @@ -117,7 +115,7 @@ def __init__( self.use_conv_bias = use_conv_bias self.hidden_act = hidden_act self.initializer_range = initializer_range - self.time_step_rank = math.ceil(self.hidden_size / 16) if time_step_rank == "auto" else time_step_rank + self.time_step_rank = int_div_ceil(self.hidden_size, 16) if time_step_rank == "auto" else time_step_rank self.time_step_scale = time_step_scale self.time_step_min = time_step_min self.time_step_max = time_step_max diff --git a/src/transformers/models/fuyu/image_processing_fuyu.py b/src/transformers/models/fuyu/image_processing_fuyu.py index c8024210ed9c..3b0aeb6a9865 100644 --- a/src/transformers/models/fuyu/image_processing_fuyu.py +++ b/src/transformers/models/fuyu/image_processing_fuyu.py @@ -13,7 +13,6 @@ # limitations under the License. """Image processor class for Fuyu.""" -import math import numpy as np @@ -40,6 +39,7 @@ from ...utils import ( TensorType, filter_out_non_signature_kwargs, + int_div_ceil, is_torch_available, is_torch_device, is_torch_dtype, @@ -655,16 +655,8 @@ def preprocess_with_tokenizer_info( image = image_input[batch_index, subseq_index] image_height, image_width = image.shape[1], image.shape[2] if variable_sized: - # The min() is required here due to floating point issues: - # math.ceil(torch.tensor(300).cuda() / 30) == 11 - new_h = min( - image_height, - math.ceil(image_unpadded_h[batch_index, subseq_index] / patch_height) * patch_height, - ) - new_w = min( - image_width, - math.ceil(image_unpadded_w[batch_index, subseq_index] / patch_width) * patch_width, - ) + new_h = int_div_ceil(image_unpadded_h[batch_index, subseq_index], patch_height) * patch_height + new_w = int_div_ceil(image_unpadded_w[batch_index, subseq_index], patch_width) * patch_width image = image[:, :new_h, :new_w] image_height, image_width = new_h, new_w diff --git a/src/transformers/models/fuyu/image_processing_fuyu_fast.py b/src/transformers/models/fuyu/image_processing_fuyu_fast.py index 633d65dd1b55..f9cb5aa032ed 100644 --- a/src/transformers/models/fuyu/image_processing_fuyu_fast.py +++ b/src/transformers/models/fuyu/image_processing_fuyu_fast.py @@ -13,7 +13,6 @@ # limitations under the License. """Fast Image processor class for Fuyu.""" -import math from typing import Optional import torch @@ -32,6 +31,7 @@ from ...utils import ( TensorType, auto_docstring, + int_div_ceil, is_torchvision_available, logging, requires_backends, @@ -286,15 +286,8 @@ def preprocess_with_tokenizer_info( image_height, image_width = image.shape[1], image.shape[2] if variable_sized: # Calculate new dimensions based on unpadded size - # The min() is required here due to floating point issues - new_h = min( - image_height, - math.ceil(image_unpadded_h[batch_index, subseq_index] / patch_height) * patch_height, - ) - new_w = min( - image_width, - math.ceil(image_unpadded_w[batch_index, subseq_index] / patch_width) * patch_width, - ) + new_h = int_div_ceil(image_unpadded_h[batch_index, subseq_index], patch_height) * patch_height + new_w = int_div_ceil(image_unpadded_w[batch_index, subseq_index], patch_width) * patch_width image = image[:, :new_h, :new_w] image_height, image_width = new_h, new_w num_patches = self.get_num_patches( diff --git a/src/transformers/models/gemma3/image_processing_gemma3.py b/src/transformers/models/gemma3/image_processing_gemma3.py index 8a185eef8cd3..87713b17aab9 100644 --- a/src/transformers/models/gemma3/image_processing_gemma3.py +++ b/src/transformers/models/gemma3/image_processing_gemma3.py @@ -39,7 +39,7 @@ validate_preprocess_arguments, ) from ...processing_utils import ImagesKwargs -from ...utils import TensorType, filter_out_non_signature_kwargs, is_vision_available, logging +from ...utils import TensorType, filter_out_non_signature_kwargs, int_div_ceil, is_vision_available, logging logger = logging.get_logger(__name__) @@ -206,8 +206,8 @@ def pan_and_scan( num_crops_h = min(pan_and_scan_max_num_crops, num_crops_h) num_crops_w = 1 - crop_size_w = int(math.ceil(width / num_crops_w)) - crop_size_h = int(math.ceil(height / num_crops_h)) + crop_size_w = int_div_ceil(width, num_crops_w) + crop_size_h = int_div_ceil(height, num_crops_h) # Don't apply PaS if crop size is too small. if min(crop_size_w, crop_size_h) < pan_and_scan_min_crop_size: diff --git a/src/transformers/models/gemma3/image_processing_gemma3_fast.py b/src/transformers/models/gemma3/image_processing_gemma3_fast.py index ca40bc945ceb..e674fdfb3589 100644 --- a/src/transformers/models/gemma3/image_processing_gemma3_fast.py +++ b/src/transformers/models/gemma3/image_processing_gemma3_fast.py @@ -31,6 +31,7 @@ from ...utils import ( TensorType, auto_docstring, + int_div_ceil, logging, ) from .image_processing_gemma3 import Gemma3ImageProcessorKwargs @@ -112,8 +113,8 @@ def pan_and_scan_batched( num_crops_h = min(pan_and_scan_max_num_crops, num_crops_h) num_crops_w = 1 - crop_size_w = int(math.ceil(width / num_crops_w)) - crop_size_h = int(math.ceil(height / num_crops_h)) + crop_size_w = int_div_ceil(width, num_crops_w) + crop_size_h = int_div_ceil(height, num_crops_h) # Don't apply PaS if crop size is too small. if min(crop_size_w, crop_size_h) < pan_and_scan_min_crop_size: diff --git a/src/transformers/models/granite_speech/feature_extraction_granite_speech.py b/src/transformers/models/granite_speech/feature_extraction_granite_speech.py index cd32d0433bae..9c239ba57727 100644 --- a/src/transformers/models/granite_speech/feature_extraction_granite_speech.py +++ b/src/transformers/models/granite_speech/feature_extraction_granite_speech.py @@ -13,14 +13,13 @@ # limitations under the License. """Feature extractor class for Granite Speech.""" -import math from collections.abc import Sequence import numpy as np from ...feature_extraction_utils import BatchFeature, FeatureExtractionMixin from ...tokenization_utils_base import AudioInput -from ...utils import is_torch_available, is_torchaudio_available, logging +from ...utils import int_div_ceil, is_torch_available, is_torchaudio_available, logging from ...utils.import_utils import requires_backends @@ -135,7 +134,7 @@ def _get_num_audio_features(self, audio_lengths: Sequence[int]) -> Sequence[int] mel_length = raw_length // hop_length + 1 # encoder frame takes two mel features encoder_length = mel_length // 2 - nblocks = math.ceil(encoder_length / self.projector_window_size) + nblocks = int_div_ceil(encoder_length, self.projector_window_size) # projector output length projector_length = nblocks * effective_window_size projector_lengths.append(projector_length) diff --git a/src/transformers/models/granite_speech/modeling_granite_speech.py b/src/transformers/models/granite_speech/modeling_granite_speech.py index 43359ec98b7e..0ae7ec6c7220 100644 --- a/src/transformers/models/granite_speech/modeling_granite_speech.py +++ b/src/transformers/models/granite_speech/modeling_granite_speech.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math from dataclasses import dataclass import torch @@ -29,6 +28,7 @@ TransformersKwargs, auto_docstring, can_return_tuple, + int_div_ceil, is_peft_available, logging, torch_compilable_check, @@ -86,7 +86,7 @@ def __init__(self, config: GraniteSpeechConfig): def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: batch_size, seq_len, dim = hidden_states.size() - nblocks = math.ceil(seq_len / self.window_size) + nblocks = int_div_ceil(seq_len, self.window_size) pad = nblocks * self.window_size - seq_len hidden_states = nn.functional.pad(hidden_states, (0, 0, 0, pad), "constant", 0) hidden_states = hidden_states.view(batch_size * nblocks, self.window_size, dim) @@ -152,7 +152,7 @@ def forward(self, hidden_states: torch.Tensor, attention_dists: torch.Tensor) -> hidden_states = self.pre_norm(hidden_states) bsz, num_features, _ = hidden_states.shape - num_blocks = math.ceil(num_features / self.context_size) + num_blocks = int_div_ceil(num_features, self.context_size) remainder = num_features % self.context_size if remainder > 0: # right padding to reach block size diff --git a/src/transformers/models/higgs_audio_v2_tokenizer/configuration_higgs_audio_v2_tokenizer.py b/src/transformers/models/higgs_audio_v2_tokenizer/configuration_higgs_audio_v2_tokenizer.py index 423241c83306..9dcc617d50f9 100644 --- a/src/transformers/models/higgs_audio_v2_tokenizer/configuration_higgs_audio_v2_tokenizer.py +++ b/src/transformers/models/higgs_audio_v2_tokenizer/configuration_higgs_audio_v2_tokenizer.py @@ -24,7 +24,7 @@ import numpy as np from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring +from ...utils import auto_docstring, int_div_ceil from ..auto import CONFIG_MAPPING, AutoConfig @@ -147,7 +147,7 @@ def __init__( @property def frame_rate(self) -> int: - return math.ceil(self.sample_rate / self.hop_length) + return int_div_ceil(self.sample_rate, self.hop_length) @property def semantic_hidden_size(self) -> int: diff --git a/src/transformers/models/idefics3/image_processing_idefics3.py b/src/transformers/models/idefics3/image_processing_idefics3.py index 8539fcbef903..4a59b2eb23b1 100644 --- a/src/transformers/models/idefics3/image_processing_idefics3.py +++ b/src/transformers/models/idefics3/image_processing_idefics3.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math from collections.abc import Iterable from typing import Any @@ -35,7 +34,7 @@ validate_preprocess_arguments, ) from ...processing_utils import ImagesKwargs -from ...utils import TensorType, is_vision_available, logging +from ...utils import TensorType, int_div_ceil, is_vision_available, logging logger = logging.get_logger(__name__) @@ -431,11 +430,11 @@ def split_image( frames = [] if height > max_height or width > max_width: # Calculate the number of splits - num_splits_h = math.ceil(height / max_height) - num_splits_w = math.ceil(width / max_width) + num_splits_h = int_div_ceil(height, max_height) + num_splits_w = int_div_ceil(width, max_width) # Calculate the optimal width and height for the sub-images - optimal_height = math.ceil(height / num_splits_h) - optimal_width = math.ceil(width / num_splits_w) + optimal_height = int_div_ceil(height, num_splits_h) + optimal_width = int_div_ceil(width, num_splits_w) # Iterate through each row and column for r in range(num_splits_h): @@ -502,13 +501,13 @@ def resize_for_vision_encoder( aspect_ratio = width / height if width >= height: - width = math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size + width = int_div_ceil(width, vision_encoder_max_size) * vision_encoder_max_size height = int(width / aspect_ratio) - height = math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size + height = int_div_ceil(height, vision_encoder_max_size) * vision_encoder_max_size elif height > width: - height = math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size + height = int_div_ceil(height, vision_encoder_max_size) * vision_encoder_max_size width = int(height * aspect_ratio) - width = math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size + width = int_div_ceil(width, vision_encoder_max_size) * vision_encoder_max_size new_size = {"height": height, "width": width} return self.resize( image, size=new_size, resample=resample, input_data_format=input_data_format, data_format=data_format @@ -893,19 +892,19 @@ def get_number_of_image_patches(self, height: int, width: int, images_kwargs: di aspect_ratio = width / height if width >= height: - resized_width = math.ceil(width / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_width = int_div_ceil(width, max_image_size["longest_edge"]) * max_image_size["longest_edge"] resized_height = int(width / aspect_ratio) - resized_height = math.ceil(height / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_height = int_div_ceil(height, max_image_size["longest_edge"]) * max_image_size["longest_edge"] elif height > width: - resized_height = math.ceil(height / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_height = int_div_ceil(height, max_image_size["longest_edge"]) * max_image_size["longest_edge"] resized_width = int(height * aspect_ratio) - resized_width = math.ceil(width / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_width = int_div_ceil(width, max_image_size["longest_edge"]) * max_image_size["longest_edge"] max_height = max_width = max_image_size["longest_edge"] if resized_height > max_height or resized_width > max_width: # Calculate the number of splits - num_rows = math.ceil(resized_height / max_height) - num_cols = math.ceil(resized_width / max_width) + num_rows = int_div_ceil(resized_height, max_height) + num_cols = int_div_ceil(resized_width, max_width) num_patches = num_rows * num_cols + 1 return num_patches, num_rows, num_cols diff --git a/src/transformers/models/idefics3/image_processing_idefics3_fast.py b/src/transformers/models/idefics3/image_processing_idefics3_fast.py index f2795ebfd64d..2f22d6ac7fdb 100644 --- a/src/transformers/models/idefics3/image_processing_idefics3_fast.py +++ b/src/transformers/models/idefics3/image_processing_idefics3_fast.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. - -import math from typing import Optional import torch @@ -33,7 +31,7 @@ make_nested_list_of_images, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_available, logging +from ...utils import TensorType, auto_docstring, int_div_ceil, is_torchvision_available, logging from .image_processing_idefics3 import Idefics3ImageProcessorKwargs @@ -284,8 +282,8 @@ def split_images( frames = [] if height > max_height or width > max_width: # Calculate the number of splits - num_splits_h = math.ceil(height / max_height) - num_splits_w = math.ceil(width / max_width) + num_splits_h = int_div_ceil(height, max_height) + num_splits_w = int_div_ceil(width, max_width) # Split the images by height, then by width frames = ( @@ -333,13 +331,13 @@ def resize_for_vision_encoder( aspect_ratio = width / height if width >= height: - width = math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size + width = int_div_ceil(width, vision_encoder_max_size) * vision_encoder_max_size height = int(width / aspect_ratio) - height = math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size + height = int_div_ceil(height, vision_encoder_max_size) * vision_encoder_max_size elif height > width: - height = math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size + height = int_div_ceil(height, vision_encoder_max_size) * vision_encoder_max_size width = int(height * aspect_ratio) - width = math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size + width = int_div_ceil(width, vision_encoder_max_size) * vision_encoder_max_size new_size = SizeDict(height=height, width=width) return self.resize(image, size=new_size, interpolation=interpolation) @@ -530,19 +528,19 @@ def get_number_of_image_patches(self, height: int, width: int, images_kwargs: di aspect_ratio = width / height if width >= height: - resized_width = math.ceil(width / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_width = int_div_ceil(width, max_image_size["longest_edge"]) * max_image_size["longest_edge"] resized_height = int(width / aspect_ratio) - resized_height = math.ceil(height / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_height = int_div_ceil(height, max_image_size["longest_edge"]) * max_image_size["longest_edge"] elif height > width: - resized_height = math.ceil(height / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_height = int_div_ceil(height, max_image_size["longest_edge"]) * max_image_size["longest_edge"] resized_width = int(height * aspect_ratio) - resized_width = math.ceil(width / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_width = int_div_ceil(width, max_image_size["longest_edge"]) * max_image_size["longest_edge"] max_height = max_width = max_image_size["longest_edge"] if resized_height > max_height or resized_width > max_width: # Calculate the number of splits - num_rows = math.ceil(resized_height / max_height) - num_cols = math.ceil(resized_width / max_width) + num_rows = int_div_ceil(resized_height, max_height) + num_cols = int_div_ceil(resized_width, max_width) num_patches = num_rows * num_cols + 1 return num_patches, num_rows, num_cols diff --git a/src/transformers/models/jamba/configuration_jamba.py b/src/transformers/models/jamba/configuration_jamba.py index 2ea28bfed57a..a97f92ff4bdb 100644 --- a/src/transformers/models/jamba/configuration_jamba.py +++ b/src/transformers/models/jamba/configuration_jamba.py @@ -13,10 +13,9 @@ # limitations under the License. """Jamba model configuration""" -import math from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging logger = logging.get_logger(__name__) @@ -118,7 +117,7 @@ def __init__( self.mamba_d_state = mamba_d_state self.mamba_d_conv = mamba_d_conv self.mamba_expand = mamba_expand - self.mamba_dt_rank = math.ceil(self.hidden_size / 16) if mamba_dt_rank == "auto" else mamba_dt_rank + self.mamba_dt_rank = int_div_ceil(self.hidden_size, 16) if mamba_dt_rank == "auto" else mamba_dt_rank self.mamba_conv_bias = mamba_conv_bias self.mamba_proj_bias = mamba_proj_bias diff --git a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py index 5b8811d615ce..1e1e2cf534a7 100755 --- a/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py +++ b/src/transformers/models/layoutlmv2/modeling_layoutlmv2.py @@ -32,7 +32,7 @@ from ...modeling_utils import PreTrainedModel from ...processing_utils import Unpack from ...pytorch_utils import apply_chunking_to_forward -from ...utils import auto_docstring, is_detectron2_available, logging, requires_backends +from ...utils import auto_docstring, int_div_ceil, is_detectron2_available, logging, requires_backends from ...utils.generic import TransformersKwargs, can_return_tuple, merge_with_config_defaults from ...utils.output_capturing import capture_outputs from .configuration_layoutlmv2 import LayoutLMv2Config @@ -500,8 +500,8 @@ def __init__(self, config): backbone_stride = self.backbone.output_shape()[self.out_feature_key].stride self.pool = nn.AvgPool2d( ( - math.ceil(math.ceil(input_shape[0] / backbone_stride) / config.image_feature_pool_shape[0]), - math.ceil(math.ceil(input_shape[1] / backbone_stride) / config.image_feature_pool_shape[1]), + int_div_ceil(int_div_ceil(input_shape[0], backbone_stride), config.image_feature_pool_shape[0]), + int_div_ceil(int_div_ceil(input_shape[1], backbone_stride), config.image_feature_pool_shape[1]), ) ) else: diff --git a/src/transformers/models/lfm2_vl/processing_lfm2_vl.py b/src/transformers/models/lfm2_vl/processing_lfm2_vl.py index bf654310d0d3..5322270b288c 100755 --- a/src/transformers/models/lfm2_vl/processing_lfm2_vl.py +++ b/src/transformers/models/lfm2_vl/processing_lfm2_vl.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import math from ...feature_extraction_utils import BatchFeature from ...image_utils import ImageInput, make_nested_list_of_images @@ -22,7 +21,7 @@ Unpack, ) from ...tokenization_utils_base import BatchEncoding, TextInput -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging logger = logging.get_logger(__name__) @@ -215,14 +214,14 @@ def _build_image_tokens( def _compute_tokens_per_tile(self, tile_size: int, encoder_patch_size: int, downsample_factor: int) -> int: """Compute the number of tokens for a single tile.""" num_patches = tile_size // encoder_patch_size - downsampled_patches = math.ceil(num_patches / downsample_factor) + downsampled_patches = int_div_ceil(num_patches, downsample_factor) return downsampled_patches * downsampled_patches def _compute_tokens_for_image(self, image_size: list[int], encoder_patch_size: int, downsample_factor: int) -> int: """Compute the number of tokens for a resized image (used for single-tile or thumbnail).""" image_height, image_width = image_size - patches_h = math.ceil((image_height // encoder_patch_size) / downsample_factor) - patches_w = math.ceil((image_width // encoder_patch_size) / downsample_factor) + patches_h = int_div_ceil(image_height // encoder_patch_size, downsample_factor) + patches_w = int_div_ceil(image_width // encoder_patch_size, downsample_factor) return patches_h * patches_w def _get_image_num_tokens(self, image_size: list[int], **images_kwargs) -> tuple[int, int]: diff --git a/src/transformers/models/llava_onevision/modeling_llava_onevision.py b/src/transformers/models/llava_onevision/modeling_llava_onevision.py index b01d3d00edf2..7e4b168c3253 100644 --- a/src/transformers/models/llava_onevision/modeling_llava_onevision.py +++ b/src/transformers/models/llava_onevision/modeling_llava_onevision.py @@ -34,7 +34,7 @@ from ...modeling_outputs import BaseModelOutputWithPast, BaseModelOutputWithPooling, ModelOutput from ...modeling_utils import PreTrainedModel from ...processing_utils import Unpack -from ...utils import TransformersKwargs, auto_docstring, torch_compilable_check +from ...utils import TransformersKwargs, auto_docstring, int_div_ceil, torch_compilable_check from ...utils.generic import can_return_tuple, merge_with_config_defaults from ..auto import AutoModel from .configuration_llava_onevision import LlavaOnevisionConfig @@ -621,7 +621,7 @@ def apply_pooling(self, image_features): image_features = image_features.permute(0, 3, 1, 2).contiguous() height, width = image_features.shape[2:] - scaled_shape = [math.ceil(height / 2), math.ceil(width / 2)] + scaled_shape = [int_div_ceil(height, 2), int_div_ceil(width, 2)] image_features = nn.functional.interpolate(image_features, size=scaled_shape, mode="bilinear") image_features = image_features.permute(0, 2, 3, 1) diff --git a/src/transformers/models/llava_onevision/modular_llava_onevision.py b/src/transformers/models/llava_onevision/modular_llava_onevision.py index cb535b276ca2..d4874f65dd60 100644 --- a/src/transformers/models/llava_onevision/modular_llava_onevision.py +++ b/src/transformers/models/llava_onevision/modular_llava_onevision.py @@ -47,7 +47,7 @@ from ...modeling_flash_attention_utils import FlashAttentionKwargs from ...modeling_outputs import BaseModelOutputWithPooling from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, logging +from ...utils import TensorType, auto_docstring, int_div_ceil, logging from ...utils.generic import can_return_tuple, merge_with_config_defaults from .image_processing_llava_onevision import LlavaOnevisionImageProcessorKwargs @@ -303,7 +303,7 @@ def apply_pooling(self, image_features): image_features = image_features.permute(0, 3, 1, 2).contiguous() height, width = image_features.shape[2:] - scaled_shape = [math.ceil(height / 2), math.ceil(width / 2)] + scaled_shape = [int_div_ceil(height, 2), int_div_ceil(width, 2)] image_features = nn.functional.interpolate(image_features, size=scaled_shape, mode="bilinear") image_features = image_features.permute(0, 2, 3, 1) diff --git a/src/transformers/models/llava_onevision/processing_llava_onevision.py b/src/transformers/models/llava_onevision/processing_llava_onevision.py index 3bd407123864..f4670167d077 100644 --- a/src/transformers/models/llava_onevision/processing_llava_onevision.py +++ b/src/transformers/models/llava_onevision/processing_llava_onevision.py @@ -25,7 +25,7 @@ from ...image_utils import ImageInput, get_image_size, to_numpy_array from ...processing_utils import MultiModalData, ProcessingKwargs, ProcessorMixin, Unpack from ...tokenization_utils_base import PreTokenizedInput, TextInput -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging from ...video_utils import VideoInput @@ -146,7 +146,7 @@ def __call__( height, width = get_image_size(one_video[0], channel_dim=output_kwargs["images_kwargs"].get("data_format")) num_frames = one_video.shape[0] # frame dim is always after batch dim patches_height_width = int(math.sqrt(self.num_image_tokens)) - pooled_height_width = math.ceil(patches_height_width / 2) + pooled_height_width = int_div_ceil(patches_height_width, 2) num_video_tokens = (num_frames * pooled_height_width * pooled_height_width) + 1 # +1 for newline token text = [sample.replace(self.video_token, self.video_token * num_video_tokens) for sample in text] diff --git a/src/transformers/models/mamba/configuration_mamba.py b/src/transformers/models/mamba/configuration_mamba.py index 68533fe90c0b..c3997924b5fd 100644 --- a/src/transformers/models/mamba/configuration_mamba.py +++ b/src/transformers/models/mamba/configuration_mamba.py @@ -13,10 +13,9 @@ # limitations under the License. """MAMBA configuration""" -import math from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging logger = logging.get_logger(__name__) @@ -109,7 +108,7 @@ def __init__( self.use_conv_bias = use_conv_bias self.hidden_act = hidden_act self.initializer_range = initializer_range - self.time_step_rank = math.ceil(self.hidden_size / 16) if time_step_rank == "auto" else time_step_rank + self.time_step_rank = int_div_ceil(self.hidden_size, 16) if time_step_rank == "auto" else time_step_rank self.time_step_scale = time_step_scale self.time_step_min = time_step_min self.time_step_max = time_step_max diff --git a/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py b/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py index 96dfbbc1d4a7..3f4461b29eef 100644 --- a/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py +++ b/src/transformers/models/mamba/convert_mamba_ssm_checkpoint_to_pytorch.py @@ -15,12 +15,11 @@ import argparse import json -import math import torch from transformers import AutoTokenizer, MambaConfig, MambaForCausalLM -from transformers.utils import logging +from transformers.utils import int_div_ceil, logging from transformers.utils.import_utils import is_mamba_ssm_available @@ -34,7 +33,7 @@ def convert_ssm_config_to_hf_config(config_ssm: MambaConfigSSM) -> MambaConfig: # Set config hidden size, num hidden layers, and vocab size directly from the original config hf_config.hidden_size = config_ssm.d_model hf_config.intermediate_size = config_ssm.d_model * 2 - hf_config.time_step_rank = math.ceil(config_ssm.d_model / 16) + hf_config.time_step_rank = int_div_ceil(config_ssm.d_model, 16) hf_config.num_hidden_layers = config_ssm.n_layer vocab_size = config_ssm.vocab_size diff --git a/src/transformers/models/mamba2/configuration_mamba2.py b/src/transformers/models/mamba2/configuration_mamba2.py index 575d61520393..96bfe4f23f7b 100644 --- a/src/transformers/models/mamba2/configuration_mamba2.py +++ b/src/transformers/models/mamba2/configuration_mamba2.py @@ -13,10 +13,9 @@ # limitations under the License. """MAMBA2 configuration""" -import math from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging logger = logging.get_logger(__name__) @@ -113,7 +112,7 @@ def __init__( self.use_conv_bias = use_conv_bias self.hidden_act = hidden_act self.initializer_range = initializer_range - self.time_step_rank = math.ceil(self.hidden_size / 16) if time_step_rank == "auto" else time_step_rank + self.time_step_rank = int_div_ceil(self.hidden_size, 16) if time_step_rank == "auto" else time_step_rank self.time_step_min = time_step_min self.time_step_max = time_step_max self.time_step_floor = time_step_floor diff --git a/src/transformers/models/mask2former/image_processing_mask2former.py b/src/transformers/models/mask2former/image_processing_mask2former.py index b22bddf8d044..0a15a55e6ee4 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former.py +++ b/src/transformers/models/mask2former/image_processing_mask2former.py @@ -13,7 +13,6 @@ # limitations under the License. """Image processor class for Mask2Former.""" -import math from collections.abc import Iterable from typing import Any @@ -46,6 +45,7 @@ IMAGENET_DEFAULT_STD, TensorType, filter_out_non_signature_kwargs, + int_div_ceil, is_torch_available, is_torch_tensor, logging, @@ -360,8 +360,8 @@ def get_mask2former_resize_output_image_size( if size_divisor > 0: height, width = output_size - height = int(math.ceil(height / size_divisor) * size_divisor) - width = int(math.ceil(width / size_divisor) * size_divisor) + height = int_div_ceil(height, size_divisor) * size_divisor + width = int_div_ceil(width, size_divisor) * size_divisor output_size = (height, width) return output_size diff --git a/src/transformers/models/mask2former/image_processing_mask2former_fast.py b/src/transformers/models/mask2former/image_processing_mask2former_fast.py index 194b2aff9ea3..966ca32b5fd2 100644 --- a/src/transformers/models/mask2former/image_processing_mask2former_fast.py +++ b/src/transformers/models/mask2former/image_processing_mask2former_fast.py @@ -18,7 +18,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math from typing import Any, Optional, Union import torch @@ -44,7 +43,7 @@ PILImageResampling, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, logging +from ...utils import TensorType, auto_docstring, int_div_ceil, logging from .image_processing_mask2former import ( Mask2FormerImageProcessorKwargs, compute_segments, @@ -189,8 +188,8 @@ def resize( ) if size_divisor > 0: height, width = new_size - height = int(math.ceil(height / size_divisor) * size_divisor) - width = int(math.ceil(width / size_divisor) * size_divisor) + height = int_div_ceil(height, size_divisor) * size_divisor + width = int_div_ceil(width, size_divisor) * size_divisor new_size = (height, width) image = tvF.resize( diff --git a/src/transformers/models/maskformer/image_processing_maskformer.py b/src/transformers/models/maskformer/image_processing_maskformer.py index 44f5aa24e96e..4cf0ea051116 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer.py +++ b/src/transformers/models/maskformer/image_processing_maskformer.py @@ -13,7 +13,6 @@ # limitations under the License. """Image processor class for MaskFormer.""" -import math from collections.abc import Iterable from typing import Any @@ -46,6 +45,7 @@ IMAGENET_DEFAULT_STD, TensorType, filter_out_non_signature_kwargs, + int_div_ceil, is_torch_available, is_torch_tensor, logging, @@ -359,8 +359,8 @@ def get_maskformer_resize_output_image_size( if size_divisor > 0: height, width = output_size - height = int(math.ceil(height / size_divisor) * size_divisor) - width = int(math.ceil(width / size_divisor) * size_divisor) + height = int_div_ceil(height, size_divisor) * size_divisor + width = int_div_ceil(width, size_divisor) * size_divisor output_size = (height, width) return output_size diff --git a/src/transformers/models/maskformer/image_processing_maskformer_fast.py b/src/transformers/models/maskformer/image_processing_maskformer_fast.py index 33abc9b8f38b..9b62b8301d6a 100644 --- a/src/transformers/models/maskformer/image_processing_maskformer_fast.py +++ b/src/transformers/models/maskformer/image_processing_maskformer_fast.py @@ -13,7 +13,6 @@ # limitations under the License. """Fast Image processor class for MaskFormer.""" -import math from typing import TYPE_CHECKING, Any, Optional, Union import torch @@ -42,6 +41,7 @@ from ...utils import ( TensorType, auto_docstring, + int_div_ceil, logging, ) from .image_processing_maskformer import ( @@ -192,8 +192,8 @@ def resize( ) if size_divisor > 0: height, width = new_size - height = int(math.ceil(height / size_divisor) * size_divisor) - width = int(math.ceil(width / size_divisor) * size_divisor) + height = int_div_ceil(height, size_divisor) * size_divisor + width = int_div_ceil(width, size_divisor) * size_divisor new_size = (height, width) image = tvF.resize( diff --git a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py index ae3a682fdb58..6db36199dca3 100644 --- a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py +++ b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py @@ -25,7 +25,6 @@ # Import MetaCLIP modules from src.mini_clip.factory import create_model_and_transforms - from transformers import ( AutoTokenizer, CLIPImageProcessor, diff --git a/src/transformers/models/mimi/configuration_mimi.py b/src/transformers/models/mimi/configuration_mimi.py index 8614bb5da840..ecb60eab0f77 100644 --- a/src/transformers/models/mimi/configuration_mimi.py +++ b/src/transformers/models/mimi/configuration_mimi.py @@ -19,7 +19,7 @@ from ...configuration_utils import PreTrainedConfig from ...modeling_rope_utils import RopeParameters -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging logger = logging.get_logger(__name__) @@ -185,7 +185,7 @@ def __init__( @property def encodec_frame_rate(self) -> int: hop_length = np.prod(self.upsampling_ratios) - return math.ceil(self.sampling_rate / hop_length) + return int_div_ceil(self.sampling_rate, int(hop_length)) @property def num_codebooks(self) -> int: diff --git a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py index 857987f65ac9..970b5bdd38d0 100644 --- a/src/transformers/models/mllama/convert_mllama_weights_to_hf.py +++ b/src/transformers/models/mllama/convert_mllama_weights_to_hf.py @@ -15,7 +15,6 @@ import argparse import gc import json -import math import os import regex as re @@ -32,6 +31,7 @@ from transformers.convert_slow_tokenizer import TikTokenConverter from transformers.models.mllama.configuration_mllama import MllamaTextConfig, MllamaVisionConfig from transformers.models.mllama.image_processing_mllama import get_all_supported_aspect_ratios +from transformers.utils import int_div_ceil # fmt: off @@ -257,7 +257,7 @@ def write_model( text_key_value_dim = text_dim # cross-attention layers: 20 for 90B, 8 for 11B - cross_attention_frequency = math.ceil(text_num_layers / cross_attention_num_layers) + cross_attention_frequency = int_div_ceil(text_num_layers, cross_attention_num_layers) text_num_total_layers = text_num_layers + cross_attention_num_layers cross_attention_layers_shift = list( range(cross_attention_frequency - 1, text_num_total_layers, cross_attention_frequency + 1) diff --git a/src/transformers/models/mobilevit/modeling_mobilevit.py b/src/transformers/models/mobilevit/modeling_mobilevit.py index bdb4953da3e0..d5c94c86d5b6 100755 --- a/src/transformers/models/mobilevit/modeling_mobilevit.py +++ b/src/transformers/models/mobilevit/modeling_mobilevit.py @@ -31,7 +31,7 @@ SemanticSegmenterOutput, ) from ...modeling_utils import PreTrainedModel -from ...utils import auto_docstring, logging, torch_int +from ...utils import auto_docstring, int_div_ceil, logging, torch_int from .configuration_mobilevit import MobileViTConfig @@ -405,12 +405,12 @@ def unfolding(self, features: torch.Tensor) -> tuple[torch.Tensor, dict]: new_height = ( torch_int(torch.ceil(orig_height / patch_height) * patch_height) if torch.jit.is_tracing() - else int(math.ceil(orig_height / patch_height) * patch_height) + else int_div_ceil(orig_height, patch_height) * patch_height ) new_width = ( torch_int(torch.ceil(orig_width / patch_width) * patch_width) if torch.jit.is_tracing() - else int(math.ceil(orig_width / patch_width) * patch_width) + else int_div_ceil(orig_width, patch_width) * patch_width ) interpolate = False diff --git a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py index 084d0ac93a0e..d0fe13b57635 100644 --- a/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py +++ b/src/transformers/models/musicgen_melody/modeling_musicgen_melody.py @@ -43,7 +43,7 @@ from ...modeling_outputs import BaseModelOutputWithPast, ModelOutput from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import TransformersKwargs, auto_docstring, logging +from ...utils import TransformersKwargs, auto_docstring, int_div_ceil, logging from ...utils.generic import merge_with_config_defaults from ...utils.output_capturing import OutputRecorder, capture_outputs from ..auto.configuration_auto import AutoConfig @@ -1539,7 +1539,7 @@ def forward( # pad or truncate to config.chroma_length if audio_hidden_states.shape[1] < self.config.chroma_length: - n_repeat = int(math.ceil(self.config.chroma_length / audio_hidden_states.shape[1])) + n_repeat = int_div_ceil(self.config.chroma_length, audio_hidden_states.shape[1]) audio_hidden_states = audio_hidden_states.repeat(1, n_repeat, 1) else: logger.warning( @@ -1771,7 +1771,7 @@ def _prepare_encoder_hidden_states_kwargs_for_generation( # pad or truncate to config.chroma_length if audio_hidden_states.shape[1] < self.config.chroma_length: - n_repeat = int(math.ceil(self.config.chroma_length / audio_hidden_states.shape[1])) + n_repeat = int_div_ceil(self.config.chroma_length, audio_hidden_states.shape[1]) audio_hidden_states = audio_hidden_states.repeat(1, n_repeat, 1) audio_hidden_states = audio_hidden_states[:, : self.config.chroma_length] diff --git a/src/transformers/models/nllb_moe/modeling_nllb_moe.py b/src/transformers/models/nllb_moe/modeling_nllb_moe.py index 313b574518f4..548f5eef41b1 100644 --- a/src/transformers/models/nllb_moe/modeling_nllb_moe.py +++ b/src/transformers/models/nllb_moe/modeling_nllb_moe.py @@ -37,7 +37,7 @@ ) from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import TransformersKwargs, auto_docstring, logging +from ...utils import TransformersKwargs, auto_docstring, int_div_ceil, logging from ...utils.generic import can_return_tuple, merge_with_config_defaults from ...utils.output_capturing import OutputRecorder, capture_outputs from .configuration_nllb_moe import NllbMoeConfig @@ -269,7 +269,7 @@ def route_tokens( if not self.training and self.moe_eval_capacity_token_fraction > 0: self.expert_capacity = math.ceil(self.moe_eval_capacity_token_fraction * nb_tokens) else: - capacity = 2 * math.ceil(nb_tokens / self.num_experts) + capacity = 2 * int_div_ceil(nb_tokens, self.num_experts) self.expert_capacity = capacity if self.expert_capacity is None else self.expert_capacity # Remove locations outside capacity from ( cumsum < capacity = False will not be routed) diff --git a/src/transformers/models/pe_audio/modeling_pe_audio.py b/src/transformers/models/pe_audio/modeling_pe_audio.py index e502073a95c7..ed4bf33b50a7 100644 --- a/src/transformers/models/pe_audio/modeling_pe_audio.py +++ b/src/transformers/models/pe_audio/modeling_pe_audio.py @@ -17,7 +17,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import math from collections.abc import Callable from dataclasses import dataclass from typing import Any, Optional @@ -37,7 +36,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import ModelOutput, TransformersKwargs, auto_docstring, can_return_tuple +from ...utils import ModelOutput, TransformersKwargs, auto_docstring, can_return_tuple, int_div_ceil from ...utils.generic import maybe_autocast, merge_with_config_defaults from ...utils.output_capturing import capture_outputs from ..auto import AutoModel @@ -110,7 +109,7 @@ def __init__(self, config: PreTrainedConfig, stride: int = 1, stride_index: int self.res_unit3 = PeAudioDacResidualUnit(dimension // 2, dilation=9) self.snake1 = Snake1d(dimension // 2) self.conv1 = nn.Conv1d( - dimension // 2, dimension, kernel_size=2 * stride, stride=stride, padding=math.ceil(stride / 2) + dimension // 2, dimension, kernel_size=2 * stride, stride=stride, padding=int_div_ceil(stride, 2) ) def forward(self, hidden_state): diff --git a/src/transformers/models/perceiver/modeling_perceiver.py b/src/transformers/models/perceiver/modeling_perceiver.py index 02f67aca7908..200c14a637cc 100755 --- a/src/transformers/models/perceiver/modeling_perceiver.py +++ b/src/transformers/models/perceiver/modeling_perceiver.py @@ -31,7 +31,7 @@ from ...modeling_outputs import BaseModelOutputWithCrossAttentions from ...modeling_utils import PreTrainedModel from ...pytorch_utils import apply_chunking_to_forward -from ...utils import ModelOutput, auto_docstring, logging, torch_int +from ...utils import ModelOutput, auto_docstring, int_div_ceil, logging, torch_int from .configuration_perceiver import PerceiverConfig @@ -2965,7 +2965,7 @@ def num_channels(self) -> int: elif self.prep_type == "pixels": inp_dim = self.in_channels if not is_temporal: - inp_dim = math.ceil(inp_dim / self.spatial_downsample) + inp_dim = int_div_ceil(inp_dim, self.spatial_downsample) elif self.prep_type == "patches": if self.conv_after_patching: inp_dim = self.out_channels diff --git a/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py b/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py index 27d02b9ada7f..15ec606245a7 100644 --- a/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py +++ b/src/transformers/models/phi4_multimodal/modeling_phi4_multimodal.py @@ -44,7 +44,7 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import auto_docstring, torch_int +from ...utils import auto_docstring, int_div_ceil, torch_int from ...utils.generic import TransformersKwargs, can_return_tuple, maybe_autocast, merge_with_config_defaults from ...utils.output_capturing import capture_outputs from .configuration_phi4_multimodal import Phi4MultimodalAudioConfig, Phi4MultimodalConfig, Phi4MultimodalVisionConfig @@ -954,7 +954,7 @@ def _streaming_mask(self, seq_len, batch_size, chunk_size, left_chunk): def forward_embeddings(self, hidden_states, masks): """Forwarding the inputs through the top embedding layers""" - seq_len = math.ceil(hidden_states.shape[1] / self.config.time_reduction) + seq_len = int_div_ceil(hidden_states.shape[1], self.config.time_reduction) if seq_len <= 0: raise ValueError( f"The sequence length after time reduction is invalid: {seq_len}. Your input feature is too short." diff --git a/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py b/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py index ef6bf1588c47..c01fa3fb3396 100644 --- a/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py +++ b/src/transformers/models/phi4_multimodal/modular_phi4_multimodal.py @@ -34,7 +34,7 @@ from ...modeling_rope_utils import RopeParameters from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging from ...utils.generic import ( TransformersKwargs, can_return_tuple, @@ -1063,7 +1063,7 @@ def _streaming_mask(self, seq_len, batch_size, chunk_size, left_chunk): def forward_embeddings(self, hidden_states, masks): """Forwarding the inputs through the top embedding layers""" - seq_len = math.ceil(hidden_states.shape[1] / self.config.time_reduction) + seq_len = int_div_ceil(hidden_states.shape[1], self.config.time_reduction) if seq_len <= 0: raise ValueError( f"The sequence length after time reduction is invalid: {seq_len}. Your input feature is too short." diff --git a/src/transformers/models/qwen3_vl/video_processing_qwen3_vl.py b/src/transformers/models/qwen3_vl/video_processing_qwen3_vl.py index 9f545d272891..bc19a4abbbcc 100644 --- a/src/transformers/models/qwen3_vl/video_processing_qwen3_vl.py +++ b/src/transformers/models/qwen3_vl/video_processing_qwen3_vl.py @@ -21,7 +21,7 @@ from ...feature_extraction_utils import BatchFeature from ...image_utils import ChannelDimension, PILImageResampling, SizeDict, get_image_size from ...processing_utils import Unpack, VideosKwargs -from ...utils import TensorType, add_start_docstrings, logging +from ...utils import TensorType, add_start_docstrings, int_div_ceil, logging from ...video_processing_utils import BASE_VIDEO_PROCESSOR_DOCSTRING, BaseVideoProcessor from ...video_utils import VideoMetadata, group_videos_by_shape, reorder_videos @@ -46,7 +46,7 @@ def smart_resize( ) h_bar = round(height / factor) * factor w_bar = round(width / factor) * factor - t_bar = math.ceil(num_frames / temporal_factor) * temporal_factor + t_bar = int_div_ceil(num_frames, temporal_factor) * temporal_factor if t_bar * h_bar * w_bar > max_pixels: beta = math.sqrt((num_frames * height * width) / max_pixels) diff --git a/src/transformers/models/smolvlm/image_processing_smolvlm.py b/src/transformers/models/smolvlm/image_processing_smolvlm.py index c86beab858a2..a576bb884993 100644 --- a/src/transformers/models/smolvlm/image_processing_smolvlm.py +++ b/src/transformers/models/smolvlm/image_processing_smolvlm.py @@ -19,7 +19,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math from collections.abc import Iterable import numpy as np @@ -41,7 +40,7 @@ validate_preprocess_arguments, ) from ...processing_utils import ImagesKwargs -from ...utils import TensorType, is_vision_available, logging +from ...utils import TensorType, int_div_ceil, is_vision_available, logging if is_vision_available(): @@ -430,11 +429,11 @@ def split_image( frames = [] if height > max_height or width > max_width: # Calculate the number of splits - num_splits_h = math.ceil(height / max_height) - num_splits_w = math.ceil(width / max_width) + num_splits_h = int_div_ceil(height, max_height) + num_splits_w = int_div_ceil(width, max_width) # Calculate the optimal width and height for the sub-images - optimal_height = math.ceil(height / num_splits_h) - optimal_width = math.ceil(width / num_splits_w) + optimal_height = int_div_ceil(height, num_splits_h) + optimal_width = int_div_ceil(width, num_splits_w) # Iterate through each row and column for r in range(num_splits_h): @@ -501,13 +500,13 @@ def resize_for_vision_encoder( aspect_ratio = width / height if width >= height: - width = math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size + width = int_div_ceil(width, vision_encoder_max_size) * vision_encoder_max_size height = int(width / aspect_ratio) - height = math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size + height = int_div_ceil(height, vision_encoder_max_size) * vision_encoder_max_size elif height > width: - height = math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size + height = int_div_ceil(height, vision_encoder_max_size) * vision_encoder_max_size width = int(height * aspect_ratio) - width = math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size + width = int_div_ceil(width, vision_encoder_max_size) * vision_encoder_max_size new_size = {"height": height, "width": width} return self.resize( image, size=new_size, resample=resample, input_data_format=input_data_format, data_format=data_format @@ -892,19 +891,19 @@ def get_number_of_image_patches(self, height: int, width: int, images_kwargs: di aspect_ratio = width / height if width >= height: - resized_width = math.ceil(width / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_width = int_div_ceil(width, max_image_size["longest_edge"]) * max_image_size["longest_edge"] resized_height = int(width / aspect_ratio) - resized_height = math.ceil(height / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_height = int_div_ceil(height, max_image_size["longest_edge"]) * max_image_size["longest_edge"] elif height > width: - resized_height = math.ceil(height / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_height = int_div_ceil(height, max_image_size["longest_edge"]) * max_image_size["longest_edge"] resized_width = int(height * aspect_ratio) - resized_width = math.ceil(width / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_width = int_div_ceil(width, max_image_size["longest_edge"]) * max_image_size["longest_edge"] max_height = max_width = max_image_size["longest_edge"] if resized_height > max_height or resized_width > max_width: # Calculate the number of splits - num_rows = math.ceil(resized_height / max_height) - num_cols = math.ceil(resized_width / max_width) + num_rows = int_div_ceil(resized_height, max_height) + num_cols = int_div_ceil(resized_width, max_width) num_patches = num_rows * num_cols + 1 return num_patches, num_rows, num_cols diff --git a/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py b/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py index 59ba2fc1f154..af9aaa486f4d 100644 --- a/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py +++ b/src/transformers/models/smolvlm/image_processing_smolvlm_fast.py @@ -19,7 +19,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math from typing import Optional import torch @@ -34,7 +33,7 @@ make_nested_list_of_images, ) from ...processing_utils import Unpack -from ...utils import TensorType, auto_docstring, is_torchvision_available, logging +from ...utils import TensorType, auto_docstring, int_div_ceil, is_torchvision_available, logging from .image_processing_smolvlm import SmolVLMImageProcessorKwargs @@ -269,8 +268,8 @@ def split_images( frames = [] if height > max_height or width > max_width: # Calculate the number of splits - num_splits_h = math.ceil(height / max_height) - num_splits_w = math.ceil(width / max_width) + num_splits_h = int_div_ceil(height, max_height) + num_splits_w = int_div_ceil(width, max_width) # Split the images by height, then by width frames = ( @@ -318,13 +317,13 @@ def resize_for_vision_encoder( aspect_ratio = width / height if width >= height: - width = math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size + width = int_div_ceil(width, vision_encoder_max_size) * vision_encoder_max_size height = int(width / aspect_ratio) - height = math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size + height = int_div_ceil(height, vision_encoder_max_size) * vision_encoder_max_size elif height > width: - height = math.ceil(height / vision_encoder_max_size) * vision_encoder_max_size + height = int_div_ceil(height, vision_encoder_max_size) * vision_encoder_max_size width = int(height * aspect_ratio) - width = math.ceil(width / vision_encoder_max_size) * vision_encoder_max_size + width = int_div_ceil(width, vision_encoder_max_size) * vision_encoder_max_size new_size = SizeDict(height=height, width=width) return self.resize(image, size=new_size, interpolation=interpolation) @@ -515,19 +514,19 @@ def get_number_of_image_patches(self, height: int, width: int, images_kwargs: di aspect_ratio = width / height if width >= height: - resized_width = math.ceil(width / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_width = int_div_ceil(width, max_image_size["longest_edge"]) * max_image_size["longest_edge"] resized_height = int(width / aspect_ratio) - resized_height = math.ceil(height / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_height = int_div_ceil(height, max_image_size["longest_edge"]) * max_image_size["longest_edge"] elif height > width: - resized_height = math.ceil(height / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_height = int_div_ceil(height, max_image_size["longest_edge"]) * max_image_size["longest_edge"] resized_width = int(height * aspect_ratio) - resized_width = math.ceil(width / max_image_size["longest_edge"]) * max_image_size["longest_edge"] + resized_width = int_div_ceil(width, max_image_size["longest_edge"]) * max_image_size["longest_edge"] max_height = max_width = max_image_size["longest_edge"] if resized_height > max_height or resized_width > max_width: # Calculate the number of splits - num_rows = math.ceil(resized_height / max_height) - num_cols = math.ceil(resized_width / max_width) + num_rows = int_div_ceil(resized_height, max_height) + num_cols = int_div_ceil(resized_width, max_width) num_patches = num_rows * num_cols + 1 return num_patches, num_rows, num_cols diff --git a/src/transformers/models/voxtral_realtime/modeling_voxtral_realtime.py b/src/transformers/models/voxtral_realtime/modeling_voxtral_realtime.py index 78b120f96a03..61b082d71a9c 100644 --- a/src/transformers/models/voxtral_realtime/modeling_voxtral_realtime.py +++ b/src/transformers/models/voxtral_realtime/modeling_voxtral_realtime.py @@ -39,7 +39,14 @@ from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update from ...modeling_utils import ALL_ATTENTION_FUNCTIONS, PreTrainedModel from ...processing_utils import Unpack -from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling, logging +from ...utils import ( + TransformersKwargs, + auto_docstring, + can_return_tuple, + int_div_ceil, + is_torchdynamo_compiling, + logging, +) from ...utils.generic import maybe_autocast, merge_with_config_defaults from ...utils.output_capturing import capture_outputs from ..auto import AutoModel @@ -1251,7 +1258,7 @@ def _prepare_generation_config( input_features = model_kwargs.get("input_features") if input_features is not None and not isinstance(input_features, GeneratorType): audio_length = input_features.shape[-1] - num_audio_tokens = math.ceil(audio_length / self.config.audio_length_per_tok) + num_audio_tokens = int_div_ceil(audio_length, self.config.audio_length_per_tok) # Stash for use in _prepare_generated_length generation_config._num_audio_tokens = num_audio_tokens diff --git a/src/transformers/models/voxtral_realtime/modular_voxtral_realtime.py b/src/transformers/models/voxtral_realtime/modular_voxtral_realtime.py index 85c6007a3edc..be96ed793e3f 100644 --- a/src/transformers/models/voxtral_realtime/modular_voxtral_realtime.py +++ b/src/transformers/models/voxtral_realtime/modular_voxtral_realtime.py @@ -42,7 +42,14 @@ VoxtralPreTrainedModel, ) from ...processing_utils import Unpack -from ...utils import TransformersKwargs, auto_docstring, can_return_tuple, is_torchdynamo_compiling, logging +from ...utils import ( + TransformersKwargs, + auto_docstring, + can_return_tuple, + int_div_ceil, + is_torchdynamo_compiling, + logging, +) from ...utils.generic import merge_with_config_defaults from ...utils.output_capturing import capture_outputs from .configuration_voxtral_realtime import VoxtralRealtimeEncoderConfig @@ -816,7 +823,7 @@ def _prepare_generation_config( input_features = model_kwargs.get("input_features") if input_features is not None and not isinstance(input_features, GeneratorType): audio_length = input_features.shape[-1] - num_audio_tokens = math.ceil(audio_length / self.config.audio_length_per_tok) + num_audio_tokens = int_div_ceil(audio_length, self.config.audio_length_per_tok) # Stash for use in _prepare_generated_length generation_config._num_audio_tokens = num_audio_tokens diff --git a/src/transformers/models/xcodec/configuration_xcodec.py b/src/transformers/models/xcodec/configuration_xcodec.py index 6c0479d425a6..8edea22df1c6 100644 --- a/src/transformers/models/xcodec/configuration_xcodec.py +++ b/src/transformers/models/xcodec/configuration_xcodec.py @@ -18,7 +18,7 @@ import numpy as np from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring +from ...utils import auto_docstring, int_div_ceil from ..auto import CONFIG_MAPPING, AutoConfig @@ -128,7 +128,7 @@ def __init__( @property def frame_rate(self) -> int: - return math.ceil(self.sample_rate / self.hop_length) + return int_div_ceil(self.sample_rate, self.hop_length) @property def semantic_hidden_size(self) -> int: diff --git a/src/transformers/models/zamba/configuration_zamba.py b/src/transformers/models/zamba/configuration_zamba.py index eeb652d47c70..54dfe8b29d38 100644 --- a/src/transformers/models/zamba/configuration_zamba.py +++ b/src/transformers/models/zamba/configuration_zamba.py @@ -13,10 +13,9 @@ # limitations under the License. """Zamba model configuration""" -import math from ...configuration_utils import PreTrainedConfig -from ...utils import auto_docstring, logging +from ...utils import auto_docstring, int_div_ceil, logging logger = logging.get_logger(__name__) @@ -125,7 +124,7 @@ def __init__( self.mamba_d_state = mamba_d_state self.mamba_d_conv = mamba_d_conv self.mamba_expand = mamba_expand - self.mamba_dt_rank = math.ceil(self.hidden_size / 16) if mamba_dt_rank == "auto" else mamba_dt_rank + self.mamba_dt_rank = int_div_ceil(self.hidden_size, 16) if mamba_dt_rank == "auto" else mamba_dt_rank self.time_step_min = time_step_min self.time_step_max = time_step_max self.time_step_floor = time_step_floor diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py index 10d1938f8732..5214ae0c4afb 100755 --- a/src/transformers/trainer.py +++ b/src/transformers/trainer.py @@ -161,6 +161,7 @@ can_return_loss, check_torch_load_is_safe, find_labels, + int_div_ceil, is_accelerate_available, is_datasets_available, is_in_notebook, @@ -2590,7 +2591,7 @@ def evaluate( metric_key_prefix, start_time, num_samples=output.num_samples, - num_steps=math.ceil(output.num_samples / total_batch_size), + num_steps=int_div_ceil(output.num_samples, total_batch_size), ) ) @@ -2864,7 +2865,7 @@ def predict( metric_key_prefix, start_time, num_samples=output.num_samples, - num_steps=math.ceil(output.num_samples / total_batch_size), + num_steps=int_div_ceil(output.num_samples, total_batch_size), ) ) diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py index 30377f5f5a61..d107a5941b69 100644 --- a/src/transformers/trainer_pt_utils.py +++ b/src/transformers/trainer_pt_utils.py @@ -20,7 +20,6 @@ import datetime import io import json -import math import os import re import sys @@ -43,6 +42,7 @@ from .integrations.deepspeed import is_deepspeed_zero3_enabled from .tokenization_utils_base import BatchEncoding from .utils import ( + int_div_ceil, is_sagemaker_mp_enabled, is_torch_available, is_torch_xla_available, @@ -612,9 +612,9 @@ def __init__( # Split to nearest available length that is evenly divisible. # This is to ensure each rank receives the same amount of data when # using this Sampler. - self.num_samples = math.ceil((len(self.lengths) - self.num_replicas) / self.num_replicas) + self.num_samples = int_div_ceil(len(self.lengths) - self.num_replicas, self.num_replicas) else: - self.num_samples = math.ceil(len(self.lengths) / self.num_replicas) + self.num_samples = int_div_ceil(len(self.lengths), self.num_replicas) self.total_size = self.num_samples * self.num_replicas self.seed = seed @@ -664,7 +664,7 @@ def __init__( self.total_batch_size = total_batch_size = batch_size * num_processes - num_batches = len(dataset) // total_batch_size if drop_last else math.ceil(len(dataset) / total_batch_size) + num_batches = len(dataset) // total_batch_size if drop_last else int_div_ceil(len(dataset), total_batch_size) self.total_num_samples = num_batches * total_batch_size def __iter__(self): @@ -788,7 +788,7 @@ def __len__(self): if self.drop_last: return (len(self.dataset) // (self.batch_size * self.num_processes)) * self.batch_size else: - return math.ceil(len(self.dataset) / (self.batch_size * self.num_processes)) * self.batch_size + return int_div_ceil(len(self.dataset), self.batch_size * self.num_processes) * self.batch_size def _secs2timedelta(secs): diff --git a/src/transformers/utils/generic.py b/src/transformers/utils/generic.py index 2dd6af34eb8a..971f1967c460 100644 --- a/src/transformers/utils/generic.py +++ b/src/transformers/utils/generic.py @@ -665,7 +665,7 @@ def torch_float(x): return x.to(torch.float32) if torch.jit.is_tracing() and isinstance(x, torch.Tensor) else int(x) -def int_div_ceil(a: int | "torch.Tensor", b: int | "torch.Tensor") -> int: +def int_div_ceil(a: int | torch.Tensor, b: int | torch.Tensor) -> int: """ Perform integer ceiling division without intermediate floating-point conversion. From 319220d8ea172094460c5786ad7d30462ec17ccb Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:32:00 -0400 Subject: [PATCH 3/5] formatting --- src/transformers/models/fuyu/image_processing_fuyu.py | 1 - src/transformers/models/jamba/configuration_jamba.py | 1 - src/transformers/models/mamba/configuration_mamba.py | 1 - src/transformers/models/mamba2/configuration_mamba2.py | 1 - src/transformers/models/zamba/configuration_zamba.py | 1 - 5 files changed, 5 deletions(-) diff --git a/src/transformers/models/fuyu/image_processing_fuyu.py b/src/transformers/models/fuyu/image_processing_fuyu.py index 3b0aeb6a9865..c63f1b2e54ac 100644 --- a/src/transformers/models/fuyu/image_processing_fuyu.py +++ b/src/transformers/models/fuyu/image_processing_fuyu.py @@ -13,7 +13,6 @@ # limitations under the License. """Image processor class for Fuyu.""" - import numpy as np from ...image_processing_utils import BaseImageProcessor, BatchFeature, get_size_dict diff --git a/src/transformers/models/jamba/configuration_jamba.py b/src/transformers/models/jamba/configuration_jamba.py index a97f92ff4bdb..e536cc1d75d1 100644 --- a/src/transformers/models/jamba/configuration_jamba.py +++ b/src/transformers/models/jamba/configuration_jamba.py @@ -13,7 +13,6 @@ # limitations under the License. """Jamba model configuration""" - from ...configuration_utils import PreTrainedConfig from ...utils import auto_docstring, int_div_ceil, logging diff --git a/src/transformers/models/mamba/configuration_mamba.py b/src/transformers/models/mamba/configuration_mamba.py index c3997924b5fd..cc90e05cd7fc 100644 --- a/src/transformers/models/mamba/configuration_mamba.py +++ b/src/transformers/models/mamba/configuration_mamba.py @@ -13,7 +13,6 @@ # limitations under the License. """MAMBA configuration""" - from ...configuration_utils import PreTrainedConfig from ...utils import auto_docstring, int_div_ceil, logging diff --git a/src/transformers/models/mamba2/configuration_mamba2.py b/src/transformers/models/mamba2/configuration_mamba2.py index 96bfe4f23f7b..981f592e9687 100644 --- a/src/transformers/models/mamba2/configuration_mamba2.py +++ b/src/transformers/models/mamba2/configuration_mamba2.py @@ -13,7 +13,6 @@ # limitations under the License. """MAMBA2 configuration""" - from ...configuration_utils import PreTrainedConfig from ...utils import auto_docstring, int_div_ceil, logging diff --git a/src/transformers/models/zamba/configuration_zamba.py b/src/transformers/models/zamba/configuration_zamba.py index 54dfe8b29d38..16534bc4d4d5 100644 --- a/src/transformers/models/zamba/configuration_zamba.py +++ b/src/transformers/models/zamba/configuration_zamba.py @@ -13,7 +13,6 @@ # limitations under the License. """Zamba model configuration""" - from ...configuration_utils import PreTrainedConfig from ...utils import auto_docstring, int_div_ceil, logging From a54b03e84dc9554a2f69c3a68770c2d1c0ca946e Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:36:38 -0400 Subject: [PATCH 4/5] formatting? ci mad --- src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py index 6db36199dca3..543666f8750e 100644 --- a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py +++ b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py @@ -33,7 +33,6 @@ MetaClip2Model, ) - def load_metaclip2_checkpoint(checkpoint_path: str, model_name: str) -> torch.nn.Module: """Load MetaCLIP 2 model from checkpoint.""" print(f"Loading MetaCLIP 2 model: {model_name}") From 2888c627e9987686537f95946e7d82cdfe5729fd Mon Sep 17 00:00:00 2001 From: Joshua Lochner <26504141+xenova@users.noreply.github.com> Date: Sun, 15 Mar 2026 16:39:38 -0400 Subject: [PATCH 5/5] undo --- src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py index 543666f8750e..6db36199dca3 100644 --- a/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py +++ b/src/transformers/models/metaclip_2/convert_metaclip_2_to_hf.py @@ -33,6 +33,7 @@ MetaClip2Model, ) + def load_metaclip2_checkpoint(checkpoint_path: str, model_name: str) -> torch.nn.Module: """Load MetaCLIP 2 model from checkpoint.""" print(f"Loading MetaCLIP 2 model: {model_name}")