Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
282 commits
Select commit Hold shift + click to select a range
f539965
initial documentation
kashif Nov 28, 2024
f95d6ee
rename mask to attention_mask
kashif Nov 28, 2024
d475529
smaller tests
kashif Nov 28, 2024
f7c1fe0
fixup
kashif Nov 28, 2024
5a808be
fix copies
kashif Nov 28, 2024
6dbbd80
move to time series section
kashif Nov 29, 2024
bfa9302
sort docs
kashif Nov 29, 2024
eb5807e
isort fix
kashif Nov 29, 2024
be32365
batch_size is not a configuration
kashif Nov 29, 2024
c4a3610
rename to TimesFMModelForPrediction
kashif Nov 30, 2024
56a5606
initial script
kashif Dec 4, 2024
01756ae
add check_outputs
kashif Dec 5, 2024
942c23c
remove dropout_rate
kashif Dec 5, 2024
e7650bd
works with torch.Tensor inputs
kashif Dec 5, 2024
c523f64
rename script
kashif Dec 5, 2024
e64f562
fix docstrings
kashif Dec 5, 2024
f5dbab9
fix freq when window_size is given
kashif Dec 5, 2024
a8dcfa9
add loss
kashif Dec 5, 2024
5fb1fe0
fix _quantile_loss
kashif Dec 5, 2024
1445fe5
formatting
kashif Dec 5, 2024
b84c188
Merge branch 'main' into timesfm
kashif Dec 5, 2024
1c1804e
fix isort
kashif Dec 5, 2024
35a7e9f
add weight init
kashif Dec 5, 2024
4bfc95c
Merge branch 'main' into timesfm
kashif Dec 5, 2024
6c4dded
add support for sdpa and flash_attention_2
kashif Dec 8, 2024
82c697c
fixes for flash_attention
kashif Dec 8, 2024
3bedf98
formatting
kashif Dec 8, 2024
2b4f55c
remove flash_attention
kashif Dec 9, 2024
5c4c591
fix tests
kashif Dec 9, 2024
f924a31
fix file name
kashif Dec 9, 2024
84f763e
fix quantile loss
kashif Dec 9, 2024
ee1e289
added initial TimesFMModelIntegrationTests
kashif Dec 9, 2024
b9e9633
fix formatting
kashif Dec 10, 2024
d3753ff
Merge branch 'main' into timesfm
kashif Dec 10, 2024
fce6cf4
Merge branch 'main' into timesfm
kashif Dec 22, 2024
c9dede6
fix import order
kashif Dec 22, 2024
bc67797
fix _quantile_loss
kashif Dec 22, 2024
61d5e89
add doc for SDPA
kashif Dec 22, 2024
5ea1698
Merge branch 'main' into timesfm
kashif Jan 3, 2025
ece0896
use timesfm 2.0
kashif Jan 3, 2025
21e3236
bug fix in timesfm decode function.
rajatsen91 Jan 3, 2025
f173a8e
compare mean forecasts
kashif Jan 6, 2025
4e5196a
Merge branch 'main' into timesfm
kashif Jan 8, 2025
b83023c
refactor type hints, use CamelCase
jinan-zhou Jan 10, 2025
b21ec50
consolidate decode func
jinan-zhou Jan 14, 2025
591874f
Merge branch 'main' into timesfm
kashif Jan 15, 2025
e8dfab0
Merge branch 'main' into timesfm
kashif Feb 3, 2025
e162102
more readable code for weight conversion
jinan-zhou Feb 6, 2025
8d614ae
Merge branch 'main' into timesfm
kashif Feb 14, 2025
e7531e1
fix-copies
kashif Feb 14, 2025
0cfb2c3
simpler init
kashif Feb 14, 2025
2e29e5f
renaem TimesFmMLP
kashif Feb 14, 2025
5dc2927
use T5LayerNorm
kashif Feb 14, 2025
7180f79
fix tests
kashif Feb 14, 2025
cdb4239
use initializer_range
kashif Feb 14, 2025
c48d673
TimesFmModel instead of TimesFmDecoder
kashif Feb 14, 2025
ce5f216
TimesFmPositionalEmbedding takes config for its init
kashif Feb 14, 2025
9453ed9
2.0-500m-pytorch default configs
kashif Feb 14, 2025
61c96fd
use TimesFmModel
kashif Feb 14, 2025
9538c1d
fix formatting
kashif Feb 16, 2025
bfa69e7
ignore TimesFmModel for testing
kashif Feb 17, 2025
80d8809
Merge branch 'main' into timesfm
kashif Feb 18, 2025
c34286f
fix docstring
kashif Feb 27, 2025
72ae8f5
Merge branch 'main' into timesfm
kashif Feb 27, 2025
e401b33
override generate as its not needed
kashif Feb 27, 2025
85446e3
add doc strings
kashif Feb 27, 2025
c410cde
fix logging
kashif Feb 27, 2025
8d5a210
add docstrings to output data classes
kashif Feb 27, 2025
c2625e0
initial copy from t5
kashif Aug 27, 2024
f43a0df
added config and attention layers
kashif Aug 27, 2024
8bbda06
add TimesFMPositionalEmbedding
kashif Aug 27, 2024
5178c11
calcuate scale_factor once
kashif Aug 31, 2024
95a06a9
add more configs and TimesFMResidualBlock
kashif Aug 31, 2024
3be5893
fix input_dims
kashif Aug 31, 2024
9fb8bf8
standardize code format with black
jinan-zhou Sep 5, 2024
f79803c
remove unneeded modules
jinan-zhou Sep 5, 2024
a81e99b
TimesFM Model
jinan-zhou Sep 11, 2024
1ec48c7
order of imports
kashif Sep 15, 2024
8abfc2e
copy from Google official implementation
jinan-zhou Sep 19, 2024
7e0305a
remove covariate forecasting
jinan-zhou Sep 23, 2024
c042a9d
Adapting TimesFM to HF format
jinan-zhou Sep 30, 2024
a52eeca
restructing in progress
jinan-zhou Oct 2, 2024
c7f760e
adapted to HF convention
jinan-zhou Oct 3, 2024
d717132
timesfm test
jinan-zhou Oct 10, 2024
72ffaaf
the model runs
jinan-zhou Oct 11, 2024
3818ee4
fixing unit tests
jinan-zhou Oct 24, 2024
0013655
fixing unit tests in progress
jinan-zhou Nov 6, 2024
6419285
add post_init
kashif Nov 7, 2024
7cd2e41
do not change TimesFMOutput
kashif Nov 7, 2024
47affe8
fixing unit tests
jinan-zhou Nov 14, 2024
bbf738c
all unit tests passed
jinan-zhou Nov 15, 2024
bb2a850
remove timesfm_layers
kashif Nov 24, 2024
c55088d
add intermediate_size and initialize with config
kashif Nov 28, 2024
fd270d9
initial documentation
kashif Nov 28, 2024
9bb5a49
rename mask to attention_mask
kashif Nov 28, 2024
5376dd7
smaller tests
kashif Nov 28, 2024
8edb51e
fixup
kashif Nov 28, 2024
e8e31cd
fix copies
kashif Nov 28, 2024
5b18440
move to time series section
kashif Nov 29, 2024
5ebeec2
sort docs
kashif Nov 29, 2024
f810125
isort fix
kashif Nov 29, 2024
7e5921c
batch_size is not a configuration
kashif Nov 29, 2024
906d6a8
rename to TimesFMModelForPrediction
kashif Nov 30, 2024
c30e748
initial script
kashif Dec 4, 2024
d7d3a13
add check_outputs
kashif Dec 5, 2024
c3fbff2
remove dropout_rate
kashif Dec 5, 2024
9e6750c
works with torch.Tensor inputs
kashif Dec 5, 2024
b437e87
rename script
kashif Dec 5, 2024
9f0f086
fix docstrings
kashif Dec 5, 2024
f9e5db8
fix freq when window_size is given
kashif Dec 5, 2024
c8703ff
add loss
kashif Dec 5, 2024
8f6c2e1
fix _quantile_loss
kashif Dec 5, 2024
b319873
formatting
kashif Dec 5, 2024
3bd0827
fix isort
kashif Dec 5, 2024
0d4325e
add weight init
kashif Dec 5, 2024
4212ef8
add support for sdpa and flash_attention_2
kashif Dec 8, 2024
9739e4b
fixes for flash_attention
kashif Dec 8, 2024
33cee01
formatting
kashif Dec 8, 2024
bce6405
remove flash_attention
kashif Dec 9, 2024
fb33f35
fix tests
kashif Dec 9, 2024
b41c368
fix file name
kashif Dec 9, 2024
9aad101
fix quantile loss
kashif Dec 9, 2024
be8922f
added initial TimesFMModelIntegrationTests
kashif Dec 9, 2024
c468644
fix formatting
kashif Dec 10, 2024
689d2a4
fix import order
kashif Dec 22, 2024
abb1c0a
fix _quantile_loss
kashif Dec 22, 2024
686c71b
add doc for SDPA
kashif Dec 22, 2024
91c50a4
use timesfm 2.0
kashif Jan 3, 2025
cef8510
bug fix in timesfm decode function.
rajatsen91 Jan 3, 2025
7c7e56f
compare mean forecasts
kashif Jan 6, 2025
22bb7cf
refactor type hints, use CamelCase
jinan-zhou Jan 10, 2025
53b290a
consolidate decode func
jinan-zhou Jan 14, 2025
c65e4b4
more readable code for weight conversion
jinan-zhou Feb 6, 2025
b428972
fix-copies
kashif Feb 14, 2025
ea05e27
simpler init
kashif Feb 14, 2025
038859d
renaem TimesFmMLP
kashif Feb 14, 2025
ef59621
use T5LayerNorm
kashif Feb 14, 2025
d8c2e0d
fix tests
kashif Feb 14, 2025
a75b8e7
use initializer_range
kashif Feb 14, 2025
5352cda
TimesFmModel instead of TimesFmDecoder
kashif Feb 14, 2025
f460370
TimesFmPositionalEmbedding takes config for its init
kashif Feb 14, 2025
913f360
2.0-500m-pytorch default configs
kashif Feb 14, 2025
02e62c6
use TimesFmModel
kashif Feb 14, 2025
4466315
fix formatting
kashif Feb 16, 2025
df7bbb0
ignore TimesFmModel for testing
kashif Feb 17, 2025
c0a4f48
fix docstring
kashif Feb 27, 2025
71bda44
override generate as its not needed
kashif Feb 27, 2025
b7e75e9
add doc strings
kashif Feb 27, 2025
f76116b
fix logging
kashif Feb 27, 2025
0026ba6
add docstrings to output data classes
kashif Feb 27, 2025
909fd6c
Merge branch 'timesfm' of https://github.com/kashif/transformers into…
kashif Feb 28, 2025
380e6bf
add _CHECKPOINT_FOR_DOC
kashif Feb 28, 2025
8deeb3e
fix comments
jinan-zhou Feb 28, 2025
92e0b41
Revert "fix comments"
kashif Mar 1, 2025
33fde14
add _prepare_4d_attention_mask
kashif Mar 1, 2025
5f7bffb
Merge branch 'main' into timesfm
kashif Mar 1, 2025
ca21a2b
we do not have generative model classes
kashif Mar 1, 2025
bac7f24
use Cache
kashif Mar 1, 2025
f5a3570
return past_key_values
kashif Mar 1, 2025
a53195c
Merge branch 'main' into timesfm
kashif Mar 3, 2025
7b00789
modules initialized with config only
jinan-zhou Mar 3, 2025
8342c11
Merge branch 'main' into timesfm
jinan-zhou Mar 3, 2025
921c0bd
Merge branch 'main' into timesfm
kashif Mar 4, 2025
019c6a2
update year
kashif Mar 4, 2025
32065cc
Update docs/source/en/model_doc/timesfm.md
kashif Mar 4, 2025
4a1687b
add layer_idx to cache
kashif Mar 5, 2025
e6d77dd
modular timesfm
kashif Mar 5, 2025
c236313
fix test
kashif Mar 5, 2025
e383fcb
Merge branch 'huggingface:main' into timesfm
jinan-zhou Mar 5, 2025
b0354f0
unwrap sequential class
jinan-zhou Mar 5, 2025
ace1363
fix toctree
jinan-zhou Mar 5, 2025
df91360
remove TimesFmOnnxConfig
kashif Mar 5, 2025
5dc0c38
Merge branch 'timesfm' of https://github.com/kashif/transformers into…
kashif Mar 5, 2025
fce6d1f
fix modular
kashif Mar 5, 2025
9da15fd
remove TimesFmStackedDecoder
kashif Mar 5, 2025
94126c6
split qkv layer into individual layers
kashif Mar 5, 2025
006e97a
rename projection layers
kashif Mar 6, 2025
9f0b4c4
use ALL_ATTENTION_FUNCTIONS
kashif Mar 6, 2025
b587467
is_causal is True
kashif Mar 6, 2025
270d99b
Merge branch 'main' into timesfm
kashif Mar 6, 2025
156051d
rename config
kashif Mar 6, 2025
cf733d0
Merge branch 'timesfm' of https://github.com/kashif/transformers into…
kashif Mar 6, 2025
dc46013
does not support flash_attn_2
kashif Mar 7, 2025
d9b1cca
formatting
kashif Mar 7, 2025
6cc2ea6
fix typo in docsstring
kashif Mar 7, 2025
66b0af6
Merge branch 'main' into timesfm
kashif Mar 7, 2025
3c036f6
rename inputs
kashif Mar 7, 2025
63dba1c
add time series mapping
kashif Mar 7, 2025
768b5f5
Update src/transformers/models/olmo2/modeling_olmo2.py
kashif Mar 7, 2025
1d20534
Update src/transformers/models/moonshine/modeling_moonshine.py
kashif Mar 7, 2025
620dcd0
use updated arguments
kashif Mar 7, 2025
8da0298
fix class name
kashif Mar 7, 2025
36f0298
add MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING
kashif Mar 7, 2025
28671d9
isort
kashif Mar 7, 2025
28c881b
Merge branch 'main' into timesfm
kashif Mar 12, 2025
347e7e7
consolidate _preprocess into forward
jinan-zhou Mar 12, 2025
41f6f55
fix a typo
jinan-zhou Mar 12, 2025
8b46515
fix a typo
jinan-zhou Mar 12, 2025
c646a84
fix toc
kashif Mar 13, 2025
fc95e56
fix modular
kashif Mar 13, 2025
4e2ad7e
Merge branch 'main' into timesfm
kashif Mar 13, 2025
466e88c
Merge branch 'main' into timesfm
kashif Mar 19, 2025
e0d163c
remove aaserts
kashif Mar 19, 2025
2acc0ac
use self.config._attn_implementation
kashif Mar 19, 2025
567a45d
move to _postprocess_output
kashif Mar 23, 2025
cf90818
remove timesfm_get_large_negative_number
kashif Mar 23, 2025
666fc92
use view unstead of multiple unsqueeze
kashif Mar 23, 2025
d7429e9
make helpers static methods of the Model
kashif Mar 23, 2025
9d9d6f2
use to_tuple
kashif Mar 23, 2025
a6d049f
use to_tuple if not return_dict
kashif Mar 23, 2025
942b607
remove unused intitialization block as its incorporated in nn.Linear
kashif Mar 23, 2025
d6314c8
remove unused num_key_value_groups
kashif Mar 23, 2025
ca68d43
use the same convention as the masking method
kashif Mar 23, 2025
8d158f3
Merge branch 'main' into timesfm
kashif Mar 23, 2025
5191573
update modular
kashif Mar 23, 2025
f47a1e7
do not use unsqueeze
kashif Mar 23, 2025
936a2d6
use view instead of unsqueeze
kashif Mar 23, 2025
a88dae7
Merge branch 'main' into timesfm
kashif Mar 24, 2025
271b169
Merge branch 'main' into timesfm
kashif Mar 25, 2025
5b40f25
use buffer for inv_timescales
kashif Mar 27, 2025
a7f85ce
formatting
kashif Mar 27, 2025
9685037
modular conversion
kashif Mar 27, 2025
b88a984
remove unneeded intialization
kashif Mar 27, 2025
49eed00
add missing docstrings
kashif Mar 27, 2025
649f2a6
remove cache
kashif Mar 27, 2025
a2e3f05
Merge branch 'main' into timesfm
kashif Mar 27, 2025
07669d2
use simple_eager_attention_forward
kashif Mar 27, 2025
08df212
support tp_plan
kashif Mar 27, 2025
def36c4
support for flex and flash attention masks
kashif Mar 31, 2025
5cc47cd
Revert "support for flex and flash attention masks"
kashif Mar 31, 2025
5e3a5e2
fix device
kashif Mar 31, 2025
7da546f
fix tests on gpu
kashif Mar 31, 2025
debb032
remove unsued large model test
kashif Mar 31, 2025
2a0c209
removed unneeded comments
kashif Mar 31, 2025
b1c3c49
Merge branch 'main' into timesfm
kashif Mar 31, 2025
87e8b12
add example usage
kashif Mar 31, 2025
0493f61
Merge branch 'main' into timesfm
kashif Apr 11, 2025
70c3cb5
fix style
kashif Apr 11, 2025
76f72fb
add import
kashif Apr 11, 2025
aa721d4
Merge branch 'main' into timesfm
kashif Apr 13, 2025
e7882d7
Update docs/source/en/model_doc/timesfm.md
kashif Apr 15, 2025
a86136d
inherit from LlamaRMSNorm
kashif Apr 15, 2025
60e7e65
use can_return_tuple decorator
kashif Apr 15, 2025
a5b9010
remvoe return_dict
kashif Apr 15, 2025
ca86584
fix year
kashif Apr 15, 2025
0b711f1
Merge branch 'main' into timesfm
kashif Apr 15, 2025
531f8e3
Merge branch 'main' into timesfm
Cyrilvallez Apr 16, 2025
9b76c0b
Update docs/source/en/model_doc/timesfm.md
kashif Apr 16, 2025
0161ca9
pretrained does not inherit from GenerationMixin
kashif Apr 16, 2025
fa53c52
use model for integration test
kashif Apr 16, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/source/en/_toctree.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,8 @@
title: PatchTST
- local: model_doc/time_series_transformer
title: Time Series Transformer
- local: model_doc/timesfm
title: TimesFM
title: Time series models
- sections:
- local: model_doc/graphormer
Expand Down
88 changes: 88 additions & 0 deletions docs/source/en/model_doc/timesfm.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
<!--Copyright 2025 The HuggingFace Team. All rights reserved.

Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
specific language governing permissions and limitations under the License.

⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
rendered properly in your Markdown viewer.

-->

# TimesFM
Comment thread
kashif marked this conversation as resolved.

<div class="flex flex-wrap space-x-1">
<img alt="PyTorch" src="https://img.shields.io/badge/PyTorch-DE3412?style=flat&logo=pytorch&logoColor=white">
</div>

## Overview

TimesFM (Time Series Foundation Model) is a pretrained time-series foundation model proposed in [A decoder-only foundation model for time-series forecasting](https://huggingface.co/papers/2310.10688) by Abhimanyu Das, Weihao Kong, Rajat Sen, and Yichen Zhou. It is a decoder only model that uses non-overlapping patches of time-series data as input and outputs some output patch length prediction in an autoregressive fashion.


The abstract from the paper is the following:

*Motivated by recent advances in large language models for Natural Language Processing (NLP), we design a time-series foundation model for forecasting whose out-of-the-box zero-shot performance on a variety of public datasets comes close to the accuracy of state-of-the-art supervised forecasting models for each individual dataset. Our model is based on pretraining a patched-decoder style attention model on a large time-series corpus, and can work well across different forecasting history lengths, prediction lengths and temporal granularities.*


This model was contributed by [kashif](https://huggingface.co/kashif).
The original code can be found [here](https://github.com/google-research/timesfm).
Comment thread
kashif marked this conversation as resolved.


To use the model:

```python
import torch
from transformers import TimesFmModelForPrediction


model = TimesFmModelForPrediction.from_pretrained(
"google/timesfm-2.0-500m-pytorch",
torch_dtype=torch.bfloat16,
attn_implementation="sdpa",
device_map="cuda" if torch.cuda.is_available() else None
)


# Create dummy inputs
forecast_input = [
np.sin(np.linspace(0, 20, 100)),
np.sin(np.linspace(0, 20, 200)),
np.sin(np.linspace(0, 20, 400)),
]
frequency_input = [0, 1, 2]

# Convert inputs to sequence of tensors
forecast_input_tensor = [
torch.tensor(ts, dtype=torch.bfloat16).to("cuda" if torch.cuda.is_available() else "cpu")
for ts in forecast_input
]
frequency_input_tensor = torch.tensor(frequency_input, dtype=torch.long).to(
"cuda" if torch.cuda.is_available() else "cpu"
)

# Get predictions from the pre-trained model
with torch.no_grad():
outputs = model(past_values=forecast_input_tensor, freq=frequency_input_tensor, return_dict=True)
point_forecast_conv = outputs.mean_predictions.float().cpu().numpy()
quantile_forecast_conv = outputs.full_predictions.float().cpu().numpy()
```

## TimesFmConfig

[[autodoc]] TimesFmConfig

## TimesFmModel

[[autodoc]] TimesFmModel
- forward

## TimesFmModelForPrediction

[[autodoc]] TimesFmModelForPrediction
- forward
1 change: 1 addition & 0 deletions src/transformers/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@
from .tapas import *
from .textnet import *
from .time_series_transformer import *
from .timesfm import *
from .timesformer import *
from .timm_backbone import *
from .timm_wrapper import *
Expand Down
2 changes: 2 additions & 0 deletions src/transformers/models/auto/configuration_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,7 @@
("tapas", "TapasConfig"),
("textnet", "TextNetConfig"),
("time_series_transformer", "TimeSeriesTransformerConfig"),
("timesfm", "TimesFmConfig"),
("timesformer", "TimesformerConfig"),
("timm_backbone", "TimmBackboneConfig"),
("timm_wrapper", "TimmWrapperConfig"),
Expand Down Expand Up @@ -681,6 +682,7 @@
("tapex", "TAPEX"),
("textnet", "TextNet"),
("time_series_transformer", "Time Series Transformer"),
("timesfm", "TimesFm"),
("timesformer", "TimeSformer"),
("timm_backbone", "TimmBackbone"),
("timm_wrapper", "TimmWrapperModel"),
Expand Down
21 changes: 21 additions & 0 deletions src/transformers/models/auto/modeling_auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@
("tapas", "TapasModel"),
("textnet", "TextNetModel"),
("time_series_transformer", "TimeSeriesTransformerModel"),
("timesfm", "TimesFmModel"),
("timesformer", "TimesformerModel"),
("timm_backbone", "TimmBackbone"),
("timm_wrapper", "TimmWrapperModel"),
Expand Down Expand Up @@ -1542,6 +1543,12 @@
]
)

MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES = OrderedDict(
[
("timesfm", "TimesFmModelForPrediction"),
]
)

MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES = OrderedDict(
[
("swin2sr", "Swin2SRForImageSuperResolution"),
Expand Down Expand Up @@ -1650,6 +1657,10 @@
CONFIG_MAPPING_NAMES, MODEL_FOR_TIME_SERIES_REGRESSION_MAPPING_NAMES
)

MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING = _LazyAutoMapping(
CONFIG_MAPPING_NAMES, MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING_NAMES
)

MODEL_FOR_IMAGE_TO_IMAGE_MAPPING = _LazyAutoMapping(CONFIG_MAPPING_NAMES, MODEL_FOR_IMAGE_TO_IMAGE_MAPPING_NAMES)


Expand Down Expand Up @@ -1820,6 +1831,15 @@ class AutoModelForSemanticSegmentation(_BaseAutoModelClass):
)


class AutoModelForTimeSeriesPrediction(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING


AutoModelForTimeSeriesPrediction = auto_class_update(
AutoModelForTimeSeriesPrediction, head_doc="time-series prediction"
)


class AutoModelForUniversalSegmentation(_BaseAutoModelClass):
_model_mapping = MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING

Expand Down Expand Up @@ -1994,6 +2014,7 @@ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
"MODEL_FOR_TEXT_ENCODING_MAPPING",
"MODEL_FOR_TEXT_TO_WAVEFORM_MAPPING",
"MODEL_FOR_TEXT_TO_SPECTROGRAM_MAPPING",
"MODEL_FOR_TIME_SERIES_PREDICTION_MAPPING",
"MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING",
"MODEL_FOR_UNIVERSAL_SEGMENTATION_MAPPING",
"MODEL_FOR_VIDEO_CLASSIFICATION_MAPPING",
Expand Down
27 changes: 27 additions & 0 deletions src/transformers/models/timesfm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright 2025 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import TYPE_CHECKING

from ...utils import _LazyModule
from ...utils.import_utils import define_import_structure


if TYPE_CHECKING:
from .configuration_timesfm import *
from .modeling_timesfm import *
else:
import sys

_file = globals()["__file__"]
sys.modules[__name__] = _LazyModule(__name__, _file, define_import_structure(_file), module_spec=__spec__)
129 changes: 129 additions & 0 deletions src/transformers/models/timesfm/configuration_timesfm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# coding=utf-8
# Copyright 2025 Google LLC and HuggingFace Inc. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""TimesFM model configuration"""

from typing import List

from ...configuration_utils import PretrainedConfig
from ...utils import logging


logger = logging.get_logger(__name__)


class TimesFmConfig(PretrainedConfig):
r"""
This is the configuration class to store the configuration of a [`TimesFmModelForPrediction`] or a [`TFTimesFmModel`]. It is used to
instantiate a TimesFM model according to the specified arguments, defining the model architecture. Instantiating a
configuration with the defaults will yield a similar configuration to that of the TimesFM
[google/timesfm-2.0-500m-pytorch](https://huggingface.co/google/timesfm-2.0-500m-pytorch) architecture.

Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
documentation from [`PretrainedConfig`] for more information.

Arguments:
patch_length (`int`, *optional*, defaults to 32):
The length of one patch in the input sequence.
context_length (`int`, *optional*, defaults to 512):
The length of the input context.
horizon_length (`int`, *optional*, defaults to 128):
The length of the prediction horizon.
freq_size (`int`, *optional*, defaults to 3):
The number of frequency embeddings.
num_hidden_layers (`int`, *optional*, defaults to 50):
Number of Transformer layers.
hidden_size (`int`, *optional*, defaults to 1280):
Size of the hidden layers in the feed-forward networks.
intermediate_size (`int`, *optional*, defaults to 1280):
Dimension of the MLP representations.
head_dim (`int`, *optional*, defaults to 80):
Size of the key, query, value projections per attention head. The `inner_dim` of the projection layer will
be defined as `num_attention_heads * head_dim`.
num_attention_heads (`int`, *optional*, defaults to 16):
Number of attention heads for each attention layer in the Transformer encoder.
tolerance (`float`, *optional*, defaults to 1e-06):
The tolerance for the quantile loss.
rms_norm_eps (`float`, *optional*, defaults to 1e-06):
The epsilon used by the RMS normalization layers.
quantiles (`List[float]`, *optional*, defaults to `[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]`):
The quantiles to predict.
pad_val (`float`, *optional*, defaults to 1123581321.0):
The value used to pad the predictions.
attention_dropout (`float`, *optional*, defaults to 0.0):
The dropout probability for the attention scores.
use_positional_embedding (`bool`, *optional*, defaults to `False`):
Whether to add positional embeddings.
initializer_range (`float`, *optional*, defaults to 0.02):
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
min_timescale (`int`, *optional*, defaults to 1):
The start of the geometric positional index. Determines the periodicity of
the added signal.
max_timescale (`int`, *optional*, defaults to 10000):
The end of the geometric positional index. Determines the frequency of the
added signal.
"""

model_type = "timesfm"
keys_to_ignore_at_inference = []
is_encoder_decoder = False

def __init__(
self,
patch_length: int = 32,
context_length: int = 512,
horizon_length: int = 128,
freq_size: int = 3,
num_hidden_layers: int = 50,
hidden_size: int = 1280,
intermediate_size: int = 1280,
head_dim: int = 80,
num_attention_heads: int = 16,
tolerance: float = 1e-6,
rms_norm_eps: float = 1e-6,
quantiles: List[float] = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
pad_val: float = 1123581321.0,
attention_dropout: float = 0.0,
use_positional_embedding: bool = False,
initializer_range: float = 0.02,
min_timescale: int = 1,
max_timescale: int = 10_000,
**kwargs,
):
self.patch_length = patch_length
self.context_length = context_length
self.horizon_length = horizon_length
self.quantiles = quantiles
self.pad_val = pad_val
self.freq_size = freq_size
self.hidden_size = hidden_size
self.intermediate_size = intermediate_size
self.head_dim = head_dim
self.num_hidden_layers = num_hidden_layers
self.num_attention_heads = num_attention_heads
self.tolerance = tolerance
self.rms_norm_eps = rms_norm_eps
self.attention_dropout = attention_dropout
self.use_positional_embedding = use_positional_embedding
self.initializer_range = initializer_range
self.min_timescale = min_timescale
self.max_timescale = max_timescale

super().__init__(
is_encoder_decoder=self.is_encoder_decoder,
**kwargs,
)


__all__ = ["TimesFmConfig"]
Loading