diff --git a/docker/common/uv-pytorch.lock b/docker/common/uv-pytorch.lock index b083e471cd..3a8580c711 100644 --- a/docker/common/uv-pytorch.lock +++ b/docker/common/uv-pytorch.lock @@ -1872,6 +1872,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7d/ed/6bfa4109fcb23a58819600392564fea69cdc6551ffd5e69ccf1d52a40cbc/greenlet-3.2.4-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:8c68325b0d0acf8d91dde4e6f930967dd52a5302cd4062932a6b2e7c2969f47c", size = 271061, upload-time = "2025-08-07T13:17:15.373Z" }, { url = "https://files.pythonhosted.org/packages/2a/fc/102ec1a2fc015b3a7652abab7acf3541d58c04d3d17a8d3d6a44adae1eb1/greenlet-3.2.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:94385f101946790ae13da500603491f04a76b6e4c059dab271b3ce2e283b2590", size = 629475, upload-time = "2025-08-07T13:42:54.009Z" }, { url = "https://files.pythonhosted.org/packages/c5/26/80383131d55a4ac0fb08d71660fd77e7660b9db6bdb4e8884f46d9f2cc04/greenlet-3.2.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f10fd42b5ee276335863712fa3da6608e93f70629c631bf77145021600abc23c", size = 640802, upload-time = "2025-08-07T13:45:25.52Z" }, + { url = "https://files.pythonhosted.org/packages/9f/7c/e7833dbcd8f376f3326bd728c845d31dcde4c84268d3921afcae77d90d08/greenlet-3.2.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c8c9e331e58180d0d83c5b7999255721b725913ff6bc6cf39fa2a45841a4fd4b", size = 636703, upload-time = "2025-08-07T13:53:12.622Z" }, { url = "https://files.pythonhosted.org/packages/e9/49/547b93b7c0428ede7b3f309bc965986874759f7d89e4e04aeddbc9699acb/greenlet-3.2.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:58b97143c9cc7b86fc458f215bd0932f1757ce649e05b640fea2e79b54cedb31", size = 635417, upload-time = "2025-08-07T13:18:25.189Z" }, { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" }, { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" }, @@ -1882,6 +1883,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" }, { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" }, { url = "https://files.pythonhosted.org/packages/ae/8f/95d48d7e3d433e6dae5b1682e4292242a53f22df82e6d3dda81b1701a960/greenlet-3.2.4-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:94abf90142c2a18151632371140b3dba4dee031633fe614cb592dbb6c9e17bc3", size = 644646, upload-time = "2025-08-07T13:45:26.523Z" }, + { url = "https://files.pythonhosted.org/packages/d5/5e/405965351aef8c76b8ef7ad370e5da58d57ef6068df197548b015464001a/greenlet-3.2.4-cp311-cp311-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:4d1378601b85e2e5171b99be8d2dc85f594c79967599328f95c1dc1a40f1c633", size = 640519, upload-time = "2025-08-07T13:53:13.928Z" }, { url = "https://files.pythonhosted.org/packages/25/5d/382753b52006ce0218297ec1b628e048c4e64b155379331f25a7316eb749/greenlet-3.2.4-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:0db5594dce18db94f7d1650d7489909b57afde4c580806b8d9203b6e79cdc079", size = 639707, upload-time = "2025-08-07T13:18:27.146Z" }, { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" }, { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" }, @@ -1892,6 +1894,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" }, { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" }, { url = "https://files.pythonhosted.org/packages/3b/16/035dcfcc48715ccd345f3a93183267167cdd162ad123cd93067d86f27ce4/greenlet-3.2.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:f28588772bb5fb869a8eb331374ec06f24a83a9c25bfa1f38b6993afe9c1e968", size = 655185, upload-time = "2025-08-07T13:45:27.624Z" }, + { url = "https://files.pythonhosted.org/packages/31/da/0386695eef69ffae1ad726881571dfe28b41970173947e7c558d9998de0f/greenlet-3.2.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:5c9320971821a7cb77cfab8d956fa8e39cd07ca44b6070db358ceb7f8797c8c9", size = 649926, upload-time = "2025-08-07T13:53:15.251Z" }, { url = "https://files.pythonhosted.org/packages/68/88/69bf19fd4dc19981928ceacbc5fd4bb6bc2215d53199e367832e98d1d8fe/greenlet-3.2.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c60a6d84229b271d44b70fb6e5fa23781abb5d742af7b808ae3f6efd7c9c60f6", size = 651839, upload-time = "2025-08-07T13:18:30.281Z" }, { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" }, { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" }, @@ -1902,6 +1905,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" }, { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" }, { url = "https://files.pythonhosted.org/packages/f7/0b/bc13f787394920b23073ca3b6c4a7a21396301ed75a655bcb47196b50e6e/greenlet-3.2.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:710638eb93b1fa52823aa91bf75326f9ecdfd5e0466f00789246a5280f4ba0fc", size = 655191, upload-time = "2025-08-07T13:45:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/f2/d6/6adde57d1345a8d0f14d31e4ab9c23cfe8e2cd39c3baf7674b4b0338d266/greenlet-3.2.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:c5111ccdc9c88f423426df3fd1811bfc40ed66264d35aa373420a34377efc98a", size = 649516, upload-time = "2025-08-07T13:53:16.314Z" }, { url = "https://files.pythonhosted.org/packages/7f/3b/3a3328a788d4a473889a2d403199932be55b1b0060f4ddd96ee7cdfcad10/greenlet-3.2.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d76383238584e9711e20ebe14db6c88ddcedc1829a9ad31a584389463b5aa504", size = 652169, upload-time = "2025-08-07T13:18:32.861Z" }, { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" }, { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" }, @@ -1912,6 +1916,7 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" }, { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" }, { url = "https://files.pythonhosted.org/packages/c0/aa/687d6b12ffb505a4447567d1f3abea23bd20e73a5bed63871178e0831b7a/greenlet-3.2.4-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:c17b6b34111ea72fc5a4e4beec9711d2226285f0386ea83477cbb97c30a3f3a5", size = 699218, upload-time = "2025-08-07T13:45:30.969Z" }, + { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" }, { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" }, { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" }, { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" }, @@ -2054,31 +2059,34 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.3.0b0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/4b/59c9a123813f1db5441f037d9a0e9171bd480c4ff3a9562976a8bf8e49ad/hf_xet-1.3.0b0.tar.gz", hash = "sha256:ece497f54c80992e1b145a89065443f6acf9a6b51d8e4648e53e3ad650fbec06", size = 615265, upload-time = "2026-01-28T20:37:21.892Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/57/2a21a3ef6b560768bb38d6ae944df6e5a1bd6be620aff5efc38e7bfdaa70/hf_xet-1.3.0b0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:5a88ed07d48c05ac4d54dc2ae0ce2df2f1967c982e5d9d06c7022299e8dc8256", size = 5013414, upload-time = "2026-01-28T20:36:44.248Z" }, - { url = "https://files.pythonhosted.org/packages/4e/da/c72939de146f589de58fb122616962e4a78c1d62e68beeb0dd554e6428f5/hf_xet-1.3.0b0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9e6d0e63148b7fdbcc0615a47108b3516f0905d6c4862e9ed57ea34fa4a14264", size = 4811901, upload-time = "2026-01-28T20:36:42.398Z" }, - { url = "https://files.pythonhosted.org/packages/e7/11/57017d7117360438c01df22bb72f39c95eb59042761052882a7686528f58/hf_xet-1.3.0b0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03ae40626d72dd345bf41a1fac4b412147f5aa8c82f3f15b08c1a4c70c02bb9d", size = 58059681, upload-time = "2026-01-28T20:36:21.36Z" }, - { url = "https://files.pythonhosted.org/packages/ae/40/64b2fb5801a3408c06674b9ff8dd954e31c1bd251959c27b161306e5ab21/hf_xet-1.3.0b0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2f2c17a00b9ce759657e8783de298576764309cb86a3aa6d598cf89ed61952a4", size = 53088704, upload-time = "2026-01-28T20:36:16.831Z" }, - { url = "https://files.pythonhosted.org/packages/d4/74/2705d733206051937ada8ceda50a3f3ce6f327bf0ac3807551ea324564ab/hf_xet-1.3.0b0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f7d17918f64f7ae8422ff3c0581f24023a7bfba52bf5063d0f1de6088467916e", size = 53469124, upload-time = "2026-01-28T20:36:55.469Z" }, - { url = "https://files.pythonhosted.org/packages/ce/29/14087f9a54bde49804787126c42c58902110126ae78eb62a346b0f1b3757/hf_xet-1.3.0b0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:576286ff35bfbc04a38fe01088770c14073c88bdb37c60e7c372ba2604b3e34f", size = 55100152, upload-time = "2026-01-28T20:37:00.58Z" }, - { url = "https://files.pythonhosted.org/packages/15/c3/27ff3bac95a2ac1fec61e566ae04502aa959aa9bf2e607bec9f684cc0430/hf_xet-1.3.0b0-cp313-cp313t-win_amd64.whl", hash = "sha256:8257478bc5b5493b2b6257db9c474ea0fb7116deeb6d8c794eeb4c52eb923e9f", size = 3072409, upload-time = "2026-01-28T20:37:23.896Z" }, - { url = "https://files.pythonhosted.org/packages/6b/bb/d7ba51576dc518a6eae6866a18841399737b3a3179ee2bce6e4faac2d001/hf_xet-1.3.0b0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:001692e42e749026b008d6d65f44117a9f9406fada19097f8f3b0ab53bb992c9", size = 5078621, upload-time = "2026-01-28T20:36:52.032Z" }, - { url = "https://files.pythonhosted.org/packages/85/ac/4cf0cf082062de08fe6cdb2f5ae6c3194247f9c079df83e28db904470394/hf_xet-1.3.0b0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1922d17a155eed02face0c03990aec4205e17db9baab8a8dae25720b44c008ce", size = 4811154, upload-time = "2026-01-28T20:36:50.11Z" }, - { url = "https://files.pythonhosted.org/packages/7a/b6/2cca7e576f6aec326d58b4942692b688de24c9fa5c87d1c9a040ae0f2013/hf_xet-1.3.0b0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0011a815d97671f3a84d9633588eef43c36cc61345d8a1d0027a1c56df66aef", size = 58048599, upload-time = "2026-01-28T20:36:39.517Z" }, - { url = "https://files.pythonhosted.org/packages/63/4d/5ef001738e05f39b4e0c088d1ffeb57d771c5beeb8ef58a1e4900b6b9bdd/hf_xet-1.3.0b0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:bd4087cd8fb858744df4de4271c8afcb4b66accd9060b4d3c7091561f7f80e32", size = 53086767, upload-time = "2026-01-28T20:36:35.119Z" }, - { url = "https://files.pythonhosted.org/packages/fd/f1/102b0f5a227feafbe49f9934f80c6bebf123aa7eb99aaa82ea947a2a9719/hf_xet-1.3.0b0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:11b6b5bd5412aafa5bc1fa2f6981db44014b535800ce742941861b32de9ae6fd", size = 53469541, upload-time = "2026-01-28T20:37:13.698Z" }, - { url = "https://files.pythonhosted.org/packages/fe/6f/32d36c0748a5caf05d417927ed842cca3b373f20b5a9eec66ab729a2eb96/hf_xet-1.3.0b0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:adaf846c27778e9455f9016ee7bbb6a00b509717a2e05896f930357eed750c80", size = 55096171, upload-time = "2026-01-28T20:37:18.943Z" }, - { url = "https://files.pythonhosted.org/packages/19/94/8322a56c1c51880f5c114022eca06126aee107ecf34e42c44081ade94bc1/hf_xet-1.3.0b0-cp314-cp314t-win_amd64.whl", hash = "sha256:3b1966c653f9d6ef20af98817888d610f6a2054f77d62416226c510a7b54d810", size = 3099533, upload-time = "2026-01-28T20:37:28.675Z" }, - { url = "https://files.pythonhosted.org/packages/24/ca/b797f7de882de667648b48c7ddbc311f6e9c6e61ce75a087478af7da1c33/hf_xet-1.3.0b0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b43fdfcc7960769ba239758bc744d0fc96e968a91078f4a086d36304a7fe0548", size = 5095272, upload-time = "2026-01-28T20:36:48.093Z" }, - { url = "https://files.pythonhosted.org/packages/1a/c0/204bc663015711ca04b75008871ecbd29c38312e3ba7839e0d1eafa0fa29/hf_xet-1.3.0b0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:efeac315421dd8b0a0d9f35cfc0929b22bbadd984d7eb3c95298f806398a3f15", size = 4826205, upload-time = "2026-01-28T20:36:46.124Z" }, - { url = "https://files.pythonhosted.org/packages/7e/34/a16aa436c3e59007678cee07f5cf3929ba053b14ae16dffd3be1270d3927/hf_xet-1.3.0b0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa63330e14196071fafc0e369a8e9d3f847335f10d33ca152537fb47bf263440", size = 58044866, upload-time = "2026-01-28T20:36:31.13Z" }, - { url = "https://files.pythonhosted.org/packages/d0/74/2202cc67e82a6eb64e42314e92ff2ee798e6dd5ee394967880b1370e878e/hf_xet-1.3.0b0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1f8a48df4e67ab695ae802f0d4d07c3d28fed64ea12decef13f8a8550783a42d", size = 53103717, upload-time = "2026-01-28T20:36:26.633Z" }, - { url = "https://files.pythonhosted.org/packages/8d/eb/9cbf85387377adaef317918318d1921b456625fa2535f39e642ed77076e4/hf_xet-1.3.0b0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ae20bc5405c06538ba820e6a3f818df793fee554f83cf071caa641d0b36f08f8", size = 53485235, upload-time = "2026-01-28T20:37:05.554Z" }, - { url = "https://files.pythonhosted.org/packages/0d/28/302fae85503e423e356042a3332e3b2b714b30ce27db2fe415260973bf0e/hf_xet-1.3.0b0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a566da3478ae73ccd6bca8cb8d1ef85bcd4c36e79912cbfafb5b33890a0f1301", size = 55093706, upload-time = "2026-01-28T20:37:09.561Z" }, - { url = "https://files.pythonhosted.org/packages/7f/df/45e30a11fcf8023b62b15c8f0addfbb82233bdbc2834fcd4681d7a07c335/hf_xet-1.3.0b0-cp37-abi3-win_amd64.whl", hash = "sha256:9c9787d60df869e66307cbd9fedb57ff85f38930bffb3f1f04856ccc12cf91b6", size = 3079075, upload-time = "2026-01-28T20:37:25.663Z" }, +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" }, + { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" }, + { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" }, + { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" }, + { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" }, ] [[package]] @@ -2134,7 +2142,7 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "1.3.4" +version = "1.9.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -2143,14 +2151,13 @@ dependencies = [ { name = "httpx" }, { name = "packaging" }, { name = "pyyaml" }, - { name = "shellingham" }, { name = "tqdm" }, - { name = "typer-slim" }, + { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/af/25/74af9d16cd59ae15b12467a79a84aa0fe24be4aba68fc4da0c1864d49c17/huggingface_hub-1.3.4.tar.gz", hash = "sha256:c20d5484a611b7b7891d272e8fc9f77d5de025b0480bdacfa858efb3780b455f", size = 627683, upload-time = "2026-01-26T14:05:10.656Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/65/fb800d327bf25bf31b798dd08935d326d064ecb9b359059fecd91b3a98e8/huggingface_hub-1.9.2.tar.gz", hash = "sha256:8d09d080a186bd950a361bfc04b862dfb04d6a2b41d48e9ba1b37507cfd3f1e1", size = 750284, upload-time = "2026-04-08T08:43:11.127Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/55/07/3d0c34c345043c6a398a5882e196b2220dc5861adfa18322448b90908f26/huggingface_hub-1.3.4-py3-none-any.whl", hash = "sha256:a0c526e76eb316e96a91e8a1a7a93cf66b0dd210be1a17bd5fc5ae53cba76bfd", size = 536611, upload-time = "2026-01-26T14:05:08.549Z" }, + { url = "https://files.pythonhosted.org/packages/57/d4/e33bf0b362810a9b96c5923e38908950d58ecb512db42e3730320c7f4a3a/huggingface_hub-1.9.2-py3-none-any.whl", hash = "sha256:e1e62ce237d4fbeca9f970aeb15176fbd503e04c25577bfd22f44aa7aa2b5243", size = 637349, upload-time = "2026-04-08T08:43:09.114Z" }, ] [[package]] @@ -2858,7 +2865,7 @@ wheels = [ [[package]] name = "mistral-common" -version = "1.9.0" +version = "1.11.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonschema" }, @@ -2871,9 +2878,9 @@ dependencies = [ { name = "tiktoken" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/41/5b/60bb9f8c424c9ec7708396096f92e34f77eca94d55f99326b72b5a322482/mistral_common-1.9.0.tar.gz", hash = "sha256:5f90ec606d1826a20a97d24aefb9bfff7f4cd4cd576b622d4857708c0577e6c2", size = 6337103, upload-time = "2026-01-29T00:28:07.982Z" } +sdist = { url = "https://files.pythonhosted.org/packages/61/97/753c85b5c0a19f4331ac99e0300ac8da06d4b29b629c9cb03064b38561bd/mistral_common-1.11.0.tar.gz", hash = "sha256:439b7fa38f9c3f020154af51bdf30eb81def507643017d8ce9f798384ec47ec3", size = 6355512, upload-time = "2026-04-01T13:54:12.36Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/64/12/8a3c9aaf58b49383d24f533edb2f81f073b59822317bd56bd66d0850caae/mistral_common-1.9.0-py3-none-any.whl", hash = "sha256:e25ed2f8c73f66cf3b1a48b2ddd649e044a0db7b9d9dd1af819eeb20ee1a6d94", size = 6517668, upload-time = "2026-01-29T00:28:04.96Z" }, + { url = "https://files.pythonhosted.org/packages/60/e4/73ad3c27e3fb613c3ce0953c928202c46cddebac3989b87be1b6f305a9f6/mistral_common-1.11.0-py3-none-any.whl", hash = "sha256:1d3ecaf7c3aa7338cb37b596fd0fb294485753958ee8e7254a6cc23eb30b249b", size = 6531513, upload-time = "2026-04-01T13:54:16.536Z" }, ] [package.optional-dependencies] @@ -3432,7 +3439,7 @@ requires-dist = [ { name = "mamba-ssm", marker = "extra == 'cuda'" }, { name = "megatron-fsdp", specifier = ">=0.2.3" }, { name = "mistral-common", extras = ["audio", "hf-hub", "image", "sentencepiece"] }, - { name = "mistral-common", extras = ["opencv"], marker = "extra == 'vlm'", specifier = ">=1.9.0" }, + { name = "mistral-common", extras = ["opencv"], marker = "extra == 'vlm'", specifier = ">=1.11.0" }, { name = "mlflow" }, { name = "nemo-automodel", extras = ["cuda"], marker = "extra == 'all'" }, { name = "nemo-automodel", extras = ["cuda"], marker = "extra == 'moe'" }, @@ -3466,7 +3473,7 @@ requires-dist = [ { name = "torchvision", marker = "sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'diffusion'", index = "https://download.pytorch.org/whl/cpu" }, { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'diffusion'", index = "https://download.pytorch.org/whl/cu129" }, { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'cuda'", specifier = "<=2.11.0" }, - { name = "transformers", specifier = ">=5.3.0,<5.4.0" }, + { name = "transformers", specifier = "==5.5.0" }, { name = "wandb" }, ] provides-extras = ["diffusion", "cuda", "cuda-source", "extra", "fa", "delta-databricks", "moe", "vlm", "cli", "all"] @@ -6610,7 +6617,7 @@ wheels = [ [[package]] name = "transformers" -version = "5.3.0" +version = "5.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -6624,9 +6631,9 @@ dependencies = [ { name = "tqdm" }, { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ff/9d/fb46e729b461985f41a5740167688b924a4019141e5c164bea77548d3d9e/transformers-5.5.0.tar.gz", hash = "sha256:c8db656cf51c600cd8c75f06b20ef85c72e8b8ff9abc880c5d3e8bc70e0ddcbd", size = 8237745, upload-time = "2026-04-02T16:13:08.113Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" }, + { url = "https://files.pythonhosted.org/packages/e7/28/35f7411ff80a3640c1f4fc907dcbb6a65061ebb82f66950e38bfc9f7f740/transformers-5.5.0-py3-none-any.whl", hash = "sha256:821a9ff0961abbb29eb1eb686d78df1c85929fdf213a3fe49dc6bd94f9efa944", size = 10245591, upload-time = "2026-04-02T16:13:03.462Z" }, ] [[package]] @@ -6639,26 +6646,26 @@ dependencies = [ [[package]] name = "ty" -version = "0.0.21" +version = "0.0.20" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/ee/20/2ba8fd9493c89c41dfe9dbb73bc70a28b28028463bc0d2897ba8be36230a/ty-0.0.21.tar.gz", hash = "sha256:a4c2ba5d67d64df8fcdefd8b280ac1149d24a73dbda82fa953a0dff9d21400ed", size = 5297967, upload-time = "2026-03-06T01:57:13.809Z" } +sdist = { url = "https://files.pythonhosted.org/packages/56/95/8de69bb98417227b01f1b1d743c819d6456c9fd140255b6124b05b17dfd6/ty-0.0.20.tar.gz", hash = "sha256:ebba6be7974c14efbb2a9adda6ac59848f880d7259f089dfa72a093039f1dcc6", size = 5262529, upload-time = "2026-03-02T15:51:36.587Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/36/70/edf38bb37517531681d1c37f5df64744e5ad02673c02eb48447eae4bea08/ty-0.0.21-py3-none-linux_armv6l.whl", hash = "sha256:7bdf2f572378de78e1f388d24691c89db51b7caf07cf90f2bfcc1d6b18b70a76", size = 10299222, upload-time = "2026-03-06T01:57:16.64Z" }, - { url = "https://files.pythonhosted.org/packages/72/62/0047b0bd19afeefbc7286f20a5f78a2aa39f92b4d89853f0d7185ab89edc/ty-0.0.21-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7e9613994610431ab8625025bd2880dbcb77c5c9fabdd21134cda12d840a529d", size = 10130513, upload-time = "2026-03-06T01:57:29.93Z" }, - { url = "https://files.pythonhosted.org/packages/a2/20/0b93a9e91aaed23155780258cdfdb4726ef68b6985378ac069bc427291a0/ty-0.0.21-py3-none-macosx_11_0_arm64.whl", hash = "sha256:56d3b198b64dd0a19b2b66e257deaed2ecea568e722ae5352f3c6fb62027f89d", size = 9605425, upload-time = "2026-03-06T01:57:27.115Z" }, - { url = "https://files.pythonhosted.org/packages/ea/fd/9945e2fa2996a1287b1e1d7ce050e97e1f420233b271e770934bfa0880a0/ty-0.0.21-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d23d2c34f7a77d974bb08f0860ef700addc8a683d81a0319f71c08f87506cfd0", size = 10108298, upload-time = "2026-03-06T01:57:35.429Z" }, - { url = "https://files.pythonhosted.org/packages/52/e7/4ec52fcb15f3200826c9f048472c062549a05b0d1ef0b51f32d527b513c4/ty-0.0.21-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:56b01fd2519637a4ca88344f61c96225f540c98ff18bca321d4eaa7bb0f7aa2f", size = 10121556, upload-time = "2026-03-06T01:57:03.242Z" }, - { url = "https://files.pythonhosted.org/packages/ee/c0/ad457be2a8abea0f25549598bd098554540ced66229488daa0d558dad3c8/ty-0.0.21-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e9de7e11c63c6afc40f3e9ba716374add171aee7fabc70b5146a510705c6d41b", size = 10603264, upload-time = "2026-03-06T01:56:52.134Z" }, - { url = "https://files.pythonhosted.org/packages/f8/5b/2ecc7a2175243a4bcb72f5298ae41feabbb93b764bb0dc45722f3752c2c2/ty-0.0.21-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:62f7f5b235c4f7876db305c36997aea07b7af29b1a068f373d0e2547e25f32ff", size = 11196428, upload-time = "2026-03-06T01:57:32.94Z" }, - { url = "https://files.pythonhosted.org/packages/37/f5/aff507d6a901f328ef96a298032b0c11aaaf950a146ed7dd3b5bf2cd3acf/ty-0.0.21-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ee8399f7c453a425291e6688efe430cfae7ab0ac4ffd50eba9f872bf878b54f6", size = 10866355, upload-time = "2026-03-06T01:56:57.831Z" }, - { url = "https://files.pythonhosted.org/packages/be/30/822bbcb92d55b65989aa7ed06d9585f28ade9c9447369194ed4b0fb3b5b9/ty-0.0.21-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:210e7568c9f886c4d01308d751949ee714ad7ad9d7d928d2ba90d329dd880367", size = 10738177, upload-time = "2026-03-06T01:57:11.256Z" }, - { url = "https://files.pythonhosted.org/packages/57/cc/46e7991b6469e93ac2c7e533a028983e402485580150ac864c56352a3a82/ty-0.0.21-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:53508e345b11569f78b21ba8e2b4e61df38a9754947fb3cd9f2ef574367338fb", size = 10079158, upload-time = "2026-03-06T01:57:00.516Z" }, - { url = "https://files.pythonhosted.org/packages/15/c2/0bbdadfbd008240f8f1a87dc877433cb3884436097926107ccf06e618199/ty-0.0.21-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:553e43571f4a35604c36cfd07d8b61a5eb7a714e3c67f8c4ff2cf674fefbaef9", size = 10150535, upload-time = "2026-03-06T01:57:08.815Z" }, - { url = "https://files.pythonhosted.org/packages/c5/b5/2dbdb7b57b5362200ef0a39738ebd31331726328336def0143ac097ee59d/ty-0.0.21-py3-none-musllinux_1_2_i686.whl", hash = "sha256:666f6822e3b9200abfa7e95eb0ddd576460adb8d66b550c0ad2c70abc84a2048", size = 10319803, upload-time = "2026-03-06T01:57:19.106Z" }, - { url = "https://files.pythonhosted.org/packages/72/84/70e52c0b7abc7c2086f9876ef454a73b161d3125315536d8d7e911c94ca4/ty-0.0.21-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:a0854d008347ce4a5fb351af132f660a390ab2a1163444d075251d43e6f74b9b", size = 10826239, upload-time = "2026-03-06T01:57:21.727Z" }, - { url = "https://files.pythonhosted.org/packages/a1/8a/1f72480fd013bbc6cd1929002abbbcde9a0b08ead6a15154de9d7f7fa37e/ty-0.0.21-py3-none-win32.whl", hash = "sha256:bef3ab4c7b966bcc276a8ac6c11b63ba222d21355b48d471ea782c4104eee4e0", size = 9693196, upload-time = "2026-03-06T01:57:24.126Z" }, - { url = "https://files.pythonhosted.org/packages/8d/f8/1104808b875c26c640e536945753a78562d606bef4e241d9dbf3d92477f6/ty-0.0.21-py3-none-win_amd64.whl", hash = "sha256:a709d576e5bea84b745d43058d8b9cd4f27f74a0b24acb4b0cbb7d3d41e0d050", size = 10668660, upload-time = "2026-03-06T01:56:55.06Z" }, - { url = "https://files.pythonhosted.org/packages/1b/b8/25e0adc404bbf986977657b25318991f93097b49f8aea640d93c0b0db68e/ty-0.0.21-py3-none-win_arm64.whl", hash = "sha256:f72047996598ac20553fb7e21ba5741e3c82dee4e9eadf10d954551a5fe09391", size = 10104161, upload-time = "2026-03-06T01:57:06.072Z" }, + { url = "https://files.pythonhosted.org/packages/0b/2c/718abe48393e521bf852cd6b0f984766869b09c258d6e38a118768a91731/ty-0.0.20-py3-none-linux_armv6l.whl", hash = "sha256:7cc12769c169c9709a829c2248ee2826b7aae82e92caeac813d856f07c021eae", size = 10333656, upload-time = "2026-03-02T15:51:56.461Z" }, + { url = "https://files.pythonhosted.org/packages/41/0e/eb1c4cc4a12862e2327b72657bcebb10b7d9f17046f1bdcd6457a0211615/ty-0.0.20-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:3b777c1bf13bc0a95985ebb8a324b8668a4a9b2e514dde5ccf09e4d55d2ff232", size = 10168505, upload-time = "2026-03-02T15:51:51.895Z" }, + { url = "https://files.pythonhosted.org/packages/89/7f/10230798e673f0dd3094dfd16e43bfd90e9494e7af6e8e7db516fb431ddf/ty-0.0.20-py3-none-macosx_11_0_arm64.whl", hash = "sha256:b2a4a7db48bf8cba30365001bc2cad7fd13c1a5aacdd704cc4b7925de8ca5eb3", size = 9678510, upload-time = "2026-03-02T15:51:48.451Z" }, + { url = "https://files.pythonhosted.org/packages/7a/3d/59d9159577494edd1728f7db77b51bb07884bd21384f517963114e3ab5f6/ty-0.0.20-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6846427b8b353a43483e9c19936dc6a25612573b44c8f7d983dfa317e7f00d4c", size = 10162926, upload-time = "2026-03-02T15:51:40.558Z" }, + { url = "https://files.pythonhosted.org/packages/9c/a8/b7273eec3e802f78eb913fbe0ce0c16ef263723173e06a5776a8359b2c66/ty-0.0.20-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:245ceef5bd88df366869385cf96411cb14696334f8daa75597cf7e41c3012eb8", size = 10171702, upload-time = "2026-03-02T15:51:44.069Z" }, + { url = "https://files.pythonhosted.org/packages/9f/32/5f1144f2f04a275109db06e3498450c4721554215b80ae73652ef412eeab/ty-0.0.20-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4d21d1cdf67a444d3c37583c17291ddba9382a9871021f3f5d5735e09e85efe", size = 10682552, upload-time = "2026-03-02T15:51:33.102Z" }, + { url = "https://files.pythonhosted.org/packages/6a/db/9f1f637310792f12bd6ed37d5fc8ab39ba1a9b0c6c55a33865e9f1cad840/ty-0.0.20-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bd4ffd907d1bd70e46af9e9a2f88622f215e1bf44658ea43b32c2c0b357299e4", size = 11242605, upload-time = "2026-03-02T15:51:34.895Z" }, + { url = "https://files.pythonhosted.org/packages/1a/68/cc9cae2e732fcfd20ccdffc508407905a023fc8493b8771c392d915528dc/ty-0.0.20-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b6594b58d8b0e9d16a22b3045fc1305db4b132c8d70c17784ab8c7a7cc986807", size = 10974655, upload-time = "2026-03-02T15:51:46.011Z" }, + { url = "https://files.pythonhosted.org/packages/1c/c1/b9e3e3f28fe63486331e653f6aeb4184af8b1fe80542fcf74d2dda40a93d/ty-0.0.20-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3662f890518ce6cf4d7568f57d03906912d2afbf948a01089a28e325b1ef198c", size = 10761325, upload-time = "2026-03-02T15:51:26.818Z" }, + { url = "https://files.pythonhosted.org/packages/39/9e/67db935bdedf219a00fb69ec5437ba24dab66e0f2e706dd54a4eca234b84/ty-0.0.20-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0e3ffbae58f9f0d17cdc4ac6d175ceae560b7ed7d54f9ddfb1c9f31054bcdc2c", size = 10145793, upload-time = "2026-03-02T15:51:38.562Z" }, + { url = "https://files.pythonhosted.org/packages/c7/de/b0eb815d4dc5a819c7e4faddc2a79058611169f7eef07ccc006531ce228c/ty-0.0.20-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:176e52bc8bb00b0e84efd34583962878a447a3a0e34ecc45fd7097a37554261b", size = 10189640, upload-time = "2026-03-02T15:51:50.202Z" }, + { url = "https://files.pythonhosted.org/packages/b8/71/63734923965cbb70df1da3e93e4b8875434e326b89e9f850611122f279bf/ty-0.0.20-py3-none-musllinux_1_2_i686.whl", hash = "sha256:b2bc73025418e976ca4143dde71fb9025a90754a08ac03e6aa9b80d4bed1294b", size = 10370568, upload-time = "2026-03-02T15:51:42.295Z" }, + { url = "https://files.pythonhosted.org/packages/32/a0/a532c2048533347dff48e9ca98bd86d2c224356e101688a8edaf8d6973fb/ty-0.0.20-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d52f7c9ec6e363e094b3c389c344d5a140401f14a77f0625e3f28c21918552f5", size = 10853999, upload-time = "2026-03-02T15:51:58.963Z" }, + { url = "https://files.pythonhosted.org/packages/48/88/36c652c658fe96658043e4abc8ea97801de6fb6e63ab50aaa82807bff1d8/ty-0.0.20-py3-none-win32.whl", hash = "sha256:c7d32bfe93f8fcaa52b6eef3f1b930fd7da410c2c94e96f7412c30cfbabf1d17", size = 9744206, upload-time = "2026-03-02T15:51:54.183Z" }, + { url = "https://files.pythonhosted.org/packages/ff/a7/a4a13bed1d7fd9d97aaa3c5bb5e6d3e9a689e6984806cbca2ab4c9233cac/ty-0.0.20-py3-none-win_amd64.whl", hash = "sha256:a5e10f40fc4a0a1cbcb740a4aad5c7ce35d79f030836ea3183b7a28f43170248", size = 10711999, upload-time = "2026-03-02T15:51:29.212Z" }, + { url = "https://files.pythonhosted.org/packages/8d/7e/6bfd748a9f4ff9267ed3329b86a0f02cdf6ab49f87bc36c8a164852f99fc/ty-0.0.20-py3-none-win_arm64.whl", hash = "sha256:53f7a5c12c960e71f160b734f328eff9a35d578af4b67a36b0bb5990ac5cdc27", size = 10150143, upload-time = "2026-03-02T15:51:31.283Z" }, ] [[package]] @@ -6676,19 +6683,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/64/7713ffe4b5983314e9d436a90d5bd4f63b6054e2aca783a3cfc44cb95bbf/typer-0.20.0-py3-none-any.whl", hash = "sha256:5b463df6793ec1dca6213a3cf4c0f03bc6e322ac5e16e13ddd622a889489784a", size = 47028, upload-time = "2025-10-20T17:03:47.617Z" }, ] -[[package]] -name = "typer-slim" -version = "0.21.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478, upload-time = "2026-01-06T11:21:11.176Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444, upload-time = "2026-01-06T11:21:12.441Z" }, -] - [[package]] name = "typing-extensions" version = "4.15.0" diff --git a/nemo_automodel/components/checkpoint/checkpointing.py b/nemo_automodel/components/checkpoint/checkpointing.py index 87a30a5bec..4a8b7edbdd 100644 --- a/nemo_automodel/components/checkpoint/checkpointing.py +++ b/nemo_automodel/components/checkpoint/checkpointing.py @@ -485,9 +485,15 @@ def initialize_model_weights( device: Target device for materialized parameters. peft_init_method: Initialization method for PEFT adapters (e.g. "xavier"). """ - to_empty_parameters_only(model, device=device) + # Only materialize parameters that are actually on the meta device. + # When the caller sets is_meta_device=True but the model was already + # constructed on a real device (e.g. ContextManagers was patched to + # a no-op), calling to_empty_parameters_only would replace valid + # weights with uninitialized CUDA memory. + has_meta_params = any(p.device.type == "meta" for p in model.parameters()) + if has_meta_params: + to_empty_parameters_only(model, device=device) - # to_empty_parameters_only only materializes parameters, not buffers. # Buffers (e.g. RoPE inv_freq) may still be on meta device. Move them # to *device* with uninitialized storage so that the subsequent # initialize_weights() call can overwrite them with proper values @@ -580,10 +586,11 @@ def load_base_model( model_name: Name of the model or an absolute path to a snapshot load_base_model: If True, restore from HF base checkpoint """ + model_type = getattr(getattr(model, "config", None), "model_type", None) + if load_base_model: assert model_name is not None, "model_name is required when loading base model" # Get combined key mapping from model attribute and model-type specific conversions - model_type = getattr(getattr(model, "config", None), "model_type", None) model_key_mapping = getattr(model, "_checkpoint_conversion_mapping", None) key_mapping = get_combined_key_mapping(model_type, model_key_mapping) # NemotronH remote code (trust_remote_code) uses backbone.* params matching checkpoint keys @@ -599,7 +606,7 @@ def load_base_model( key_mapping=key_mapping, ) - _reinit_rope_buffers(model, device) + _reinit_non_persistent_buffers(model, device, model_type=model_type) is_tied_lm_head = is_tied_word_embeddings(model) self.config.original_model_root_dir = root_dir @@ -1025,18 +1032,48 @@ def _init_peft_adapters(model: nn.Module, peft_init_method: str) -> None: logging.warning(f"Failed to initialize weights for PEFT adapter `{module.__class__.__name__}`: {e}") -def _reinit_rope_buffers(model: nn.Module, device: torch.device) -> None: +_MODELS_REQUIRING_BUFFER_REINIT: frozenset[str] = frozenset( + { + "gemma3", + "nemotron-nas", + } +) + + +def _reinit_non_persistent_buffers(model: nn.Module, device: torch.device, model_type: str | None = None) -> None: """ - Recompute non-persistent RoPE ``inv_freq`` buffers for Nemotron-NAS models. + Recompute non-persistent buffers that are not saved in checkpoints. + + Non-persistent buffers are not saved in checkpoints, so after meta-device + materialization they contain uninitialized CUDA memory. When + ``initialize_weights()`` is skipped (e.g. for Gemma3 to avoid DTensor + issues), these buffers must be recomputed explicitly. + + Only runs for models listed in ``_MODELS_REQUIRING_BUFFER_REINIT`` to + avoid unexpected side-effects on arbitrary HF Hub models. + + Handles four patterns: + + 1. **Standard RoPE** — single ``inv_freq`` buffer with ``rope_init_fn`` + + ``rope_kwargs`` (e.g. Nemotron-NAS). + 2. **Per-layer-type RoPE** — ``{layer_type}_inv_freq`` buffers via + ``compute_default_rope_parameters`` (e.g. Gemma3RotaryEmbedding). + 3. **Scaled embedding** — ``embed_scale`` buffer on ``ScaledWordEmbedding`` + modules (Gemma family), recomputed from ``scalar_embed_scale``. + 4. **Vision position IDs** — ``position_ids`` buffer on vision embedding + modules (SigLIP), recomputed from ``num_positions``. + Args: - model: Model to reinitialize RoPE buffers for. + model: Model to reinitialize non-persistent buffers for. device: Device to create the new buffers on. + model_type: The ``config.model_type`` string. If not in + ``_MODELS_REQUIRING_BUFFER_REINIT`` the function is a no-op. """ - model_type = getattr(getattr(model, "config", None), "model_type", None) - if model_type not in ("nemotron-nas",): + if model_type not in _MODELS_REQUIRING_BUFFER_REINIT: return for name, module in model.named_modules(): + # Pattern 1: standard RoPE with rope_init_fn + rope_kwargs (Nemotron-NAS) if hasattr(module, "rope_init_fn") and hasattr(module, "inv_freq") and hasattr(module, "rope_kwargs"): try: inv_freq, _ = module.rope_init_fn(module.config, device, **module.rope_kwargs) @@ -1047,6 +1084,51 @@ def _reinit_rope_buffers(model: nn.Module, device: torch.device) -> None: except Exception as e: logging.warning(f"Failed to reinitialize RoPE inv_freq for {name}: {e}") + # Pattern 2: per-layer-type RoPE (Gemma3RotaryEmbedding and similar) + elif hasattr(module, "layer_types") and hasattr(module, "rope_type") and hasattr(module, "config"): + rope_config = getattr(module, "config", None) + rope_parameters = getattr(rope_config, "rope_parameters", None) + if rope_parameters is None: + continue + for layer_type in getattr(module, "layer_types", []): + inv_freq_attr = f"{layer_type}_inv_freq" + if not hasattr(module, inv_freq_attr): + continue + try: + rope_init_fn = getattr(module, "compute_default_rope_parameters", None) + if rope_init_fn is None: + continue + rope_type = module.rope_type.get(layer_type, "default") + if rope_type != "default": + from transformers.modeling_rope_utils import ROPE_INIT_FUNCTIONS + + rope_init_fn = ROPE_INIT_FUNCTIONS[rope_type] + curr_inv_freq, curr_attention_scaling = rope_init_fn(rope_config, device, layer_type=layer_type) + setattr(module, inv_freq_attr, curr_inv_freq) + orig_attr = f"{layer_type}_original_inv_freq" + if hasattr(module, orig_attr): + setattr(module, orig_attr, curr_inv_freq.clone()) + setattr(module, f"{layer_type}_attention_scaling", curr_attention_scaling) + logging.debug(f"Reinitialized RoPE {inv_freq_attr} for {name} on device {device}") + except Exception as e: + logging.warning(f"Failed to reinitialize RoPE {inv_freq_attr} for {name}: {e}") + + # Pattern 3: ScaledWordEmbedding embed_scale (Gemma family) + if hasattr(module, "scalar_embed_scale") and "embed_scale" in getattr(module, "_buffers", {}): + try: + module.embed_scale = torch.tensor(module.scalar_embed_scale, device=device) + logging.debug(f"Reinitialized embed_scale={module.scalar_embed_scale} for {name} on device {device}") + except Exception as e: + logging.warning(f"Failed to reinitialize embed_scale for {name}: {e}") + + # Pattern 4: Vision embedding position_ids (SigLIP and similar) + if hasattr(module, "num_positions") and "position_ids" in getattr(module, "_buffers", {}): + try: + module.position_ids = torch.arange(module.num_positions, device=device).expand((1, -1)) + logging.debug(f"Reinitialized position_ids (num_positions={module.num_positions}) for {name}") + except Exception as e: + logging.warning(f"Failed to reinitialize position_ids for {name}: {e}") + def _apply(module, fn, recurse=True) -> nn.Module: """ diff --git a/nemo_automodel/components/checkpoint/conversion_mapping.py b/nemo_automodel/components/checkpoint/conversion_mapping.py index b2a23030b1..6fc41e5b08 100644 --- a/nemo_automodel/components/checkpoint/conversion_mapping.py +++ b/nemo_automodel/components/checkpoint/conversion_mapping.py @@ -165,6 +165,16 @@ def get_model_conversion_mapping( ) +_VLM_KEY_MAPPINGS: dict[str, dict[str, str]] = { + "gemma3": { + r"^language_model\.model\.": "model.language_model.", + r"^vision_tower\.": "model.vision_tower.", + r"^multi_modal_projector\.": "model.multi_modal_projector.", + r"^language_model\.lm_head\.": "lm_head.", + }, +} + + def get_combined_key_mapping( model_type: str, model_key_mapping: Optional[dict[str, str]] = None, @@ -188,6 +198,13 @@ def get_combined_key_mapping( Combined key mapping dictionary (regex pattern -> replacement), or None if no mappings are defined. """ + # VLM models with known restructured hierarchies get explicit mappings + # that override the generic transformers conversion (e.g. transformers 5.5.0 + # aliases gemma3→llava, but the llava mapping produces wrong FQNs for + # Gemma3's model.language_model.* hierarchy). + if model_type in _VLM_KEY_MAPPINGS: + return dict(_VLM_KEY_MAPPINGS[model_type]) + result = {} # First add model-specific key mapping (takes precedence) diff --git a/nemo_automodel/components/models/gemma4_moe/model.py b/nemo_automodel/components/models/gemma4_moe/model.py index ae659b23b9..0d32affa3c 100644 --- a/nemo_automodel/components/models/gemma4_moe/model.py +++ b/nemo_automodel/components/models/gemma4_moe/model.py @@ -262,7 +262,8 @@ def __init__( moe_defaults = dict( dim=config.hidden_size, inter_dim=config.intermediate_size, - moe_inter_dim=config.expert_intermediate_size or getattr(config, "moe_intermediate_size", None), + moe_inter_dim=getattr(config, "moe_intermediate_size", None) + or getattr(config, "expert_intermediate_size", None), n_routed_experts=config.num_experts, n_shared_experts=0, n_activated_experts=config.top_k_experts, @@ -440,8 +441,10 @@ def __init__( for k, v in text_config.items(): setattr(cfg_text, k, v) - # Compat: checkpoints renamed expert_intermediate_size → moe_intermediate_size. + # Compat: older checkpoints used expert_intermediate_size, v5.5+ uses moe_intermediate_size. cfg_text = config.text_config if hasattr(config, "text_config") else config + if not getattr(cfg_text, "moe_intermediate_size", None) and getattr(cfg_text, "expert_intermediate_size", None): + cfg_text.moe_intermediate_size = cfg_text.expert_intermediate_size if not getattr(cfg_text, "expert_intermediate_size", None) and getattr(cfg_text, "moe_intermediate_size", None): cfg_text.expert_intermediate_size = cfg_text.moe_intermediate_size diff --git a/nemo_automodel/components/models/nemotron_parse/model.py b/nemo_automodel/components/models/nemotron_parse/model.py index 841b7b5b34..ba61d1757f 100644 --- a/nemo_automodel/components/models/nemotron_parse/model.py +++ b/nemo_automodel/components/models/nemotron_parse/model.py @@ -323,7 +323,6 @@ def forward( encoder_hidden_states, encoder_attention_mask, None, # past_key_values - output_attentions, False, # use_cache ) else: @@ -333,15 +332,17 @@ def forward( encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, past_key_values=None, - output_attentions=output_attentions, use_cache=False, ) - hidden_states = layer_outputs[0] - if output_attentions: - all_self_attns += (layer_outputs[1],) - if encoder_hidden_states is not None: - all_cross_attentions += (layer_outputs[2],) + if isinstance(layer_outputs, torch.Tensor): + hidden_states = layer_outputs + else: + hidden_states = layer_outputs[0] + if output_attentions: + all_self_attns += (layer_outputs[1],) + if encoder_hidden_states is not None: + all_cross_attentions += (layer_outputs[2],) hidden_states = self.layer_norm(hidden_states) diff --git a/nemo_automodel/components/models/nemotron_v3/model.py b/nemo_automodel/components/models/nemotron_v3/model.py index 8330e0fc41..d8aa86cf25 100644 --- a/nemo_automodel/components/models/nemotron_v3/model.py +++ b/nemo_automodel/components/models/nemotron_v3/model.py @@ -485,8 +485,9 @@ def prepare_inputs_for_generation( batch_size = input_ids.shape[0] - # Create cache on first call - if past_key_values is None: + # Create cache on first call, or replace non-NemotronHybridCache + # (transformers v5.5+ GenerationMixin may pre-create a DynamicCache) + if past_key_values is None or not isinstance(past_key_values, NemotronHybridCache): past_key_values = NemotronHybridCache(self.config, batch_size, self.dtype, self.device) # First call: cache_position covers the full prompt if cache_position is None: diff --git a/nemo_automodel/components/models/qwen3_5_moe/cp_linear_attn.py b/nemo_automodel/components/models/qwen3_5_moe/cp_linear_attn.py index 6fb3291051..6286cbbf19 100644 --- a/nemo_automodel/components/models/qwen3_5_moe/cp_linear_attn.py +++ b/nemo_automodel/components/models/qwen3_5_moe/cp_linear_attn.py @@ -93,7 +93,6 @@ def forward( return super().forward( hidden_states, cache_params=cache_params, - cache_position=cache_position, attention_mask=attention_mask, ) diff --git a/pyproject.toml b/pyproject.toml index 46c99cb4ad..dcafa835f5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -82,7 +82,7 @@ dependencies = [ "pyyaml", "torch>=2.6.0,<=2.10.0", "torchdata", - "transformers>=5.3.0,<5.4.0", + "transformers==5.5.0", "wandb", "torchao", "mlflow", @@ -128,7 +128,7 @@ moe = [ vlm = [ "albumentations", "backoff", - "mistral_common[opencv]>=1.9.0", + "mistral_common[opencv]>=1.11.0", "numpy", "numba", "open-clip-torch", diff --git a/tests/functional_tests/checkpoint/test_peft_vlm.py b/tests/functional_tests/checkpoint/test_peft_vlm.py index f8139c72ac..f66082b900 100644 --- a/tests/functional_tests/checkpoint/test_peft_vlm.py +++ b/tests/functional_tests/checkpoint/test_peft_vlm.py @@ -20,21 +20,21 @@ import shutil from pathlib import Path +import datasets import torch import torch.distributed.checkpoint as dcp import torch.distributed.tensor import torch.nn as nn +import yaml from peft import PeftModel from safetensors import safe_open from transformers import AutoModelForImageTextToText -import yaml from nemo_automodel.components.checkpoint._backports.hf_storage import _HuggingFaceStorageReader from nemo_automodel.components.checkpoint.stateful_wrappers import ModelState, OptimizerState from nemo_automodel.components.config._arg_parser import parse_args_and_load_config from nemo_automodel.recipes.vlm.finetune import FinetuneRecipeForVLM, calculate_loss -import datasets datasets.disable_caching() @@ -52,11 +52,11 @@ def get_validation_loss( with torch.no_grad(): out = model(**val_batch) loss = calculate_loss( - loss_fn, - logits=out.logits, - labels=labels, - mask=loss_mask, - ) + loss_fn, + logits=out.logits, + labels=labels, + mask=loss_mask, + ) return loss @@ -95,13 +95,15 @@ def load_dcp(ckpt_dir: Path | str) -> tuple[dict, dict]: def compare_configs(source_config: dict, restored_config: dict): - """ Recursively compare two configs.""" + """Recursively compare two configs.""" for k, v in source_config.items(): if k in restored_config: if isinstance(v, dict): compare_configs(v, restored_config[k]) else: - assert v == restored_config[k], f"Config mismatch for key {k}. Expected {v} but got {restored_config[k]}" + assert v == restored_config[k], ( + f"Config mismatch for key {k}. Expected {v} but got {restored_config[k]}" + ) def load_safetensors(ckpt_dir: Path | str) -> dict[str, torch.Tensor]: @@ -125,6 +127,7 @@ def to_cpu( """ return {k: v.cpu() for k, v in state_dict.items() if isinstance(v, torch.Tensor)} + def get_test_peft_vlm_checkpoint_expected_keys(): expected_model_keys = { "base_model.model.model.language_model.layers.0.self_attn.q_proj.lora_A.weight": ( @@ -953,8 +956,7 @@ def test_hf_peft_checkpoint(): def _rename_keys(d: dict, prepend: str): - """Rename the keys of *d* by prepending *prepend* to each key. - """ + """Rename the keys of *d* by prepending *prepend* to each key.""" flat: dict[str, torch.Tensor] = {} for k, v in d.items(): key = f"{prepend}{k}" diff --git a/tests/unit_tests/checkpoint/test_checkpointing.py b/tests/unit_tests/checkpoint/test_checkpointing.py index a0d05734a3..f3d69a6183 100644 --- a/tests/unit_tests/checkpoint/test_checkpointing.py +++ b/tests/unit_tests/checkpoint/test_checkpointing.py @@ -27,7 +27,7 @@ _equally_divide_layers, _is_custom_model, _model_has_dtensors, - _reinit_rope_buffers, + _reinit_non_persistent_buffers, ) from nemo_automodel.components.checkpoint.stateful_wrappers import _get_lm_head_weight_and_name @@ -137,12 +137,12 @@ def test_multiple_orig_mod_prefixes_all_stripped(self): # ============================================================================= -# Tests for _reinit_rope_buffers +# Tests for _reinit_non_persistent_buffers # ============================================================================= class TestReinitRopeBuffers: - """Test cases for _reinit_rope_buffers RoPE buffer reinitialization.""" + """Test cases for _reinit_non_persistent_buffers RoPE buffer reinitialization.""" def test_non_deci_model_returns_early(self): """Non-DeciLM model (e.g. llama) returns early without changes.""" @@ -157,7 +157,7 @@ def test_non_deci_model_returns_early(self): original_inv_freq = rope.inv_freq.clone() model.rope = rope - _reinit_rope_buffers(model, torch.device("cpu")) + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type="llama") assert torch.equal(model.rope.inv_freq, original_inv_freq) @@ -184,7 +184,7 @@ def test_deci_model_recomputes_inv_freq(self): real_model.config = config # We need to mock named_modules to return our mock rope with patch.object(real_model, "named_modules", return_value=[("", real_model), ("layers.0.rotary", rope)]): - _reinit_rope_buffers(real_model, torch.device("cpu")) + _reinit_non_persistent_buffers(real_model, torch.device("cpu"), model_type="nemotron-nas") rope.rope_init_fn.assert_called_once_with(rope.config, torch.device("cpu"), seq_len=128) assert rope.inv_freq is new_inv_freq @@ -206,7 +206,7 @@ def test_deci_model_updates_original_inv_freq(self): rope.original_inv_freq = torch.zeros(3) with patch.object(model, "named_modules", return_value=[("", model), ("layers.0.rotary", rope)]): - _reinit_rope_buffers(model, torch.device("cpu")) + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type="nemotron-nas") assert rope.inv_freq is new_inv_freq # original_inv_freq should be a clone of new_inv_freq @@ -223,14 +223,14 @@ def test_deci_model_without_rope_attributes_no_crash(self): model.layer = torch.nn.Linear(4, 4) # Should not raise - _reinit_rope_buffers(model, torch.device("cpu")) + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type="nemotron-nas") def test_no_config_returns_early(self): """Model without config attribute returns early.""" model = torch.nn.Module() - # Should not raise - _reinit_rope_buffers(model, torch.device("cpu")) + # Should not raise — model_type=None is not in the allowlist + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type=None) def test_rope_init_fn_failure_logs_warning(self): """If rope_init_fn raises, a warning is logged and other modules continue.""" @@ -247,7 +247,56 @@ def test_rope_init_fn_failure_logs_warning(self): with patch.object(model, "named_modules", return_value=[("", model), ("layers.0.rotary", rope)]): # Should not raise, just log a warning - _reinit_rope_buffers(model, torch.device("cpu")) + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type="nemotron-nas") + + def test_embed_scale_reinitialized_from_scalar(self): + """ScaledWordEmbedding embed_scale buffer is recomputed from scalar_embed_scale.""" + model = torch.nn.Module() + emb = torch.nn.Embedding(10, 8) + emb.scalar_embed_scale = 48.0 + emb.register_buffer("embed_scale", torch.tensor(float("nan")), persistent=False) + model.embed_tokens = emb + + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type="gemma3") + + assert emb.embed_scale.item() == 48.0 + + def test_embed_scale_without_scalar_attr_is_skipped(self): + """Modules without scalar_embed_scale are not touched.""" + model = torch.nn.Module() + emb = torch.nn.Embedding(10, 8) + emb.register_buffer("embed_scale", torch.tensor(float("nan")), persistent=False) + model.embed_tokens = emb + + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type="gemma3") + + # embed_scale should remain NaN because there's no scalar_embed_scale to recover from + assert torch.isnan(emb.embed_scale) + + def test_position_ids_reinitialized_from_num_positions(self): + """Vision embedding position_ids buffer is recomputed from num_positions.""" + model = torch.nn.Module() + vis_emb = torch.nn.Module() + vis_emb.num_positions = 16 + vis_emb.register_buffer("position_ids", torch.full((1, 16), 999999, dtype=torch.long), persistent=False) + model.vision_embeddings = vis_emb + + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type="gemma3") + + expected = torch.arange(16).expand((1, -1)) + assert torch.equal(vis_emb.position_ids, expected) + + def test_position_ids_without_num_positions_is_skipped(self): + """Modules with position_ids but no num_positions are not touched.""" + model = torch.nn.Module() + vis_emb = torch.nn.Module() + garbage = torch.full((1, 16), 999999, dtype=torch.long) + vis_emb.register_buffer("position_ids", garbage.clone(), persistent=False) + model.vision_embeddings = vis_emb + + _reinit_non_persistent_buffers(model, torch.device("cpu"), model_type="gemma3") + + assert torch.equal(vis_emb.position_ids, garbage) # ============================================================================= @@ -695,11 +744,12 @@ def _make_checkpointer(self, tmp_path, diffusers_compatible): return checkpointer @patch("nemo_automodel.components.checkpoint.checkpointing.consolidate_safetensors_files_on_every_rank") - @patch("nemo_automodel.components.checkpoint.checkpointing._maybe_adapt_state_dict_to_hf", side_effect=lambda *a, **kw: a[1]) + @patch( + "nemo_automodel.components.checkpoint.checkpointing._maybe_adapt_state_dict_to_hf", + side_effect=lambda *a, **kw: a[1], + ) @patch("torch.distributed.is_initialized", return_value=False) - def test_save_model_renames_index_on_all_ranks_path( - self, mock_dist_init, mock_adapt, mock_consolidate, tmp_path - ): + def test_save_model_renames_index_on_all_ranks_path(self, mock_dist_init, mock_adapt, mock_consolidate, tmp_path): weights_path = tmp_path / "step_100" consolidated_dir = weights_path / "model" / "consolidated" @@ -723,7 +773,10 @@ def _fake_consolidate(**kwargs): assert (consolidated_dir / _DIFFUSERS_INDEX_FN).exists() @patch("nemo_automodel.components.checkpoint.checkpointing.consolidate_safetensors_files_on_every_rank") - @patch("nemo_automodel.components.checkpoint.checkpointing._maybe_adapt_state_dict_to_hf", side_effect=lambda *a, **kw: a[1]) + @patch( + "nemo_automodel.components.checkpoint.checkpointing._maybe_adapt_state_dict_to_hf", + side_effect=lambda *a, **kw: a[1], + ) @patch("torch.distributed.is_initialized", return_value=False) def test_save_model_preserves_index_when_not_diffusers_compatible( self, mock_dist_init, mock_adapt, mock_consolidate, tmp_path diff --git a/tests/unit_tests/models/deepseek_v3/test_dsv3_layers.py b/tests/unit_tests/models/deepseek_v3/test_dsv3_layers.py index efab8fc71b..a7fc44ad0b 100644 --- a/tests/unit_tests/models/deepseek_v3/test_dsv3_layers.py +++ b/tests/unit_tests/models/deepseek_v3/test_dsv3_layers.py @@ -207,6 +207,8 @@ def create_mock_config(self, **overrides): for key, value in overrides.items(): setattr(config, key, value) + config.rope_parameters = config.rope_scaling + return config @skip_te @@ -364,6 +366,8 @@ def create_mock_config(self, **overrides): for key, value in overrides.items(): setattr(config, key, value) + config.rope_parameters = config.rope_scaling + return config def test_mla_forward_tensor_shapes(self): @@ -487,6 +491,8 @@ def create_mock_config(self, **overrides): for key, value in overrides.items(): setattr(config, key, value) + config.rope_parameters = config.rope_scaling + return config @patch("torch.nn.init.trunc_normal_") diff --git a/tests/unit_tests/models/gemma4/test_gemma4_model.py b/tests/unit_tests/models/gemma4/test_gemma4_model.py index 67787e1a9c..5f6ddfd24d 100644 --- a/tests/unit_tests/models/gemma4/test_gemma4_model.py +++ b/tests/unit_tests/models/gemma4/test_gemma4_model.py @@ -16,9 +16,6 @@ import pytest import torch - -pytest.importorskip("transformers.models.gemma4", reason="Gemma4 not available in this transformers version") - from transformers.models.gemma4.configuration_gemma4 import Gemma4Config, Gemma4TextConfig from nemo_automodel.components.models.common import BackendConfig @@ -47,12 +44,11 @@ def _make_text_config(**overrides): num_hidden_layers=4, intermediate_size=128, rms_norm_eps=1e-6, - rope_theta=10000.0, max_position_embeddings=256, enable_moe_block=True, num_experts=4, top_k_experts=2, - expert_intermediate_size=64, + moe_intermediate_size=64, layer_types=["full_attention", "sliding_attention"] * 2, sliding_window=128, hidden_activation="gelu_pytorch_tanh", @@ -74,7 +70,7 @@ def _make_moe_config(text_config=None): return MoEConfig( dim=tc.hidden_size, inter_dim=tc.intermediate_size, - moe_inter_dim=tc.expert_intermediate_size, + moe_inter_dim=tc.moe_intermediate_size, n_routed_experts=tc.num_experts, n_shared_experts=0, n_activated_experts=tc.top_k_experts, @@ -152,7 +148,7 @@ def test_proj_output_features_match_num_experts(self, text_config): def test_root_size_value(self, text_config): gate = Gemma4Gate(text_config) - expected = text_config.hidden_size ** -0.5 + expected = text_config.hidden_size**-0.5 torch.testing.assert_close(gate.root_size, torch.tensor(expected)) def test_scale_initialized_to_ones(self, text_config): @@ -317,7 +313,7 @@ def test_moe_config_auto_created(self, text_config, backend_config): assert model.moe_config.dim == text_config.hidden_size assert model.moe_config.n_routed_experts == text_config.num_experts assert model.moe_config.n_activated_experts == text_config.top_k_experts - assert model.moe_config.moe_inter_dim == text_config.expert_intermediate_size + assert model.moe_config.moe_inter_dim == text_config.moe_intermediate_size assert model.moe_config.expert_activation == "geglu" def test_moe_config_accepts_override(self, text_config, backend_config, moe_config): @@ -365,8 +361,11 @@ def test_moe_exposes_moe_config(self, gemma4_config, backend_config): def test_state_dict_adapter_created_when_enabled(self, gemma4_config): backend = BackendConfig( - linear="torch", attn="sdpa", rms_norm="torch", - experts="torch", dispatcher="torch", + linear="torch", + attn="sdpa", + rms_norm="torch", + experts="torch", + dispatcher="torch", enable_hf_state_dict_adapter=True, ) model = Gemma4ForConditionalGeneration(gemma4_config, backend=backend) @@ -382,8 +381,11 @@ def test_text_config_dict_override_applied(self): model = Gemma4ForConditionalGeneration( cfg, backend=BackendConfig( - linear="torch", attn="sdpa", rms_norm="torch", - experts="torch", dispatcher="torch", + linear="torch", + attn="sdpa", + rms_norm="torch", + experts="torch", + dispatcher="torch", enable_hf_state_dict_adapter=False, ), text_config=override, @@ -472,6 +474,7 @@ def make_tracker(orig): def tracker(buf_dev): init_calls.append(buf_dev) return orig(buf_dev) + return tracker layer.moe.init_weights = make_tracker(original_init) @@ -498,8 +501,11 @@ def test_from_config_creates_model(self, gemma4_config, backend_config): def test_from_pretrained_classmethod(self): cfg = _make_gemma4_config() backend = BackendConfig( - linear="torch", attn="sdpa", rms_norm="torch", - experts="torch", dispatcher="torch", + linear="torch", + attn="sdpa", + rms_norm="torch", + experts="torch", + dispatcher="torch", enable_hf_state_dict_adapter=False, ) @@ -509,11 +515,13 @@ def test_from_pretrained_classmethod(self): mock_from_pretrained.return_value = cfg with patch.object( - Gemma4ForConditionalGeneration, "from_config", + Gemma4ForConditionalGeneration, + "from_config", wraps=Gemma4ForConditionalGeneration.from_config, ) as mock_from_config: model = Gemma4ForConditionalGeneration.from_pretrained( - "gemma4/model", backend=backend, + "gemma4/model", + backend=backend, ) assert isinstance(model, Gemma4ForConditionalGeneration) mock_from_pretrained.assert_called_once_with("gemma4/model") diff --git a/tests/unit_tests/models/gemma4/test_gemma4_state_dict_adapter.py b/tests/unit_tests/models/gemma4/test_gemma4_state_dict_adapter.py index 09bb1cbb77..5f556b11d6 100644 --- a/tests/unit_tests/models/gemma4/test_gemma4_state_dict_adapter.py +++ b/tests/unit_tests/models/gemma4/test_gemma4_state_dict_adapter.py @@ -17,8 +17,6 @@ import pytest import torch -pytest.importorskip("transformers.models.gemma4", reason="Gemma4 not available in this transformers version") - from nemo_automodel.components.models.common import BackendConfig from nemo_automodel.components.models.gemma4_moe.state_dict_adapter import Gemma4MoEStateDictAdapter from nemo_automodel.components.moe.config import MoEConfig @@ -38,6 +36,7 @@ def config(): cfg.intermediate_size = 128 cfg.num_experts = N_EXPERTS cfg.top_k_experts = 2 + cfg.moe_intermediate_size = EXPERT_INTER cfg.expert_intermediate_size = EXPERT_INTER return cfg @@ -88,16 +87,15 @@ def adapter(config, moe_config, backend_config): def _make_hf_state_dict(layer_idx=0, with_model_prefix=True): - """Build a minimal HF-format Gemma4 MoE state dict for one layer.""" + """Build a minimal HF-format Gemma4 MoE state dict for one layer (v5.5 layout).""" prefix = "model.language_model." if with_model_prefix else "" layer = f"{prefix}layers.{layer_idx}" return { f"{layer}.router.proj.weight": torch.randn(N_EXPERTS, HIDDEN), f"{layer}.router.scale": torch.randn(HIDDEN), - f"{layer}.moe.gate_proj": torch.randn(N_EXPERTS, HIDDEN, EXPERT_INTER), - f"{layer}.moe.up_proj": torch.randn(N_EXPERTS, HIDDEN, EXPERT_INTER), - f"{layer}.moe.down_proj": torch.randn(N_EXPERTS, EXPERT_INTER, HIDDEN), - f"{layer}.moe.per_expert_scale": torch.ones(N_EXPERTS) * 2.0, + f"{layer}.router.per_expert_scale": torch.ones(N_EXPERTS) * 2.0, + f"{layer}.experts.gate_up_proj": torch.randn(N_EXPERTS, 2 * EXPERT_INTER, HIDDEN), + f"{layer}.experts.down_proj": torch.randn(N_EXPERTS, HIDDEN, EXPERT_INTER), f"{layer}.self_attn.q_proj.weight": torch.randn(HIDDEN, HIDDEN), } @@ -124,25 +122,25 @@ def test_router_original_keys_removed(self, adapter): def test_expert_gate_up_concatenated(self, adapter): hf_sd = _make_hf_state_dict() - gate_proj = hf_sd["model.language_model.layers.0.moe.gate_proj"] - up_proj = hf_sd["model.language_model.layers.0.moe.up_proj"] + gate_up_proj = hf_sd["model.language_model.layers.0.experts.gate_up_proj"] nemo_sd = adapter.from_hf(hf_sd) gate_and_up = nemo_sd["model.language_model.layers.0.moe.experts.gate_and_up_projs"] + # HF [E, 2*inter, hidden] transposed to NeMo [E, hidden, 2*inter] assert gate_and_up.shape == (N_EXPERTS, HIDDEN, 2 * EXPERT_INTER) - torch.testing.assert_close(gate_and_up[..., :EXPERT_INTER], gate_proj) - torch.testing.assert_close(gate_and_up[..., EXPERT_INTER:], up_proj) + torch.testing.assert_close(gate_and_up, gate_up_proj.transpose(-2, -1)) def test_per_expert_scale_absorbed_into_down_projs(self, adapter): hf_sd = _make_hf_state_dict() - down_proj = hf_sd["model.language_model.layers.0.moe.down_proj"] - per_expert_scale = hf_sd["model.language_model.layers.0.moe.per_expert_scale"] + down_proj = hf_sd["model.language_model.layers.0.experts.down_proj"] + per_expert_scale = hf_sd["model.language_model.layers.0.router.per_expert_scale"] nemo_sd = adapter.from_hf(hf_sd) down_projs = nemo_sd["model.language_model.layers.0.moe.experts.down_projs"] - expected = down_proj * per_expert_scale[:, None, None] + # HF [E, hidden, inter] transposed to NeMo [E, inter, hidden], scaled by per_expert_scale + expected = down_proj.transpose(-2, -1) * per_expert_scale[:, None, None] torch.testing.assert_close(down_projs, expected) def test_passthrough_keys_preserved(self, adapter): @@ -163,14 +161,13 @@ def test_hf_expert_keys_not_in_output(self, adapter): nemo_sd = adapter.from_hf(hf_sd) for key in nemo_sd: - assert ".moe.gate_proj" not in key - assert ".moe.up_proj" not in key - assert ".moe.down_proj" not in key or "experts.down_projs" in key - assert ".moe.per_expert_scale" not in key + assert ".experts.gate_up_proj" not in key or "gate_and_up_projs" in key + assert ".experts.down_proj" not in key or "experts.down_projs" in key + assert ".router.per_expert_scale" not in key def test_incomplete_expert_keys_raises(self, adapter): hf_sd = _make_hf_state_dict() - del hf_sd["model.language_model.layers.0.moe.per_expert_scale"] + del hf_sd["model.language_model.layers.0.experts.gate_up_proj"] with pytest.raises(RuntimeError, match="Incomplete expert weights"): adapter.from_hf(hf_sd) @@ -226,12 +223,10 @@ def test_gate_and_up_split_correctly(self, adapter): hf_sd = adapter.to_hf(nemo_sd) - gate_proj = hf_sd["model.language_model.layers.0.moe.gate_proj"] - up_proj = hf_sd["model.language_model.layers.0.moe.up_proj"] - assert gate_proj.shape == (N_EXPERTS, HIDDEN, EXPERT_INTER) - assert up_proj.shape == (N_EXPERTS, HIDDEN, EXPERT_INTER) - torch.testing.assert_close(gate_proj, gate_and_up[..., :EXPERT_INTER]) - torch.testing.assert_close(up_proj, gate_and_up[..., EXPERT_INTER:]) + gate_up_proj = hf_sd["model.language_model.layers.0.experts.gate_up_proj"] + # NeMo [E, hidden, 2*inter] transposed to HF [E, 2*inter, hidden] + assert gate_up_proj.shape == (N_EXPERTS, 2 * EXPERT_INTER, HIDDEN) + torch.testing.assert_close(gate_up_proj, gate_and_up.transpose(-2, -1)) def test_down_projs_output_and_per_expert_scale(self, adapter): nemo_sd = self._make_nemo_state_dict() @@ -239,10 +234,11 @@ def test_down_projs_output_and_per_expert_scale(self, adapter): hf_sd = adapter.to_hf(nemo_sd) - down_proj = hf_sd["model.language_model.layers.0.moe.down_proj"] - per_expert_scale = hf_sd["model.language_model.layers.0.moe.per_expert_scale"] + down_proj = hf_sd["model.language_model.layers.0.experts.down_proj"] + per_expert_scale = hf_sd["model.language_model.layers.0.router.per_expert_scale"] - torch.testing.assert_close(down_proj, original_down) + # NeMo [E, inter, hidden] transposed to HF [E, hidden, inter] + torch.testing.assert_close(down_proj, original_down.transpose(-2, -1)) torch.testing.assert_close(per_expert_scale, torch.ones(N_EXPERTS, dtype=torch.float32)) def test_passthrough_keys_preserved(self, adapter): @@ -283,10 +279,9 @@ def test_multiple_layers(self, adapter): for layer_idx in range(2): assert f"model.language_model.layers.{layer_idx}.router.proj.weight" in hf_sd - assert f"model.language_model.layers.{layer_idx}.moe.gate_proj" in hf_sd - assert f"model.language_model.layers.{layer_idx}.moe.up_proj" in hf_sd - assert f"model.language_model.layers.{layer_idx}.moe.down_proj" in hf_sd - assert f"model.language_model.layers.{layer_idx}.moe.per_expert_scale" in hf_sd + assert f"model.language_model.layers.{layer_idx}.experts.gate_up_proj" in hf_sd + assert f"model.language_model.layers.{layer_idx}.experts.down_proj" in hf_sd + assert f"model.language_model.layers.{layer_idx}.router.per_expert_scale" in hf_sd # --------------------------------------------------------------------------- @@ -299,42 +294,38 @@ def test_hf_to_nemo_to_hf_preserves_shapes(self, adapter): nemo_sd = adapter.from_hf(hf_sd) hf_sd_rt = adapter.to_hf(nemo_sd) - for key in ["model.language_model.layers.0.moe.gate_proj", - "model.language_model.layers.0.moe.up_proj", - "model.language_model.layers.0.moe.down_proj", - "model.language_model.layers.0.moe.per_expert_scale", - "model.language_model.layers.0.router.proj.weight", - "model.language_model.layers.0.router.scale"]: + for key in [ + "model.language_model.layers.0.experts.gate_up_proj", + "model.language_model.layers.0.experts.down_proj", + "model.language_model.layers.0.router.per_expert_scale", + "model.language_model.layers.0.router.proj.weight", + "model.language_model.layers.0.router.scale", + ]: assert key in hf_sd_rt, f"Missing key after round-trip: {key}" assert hf_sd[key].shape == hf_sd_rt[key].shape, f"Shape mismatch for {key}" def test_hf_to_nemo_to_hf_preserves_gate_up_values(self, adapter): hf_sd = _make_hf_state_dict() - # Use per_expert_scale=1 so round-trip is exact - hf_sd["model.language_model.layers.0.moe.per_expert_scale"] = torch.ones(N_EXPERTS) + hf_sd["model.language_model.layers.0.router.per_expert_scale"] = torch.ones(N_EXPERTS) nemo_sd = adapter.from_hf(hf_sd) hf_sd_rt = adapter.to_hf(nemo_sd) torch.testing.assert_close( - hf_sd_rt["model.language_model.layers.0.moe.gate_proj"], - hf_sd["model.language_model.layers.0.moe.gate_proj"], - ) - torch.testing.assert_close( - hf_sd_rt["model.language_model.layers.0.moe.up_proj"], - hf_sd["model.language_model.layers.0.moe.up_proj"], + hf_sd_rt["model.language_model.layers.0.experts.gate_up_proj"], + hf_sd["model.language_model.layers.0.experts.gate_up_proj"], ) def test_hf_to_nemo_to_hf_preserves_down_proj_with_unit_scale(self, adapter): hf_sd = _make_hf_state_dict() - hf_sd["model.language_model.layers.0.moe.per_expert_scale"] = torch.ones(N_EXPERTS) + hf_sd["model.language_model.layers.0.router.per_expert_scale"] = torch.ones(N_EXPERTS) nemo_sd = adapter.from_hf(hf_sd) hf_sd_rt = adapter.to_hf(nemo_sd) torch.testing.assert_close( - hf_sd_rt["model.language_model.layers.0.moe.down_proj"], - hf_sd["model.language_model.layers.0.moe.down_proj"], + hf_sd_rt["model.language_model.layers.0.experts.down_proj"], + hf_sd["model.language_model.layers.0.experts.down_proj"], ) def test_router_keys_round_trip(self, adapter): diff --git a/tests/unit_tests/models/qwen3_5_moe/test_cp_linear_attn.py b/tests/unit_tests/models/qwen3_5_moe/test_cp_linear_attn.py index 3d2376f2f1..5236f72506 100644 --- a/tests/unit_tests/models/qwen3_5_moe/test_cp_linear_attn.py +++ b/tests/unit_tests/models/qwen3_5_moe/test_cp_linear_attn.py @@ -282,6 +282,19 @@ def test_cp_mesh_size_1_delegates_to_super(self, module, device): module.forward(hidden) mock_super_fwd.assert_called_once() + def test_no_cp_does_not_forward_cache_position(self, module, device): + """cache_position should not be forwarded to super (removed in transformers>=5.5).""" + assert module._cp_mesh is None + B, S, D = 1, 8, module.hidden_size + hidden = torch.randn(B, S, D, device=device) + with patch.object( + type(module).__bases__[0], "forward", return_value=torch.randn(B, S, D, device=device) + ) as mock_super_fwd: + module.forward(hidden, cache_position=torch.arange(S, device=device)) + mock_super_fwd.assert_called_once() + _, kwargs = mock_super_fwd.call_args + assert "cache_position" not in kwargs + def test_cp_mesh_gt_1_calls_forward_with_cp(self, module, device): """When _cp_mesh.size() > 1, forward should call _forward_with_cp.""" mesh = MagicMock() diff --git a/uv.lock b/uv.lock index 0b866c01dc..2d6b44f69d 100644 --- a/uv.lock +++ b/uv.lock @@ -2057,31 +2057,34 @@ wheels = [ [[package]] name = "hf-xet" -version = "1.3.0b0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/41/4b/59c9a123813f1db5441f037d9a0e9171bd480c4ff3a9562976a8bf8e49ad/hf_xet-1.3.0b0.tar.gz", hash = "sha256:ece497f54c80992e1b145a89065443f6acf9a6b51d8e4648e53e3ad650fbec06", size = 615265, upload-time = "2026-01-28T20:37:21.892Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/32/57/2a21a3ef6b560768bb38d6ae944df6e5a1bd6be620aff5efc38e7bfdaa70/hf_xet-1.3.0b0-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:5a88ed07d48c05ac4d54dc2ae0ce2df2f1967c982e5d9d06c7022299e8dc8256", size = 5013414, upload-time = "2026-01-28T20:36:44.248Z" }, - { url = "https://files.pythonhosted.org/packages/4e/da/c72939de146f589de58fb122616962e4a78c1d62e68beeb0dd554e6428f5/hf_xet-1.3.0b0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:9e6d0e63148b7fdbcc0615a47108b3516f0905d6c4862e9ed57ea34fa4a14264", size = 4811901, upload-time = "2026-01-28T20:36:42.398Z" }, - { url = "https://files.pythonhosted.org/packages/e7/11/57017d7117360438c01df22bb72f39c95eb59042761052882a7686528f58/hf_xet-1.3.0b0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03ae40626d72dd345bf41a1fac4b412147f5aa8c82f3f15b08c1a4c70c02bb9d", size = 58059681, upload-time = "2026-01-28T20:36:21.36Z" }, - { url = "https://files.pythonhosted.org/packages/ae/40/64b2fb5801a3408c06674b9ff8dd954e31c1bd251959c27b161306e5ab21/hf_xet-1.3.0b0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2f2c17a00b9ce759657e8783de298576764309cb86a3aa6d598cf89ed61952a4", size = 53088704, upload-time = "2026-01-28T20:36:16.831Z" }, - { url = "https://files.pythonhosted.org/packages/d4/74/2705d733206051937ada8ceda50a3f3ce6f327bf0ac3807551ea324564ab/hf_xet-1.3.0b0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:f7d17918f64f7ae8422ff3c0581f24023a7bfba52bf5063d0f1de6088467916e", size = 53469124, upload-time = "2026-01-28T20:36:55.469Z" }, - { url = "https://files.pythonhosted.org/packages/ce/29/14087f9a54bde49804787126c42c58902110126ae78eb62a346b0f1b3757/hf_xet-1.3.0b0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:576286ff35bfbc04a38fe01088770c14073c88bdb37c60e7c372ba2604b3e34f", size = 55100152, upload-time = "2026-01-28T20:37:00.58Z" }, - { url = "https://files.pythonhosted.org/packages/15/c3/27ff3bac95a2ac1fec61e566ae04502aa959aa9bf2e607bec9f684cc0430/hf_xet-1.3.0b0-cp313-cp313t-win_amd64.whl", hash = "sha256:8257478bc5b5493b2b6257db9c474ea0fb7116deeb6d8c794eeb4c52eb923e9f", size = 3072409, upload-time = "2026-01-28T20:37:23.896Z" }, - { url = "https://files.pythonhosted.org/packages/6b/bb/d7ba51576dc518a6eae6866a18841399737b3a3179ee2bce6e4faac2d001/hf_xet-1.3.0b0-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:001692e42e749026b008d6d65f44117a9f9406fada19097f8f3b0ab53bb992c9", size = 5078621, upload-time = "2026-01-28T20:36:52.032Z" }, - { url = "https://files.pythonhosted.org/packages/85/ac/4cf0cf082062de08fe6cdb2f5ae6c3194247f9c079df83e28db904470394/hf_xet-1.3.0b0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1922d17a155eed02face0c03990aec4205e17db9baab8a8dae25720b44c008ce", size = 4811154, upload-time = "2026-01-28T20:36:50.11Z" }, - { url = "https://files.pythonhosted.org/packages/7a/b6/2cca7e576f6aec326d58b4942692b688de24c9fa5c87d1c9a040ae0f2013/hf_xet-1.3.0b0-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f0011a815d97671f3a84d9633588eef43c36cc61345d8a1d0027a1c56df66aef", size = 58048599, upload-time = "2026-01-28T20:36:39.517Z" }, - { url = "https://files.pythonhosted.org/packages/63/4d/5ef001738e05f39b4e0c088d1ffeb57d771c5beeb8ef58a1e4900b6b9bdd/hf_xet-1.3.0b0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:bd4087cd8fb858744df4de4271c8afcb4b66accd9060b4d3c7091561f7f80e32", size = 53086767, upload-time = "2026-01-28T20:36:35.119Z" }, - { url = "https://files.pythonhosted.org/packages/fd/f1/102b0f5a227feafbe49f9934f80c6bebf123aa7eb99aaa82ea947a2a9719/hf_xet-1.3.0b0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:11b6b5bd5412aafa5bc1fa2f6981db44014b535800ce742941861b32de9ae6fd", size = 53469541, upload-time = "2026-01-28T20:37:13.698Z" }, - { url = "https://files.pythonhosted.org/packages/fe/6f/32d36c0748a5caf05d417927ed842cca3b373f20b5a9eec66ab729a2eb96/hf_xet-1.3.0b0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:adaf846c27778e9455f9016ee7bbb6a00b509717a2e05896f930357eed750c80", size = 55096171, upload-time = "2026-01-28T20:37:18.943Z" }, - { url = "https://files.pythonhosted.org/packages/19/94/8322a56c1c51880f5c114022eca06126aee107ecf34e42c44081ade94bc1/hf_xet-1.3.0b0-cp314-cp314t-win_amd64.whl", hash = "sha256:3b1966c653f9d6ef20af98817888d610f6a2054f77d62416226c510a7b54d810", size = 3099533, upload-time = "2026-01-28T20:37:28.675Z" }, - { url = "https://files.pythonhosted.org/packages/24/ca/b797f7de882de667648b48c7ddbc311f6e9c6e61ce75a087478af7da1c33/hf_xet-1.3.0b0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:b43fdfcc7960769ba239758bc744d0fc96e968a91078f4a086d36304a7fe0548", size = 5095272, upload-time = "2026-01-28T20:36:48.093Z" }, - { url = "https://files.pythonhosted.org/packages/1a/c0/204bc663015711ca04b75008871ecbd29c38312e3ba7839e0d1eafa0fa29/hf_xet-1.3.0b0-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:efeac315421dd8b0a0d9f35cfc0929b22bbadd984d7eb3c95298f806398a3f15", size = 4826205, upload-time = "2026-01-28T20:36:46.124Z" }, - { url = "https://files.pythonhosted.org/packages/7e/34/a16aa436c3e59007678cee07f5cf3929ba053b14ae16dffd3be1270d3927/hf_xet-1.3.0b0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fa63330e14196071fafc0e369a8e9d3f847335f10d33ca152537fb47bf263440", size = 58044866, upload-time = "2026-01-28T20:36:31.13Z" }, - { url = "https://files.pythonhosted.org/packages/d0/74/2202cc67e82a6eb64e42314e92ff2ee798e6dd5ee394967880b1370e878e/hf_xet-1.3.0b0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1f8a48df4e67ab695ae802f0d4d07c3d28fed64ea12decef13f8a8550783a42d", size = 53103717, upload-time = "2026-01-28T20:36:26.633Z" }, - { url = "https://files.pythonhosted.org/packages/8d/eb/9cbf85387377adaef317918318d1921b456625fa2535f39e642ed77076e4/hf_xet-1.3.0b0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ae20bc5405c06538ba820e6a3f818df793fee554f83cf071caa641d0b36f08f8", size = 53485235, upload-time = "2026-01-28T20:37:05.554Z" }, - { url = "https://files.pythonhosted.org/packages/0d/28/302fae85503e423e356042a3332e3b2b714b30ce27db2fe415260973bf0e/hf_xet-1.3.0b0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a566da3478ae73ccd6bca8cb8d1ef85bcd4c36e79912cbfafb5b33890a0f1301", size = 55093706, upload-time = "2026-01-28T20:37:09.561Z" }, - { url = "https://files.pythonhosted.org/packages/7f/df/45e30a11fcf8023b62b15c8f0addfbb82233bdbc2834fcd4681d7a07c335/hf_xet-1.3.0b0-cp37-abi3-win_amd64.whl", hash = "sha256:9c9787d60df869e66307cbd9fedb57ff85f38930bffb3f1f04856ccc12cf91b6", size = 3079075, upload-time = "2026-01-28T20:37:25.663Z" }, +version = "1.4.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/53/92/ec9ad04d0b5728dca387a45af7bc98fbb0d73b2118759f5f6038b61a57e8/hf_xet-1.4.3.tar.gz", hash = "sha256:8ddedb73c8c08928c793df2f3401ec26f95be7f7e516a7bee2fbb546f6676113", size = 670477, upload-time = "2026-03-31T22:40:07.874Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/72/43/724d307b34e353da0abd476e02f72f735cdd2bc86082dee1b32ea0bfee1d/hf_xet-1.4.3-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:7551659ba4f1e1074e9623996f28c3873682530aee0a846b7f2f066239228144", size = 3800935, upload-time = "2026-03-31T22:39:49.618Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d2/8bee5996b699262edb87dbb54118d287c0e1b2fc78af7cdc41857ba5e3c4/hf_xet-1.4.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:bee693ada985e7045997f05f081d0e12c4c08bd7626dc397f8a7c487e6c04f7f", size = 3558942, upload-time = "2026-03-31T22:39:47.938Z" }, + { url = "https://files.pythonhosted.org/packages/c3/a1/e993d09cbe251196fb60812b09a58901c468127b7259d2bf0f68bf6088eb/hf_xet-1.4.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:21644b404bb0100fe3857892f752c4d09642586fd988e61501c95bbf44b393a3", size = 4207657, upload-time = "2026-03-31T22:39:39.69Z" }, + { url = "https://files.pythonhosted.org/packages/64/44/9eb6d21e5c34c63e5e399803a6932fa983cabdf47c0ecbcfe7ea97684b8c/hf_xet-1.4.3-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:987f09cfe418237812896a6736b81b1af02a3a6dcb4b4944425c4c4fca7a7cf8", size = 3986765, upload-time = "2026-03-31T22:39:37.936Z" }, + { url = "https://files.pythonhosted.org/packages/ea/7b/8ad6f16fdb82f5f7284a34b5ec48645bd575bdcd2f6f0d1644775909c486/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:60cf7fc43a99da0a853345cf86d23738c03983ee5249613a6305d3e57a5dca74", size = 4188162, upload-time = "2026-03-31T22:39:58.382Z" }, + { url = "https://files.pythonhosted.org/packages/1b/c4/39d6e136cbeea9ca5a23aad4b33024319222adbdc059ebcda5fc7d9d5ff4/hf_xet-1.4.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2815a49a7a59f3e2edf0cf113ae88e8cb2ca2a221bf353fb60c609584f4884d4", size = 4424525, upload-time = "2026-03-31T22:40:00.225Z" }, + { url = "https://files.pythonhosted.org/packages/46/f2/adc32dae6bdbc367853118b9878139ac869419a4ae7ba07185dc31251b76/hf_xet-1.4.3-cp313-cp313t-win_amd64.whl", hash = "sha256:42ee323265f1e6a81b0e11094564fb7f7e0ec75b5105ffd91ae63f403a11931b", size = 3671610, upload-time = "2026-03-31T22:40:10.42Z" }, + { url = "https://files.pythonhosted.org/packages/e2/19/25d897dcc3f81953e0c2cde9ec186c7a0fee413eb0c9a7a9130d87d94d3a/hf_xet-1.4.3-cp313-cp313t-win_arm64.whl", hash = "sha256:27c976ba60079fb8217f485b9c5c7fcd21c90b0367753805f87cb9f3cdc4418a", size = 3528529, upload-time = "2026-03-31T22:40:09.106Z" }, + { url = "https://files.pythonhosted.org/packages/ec/36/3e8f85ca9fe09b8de2b2e10c63b3b3353d7dda88a0b3d426dffbe7b8313b/hf_xet-1.4.3-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:5251d5ece3a81815bae9abab41cf7ddb7bcb8f56411bce0827f4a3071c92fdc6", size = 3801019, upload-time = "2026-03-31T22:39:56.651Z" }, + { url = "https://files.pythonhosted.org/packages/b5/9c/defb6cb1de28bccb7bd8d95f6e60f72a3d3fa4cb3d0329c26fb9a488bfe7/hf_xet-1.4.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1feb0f3abeacee143367c326a128a2e2b60868ec12a36c225afb1d6c5a05e6d2", size = 3558746, upload-time = "2026-03-31T22:39:54.766Z" }, + { url = "https://files.pythonhosted.org/packages/c1/bd/8d001191893178ff8e826e46ad5299446e62b93cd164e17b0ffea08832ec/hf_xet-1.4.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:8b301fc150290ca90b4fccd079829b84bb4786747584ae08b94b4577d82fb791", size = 4207692, upload-time = "2026-03-31T22:39:46.246Z" }, + { url = "https://files.pythonhosted.org/packages/ce/48/6790b402803250e9936435613d3a78b9aaeee7973439f0918848dde58309/hf_xet-1.4.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:d972fbe95ddc0d3c0fc49b31a8a69f47db35c1e3699bf316421705741aab6653", size = 3986281, upload-time = "2026-03-31T22:39:44.648Z" }, + { url = "https://files.pythonhosted.org/packages/51/56/ea62552fe53db652a9099eda600b032d75554d0e86c12a73824bfedef88b/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:c5b48db1ee344a805a1b9bd2cda9b6b65fe77ed3787bd6e87ad5521141d317cd", size = 4187414, upload-time = "2026-03-31T22:40:04.951Z" }, + { url = "https://files.pythonhosted.org/packages/7d/f5/bc1456d4638061bea997e6d2db60a1a613d7b200e0755965ec312dc1ef79/hf_xet-1.4.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:22bdc1f5fb8b15bf2831440b91d1c9bbceeb7e10c81a12e8d75889996a5c9da8", size = 4424368, upload-time = "2026-03-31T22:40:06.347Z" }, + { url = "https://files.pythonhosted.org/packages/e4/76/ab597bae87e1f06d18d3ecb8ed7f0d3c9a37037fc32ce76233d369273c64/hf_xet-1.4.3-cp314-cp314t-win_amd64.whl", hash = "sha256:0392c79b7cf48418cd61478c1a925246cf10639f4cd9d94368d8ca1e8df9ea07", size = 3672280, upload-time = "2026-03-31T22:40:16.401Z" }, + { url = "https://files.pythonhosted.org/packages/62/05/2e462d34e23a09a74d73785dbed71cc5dbad82a72eee2ad60a72a554155d/hf_xet-1.4.3-cp314-cp314t-win_arm64.whl", hash = "sha256:681c92a07796325778a79d76c67011764ecc9042a8c3579332b61b63ae512075", size = 3528945, upload-time = "2026-03-31T22:40:14.995Z" }, + { url = "https://files.pythonhosted.org/packages/ac/9f/9c23e4a447b8f83120798f9279d0297a4d1360bdbf59ef49ebec78fe2545/hf_xet-1.4.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:d0da85329eaf196e03e90b84c2d0aca53bd4573d097a75f99609e80775f98025", size = 3805048, upload-time = "2026-03-31T22:39:53.105Z" }, + { url = "https://files.pythonhosted.org/packages/0b/f8/7aacb8e5f4a7899d39c787b5984e912e6c18b11be136ef13947d7a66d265/hf_xet-1.4.3-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:e23717ce4186b265f69afa66e6f0069fe7efbf331546f5c313d00e123dc84583", size = 3562178, upload-time = "2026-03-31T22:39:51.295Z" }, + { url = "https://files.pythonhosted.org/packages/df/9a/a24b26dc8a65f0ecc0fe5be981a19e61e7ca963b85e062c083f3a9100529/hf_xet-1.4.3-cp37-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc360b70c815bf340ed56c7b8c63aacf11762a4b099b2fe2c9bd6d6068668c08", size = 4212320, upload-time = "2026-03-31T22:39:42.922Z" }, + { url = "https://files.pythonhosted.org/packages/53/60/46d493db155d2ee2801b71fb1b0fd67696359047fdd8caee2c914cc50c79/hf_xet-1.4.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:39f2d2e9654cd9b4319885733993807aab6de9dfbd34c42f0b78338d6617421f", size = 3991546, upload-time = "2026-03-31T22:39:41.335Z" }, + { url = "https://files.pythonhosted.org/packages/bc/f5/067363e1c96c6b17256910830d1b54099d06287e10f4ec6ec4e7e08371fc/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:49ad8a8cead2b56051aa84d7fce3e1335efe68df3cf6c058f22a65513885baac", size = 4193200, upload-time = "2026-03-31T22:40:01.936Z" }, + { url = "https://files.pythonhosted.org/packages/42/4b/53951592882d9c23080c7644542fda34a3813104e9e11fa1a7d82d419cb8/hf_xet-1.4.3-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:7716d62015477a70ea272d2d68cd7cad140f61c52ee452e133e139abfe2c17ba", size = 4429392, upload-time = "2026-03-31T22:40:03.492Z" }, + { url = "https://files.pythonhosted.org/packages/8a/21/75a6c175b4e79662ad8e62f46a40ce341d8d6b206b06b4320d07d55b188c/hf_xet-1.4.3-cp37-abi3-win_amd64.whl", hash = "sha256:6b591fcad34e272a5b02607485e4f2a1334aebf1bc6d16ce8eb1eb8978ac2021", size = 3677359, upload-time = "2026-03-31T22:40:13.619Z" }, + { url = "https://files.pythonhosted.org/packages/8a/7c/44314ecd0e89f8b2b51c9d9e5e7a60a9c1c82024ac471d415860557d3cd8/hf_xet-1.4.3-cp37-abi3-win_arm64.whl", hash = "sha256:7c2c7e20bcfcc946dc67187c203463f5e932e395845d098cc2a93f5b67ca0b47", size = 3533664, upload-time = "2026-03-31T22:40:12.152Z" }, ] [[package]] @@ -2137,7 +2140,7 @@ wheels = [ [[package]] name = "huggingface-hub" -version = "1.3.4" +version = "1.9.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "filelock" }, @@ -2146,14 +2149,13 @@ dependencies = [ { name = "httpx" }, { name = "packaging" }, { name = "pyyaml" }, - { name = "shellingham" }, { name = "tqdm" }, - { name = "typer-slim" }, + { name = "typer" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/af/25/74af9d16cd59ae15b12467a79a84aa0fe24be4aba68fc4da0c1864d49c17/huggingface_hub-1.3.4.tar.gz", hash = "sha256:c20d5484a611b7b7891d272e8fc9f77d5de025b0480bdacfa858efb3780b455f", size = 627683, upload-time = "2026-01-26T14:05:10.656Z" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/65/fb800d327bf25bf31b798dd08935d326d064ecb9b359059fecd91b3a98e8/huggingface_hub-1.9.2.tar.gz", hash = "sha256:8d09d080a186bd950a361bfc04b862dfb04d6a2b41d48e9ba1b37507cfd3f1e1", size = 750284, upload-time = "2026-04-08T08:43:11.127Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/55/07/3d0c34c345043c6a398a5882e196b2220dc5861adfa18322448b90908f26/huggingface_hub-1.3.4-py3-none-any.whl", hash = "sha256:a0c526e76eb316e96a91e8a1a7a93cf66b0dd210be1a17bd5fc5ae53cba76bfd", size = 536611, upload-time = "2026-01-26T14:05:08.549Z" }, + { url = "https://files.pythonhosted.org/packages/57/d4/e33bf0b362810a9b96c5923e38908950d58ecb512db42e3730320c7f4a3a/huggingface_hub-1.9.2-py3-none-any.whl", hash = "sha256:e1e62ce237d4fbeca9f970aeb15176fbd503e04c25577bfd22f44aa7aa2b5243", size = 637349, upload-time = "2026-04-08T08:43:09.114Z" }, ] [[package]] @@ -2868,7 +2870,7 @@ wheels = [ [[package]] name = "mistral-common" -version = "1.9.0" +version = "1.11.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jsonschema" }, @@ -2881,9 +2883,9 @@ dependencies = [ { name = "tiktoken" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/41/5b/60bb9f8c424c9ec7708396096f92e34f77eca94d55f99326b72b5a322482/mistral_common-1.9.0.tar.gz", hash = "sha256:5f90ec606d1826a20a97d24aefb9bfff7f4cd4cd576b622d4857708c0577e6c2", size = 6337103, upload-time = "2026-01-29T00:28:07.982Z" } +sdist = { url = "https://files.pythonhosted.org/packages/61/97/753c85b5c0a19f4331ac99e0300ac8da06d4b29b629c9cb03064b38561bd/mistral_common-1.11.0.tar.gz", hash = "sha256:439b7fa38f9c3f020154af51bdf30eb81def507643017d8ce9f798384ec47ec3", size = 6355512, upload-time = "2026-04-01T13:54:12.36Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/64/12/8a3c9aaf58b49383d24f533edb2f81f073b59822317bd56bd66d0850caae/mistral_common-1.9.0-py3-none-any.whl", hash = "sha256:e25ed2f8c73f66cf3b1a48b2ddd649e044a0db7b9d9dd1af819eeb20ee1a6d94", size = 6517668, upload-time = "2026-01-29T00:28:04.96Z" }, + { url = "https://files.pythonhosted.org/packages/60/e4/73ad3c27e3fb613c3ce0953c928202c46cddebac3989b87be1b6f305a9f6/mistral_common-1.11.0-py3-none-any.whl", hash = "sha256:1d3ecaf7c3aa7338cb37b596fd0fb294485753958ee8e7254a6cc23eb30b249b", size = 6531513, upload-time = "2026-04-01T13:54:16.536Z" }, ] [package.optional-dependencies] @@ -3452,7 +3454,7 @@ requires-dist = [ { name = "mamba-ssm", marker = "extra == 'cuda'" }, { name = "megatron-fsdp", specifier = ">=0.2.3" }, { name = "mistral-common", extras = ["audio", "hf-hub", "image", "sentencepiece"] }, - { name = "mistral-common", extras = ["opencv"], marker = "extra == 'vlm'", specifier = ">=1.9.0" }, + { name = "mistral-common", extras = ["opencv"], marker = "extra == 'vlm'", specifier = ">=1.11.0" }, { name = "mlflow" }, { name = "nemo-automodel", extras = ["cuda"], marker = "extra == 'all'" }, { name = "nemo-automodel", extras = ["cuda"], marker = "extra == 'moe'" }, @@ -3486,7 +3488,7 @@ requires-dist = [ { name = "torchvision", marker = "sys_platform != 'darwin' and sys_platform != 'linux' and extra == 'diffusion'", index = "https://download.pytorch.org/whl/cpu" }, { name = "torchvision", marker = "sys_platform == 'linux' and extra == 'diffusion'", index = "https://download.pytorch.org/whl/cu129" }, { name = "transformer-engine", extras = ["pytorch"], marker = "extra == 'cuda'", specifier = "<=2.11.0" }, - { name = "transformers", specifier = ">=5.3.0,<5.4.0" }, + { name = "transformers", specifier = "==5.5.0" }, { name = "wandb" }, ] provides-extras = ["diffusion", "cuda", "cuda-source", "extra", "fa", "delta-databricks", "moe", "vlm", "cli", "all"] @@ -7132,7 +7134,7 @@ sdist = { url = "https://files.pythonhosted.org/packages/09/42/068a40f5b213a3a88 [[package]] name = "transformers" -version = "5.3.0" +version = "5.5.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "huggingface-hub" }, @@ -7146,9 +7148,9 @@ dependencies = [ { name = "tqdm" }, { name = "typer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/fc/1a/70e830d53ecc96ce69cfa8de38f163712d2b43ac52fbd743f39f56025c31/transformers-5.3.0.tar.gz", hash = "sha256:009555b364029da9e2946d41f1c5de9f15e6b1df46b189b7293f33a161b9c557", size = 8830831, upload-time = "2026-03-04T17:41:46.119Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ff/9d/fb46e729b461985f41a5740167688b924a4019141e5c164bea77548d3d9e/transformers-5.5.0.tar.gz", hash = "sha256:c8db656cf51c600cd8c75f06b20ef85c72e8b8ff9abc880c5d3e8bc70e0ddcbd", size = 8237745, upload-time = "2026-04-02T16:13:08.113Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/88/ae8320064e32679a5429a2c9ebbc05c2bf32cefb6e076f9b07f6d685a9b4/transformers-5.3.0-py3-none-any.whl", hash = "sha256:50ac8c89c3c7033444fb3f9f53138096b997ebb70d4b5e50a2e810bf12d3d29a", size = 10661827, upload-time = "2026-03-04T17:41:42.722Z" }, + { url = "https://files.pythonhosted.org/packages/e7/28/35f7411ff80a3640c1f4fc907dcbb6a65061ebb82f66950e38bfc9f7f740/transformers-5.5.0-py3-none-any.whl", hash = "sha256:821a9ff0961abbb29eb1eb686d78df1c85929fdf213a3fe49dc6bd94f9efa944", size = 10245591, upload-time = "2026-04-02T16:13:03.462Z" }, ] [[package]] @@ -7268,19 +7270,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/78/64/7713ffe4b5983314e9d436a90d5bd4f63b6054e2aca783a3cfc44cb95bbf/typer-0.20.0-py3-none-any.whl", hash = "sha256:5b463df6793ec1dca6213a3cf4c0f03bc6e322ac5e16e13ddd622a889489784a", size = 47028, upload-time = "2025-10-20T17:03:47.617Z" }, ] -[[package]] -name = "typer-slim" -version = "0.21.1" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "click" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/17/d4/064570dec6358aa9049d4708e4a10407d74c99258f8b2136bb8702303f1a/typer_slim-0.21.1.tar.gz", hash = "sha256:73495dd08c2d0940d611c5a8c04e91c2a0a98600cbd4ee19192255a233b6dbfd", size = 110478, upload-time = "2026-01-06T11:21:11.176Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/0a/4aca634faf693e33004796b6cee0ae2e1dba375a800c16ab8d3eff4bb800/typer_slim-0.21.1-py3-none-any.whl", hash = "sha256:6e6c31047f171ac93cc5a973c9e617dbc5ab2bddc4d0a3135dc161b4e2020e0d", size = 47444, upload-time = "2026-01-06T11:21:12.441Z" }, -] - [[package]] name = "typing-extensions" version = "4.15.0"