From 8d72b3f3766a257b6cf88aaa3a56f572afa8d307 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Sat, 8 Nov 2025 16:04:51 +0100 Subject: [PATCH 1/5] feat(services): allow configuring Huggingface endpoint --- core/src/services/huggingface/backend.rs | 32 ++++++++++++++++++++---- core/src/services/huggingface/config.rs | 4 +++ core/src/services/huggingface/core.rs | 23 +++++++++++------ 3 files changed, 46 insertions(+), 13 deletions(-) diff --git a/core/src/services/huggingface/backend.rs b/core/src/services/huggingface/backend.rs index c5b5e4c8b806..98f3fa7d6938 100644 --- a/core/src/services/huggingface/backend.rs +++ b/core/src/services/huggingface/backend.rs @@ -106,6 +106,17 @@ impl HuggingfaceBuilder { } self } + + /// configure the Hub base url. You might want to set this variable if your + /// organization is using a Private Hub https://huggingface.co/enterprise + /// + /// Default is "https://huggingface.co" + pub fn endpoint(mut self, endpoint: &str) -> Self { + if !endpoint.is_empty() { + self.config.endpoint = Some(endpoint.to_string()); + } + self + } } impl Builder for HuggingfaceBuilder { @@ -151,6 +162,20 @@ impl Builder for HuggingfaceBuilder { let token = self.config.token.as_ref().cloned(); + let endpoint = match &self.config.endpoint { + Some(endpoint) => endpoint.clone(), + None => { + // Try to read from HF_ENDPOINT env var which is used + // by the official huggingface clients. + if let Ok(env_endpoint) = std::env::var("HF_ENDPOINT") { + env_endpoint + } else { + "https://huggingface.co".to_string() + } + } + }; + debug!("backend use endpoint: {}", &endpoint); + Ok(HuggingfaceBackend { core: Arc::new(HuggingfaceCore { info: { @@ -158,14 +183,10 @@ impl Builder for HuggingfaceBuilder { am.set_scheme(HUGGINGFACE_SCHEME) .set_native_capability(Capability { stat: true, - read: true, - list: true, list_with_recursive: true, - shared: true, - ..Default::default() }); am.into() @@ -175,6 +196,7 @@ impl Builder for HuggingfaceBuilder { revision, root, token, + endpoint, }), }) } @@ -267,4 +289,4 @@ impl Access for HuggingfaceBackend { pub enum RepoType { Model, Dataset, -} +} \ No newline at end of file diff --git a/core/src/services/huggingface/config.rs b/core/src/services/huggingface/config.rs index 096490560560..6b4034bfbfc8 100644 --- a/core/src/services/huggingface/config.rs +++ b/core/src/services/huggingface/config.rs @@ -50,6 +50,10 @@ pub struct HuggingfaceConfig { /// /// This is optional. pub token: Option, + /// Endpoint of the Huggingface Hub. + /// + /// Default is "https://huggingface.co". + pub endpoint: Option, } impl Debug for HuggingfaceConfig { diff --git a/core/src/services/huggingface/core.rs b/core/src/services/huggingface/core.rs index 5f3a65dff399..47368811efb5 100644 --- a/core/src/services/huggingface/core.rs +++ b/core/src/services/huggingface/core.rs @@ -36,6 +36,7 @@ pub struct HuggingfaceCore { pub revision: String, pub root: String, pub token: Option, + pub endpoint: String, } impl Debug for HuggingfaceCore { @@ -45,6 +46,7 @@ impl Debug for HuggingfaceCore { .field("repo_id", &self.repo_id) .field("revision", &self.revision) .field("root", &self.root) + .field("endpoint", &self.endpoint) .finish_non_exhaustive() } } @@ -57,12 +59,12 @@ impl HuggingfaceCore { let url = match self.repo_type { RepoType::Model => format!( - "https://huggingface.co/api/models/{}/paths-info/{}", - &self.repo_id, &self.revision + "{}/api/models/{}/paths-info/{}", + &self.endpoint, &self.repo_id, &self.revision ), RepoType::Dataset => format!( - "https://huggingface.co/api/datasets/{}/paths-info/{}", - &self.repo_id, &self.revision + "{}/api/datasets/{}/paths-info/{}", + &self.endpoint, &self.repo_id, &self.revision ), }; @@ -92,13 +94,15 @@ impl HuggingfaceCore { let mut url = match self.repo_type { RepoType::Model => format!( - "https://huggingface.co/api/models/{}/tree/{}/{}?expand=True", + "{}/api/models/{}/tree/{}/{}?expand=True", + &self.endpoint, &self.repo_id, &self.revision, percent_encode_path(&p) ), RepoType::Dataset => format!( - "https://huggingface.co/api/datasets/{}/tree/{}/{}?expand=True", + "{}/api/datasets/{}/tree/{}/{}?expand=True", + &self.endpoint, &self.repo_id, &self.revision, percent_encode_path(&p) @@ -134,13 +138,15 @@ impl HuggingfaceCore { let url = match self.repo_type { RepoType::Model => format!( - "https://huggingface.co/{}/resolve/{}/{}", + "{}/api/models/{}/resolve/{}/{}", + &self.endpoint, &self.repo_id, &self.revision, percent_encode_path(&p) ), RepoType::Dataset => format!( - "https://huggingface.co/datasets/{}/resolve/{}/{}", + "{}/api/datasets/{}/resolve/{}/{}", + &self.endpoint, &self.repo_id, &self.revision, percent_encode_path(&p) @@ -403,4 +409,5 @@ mod tests { Ok(()) } + } From 1ee28083ebdaf25a3a2ebc0457f5e3bb1a8efd6b Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Mon, 10 Nov 2025 16:53:22 +0100 Subject: [PATCH 2/5] chore(hugginface): fix hf_resolve urls --- core/src/services/huggingface/core.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/services/huggingface/core.rs b/core/src/services/huggingface/core.rs index 47368811efb5..25f96cb73688 100644 --- a/core/src/services/huggingface/core.rs +++ b/core/src/services/huggingface/core.rs @@ -138,14 +138,14 @@ impl HuggingfaceCore { let url = match self.repo_type { RepoType::Model => format!( - "{}/api/models/{}/resolve/{}/{}", + "{}/models/{}/resolve/{}/{}", &self.endpoint, &self.repo_id, &self.revision, percent_encode_path(&p) ), RepoType::Dataset => format!( - "{}/api/datasets/{}/resolve/{}/{}", + "{}/datasets/{}/resolve/{}/{}", &self.endpoint, &self.repo_id, &self.revision, From 64f10d6c1dd49e36c763a2cce2d0b59bda09ea2d Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Tue, 11 Nov 2025 10:25:57 +0100 Subject: [PATCH 3/5] fix(huggingface): percent encode revision --- core/src/services/huggingface/backend.rs | 2 +- core/src/services/huggingface/core.rs | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/core/src/services/huggingface/backend.rs b/core/src/services/huggingface/backend.rs index 98f3fa7d6938..96ab0716363c 100644 --- a/core/src/services/huggingface/backend.rs +++ b/core/src/services/huggingface/backend.rs @@ -289,4 +289,4 @@ impl Access for HuggingfaceBackend { pub enum RepoType { Model, Dataset, -} \ No newline at end of file +} diff --git a/core/src/services/huggingface/core.rs b/core/src/services/huggingface/core.rs index 25f96cb73688..77117f7baa0d 100644 --- a/core/src/services/huggingface/core.rs +++ b/core/src/services/huggingface/core.rs @@ -60,11 +60,15 @@ impl HuggingfaceCore { let url = match self.repo_type { RepoType::Model => format!( "{}/api/models/{}/paths-info/{}", - &self.endpoint, &self.repo_id, &self.revision + &self.endpoint, + &self.repo_id, + percent_encode_path(&self.revision) ), RepoType::Dataset => format!( "{}/api/datasets/{}/paths-info/{}", - &self.endpoint, &self.repo_id, &self.revision + &self.endpoint, + &self.repo_id, + percent_encode_path(&self.revision) ), }; @@ -97,14 +101,14 @@ impl HuggingfaceCore { "{}/api/models/{}/tree/{}/{}?expand=True", &self.endpoint, &self.repo_id, - &self.revision, + percent_encode_path(&self.revision), percent_encode_path(&p) ), RepoType::Dataset => format!( "{}/api/datasets/{}/tree/{}/{}?expand=True", &self.endpoint, &self.repo_id, - &self.revision, + percent_encode_path(&self.revision), percent_encode_path(&p) ), }; @@ -138,17 +142,17 @@ impl HuggingfaceCore { let url = match self.repo_type { RepoType::Model => format!( - "{}/models/{}/resolve/{}/{}", + "{}/{}/resolve/{}/{}", &self.endpoint, &self.repo_id, - &self.revision, + percent_encode_path(&self.revision), percent_encode_path(&p) ), RepoType::Dataset => format!( "{}/datasets/{}/resolve/{}/{}", &self.endpoint, &self.repo_id, - &self.revision, + percent_encode_path(&self.revision), percent_encode_path(&p) ), }; @@ -409,5 +413,4 @@ mod tests { Ok(()) } - } From 2faeaf44161fb2fb40daf0b767972543b940c2d0 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Tue, 11 Nov 2025 15:07:31 +0100 Subject: [PATCH 4/5] fix: percent encode slash in revision --- core/src/services/huggingface/core.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/core/src/services/huggingface/core.rs b/core/src/services/huggingface/core.rs index 77117f7baa0d..1ba46b2ba06a 100644 --- a/core/src/services/huggingface/core.rs +++ b/core/src/services/huggingface/core.rs @@ -23,11 +23,16 @@ use http::Request; use http::Response; use http::header; use serde::Deserialize; +use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use super::backend::RepoType; use crate::raw::*; use crate::*; +fn percent_encode_revision(revision: &str) -> String { + utf8_percent_encode(revision, NON_ALPHANUMERIC).to_string() +} + pub struct HuggingfaceCore { pub info: Arc, @@ -62,13 +67,13 @@ impl HuggingfaceCore { "{}/api/models/{}/paths-info/{}", &self.endpoint, &self.repo_id, - percent_encode_path(&self.revision) + percent_encode_revision(&self.revision) ), RepoType::Dataset => format!( "{}/api/datasets/{}/paths-info/{}", &self.endpoint, &self.repo_id, - percent_encode_path(&self.revision) + percent_encode_revision(&self.revision) ), }; @@ -101,14 +106,14 @@ impl HuggingfaceCore { "{}/api/models/{}/tree/{}/{}?expand=True", &self.endpoint, &self.repo_id, - percent_encode_path(&self.revision), + percent_encode_revision(&self.revision), percent_encode_path(&p) ), RepoType::Dataset => format!( "{}/api/datasets/{}/tree/{}/{}?expand=True", &self.endpoint, &self.repo_id, - percent_encode_path(&self.revision), + percent_encode_revision(&self.revision), percent_encode_path(&p) ), }; @@ -145,14 +150,14 @@ impl HuggingfaceCore { "{}/{}/resolve/{}/{}", &self.endpoint, &self.repo_id, - percent_encode_path(&self.revision), + percent_encode_revision(&self.revision), percent_encode_path(&p) ), RepoType::Dataset => format!( "{}/datasets/{}/resolve/{}/{}", &self.endpoint, &self.repo_id, - percent_encode_path(&self.revision), + percent_encode_revision(&self.revision), percent_encode_path(&p) ), }; From 904a327a6473cda575cca3ae37a795d10ae7d4f8 Mon Sep 17 00:00:00 2001 From: Krisztian Szucs Date: Wed, 19 Nov 2025 12:38:51 +0100 Subject: [PATCH 5/5] style: run cargo fmt --- core/src/services/huggingface/core.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/services/huggingface/core.rs b/core/src/services/huggingface/core.rs index 1ba46b2ba06a..e852d0a6ccfb 100644 --- a/core/src/services/huggingface/core.rs +++ b/core/src/services/huggingface/core.rs @@ -22,8 +22,8 @@ use bytes::Bytes; use http::Request; use http::Response; use http::header; +use percent_encoding::{NON_ALPHANUMERIC, utf8_percent_encode}; use serde::Deserialize; -use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use super::backend::RepoType; use crate::raw::*;