From 81e86c31f6952c337c17216ae4244f964d1e519a Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Thu, 27 Feb 2025 16:40:57 +0100 Subject: [PATCH 1/2] Draft to add deploy from catalog --- src/huggingface_hub/__init__.py | 6 +++ src/huggingface_hub/constants.py | 1 + src/huggingface_hub/hf_api.py | 92 ++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py index dc935329a2..5b429b4990 100644 --- a/src/huggingface_hub/__init__.py +++ b/src/huggingface_hub/__init__.py @@ -165,6 +165,7 @@ "create_commit", "create_discussion", "create_inference_endpoint", + "create_inference_endpoint_from_catalog", "create_pull_request", "create_repo", "create_tag", @@ -205,6 +206,7 @@ "list_accepted_access_requests", "list_collections", "list_datasets", + "list_inference_catalog", "list_inference_endpoints", "list_liked_repos", "list_models", @@ -769,6 +771,7 @@ "create_commit", "create_discussion", "create_inference_endpoint", + "create_inference_endpoint_from_catalog", "create_pull_request", "create_repo", "create_tag", @@ -823,6 +826,7 @@ "list_accepted_access_requests", "list_collections", "list_datasets", + "list_inference_catalog", "list_inference_endpoints", "list_liked_repos", "list_models", @@ -1107,6 +1111,7 @@ def __dir__(): create_commit, # noqa: F401 create_discussion, # noqa: F401 create_inference_endpoint, # noqa: F401 + create_inference_endpoint_from_catalog, # noqa: F401 create_pull_request, # noqa: F401 create_repo, # noqa: F401 create_tag, # noqa: F401 @@ -1147,6 +1152,7 @@ def __dir__(): list_accepted_access_requests, # noqa: F401 list_collections, # noqa: F401 list_datasets, # noqa: F401 + list_inference_catalog, # noqa: F401 list_inference_endpoints, # noqa: F401 list_liked_repos, # noqa: F401 list_models, # noqa: F401 diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py index 6add6a8e4d..ff5d84aa21 100644 --- a/src/huggingface_hub/constants.py +++ b/src/huggingface_hub/constants.py @@ -78,6 +78,7 @@ def _as_int(value: Optional[str]) -> Optional[int]: # See https://huggingface.co/docs/inference-endpoints/index INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2" +INFERENCE_CATALOG_ENDPOINT = "https://endpoints.huggingface.co/api/catalog" # Proxy for third-party providers INFERENCE_PROXY_TEMPLATE = "https://router.huggingface.co/{provider}" diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index 49aa816110..fed692be52 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -16,7 +16,9 @@ import inspect import json +import random import re +import string import struct import warnings from collections import defaultdict @@ -112,6 +114,7 @@ SafetensorsRepoMetadata, TensorInfo, build_hf_headers, + experimental, filter_repo_objects, fix_hf_endpoint_in_url, get_session, @@ -7485,6 +7488,93 @@ def create_inference_endpoint( return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token) + @experimental + @validate_hf_hub_args + def create_inference_endpoint_from_catalog( + self, + repo_id: str, + *, + name: Optional[str] = None, + token: Union[bool, str, None] = None, + namespace: Optional[str] = None, + ) -> InferenceEndpoint: + """Create a new Inference Endpoint from a model in the Hugging Face Inference Catalog. + + The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference + and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list + of available models in the catalog. + + Args: + repo_id (`str`): + The ID of the model in the catalog to deploy as an Inference Endpoint. + name (`str`, *optional*): + The unique name for the new Inference Endpoint. If not provided, a random name will be generated. + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + namespace (`str`, *optional*): + The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace. + + Returns: + [`InferenceEndpoint`]: information about the new Inference Endpoint. + + + + `create_inference_endpoint_from_catalog` is experimental. Its API is subject to change in the future. Please provide feedback + if you have any suggestions or requests. + + + """ + payload: Dict = { + "accessToken": token or self.token or get_token(), + "namespace": namespace or self._get_namespace(token=token), + "repoId": repo_id, + } + if name is not None: + payload["endpointName"] = name + + response = get_session().post( + f"{constants.INFERENCE_CATALOG_ENDPOINT}/deploy", + headers=self._build_hf_headers(token=False), + json=payload, + ) + hf_raise_for_status(response) + return InferenceEndpoint.from_raw(response.json()["endpoint"], namespace=namespace, token=token) + + @experimental + @validate_hf_hub_args + def list_inference_catalog(self, *, token: Union[bool, str, None] = None) -> List[str]: + """List models available in the Hugging Face Inference Catalog. + + The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference + and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list + of available models in the catalog. + + Use [`create_inference_endpoint_from_catalog`] to deploy a model from the catalog. + + Args: + token (Union[bool, str, None], optional): + A valid user access token (string). Defaults to the locally saved + token, which is the recommended method for authentication (see + https://huggingface.co/docs/huggingface_hub/quick-start#authentication). + + Returns: + List[`str`]: A list of model IDs available in the catalog. + + + `list_inference_catalog` is experimental. Its API is subject to change in the future. Please provide feedback + if you have any suggestions or requests. + + + """ + response = get_session().get( + f"{constants.INFERENCE_CATALOG_ENDPOINT}/repo-list", + headers=self._build_hf_headers(token=token), + ) + hf_raise_for_status(response) + return response.json()["models"] + def get_inference_endpoint( self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None ) -> InferenceEndpoint: @@ -9607,6 +9697,8 @@ def _parse_revision_from_pr_url(pr_url: str) -> str: pause_inference_endpoint = api.pause_inference_endpoint resume_inference_endpoint = api.resume_inference_endpoint scale_to_zero_inference_endpoint = api.scale_to_zero_inference_endpoint +create_inference_endpoint_from_catalog = api.create_inference_endpoint_from_catalog +list_inference_catalog = api.list_inference_catalog # Collections API get_collection = api.get_collection From 7e618343e86fb41dc0853379f195d615e30e151d Mon Sep 17 00:00:00 2001 From: Lucain Pouget Date: Thu, 27 Feb 2025 16:49:11 +0100 Subject: [PATCH 2/2] make style --- src/huggingface_hub/hf_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py index fed692be52..c5d1a5e8e2 100644 --- a/src/huggingface_hub/hf_api.py +++ b/src/huggingface_hub/hf_api.py @@ -16,9 +16,7 @@ import inspect import json -import random import re -import string import struct import warnings from collections import defaultdict @@ -7526,8 +7524,9 @@ def create_inference_endpoint_from_catalog( """ + token = token or self.token or get_token() payload: Dict = { - "accessToken": token or self.token or get_token(), + "accessToken": token, "namespace": namespace or self._get_namespace(token=token), "repoId": repo_id, } @@ -7540,7 +7539,8 @@ def create_inference_endpoint_from_catalog( json=payload, ) hf_raise_for_status(response) - return InferenceEndpoint.from_raw(response.json()["endpoint"], namespace=namespace, token=token) + data = response.json()["endpoint"] + return InferenceEndpoint.from_raw(data, namespace=data[name], token=token) @experimental @validate_hf_hub_args