huggingface · Wauplin · Feb 27, 2025 · Feb 27, 2025
diff --git a/src/huggingface_hub/__init__.py b/src/huggingface_hub/__init__.py
@@ -165,6 +165,7 @@
         "create_commit",
         "create_discussion",
         "create_inference_endpoint",
+        "create_inference_endpoint_from_catalog",
         "create_pull_request",
         "create_repo",
         "create_tag",
@@ -205,6 +206,7 @@
         "list_accepted_access_requests",
         "list_collections",
         "list_datasets",
+        "list_inference_catalog",
         "list_inference_endpoints",
         "list_liked_repos",
         "list_models",
@@ -769,6 +771,7 @@
     "create_commit",
     "create_discussion",
     "create_inference_endpoint",
+    "create_inference_endpoint_from_catalog",
     "create_pull_request",
     "create_repo",
     "create_tag",
@@ -823,6 +826,7 @@
     "list_accepted_access_requests",
     "list_collections",
     "list_datasets",
+    "list_inference_catalog",
     "list_inference_endpoints",
     "list_liked_repos",
     "list_models",
@@ -1107,6 +1111,7 @@ def __dir__():
         create_commit,  # noqa: F401
         create_discussion,  # noqa: F401
         create_inference_endpoint,  # noqa: F401
+        create_inference_endpoint_from_catalog,  # noqa: F401
         create_pull_request,  # noqa: F401
         create_repo,  # noqa: F401
         create_tag,  # noqa: F401
@@ -1147,6 +1152,7 @@ def __dir__():
         list_accepted_access_requests,  # noqa: F401
         list_collections,  # noqa: F401
         list_datasets,  # noqa: F401
+        list_inference_catalog,  # noqa: F401
         list_inference_endpoints,  # noqa: F401
         list_liked_repos,  # noqa: F401
         list_models,  # noqa: F401

diff --git a/src/huggingface_hub/constants.py b/src/huggingface_hub/constants.py
@@ -78,6 +78,7 @@ def _as_int(value: Optional[str]) -> Optional[int]:
 
 # See https://huggingface.co/docs/inference-endpoints/index
 INFERENCE_ENDPOINTS_ENDPOINT = "https://api.endpoints.huggingface.cloud/v2"
+INFERENCE_CATALOG_ENDPOINT = "https://endpoints.huggingface.co/api/catalog"
 
 # Proxy for third-party providers
 INFERENCE_PROXY_TEMPLATE = "https://router.huggingface.co/{provider}"

diff --git a/src/huggingface_hub/hf_api.py b/src/huggingface_hub/hf_api.py
@@ -112,6 +112,7 @@
     SafetensorsRepoMetadata,
     TensorInfo,
     build_hf_headers,
+    experimental,
     filter_repo_objects,
     fix_hf_endpoint_in_url,
     get_session,
@@ -7485,6 +7486,95 @@ def create_inference_endpoint(
 
         return InferenceEndpoint.from_raw(response.json(), namespace=namespace, token=token)
 
+    @experimental
+    @validate_hf_hub_args
+    def create_inference_endpoint_from_catalog(
+        self,
+        repo_id: str,
+        *,
+        name: Optional[str] = None,
+        token: Union[bool, str, None] = None,
+        namespace: Optional[str] = None,
+    ) -> InferenceEndpoint:
+        """Create a new Inference Endpoint from a model in the Hugging Face Inference Catalog.
+
+        The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
+        and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list
+        of available models in the catalog.
+
+        Args:
+            repo_id (`str`):
+                The ID of the model in the catalog to deploy as an Inference Endpoint.
+            name (`str`, *optional*):
+                The unique name for the new Inference Endpoint. If not provided, a random name will be generated.
+            token (Union[bool, str, None], optional):
+                A valid user access token (string). Defaults to the locally saved
+                token, which is the recommended method for authentication (see
+                https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
+            namespace (`str`, *optional*):
+                The namespace where the Inference Endpoint will be created. Defaults to the current user's namespace.
+
+        Returns:
+            [`InferenceEndpoint`]: information about the new Inference Endpoint.
+
+        <Tip warning={true}>
+
+        `create_inference_endpoint_from_catalog` is experimental. Its API is subject to change in the future. Please provide feedback
+        if you have any suggestions or requests.
+
+        </Tip>
+        """
+        token = token or self.token or get_token()
+        payload: Dict = {
+            "accessToken": token,
+            "namespace": namespace or self._get_namespace(token=token),
+            "repoId": repo_id,
+        }
+        if name is not None:
+            payload["endpointName"] = name
+
+        response = get_session().post(
+            f"{constants.INFERENCE_CATALOG_ENDPOINT}/deploy",
+            headers=self._build_hf_headers(token=False),
+            json=payload,
+        )
+        hf_raise_for_status(response)
+        data = response.json()["endpoint"]
+        return InferenceEndpoint.from_raw(data, namespace=data[name], token=token)
+
+    @experimental
+    @validate_hf_hub_args
+    def list_inference_catalog(self, *, token: Union[bool, str, None] = None) -> List[str]:
+        """List models available in the Hugging Face Inference Catalog.
+
+        The goal of the Inference Catalog is to provide a curated list of models that are optimized for inference
+        and for which default configurations have been tested. See https://endpoints.huggingface.co/catalog for a list
+        of available models in the catalog.
+
+        Use [`create_inference_endpoint_from_catalog`] to deploy a model from the catalog.
+
+        Args:
+            token (Union[bool, str, None], optional):
+                A valid user access token (string). Defaults to the locally saved
+                token, which is the recommended method for authentication (see
+                https://huggingface.co/docs/huggingface_hub/quick-start#authentication).
+
+        Returns:
+            List[`str`]: A list of model IDs available in the catalog.
+        <Tip warning={true}>
+
+        `list_inference_catalog` is experimental. Its API is subject to change in the future. Please provide feedback
+        if you have any suggestions or requests.
+
+        </Tip>
+        """
+        response = get_session().get(
+            f"{constants.INFERENCE_CATALOG_ENDPOINT}/repo-list",
+            headers=self._build_hf_headers(token=token),
+        )
+        hf_raise_for_status(response)
+        return response.json()["models"]
+
     def get_inference_endpoint(
         self, name: str, *, namespace: Optional[str] = None, token: Union[bool, str, None] = None
     ) -> InferenceEndpoint:
@@ -9607,6 +9697,8 @@ def _parse_revision_from_pr_url(pr_url: str) -> str:
 pause_inference_endpoint = api.pause_inference_endpoint
 resume_inference_endpoint = api.resume_inference_endpoint
 scale_to_zero_inference_endpoint = api.scale_to_zero_inference_endpoint
+create_inference_endpoint_from_catalog = api.create_inference_endpoint_from_catalog
+list_inference_catalog = api.list_inference_catalog
 
 # Collections API
 get_collection = api.get_collection