From f731a170a9e44716cdf4874d0b054a80abdd71fd Mon Sep 17 00:00:00 2001
From: Niels Warncke <niels.warncke@gmail.com>
Date: Tue, 12 Nov 2024 11:35:35 +0100
Subject: [PATCH 1/2]  Add allowed CUDA versions parameter to endpoint creation

 This commit introduces a new parameter `allowed_cuda_versions` to the `create_endpoint` function and the
 corresponding GraphQL mutation generator `generate_endpoint_mutation`. This parameter allows specifying a
 comma-separated list of allowed CUDA versions for the endpoint. The parameter is added to the GraphQL mutation as
 new field and is expected to enhance control over the CUDA environment for endpoints.

 - The `allowed_cuda_versions` parameter is added with a default value of "12.1,12.2,12.3,12.4,12.5".
 - The parameter is also included in the GraphQL mutation string.
 - Redundant documentation and example code in `create_endpoint` have been removed to streamline the function's
 docstring.
---
 runpod/api/ctl_commands.py        | 17 +++--------------
 runpod/api/mutations/endpoints.py |  7 +++++++
 2 files changed, 10 insertions(+), 14 deletions(-)

diff --git a/runpod/api/ctl_commands.py b/runpod/api/ctl_commands.py
index 3d0137ce..5b27c0fd 100644
--- a/runpod/api/ctl_commands.py
+++ b/runpod/api/ctl_commands.py
@@ -302,24 +302,12 @@ def create_endpoint(
     workers_min: int = 0,
     workers_max: int = 3,
     flashboot=False,
+    allowed_cuda_versions: str = "12.1,12.2,12.3,12.4,12.5",
 ):
     """
     Create an endpoint
 
-    :param name: the name of the endpoint
-    :param template_id: the id of the template to use for the endpoint
-    :param gpu_ids: the ids of the GPUs to use for the endpoint
-    :param network_volume_id: the id of the network volume to use for the endpoint
-    :param locations: the locations to use for the endpoint
-    :param idle_timeout: the idle timeout for the endpoint
-    :param scaler_type: the scaler type for the endpoint
-    :param scaler_value: the scaler value for the endpoint
-    :param workers_min: the minimum number of workers for the endpoint
-    :param workers_max: the maximum number of workers for the endpoint
-
-    :example:
-
-    >>> endpoint_id = runpod.create_endpoint("test", "template_id")
+    :param allowed_cuda_versions: Comma-separated string of allowed CUDA versions (e.g., "12.4,12.5").
     """
     raw_response = run_graphql_query(
         endpoint_mutations.generate_endpoint_mutation(
@@ -334,6 +322,7 @@ def create_endpoint(
             workers_min,
             workers_max,
             flashboot,
+            allowed_cuda_versions,
         )
     )
 
diff --git a/runpod/api/mutations/endpoints.py b/runpod/api/mutations/endpoints.py
index 61571caf..6c4694e4 100644
--- a/runpod/api/mutations/endpoints.py
+++ b/runpod/api/mutations/endpoints.py
@@ -15,6 +15,7 @@ def generate_endpoint_mutation(
     workers_min: int = 0,
     workers_max: int = 3,
     flashboot=False,
+    allowed_cuda_versions: str = "12.1,12.2,12.3,12.4,12.5",
 ):
     """Generate a string for a GraphQL mutation to create a new endpoint."""
     input_fields = []
@@ -44,6 +45,10 @@ def generate_endpoint_mutation(
     input_fields.append(f"workersMin: {workers_min}")
     input_fields.append(f"workersMax: {workers_max}")
 
+    # New Field for allowed CUDA versions
+    if allowed_cuda_versions is not None:
+        input_fields.append(f'allowedCudaVersions: "{allowed_cuda_versions}"')
+
     # Format the input fields into a string
     input_fields_string = ", ".join(input_fields)
 
@@ -65,11 +70,13 @@ def generate_endpoint_mutation(
             scalerValue
             workersMin
             workersMax
+            allowedCudaVersions
         }}
     }}
     """
 
 
+
 def update_endpoint_template_mutation(endpoint_id: str, template_id: str):
     """Generate a string for a GraphQL mutation to update an existing endpoint's template."""
     input_fields = []

From 9e440fbfa26b1abb1a4e9003941931852a6b681d Mon Sep 17 00:00:00 2001
From: Niels Warncke <niels.warncke@gmail.com>
Date: Tue, 12 Nov 2024 14:09:24 +0100
Subject: [PATCH 2/2]  Add gpu_count parameter to endpoint creation functions

 This commit introduces the `gpu_count` parameter to the `create_endpoint` function in `ctl_commands.py` and the `generate_endpoint_mutation` function in
 `mutations/endpoints.py`. The `gpu_count` parameter allows specifying the number of GPUs to be used by an endpoint. This change includes the necessary updates to the
 function signatures and the GraphQL mutation string construction to support the new parameter.
---
 runpod/api/ctl_commands.py        | 2 ++
 runpod/api/mutations/endpoints.py | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/runpod/api/ctl_commands.py b/runpod/api/ctl_commands.py
index 5b27c0fd..57bcecd3 100644
--- a/runpod/api/ctl_commands.py
+++ b/runpod/api/ctl_commands.py
@@ -303,6 +303,7 @@ def create_endpoint(
     workers_max: int = 3,
     flashboot=False,
     allowed_cuda_versions: str = "12.1,12.2,12.3,12.4,12.5",
+    gpu_count: int = 1,
 ):
     """
     Create an endpoint
@@ -323,6 +324,7 @@ def create_endpoint(
             workers_max,
             flashboot,
             allowed_cuda_versions,
+            gpu_count
         )
     )
 
diff --git a/runpod/api/mutations/endpoints.py b/runpod/api/mutations/endpoints.py
index 6c4694e4..4f93d2c2 100644
--- a/runpod/api/mutations/endpoints.py
+++ b/runpod/api/mutations/endpoints.py
@@ -16,6 +16,7 @@ def generate_endpoint_mutation(
     workers_max: int = 3,
     flashboot=False,
     allowed_cuda_versions: str = "12.1,12.2,12.3,12.4,12.5",
+    gpu_count: int = None,
 ):
     """Generate a string for a GraphQL mutation to create a new endpoint."""
     input_fields = []
@@ -45,9 +46,11 @@ def generate_endpoint_mutation(
     input_fields.append(f"workersMin: {workers_min}")
     input_fields.append(f"workersMax: {workers_max}")
 
-    # New Field for allowed CUDA versions
     if allowed_cuda_versions is not None:
         input_fields.append(f'allowedCudaVersions: "{allowed_cuda_versions}"')
+    
+    if gpu_count is not None:
+        input_fields.append(f"gpuCount: {gpu_count}")
 
     # Format the input fields into a string
     input_fields_string = ", ".join(input_fields)
@@ -71,6 +74,7 @@ def generate_endpoint_mutation(
             workersMin
             workersMax
             allowedCudaVersions
+            gpuCount
         }}
     }}
     """