From f731a170a9e44716cdf4874d0b054a80abdd71fd Mon Sep 17 00:00:00 2001 From: Niels Warncke Date: Tue, 12 Nov 2024 11:35:35 +0100 Subject: [PATCH 1/2] Add allowed CUDA versions parameter to endpoint creation This commit introduces a new parameter `allowed_cuda_versions` to the `create_endpoint` function and the corresponding GraphQL mutation generator `generate_endpoint_mutation`. This parameter allows specifying a comma-separated list of allowed CUDA versions for the endpoint. The parameter is added to the GraphQL mutation as new field and is expected to enhance control over the CUDA environment for endpoints. - The `allowed_cuda_versions` parameter is added with a default value of "12.1,12.2,12.3,12.4,12.5". - The parameter is also included in the GraphQL mutation string. - Redundant documentation and example code in `create_endpoint` have been removed to streamline the function's docstring. --- runpod/api/ctl_commands.py | 17 +++-------------- runpod/api/mutations/endpoints.py | 7 +++++++ 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/runpod/api/ctl_commands.py b/runpod/api/ctl_commands.py index 3d0137ce..5b27c0fd 100644 --- a/runpod/api/ctl_commands.py +++ b/runpod/api/ctl_commands.py @@ -302,24 +302,12 @@ def create_endpoint( workers_min: int = 0, workers_max: int = 3, flashboot=False, + allowed_cuda_versions: str = "12.1,12.2,12.3,12.4,12.5", ): """ Create an endpoint - :param name: the name of the endpoint - :param template_id: the id of the template to use for the endpoint - :param gpu_ids: the ids of the GPUs to use for the endpoint - :param network_volume_id: the id of the network volume to use for the endpoint - :param locations: the locations to use for the endpoint - :param idle_timeout: the idle timeout for the endpoint - :param scaler_type: the scaler type for the endpoint - :param scaler_value: the scaler value for the endpoint - :param workers_min: the minimum number of workers for the endpoint - :param workers_max: the maximum number of workers for the endpoint - - :example: - - >>> endpoint_id = runpod.create_endpoint("test", "template_id") + :param allowed_cuda_versions: Comma-separated string of allowed CUDA versions (e.g., "12.4,12.5"). """ raw_response = run_graphql_query( endpoint_mutations.generate_endpoint_mutation( @@ -334,6 +322,7 @@ def create_endpoint( workers_min, workers_max, flashboot, + allowed_cuda_versions, ) ) diff --git a/runpod/api/mutations/endpoints.py b/runpod/api/mutations/endpoints.py index 61571caf..6c4694e4 100644 --- a/runpod/api/mutations/endpoints.py +++ b/runpod/api/mutations/endpoints.py @@ -15,6 +15,7 @@ def generate_endpoint_mutation( workers_min: int = 0, workers_max: int = 3, flashboot=False, + allowed_cuda_versions: str = "12.1,12.2,12.3,12.4,12.5", ): """Generate a string for a GraphQL mutation to create a new endpoint.""" input_fields = [] @@ -44,6 +45,10 @@ def generate_endpoint_mutation( input_fields.append(f"workersMin: {workers_min}") input_fields.append(f"workersMax: {workers_max}") + # New Field for allowed CUDA versions + if allowed_cuda_versions is not None: + input_fields.append(f'allowedCudaVersions: "{allowed_cuda_versions}"') + # Format the input fields into a string input_fields_string = ", ".join(input_fields) @@ -65,11 +70,13 @@ def generate_endpoint_mutation( scalerValue workersMin workersMax + allowedCudaVersions }} }} """ + def update_endpoint_template_mutation(endpoint_id: str, template_id: str): """Generate a string for a GraphQL mutation to update an existing endpoint's template.""" input_fields = [] From 9e440fbfa26b1abb1a4e9003941931852a6b681d Mon Sep 17 00:00:00 2001 From: Niels Warncke Date: Tue, 12 Nov 2024 14:09:24 +0100 Subject: [PATCH 2/2] Add gpu_count parameter to endpoint creation functions This commit introduces the `gpu_count` parameter to the `create_endpoint` function in `ctl_commands.py` and the `generate_endpoint_mutation` function in `mutations/endpoints.py`. The `gpu_count` parameter allows specifying the number of GPUs to be used by an endpoint. This change includes the necessary updates to the function signatures and the GraphQL mutation string construction to support the new parameter. --- runpod/api/ctl_commands.py | 2 ++ runpod/api/mutations/endpoints.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/runpod/api/ctl_commands.py b/runpod/api/ctl_commands.py index 5b27c0fd..57bcecd3 100644 --- a/runpod/api/ctl_commands.py +++ b/runpod/api/ctl_commands.py @@ -303,6 +303,7 @@ def create_endpoint( workers_max: int = 3, flashboot=False, allowed_cuda_versions: str = "12.1,12.2,12.3,12.4,12.5", + gpu_count: int = 1, ): """ Create an endpoint @@ -323,6 +324,7 @@ def create_endpoint( workers_max, flashboot, allowed_cuda_versions, + gpu_count ) ) diff --git a/runpod/api/mutations/endpoints.py b/runpod/api/mutations/endpoints.py index 6c4694e4..4f93d2c2 100644 --- a/runpod/api/mutations/endpoints.py +++ b/runpod/api/mutations/endpoints.py @@ -16,6 +16,7 @@ def generate_endpoint_mutation( workers_max: int = 3, flashboot=False, allowed_cuda_versions: str = "12.1,12.2,12.3,12.4,12.5", + gpu_count: int = None, ): """Generate a string for a GraphQL mutation to create a new endpoint.""" input_fields = [] @@ -45,9 +46,11 @@ def generate_endpoint_mutation( input_fields.append(f"workersMin: {workers_min}") input_fields.append(f"workersMax: {workers_max}") - # New Field for allowed CUDA versions if allowed_cuda_versions is not None: input_fields.append(f'allowedCudaVersions: "{allowed_cuda_versions}"') + + if gpu_count is not None: + input_fields.append(f"gpuCount: {gpu_count}") # Format the input fields into a string input_fields_string = ", ".join(input_fields) @@ -71,6 +74,7 @@ def generate_endpoint_mutation( workersMin workersMax allowedCudaVersions + gpuCount }} }} """