Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
201 changes: 195 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
- [Authentication](#authentication)
- [API Keys](#api-keys)
- [Bearer Token](#bearer-token)
- [List Deployments](#list-deployments)
- [Deployments](#deployments)
- [List Deployments](#list-deployments)
- [Get Deployment by Id](#get-deployment-by-id)
- [Get Deployment Configuration](#get-deployment-configuration)
- [Make Chat Completions Requests](#make-completions-requests)
- [Without Streaming](#without-streaming)
- [With Streaming](#with-streaming)
Expand All @@ -30,6 +33,12 @@
- [Applications](#applications)
- [List Applications](#list-applications)
- [Get Application by Id](#get-application-by-id)
- [Models](#models)
- [Get Model by Name](#get-model-by-name)
- [Toolsets](#toolsets)
- [Get Toolset by Id](#get-toolset-by-id)
- [Resource Permissions](#resource-permissions)
- [Grant Permissions](#grant-permissions)
- [Client Pool](#client-pool)
- [Synchronous Client Pool](#synchronous-client-pool)
- [Asynchronous Client Pool](#asynchronous-client-pool)
Expand Down Expand Up @@ -136,19 +145,94 @@ dial_client = Dial(
)
```

### List Deployments
### Deployments

If you want to get a list of available deployments, use `client.deployments.list()` or method:
#### List Deployments

To get a list of available deployments:

```python
# Sync
deployments = client.deployments.list()
# Async
deployments = await async_client.deployments.list()
```

```pycon
>>> client.deployments.list()
[
Deployment(id='gpt-35-turbo', model='gpt-35-turbo', owner='organization-owner', object='deployment', status='succeeded', created_at=1724760524, updated_at=1724760524, scale_settings=ScaleSettings(scale_type='standard'), features={'rate': False, 'tokenize': False, 'truncate_prompt': False, 'configuration': False, 'system_prompt': True, 'tools': False, 'seed': False, 'url_attachments': False, 'folder_attachments': False, 'allow_resume': True}),
Deployment(id='stable-diffusion-xl', model='stable-diffusion-xl', owner='organization-owner', object='deployment', status='succeeded', created_at=1724760524, updated_at=1724760524, scale_settings=ScaleSettings(scale_type='standard'), features={'rate': False, 'tokenize': False, 'truncate_prompt': False, 'configuration': False, 'system_prompt': True, 'tools': False, 'seed': False, 'url_attachments': False, 'folder_attachments': False, 'allow_resume': True}),
Deployment(id='gemini-pro-vision', model='gemini-pro-vision', owner='organization-owner', object='deployment', status='succeeded', created_at=1724760524, updated_at=1724760524, scale_settings=ScaleSettings(scale_type='standard'), features={'rate': False, 'tokenize': False, 'truncate_prompt': False, 'configuration': False, 'system_prompt': True, 'tools': False, 'seed': False, 'url_attachments': False, 'folder_attachments': False, 'allow_resume': True}),
Deployment(id='gpt-35-turbo', model='gpt-35-turbo', owner='organization-owner', object='deployment', status='succeeded', created_at=1724760524, updated_at=1724760524, scale_settings=ScaleSettings(scale_type='standard'), features=Features(rate=False, tokenize=False, truncate_prompt=False, configuration=False, system_prompt=True, tools=False, seed=False, url_attachments=False, folder_attachments=False, allow_resume=True)),
Deployment(id='stable-diffusion-xl', model='stable-diffusion-xl', owner='organization-owner', object='deployment', status='succeeded', created_at=1724760524, updated_at=1724760524, scale_settings=ScaleSettings(scale_type='standard'), features=Features(rate=False, tokenize=False, truncate_prompt=False, configuration=False, system_prompt=True, tools=False, seed=False, url_attachments=False, folder_attachments=False, allow_resume=True)),
...,
]
```

#### Get Deployment by Id

To fetch a single deployment by its identifier:

```python
# Sync
deployment = client.deployments.get("gpt-35-turbo")
# Async
deployment = await async_client.deployments.get("gpt-35-turbo")
```

As a result, you will receive a `Deployment` object:

```python
Deployment(
id="gpt-35-turbo",
model="gpt-35-turbo",
object="deployment",
owner="organization-owner",
status="succeeded",
created_at=1724760524,
updated_at=1724760524,
scale_settings=ScaleSettings(scale_type="standard"),
features=Features(
rate=False,
tokenize=False,
truncate_prompt=False,
configuration=True,
system_prompt=True,
tools=True,
seed=False,
url_attachments=False,
folder_attachments=False,
allow_resume=True,
),
defaults={},
)
```

#### Get Deployment Configuration

Some deployments expose a JSON Schema document describing their runtime configuration. Use `get_configuration()` to retrieve it:

```python
# Sync
config = client.deployments.get_configuration_schema("gpt-35-turbo")
# Async
config = await async_client.deployments.get_configuration_schema("gpt-35-turbo")
```

The response is a plain `dict` whose shape is entirely deployment-specific:

```python
{
"type": "object",
"properties": {
"model_to_use": {
"type": "string",
"enum": ["gpt-4", "gpt-4o"],
"default": "gpt-4",
}
},
"additionalProperties": False,
}
```

### Make Completions Requests

#### Without Streaming
Expand Down Expand Up @@ -535,6 +619,111 @@ application = await async_client.application.get("app_id")

As a result, you will receive a list of `Application` objects. Refer to the [previous example](#list-applications).

### Models

#### Get Model by Name

To retrieve metadata, capabilities, and pricing for a specific model:

```python
# Sync
model_info = client.model.get("gpt-4")
# Async
model_info = await async_client.model.get("gpt-4")
```

As a result, you will receive a `ModelInfo` object:

```python
ModelInfo(
id="gpt-4",
model="gpt-4",
object="model",
owner="organization-owner",
status="succeeded",
created_at=1724760524,
updated_at=1724760524,
lifecycle_status="generally-available",
display_name="GPT-4",
description="OpenAI GPT-4 model.",
capabilities=ModelCapabilities(
scale_types=["standard"],
completion=False,
chat_completion=True,
embeddings=False,
fine_tune=False,
inference=False,
),
limits=ModelLimits(
max_prompt_tokens=8192,
max_completion_tokens=4096,
max_total_tokens=None,
),
pricing=ModelPricing(
unit="token",
prompt="0.00003",
completion="0.00006",
),
)
```

### Toolsets

#### Get Toolset by Id

To retrieve information about a specific MCP toolset:

```python
# Sync
toolset_info = client.toolset.get("my-toolset")
# Async
toolset_info = await async_client.toolset.get("my-toolset")
```

As a result, you will receive a `ToolsetInfo` object:

```python
ToolsetInfo(
id="my-toolset",
toolset="my-toolset",
display_name="My Toolset",
description="A collection of tools for data processing.",
transport="HTTP",
allowed_tools=["tool-a", "tool-b"],
owner="organization-owner",
status="succeeded",
created_at=1724760524,
updated_at=1724760524,
)
```

### Resource Permissions

#### Grant Permissions

Use `resource_permissions.grant()` to grant access to one or more files in DIAL storage to a specific deployment (receiver). This is typically used when a deployment needs to read files on behalf of a user.

```python
# Sync
client.resource_permissions.grant(
resources=["files/my-bucket/report.pdf"],
receiver="my-deployment",
permissions=["READ"],
)
# Async
await async_client.resource_permissions.grant(
resources=["files/my-bucket/report.pdf"],
receiver="my-deployment",
permissions=["READ"],
)
```

- `resources` — list of DIAL file URL strings to share.
- `receiver` — the deployment ID that should receive access.
- `permissions` — list of permission strings; defaults to `["READ"]`.

The method returns `None` on success and raises `DialException` on HTTP error.

### Client Pool

When you need to create multiple DIAL clients and wish to enhance performance by reusing the HTTP connection for the same DIAL instance, consider using synchronous and asynchronous **client pools**.
Expand Down
6 changes: 6 additions & 0 deletions aidial_client/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
ParsingDataError,
ResourceNotFoundError,
)
from aidial_client.types.model import ModelInfo, ModelLimits, ModelPricing
from aidial_client.types.toolset import ToolsetInfo

__all__ = [
"Dial",
Expand All @@ -24,4 +26,8 @@
"ParsingDataError",
"EtagMismatchError",
"ResourceNotFoundError",
"ToolsetInfo",
"ModelInfo",
"ModelPricing",
"ModelLimits",
]
10 changes: 10 additions & 0 deletions aidial_client/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,11 @@ def _init_resources(self) -> None:
)
self.deployments = resources.Deployments(http_client=self._http_client)
self.application = resources.Application(http_client=self._http_client)
self.toolset = resources.Toolset(http_client=self._http_client)
self.model = resources.Model(http_client=self._http_client)
self.resource_permissions = resources.ResourcePermissions(
http_client=self._http_client
)

def _create_http_client(self) -> SyncHTTPClient:
return SyncHTTPClient(
Expand Down Expand Up @@ -189,6 +194,11 @@ def _init_resources(self) -> None:
self.application = resources.AsyncApplication(
http_client=self._http_client
)
self.toolset = resources.AsyncToolset(http_client=self._http_client)
self.model = resources.AsyncModel(http_client=self._http_client)
self.resource_permissions = resources.AsyncResourcePermissions(
http_client=self._http_client
)

def _create_http_client(self) -> AsyncHTTPClient:
return AsyncHTTPClient(
Expand Down
1 change: 1 addition & 0 deletions aidial_client/_internal_types/_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
ExtraForbidModel,
bytes,
str,
dict,
httpx.Response,
FileDownloadResponse,
None,
Expand Down
7 changes: 7 additions & 0 deletions aidial_client/_utils/_response_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ def process_block_response(
return cast(ResponseT, response.text)
elif cast_to == NoneType:
return cast(ResponseT, None)
elif cast_to == dict:
try:
return cast(ResponseT, response.json())
except Exception as e:
raise ParsingDataError(
message=f"Error during parsing of response data: {str(e)}"
)
elif issubclass(cast_to, (ExtraForbidModel, ExtraAllowModel)):
try:
data = response.json()
Expand Down
12 changes: 12 additions & 0 deletions aidial_client/resources/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
from aidial_client.resources.deployments import AsyncDeployments, Deployments
from aidial_client.resources.metadata import AsyncMetadata, Metadata
from aidial_client.resources.model import AsyncModel, Model
from aidial_client.resources.resource_permissions import (
AsyncResourcePermissions,
ResourcePermissions,
)
from aidial_client.resources.toolset import AsyncToolset, Toolset

from .application import Application, AsyncApplication
from .bucket import AsyncBucket, Bucket
Expand All @@ -19,4 +25,10 @@
"Metadata",
"Application",
"AsyncApplication",
"Toolset",
Comment thread
korotaav48 marked this conversation as resolved.
"AsyncToolset",
"Model",
"AsyncModel",
"ResourcePermissions",
"AsyncResourcePermissions",
]
38 changes: 37 additions & 1 deletion aidial_client/resources/deployments.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import Any, Dict, List

from aidial_client._internal_types._http_request import FinalRequestOptions
from aidial_client.resources.base import AsyncResource, Resource
Expand All @@ -15,6 +15,23 @@ def _list_raw(self) -> DeploymentsResponse:
def list(self) -> List[Deployment]:
return self._list_raw().data

def get(self, deployment_id: str) -> Deployment:
return self.http_client.request(
cast_to=Deployment,
options=FinalRequestOptions(
method="GET", url=f"openai/deployments/{deployment_id}"
),
)

def get_configuration_schema(self, deployment_id: str) -> Dict[str, Any]:
return self.http_client.request(
cast_to=dict,
options=FinalRequestOptions(
method="GET",
url=f"v1/deployments/{deployment_id}/configuration",
),
)


class AsyncDeployments(AsyncResource):
async def _list_raw(self) -> DeploymentsResponse:
Expand All @@ -25,3 +42,22 @@ async def _list_raw(self) -> DeploymentsResponse:

async def list(self) -> List[Deployment]:
return (await self._list_raw()).data

async def get(self, deployment_id: str) -> Deployment:
return await self.http_client.request(
cast_to=Deployment,
options=FinalRequestOptions(
method="GET", url=f"openai/deployments/{deployment_id}"
),
)

async def get_configuration_schema(
self, deployment_id: str
) -> Dict[str, Any]:
return await self.http_client.request(
cast_to=dict,
options=FinalRequestOptions(
method="GET",
url=f"v1/deployments/{deployment_id}/configuration",
),
)
Loading
Loading