From 07fa2c0a2e8a7cd1546682c7abb3bb372eed96e5 Mon Sep 17 00:00:00 2001
From: snipe <72265661+notsniped@users.noreply.github.com>
Date: Sat, 20 Apr 2024 21:58:49 +0530
Subject: [PATCH 1/2] Remove locally-added `openai` Python library from client

---
 openai/__init__.py                            |  339 ---
 openai/__main__.py                            |    3 -
 openai/_base_client.py                        | 1972 ---------------
 openai/_client.py                             |  503 ----
 openai/_compat.py                             |  222 --
 openai/_constants.py                          |   14 -
 openai/_exceptions.py                         |  125 -
 openai/_extras/__init__.py                    |    2 -
 openai/_extras/_common.py                     |   21 -
 openai/_extras/numpy_proxy.py                 |   37 -
 openai/_extras/pandas_proxy.py                |   28 -
 openai/_files.py                              |  127 -
 openai/_legacy_response.py                    |  456 ----
 openai/_models.py                             |  654 -----
 openai/_module_client.py                      |   78 -
 openai/_qs.py                                 |  150 --
 openai/_resource.py                           |   43 -
 openai/_response.py                           |  824 ------
 openai/_streaming.py                          |  410 ---
 openai/_types.py                              |  222 --
 openai/_utils/__init__.py                     |   50 -
 openai/_utils/_logs.py                        |   25 -
 openai/_utils/_proxy.py                       |   63 -
 openai/_utils/_streams.py                     |   12 -
 openai/_utils/_sync.py                        |   64 -
 openai/_utils/_transform.py                   |  382 ---
 openai/_utils/_typing.py                      |  120 -
 openai/_utils/_utils.py                       |  391 ---
 openai/_version.py                            |    4 -
 openai/cli/__init__.py                        |    1 -
 openai/cli/_api/__init__.py                   |    1 -
 openai/cli/_api/_main.py                      |   16 -
 openai/cli/_api/audio.py                      |   94 -
 openai/cli/_api/chat/__init__.py              |   13 -
 openai/cli/_api/chat/completions.py           |  156 --
 openai/cli/_api/completions.py                |  173 --
 openai/cli/_api/files.py                      |   80 -
 openai/cli/_api/image.py                      |  139 --
 openai/cli/_api/models.py                     |   45 -
 openai/cli/_cli.py                            |  234 --
 openai/cli/_errors.py                         |   23 -
 openai/cli/_models.py                         |   17 -
 openai/cli/_progress.py                       |   59 -
 openai/cli/_tools/__init__.py                 |    1 -
 openai/cli/_tools/_main.py                    |   17 -
 openai/cli/_tools/fine_tunes.py               |   63 -
 openai/cli/_tools/migrate.py                  |  181 --
 openai/cli/_utils.py                          |   45 -
 openai/lib/.keep                              |    4 -
 openai/lib/_old_api.py                        |   72 -
 openai/lib/_validators.py                     |  805 ------
 openai/lib/azure.py                           |  529 ----
 openai/lib/streaming/__init__.py              |    8 -
 openai/lib/streaming/_assistants.py           | 1035 --------
 openai/pagination.py                          |  107 -
 openai/py.typed                               |    0
 openai/resources/__init__.py                  |  145 --
 openai/resources/audio/__init__.py            |   61 -
 openai/resources/audio/audio.py               |  144 --
 openai/resources/audio/speech.py              |  213 --
 openai/resources/audio/transcriptions.py      |  256 --
 openai/resources/audio/translations.py        |  226 --
 openai/resources/beta/__init__.py             |   47 -
 openai/resources/beta/assistants/__init__.py  |   33 -
 .../resources/beta/assistants/assistants.py   |  747 ------
 openai/resources/beta/assistants/files.py     |  483 ----
 openai/resources/beta/beta.py                 |  114 -
 openai/resources/beta/threads/__init__.py     |   47 -
 .../beta/threads/messages/__init__.py         |   33 -
 .../resources/beta/threads/messages/files.py  |  312 ---
 .../beta/threads/messages/messages.py         |  588 -----
 .../resources/beta/threads/runs/__init__.py   |   33 -
 openai/resources/beta/threads/runs/runs.py    | 2223 -----------------
 openai/resources/beta/threads/runs/steps.py   |  310 ---
 openai/resources/beta/threads/threads.py      | 1259 ----------
 openai/resources/chat/__init__.py             |   33 -
 openai/resources/chat/chat.py                 |   80 -
 openai/resources/chat/completions.py          | 1403 -----------
 openai/resources/completions.py               | 1102 --------
 openai/resources/embeddings.py                |  261 --
 openai/resources/files.py                     |  689 -----
 openai/resources/fine_tuning/__init__.py      |   33 -
 openai/resources/fine_tuning/fine_tuning.py   |   80 -
 openai/resources/fine_tuning/jobs.py          |  638 -----
 openai/resources/images.py                    |  587 -----
 openai/resources/models.py                    |  283 ---
 openai/resources/moderations.py               |  180 --
 openai/types/__init__.py                      |   31 -
 openai/types/audio/__init__.py                |    9 -
 openai/types/audio/speech_create_params.py    |   39 -
 openai/types/audio/transcription.py           |   10 -
 .../audio/transcription_create_params.py      |   65 -
 openai/types/audio/translation.py             |    9 -
 .../types/audio/translation_create_params.py  |   48 -
 openai/types/beta/__init__.py                 |   23 -
 openai/types/beta/assistant.py                |   64 -
 openai/types/beta/assistant_create_params.py  |   56 -
 openai/types/beta/assistant_deleted.py        |   15 -
 openai/types/beta/assistant_list_params.py    |   39 -
 openai/types/beta/assistant_stream_event.py   |  276 --
 openai/types/beta/assistant_tool.py           |   13 -
 openai/types/beta/assistant_tool_param.py     |   13 -
 openai/types/beta/assistant_update_params.py  |   58 -
 openai/types/beta/assistants/__init__.py      |    8 -
 .../types/beta/assistants/assistant_file.py   |   21 -
 .../beta/assistants/file_create_params.py     |   16 -
 .../beta/assistants/file_delete_response.py   |   15 -
 .../types/beta/assistants/file_list_params.py |   39 -
 openai/types/beta/chat/__init__.py            |    3 -
 openai/types/beta/code_interpreter_tool.py    |   12 -
 .../types/beta/code_interpreter_tool_param.py |   12 -
 openai/types/beta/function_tool.py            |   15 -
 openai/types/beta/function_tool_param.py      |   16 -
 openai/types/beta/retrieval_tool.py           |   12 -
 openai/types/beta/retrieval_tool_param.py     |   12 -
 openai/types/beta/thread.py                   |   27 -
 .../beta/thread_create_and_run_params.py      |  136 -
 openai/types/beta/thread_create_params.py     |   54 -
 openai/types/beta/thread_deleted.py           |   15 -
 openai/types/beta/thread_update_params.py     |   18 -
 openai/types/beta/threads/__init__.py         |   33 -
 openai/types/beta/threads/annotation.py       |   12 -
 openai/types/beta/threads/annotation_delta.py |   14 -
 .../beta/threads/file_citation_annotation.py  |   29 -
 .../threads/file_citation_delta_annotation.py |   33 -
 .../beta/threads/file_path_annotation.py      |   26 -
 .../threads/file_path_delta_annotation.py     |   30 -
 openai/types/beta/threads/image_file.py       |   13 -
 .../beta/threads/image_file_content_block.py  |   15 -
 openai/types/beta/threads/image_file_delta.py |   15 -
 .../beta/threads/image_file_delta_block.py    |   19 -
 openai/types/beta/threads/message.py          |   81 -
 openai/types/beta/threads/message_content.py  |   12 -
 .../beta/threads/message_content_delta.py     |   12 -
 .../beta/threads/message_create_params.py     |   38 -
 openai/types/beta/threads/message_delta.py    |   24 -
 .../types/beta/threads/message_delta_event.py |   19 -
 .../types/beta/threads/message_list_params.py |   42 -
 .../beta/threads/message_update_params.py     |   20 -
 .../types/beta/threads/messages/__init__.py   |    6 -
 .../beta/threads/messages/file_list_params.py |   41 -
 .../beta/threads/messages/message_file.py     |   25 -
 .../required_action_function_tool_call.py     |   34 -
 openai/types/beta/threads/run.py              |  144 --
 .../types/beta/threads/run_create_params.py   |   83 -
 openai/types/beta/threads/run_list_params.py  |   39 -
 openai/types/beta/threads/run_status.py       |    9 -
 .../threads/run_submit_tool_outputs_params.py |   52 -
 .../types/beta/threads/run_update_params.py   |   20 -
 openai/types/beta/threads/runs/__init__.py    |   22 -
 .../threads/runs/code_interpreter_logs.py     |   19 -
 .../runs/code_interpreter_output_image.py     |   26 -
 .../runs/code_interpreter_tool_call.py        |   70 -
 .../runs/code_interpreter_tool_call_delta.py  |   44 -
 .../beta/threads/runs/function_tool_call.py   |   38 -
 .../threads/runs/function_tool_call_delta.py  |   41 -
 .../runs/message_creation_step_details.py     |   19 -
 .../beta/threads/runs/retrieval_tool_call.py  |   21 -
 .../threads/runs/retrieval_tool_call_delta.py |   25 -
 openai/types/beta/threads/runs/run_step.py    |  110 -
 .../types/beta/threads/runs/run_step_delta.py |   18 -
 .../beta/threads/runs/run_step_delta_event.py |   19 -
 .../runs/run_step_delta_message_delta.py      |   20 -
 .../beta/threads/runs/step_list_params.py     |   41 -
 openai/types/beta/threads/runs/tool_call.py   |   15 -
 .../beta/threads/runs/tool_call_delta.py      |   16 -
 .../threads/runs/tool_call_delta_object.py    |   21 -
 .../threads/runs/tool_calls_step_details.py   |   21 -
 openai/types/beta/threads/text.py             |   15 -
 .../types/beta/threads/text_content_block.py  |   15 -
 openai/types/beta/threads/text_delta.py       |   15 -
 openai/types/beta/threads/text_delta_block.py |   19 -
 openai/types/chat/__init__.py                 |   41 -
 openai/types/chat/chat_completion.py          |   67 -
 ...chat_completion_assistant_message_param.py |   51 -
 openai/types/chat/chat_completion_chunk.py    |  128 -
 ...hat_completion_content_part_image_param.py |   26 -
 .../chat_completion_content_part_param.py     |   12 -
 ...chat_completion_content_part_text_param.py |   15 -
 ...t_completion_function_call_option_param.py |   12 -
 .../chat_completion_function_message_param.py |   19 -
 openai/types/chat/chat_completion_message.py  |   40 -
 .../chat/chat_completion_message_param.py     |   21 -
 .../chat/chat_completion_message_tool_call.py |   31 -
 ...chat_completion_message_tool_call_param.py |   31 -
 ...chat_completion_named_tool_choice_param.py |   19 -
 openai/types/chat/chat_completion_role.py     |    7 -
 .../chat_completion_system_message_param.py   |   22 -
 .../chat/chat_completion_token_logprob.py     |   57 -
 ...hat_completion_tool_choice_option_param.py |   12 -
 .../chat_completion_tool_message_param.py     |   18 -
 .../types/chat/chat_completion_tool_param.py  |   16 -
 .../chat_completion_user_message_param.py     |   25 -
 openai/types/chat/completion_create_params.py |  280 ---
 openai/types/completion.py                    |   37 -
 openai/types/completion_choice.py             |   35 -
 openai/types/completion_create_params.py      |  182 --
 openai/types/completion_usage.py              |   16 -
 openai/types/create_embedding_response.py     |   31 -
 openai/types/embedding.py                     |   23 -
 openai/types/embedding_create_params.py       |   50 -
 openai/types/file_content.py                  |    6 -
 openai/types/file_create_params.py            |   25 -
 openai/types/file_deleted.py                  |   15 -
 openai/types/file_list_params.py              |   12 -
 openai/types/file_object.py                   |   46 -
 openai/types/fine_tuning/__init__.py          |    9 -
 openai/types/fine_tuning/fine_tuning_job.py   |  107 -
 .../fine_tuning/fine_tuning_job_event.py      |   19 -
 openai/types/fine_tuning/job_create_params.py |   78 -
 .../fine_tuning/job_list_events_params.py     |   15 -
 openai/types/fine_tuning/job_list_params.py   |   15 -
 openai/types/image.py                         |   24 -
 openai/types/image_create_variation_params.py |   50 -
 openai/types/image_edit_params.py             |   61 -
 openai/types/image_generate_params.py         |   63 -
 openai/types/images_response.py               |   14 -
 openai/types/model.py                         |   21 -
 openai/types/model_deleted.py                 |   13 -
 openai/types/moderation.py                    |  117 -
 openai/types/moderation_create_params.py      |   25 -
 openai/types/moderation_create_response.py    |   19 -
 openai/types/shared/__init__.py               |    5 -
 openai/types/shared/error_object.py           |   17 -
 openai/types/shared/function_definition.py    |   35 -
 openai/types/shared/function_parameters.py    |    7 -
 openai/types/shared_params/__init__.py        |    4 -
 .../shared_params/function_definition.py      |   36 -
 .../shared_params/function_parameters.py      |    9 -
 openai/version.py                             |    3 -
 230 files changed, 28905 deletions(-)
 delete mode 100644 openai/__init__.py
 delete mode 100644 openai/__main__.py
 delete mode 100644 openai/_base_client.py
 delete mode 100644 openai/_client.py
 delete mode 100644 openai/_compat.py
 delete mode 100644 openai/_constants.py
 delete mode 100644 openai/_exceptions.py
 delete mode 100644 openai/_extras/__init__.py
 delete mode 100644 openai/_extras/_common.py
 delete mode 100644 openai/_extras/numpy_proxy.py
 delete mode 100644 openai/_extras/pandas_proxy.py
 delete mode 100644 openai/_files.py
 delete mode 100644 openai/_legacy_response.py
 delete mode 100644 openai/_models.py
 delete mode 100644 openai/_module_client.py
 delete mode 100644 openai/_qs.py
 delete mode 100644 openai/_resource.py
 delete mode 100644 openai/_response.py
 delete mode 100644 openai/_streaming.py
 delete mode 100644 openai/_types.py
 delete mode 100644 openai/_utils/__init__.py
 delete mode 100644 openai/_utils/_logs.py
 delete mode 100644 openai/_utils/_proxy.py
 delete mode 100644 openai/_utils/_streams.py
 delete mode 100644 openai/_utils/_sync.py
 delete mode 100644 openai/_utils/_transform.py
 delete mode 100644 openai/_utils/_typing.py
 delete mode 100644 openai/_utils/_utils.py
 delete mode 100644 openai/_version.py
 delete mode 100644 openai/cli/__init__.py
 delete mode 100644 openai/cli/_api/__init__.py
 delete mode 100644 openai/cli/_api/_main.py
 delete mode 100644 openai/cli/_api/audio.py
 delete mode 100644 openai/cli/_api/chat/__init__.py
 delete mode 100644 openai/cli/_api/chat/completions.py
 delete mode 100644 openai/cli/_api/completions.py
 delete mode 100644 openai/cli/_api/files.py
 delete mode 100644 openai/cli/_api/image.py
 delete mode 100644 openai/cli/_api/models.py
 delete mode 100644 openai/cli/_cli.py
 delete mode 100644 openai/cli/_errors.py
 delete mode 100644 openai/cli/_models.py
 delete mode 100644 openai/cli/_progress.py
 delete mode 100644 openai/cli/_tools/__init__.py
 delete mode 100644 openai/cli/_tools/_main.py
 delete mode 100644 openai/cli/_tools/fine_tunes.py
 delete mode 100644 openai/cli/_tools/migrate.py
 delete mode 100644 openai/cli/_utils.py
 delete mode 100644 openai/lib/.keep
 delete mode 100644 openai/lib/_old_api.py
 delete mode 100644 openai/lib/_validators.py
 delete mode 100644 openai/lib/azure.py
 delete mode 100644 openai/lib/streaming/__init__.py
 delete mode 100644 openai/lib/streaming/_assistants.py
 delete mode 100644 openai/pagination.py
 delete mode 100644 openai/py.typed
 delete mode 100644 openai/resources/__init__.py
 delete mode 100644 openai/resources/audio/__init__.py
 delete mode 100644 openai/resources/audio/audio.py
 delete mode 100644 openai/resources/audio/speech.py
 delete mode 100644 openai/resources/audio/transcriptions.py
 delete mode 100644 openai/resources/audio/translations.py
 delete mode 100644 openai/resources/beta/__init__.py
 delete mode 100644 openai/resources/beta/assistants/__init__.py
 delete mode 100644 openai/resources/beta/assistants/assistants.py
 delete mode 100644 openai/resources/beta/assistants/files.py
 delete mode 100644 openai/resources/beta/beta.py
 delete mode 100644 openai/resources/beta/threads/__init__.py
 delete mode 100644 openai/resources/beta/threads/messages/__init__.py
 delete mode 100644 openai/resources/beta/threads/messages/files.py
 delete mode 100644 openai/resources/beta/threads/messages/messages.py
 delete mode 100644 openai/resources/beta/threads/runs/__init__.py
 delete mode 100644 openai/resources/beta/threads/runs/runs.py
 delete mode 100644 openai/resources/beta/threads/runs/steps.py
 delete mode 100644 openai/resources/beta/threads/threads.py
 delete mode 100644 openai/resources/chat/__init__.py
 delete mode 100644 openai/resources/chat/chat.py
 delete mode 100644 openai/resources/chat/completions.py
 delete mode 100644 openai/resources/completions.py
 delete mode 100644 openai/resources/embeddings.py
 delete mode 100644 openai/resources/files.py
 delete mode 100644 openai/resources/fine_tuning/__init__.py
 delete mode 100644 openai/resources/fine_tuning/fine_tuning.py
 delete mode 100644 openai/resources/fine_tuning/jobs.py
 delete mode 100644 openai/resources/images.py
 delete mode 100644 openai/resources/models.py
 delete mode 100644 openai/resources/moderations.py
 delete mode 100644 openai/types/__init__.py
 delete mode 100644 openai/types/audio/__init__.py
 delete mode 100644 openai/types/audio/speech_create_params.py
 delete mode 100644 openai/types/audio/transcription.py
 delete mode 100644 openai/types/audio/transcription_create_params.py
 delete mode 100644 openai/types/audio/translation.py
 delete mode 100644 openai/types/audio/translation_create_params.py
 delete mode 100644 openai/types/beta/__init__.py
 delete mode 100644 openai/types/beta/assistant.py
 delete mode 100644 openai/types/beta/assistant_create_params.py
 delete mode 100644 openai/types/beta/assistant_deleted.py
 delete mode 100644 openai/types/beta/assistant_list_params.py
 delete mode 100644 openai/types/beta/assistant_stream_event.py
 delete mode 100644 openai/types/beta/assistant_tool.py
 delete mode 100644 openai/types/beta/assistant_tool_param.py
 delete mode 100644 openai/types/beta/assistant_update_params.py
 delete mode 100644 openai/types/beta/assistants/__init__.py
 delete mode 100644 openai/types/beta/assistants/assistant_file.py
 delete mode 100644 openai/types/beta/assistants/file_create_params.py
 delete mode 100644 openai/types/beta/assistants/file_delete_response.py
 delete mode 100644 openai/types/beta/assistants/file_list_params.py
 delete mode 100644 openai/types/beta/chat/__init__.py
 delete mode 100644 openai/types/beta/code_interpreter_tool.py
 delete mode 100644 openai/types/beta/code_interpreter_tool_param.py
 delete mode 100644 openai/types/beta/function_tool.py
 delete mode 100644 openai/types/beta/function_tool_param.py
 delete mode 100644 openai/types/beta/retrieval_tool.py
 delete mode 100644 openai/types/beta/retrieval_tool_param.py
 delete mode 100644 openai/types/beta/thread.py
 delete mode 100644 openai/types/beta/thread_create_and_run_params.py
 delete mode 100644 openai/types/beta/thread_create_params.py
 delete mode 100644 openai/types/beta/thread_deleted.py
 delete mode 100644 openai/types/beta/thread_update_params.py
 delete mode 100644 openai/types/beta/threads/__init__.py
 delete mode 100644 openai/types/beta/threads/annotation.py
 delete mode 100644 openai/types/beta/threads/annotation_delta.py
 delete mode 100644 openai/types/beta/threads/file_citation_annotation.py
 delete mode 100644 openai/types/beta/threads/file_citation_delta_annotation.py
 delete mode 100644 openai/types/beta/threads/file_path_annotation.py
 delete mode 100644 openai/types/beta/threads/file_path_delta_annotation.py
 delete mode 100644 openai/types/beta/threads/image_file.py
 delete mode 100644 openai/types/beta/threads/image_file_content_block.py
 delete mode 100644 openai/types/beta/threads/image_file_delta.py
 delete mode 100644 openai/types/beta/threads/image_file_delta_block.py
 delete mode 100644 openai/types/beta/threads/message.py
 delete mode 100644 openai/types/beta/threads/message_content.py
 delete mode 100644 openai/types/beta/threads/message_content_delta.py
 delete mode 100644 openai/types/beta/threads/message_create_params.py
 delete mode 100644 openai/types/beta/threads/message_delta.py
 delete mode 100644 openai/types/beta/threads/message_delta_event.py
 delete mode 100644 openai/types/beta/threads/message_list_params.py
 delete mode 100644 openai/types/beta/threads/message_update_params.py
 delete mode 100644 openai/types/beta/threads/messages/__init__.py
 delete mode 100644 openai/types/beta/threads/messages/file_list_params.py
 delete mode 100644 openai/types/beta/threads/messages/message_file.py
 delete mode 100644 openai/types/beta/threads/required_action_function_tool_call.py
 delete mode 100644 openai/types/beta/threads/run.py
 delete mode 100644 openai/types/beta/threads/run_create_params.py
 delete mode 100644 openai/types/beta/threads/run_list_params.py
 delete mode 100644 openai/types/beta/threads/run_status.py
 delete mode 100644 openai/types/beta/threads/run_submit_tool_outputs_params.py
 delete mode 100644 openai/types/beta/threads/run_update_params.py
 delete mode 100644 openai/types/beta/threads/runs/__init__.py
 delete mode 100644 openai/types/beta/threads/runs/code_interpreter_logs.py
 delete mode 100644 openai/types/beta/threads/runs/code_interpreter_output_image.py
 delete mode 100644 openai/types/beta/threads/runs/code_interpreter_tool_call.py
 delete mode 100644 openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
 delete mode 100644 openai/types/beta/threads/runs/function_tool_call.py
 delete mode 100644 openai/types/beta/threads/runs/function_tool_call_delta.py
 delete mode 100644 openai/types/beta/threads/runs/message_creation_step_details.py
 delete mode 100644 openai/types/beta/threads/runs/retrieval_tool_call.py
 delete mode 100644 openai/types/beta/threads/runs/retrieval_tool_call_delta.py
 delete mode 100644 openai/types/beta/threads/runs/run_step.py
 delete mode 100644 openai/types/beta/threads/runs/run_step_delta.py
 delete mode 100644 openai/types/beta/threads/runs/run_step_delta_event.py
 delete mode 100644 openai/types/beta/threads/runs/run_step_delta_message_delta.py
 delete mode 100644 openai/types/beta/threads/runs/step_list_params.py
 delete mode 100644 openai/types/beta/threads/runs/tool_call.py
 delete mode 100644 openai/types/beta/threads/runs/tool_call_delta.py
 delete mode 100644 openai/types/beta/threads/runs/tool_call_delta_object.py
 delete mode 100644 openai/types/beta/threads/runs/tool_calls_step_details.py
 delete mode 100644 openai/types/beta/threads/text.py
 delete mode 100644 openai/types/beta/threads/text_content_block.py
 delete mode 100644 openai/types/beta/threads/text_delta.py
 delete mode 100644 openai/types/beta/threads/text_delta_block.py
 delete mode 100644 openai/types/chat/__init__.py
 delete mode 100644 openai/types/chat/chat_completion.py
 delete mode 100644 openai/types/chat/chat_completion_assistant_message_param.py
 delete mode 100644 openai/types/chat/chat_completion_chunk.py
 delete mode 100644 openai/types/chat/chat_completion_content_part_image_param.py
 delete mode 100644 openai/types/chat/chat_completion_content_part_param.py
 delete mode 100644 openai/types/chat/chat_completion_content_part_text_param.py
 delete mode 100644 openai/types/chat/chat_completion_function_call_option_param.py
 delete mode 100644 openai/types/chat/chat_completion_function_message_param.py
 delete mode 100644 openai/types/chat/chat_completion_message.py
 delete mode 100644 openai/types/chat/chat_completion_message_param.py
 delete mode 100644 openai/types/chat/chat_completion_message_tool_call.py
 delete mode 100644 openai/types/chat/chat_completion_message_tool_call_param.py
 delete mode 100644 openai/types/chat/chat_completion_named_tool_choice_param.py
 delete mode 100644 openai/types/chat/chat_completion_role.py
 delete mode 100644 openai/types/chat/chat_completion_system_message_param.py
 delete mode 100644 openai/types/chat/chat_completion_token_logprob.py
 delete mode 100644 openai/types/chat/chat_completion_tool_choice_option_param.py
 delete mode 100644 openai/types/chat/chat_completion_tool_message_param.py
 delete mode 100644 openai/types/chat/chat_completion_tool_param.py
 delete mode 100644 openai/types/chat/chat_completion_user_message_param.py
 delete mode 100644 openai/types/chat/completion_create_params.py
 delete mode 100644 openai/types/completion.py
 delete mode 100644 openai/types/completion_choice.py
 delete mode 100644 openai/types/completion_create_params.py
 delete mode 100644 openai/types/completion_usage.py
 delete mode 100644 openai/types/create_embedding_response.py
 delete mode 100644 openai/types/embedding.py
 delete mode 100644 openai/types/embedding_create_params.py
 delete mode 100644 openai/types/file_content.py
 delete mode 100644 openai/types/file_create_params.py
 delete mode 100644 openai/types/file_deleted.py
 delete mode 100644 openai/types/file_list_params.py
 delete mode 100644 openai/types/file_object.py
 delete mode 100644 openai/types/fine_tuning/__init__.py
 delete mode 100644 openai/types/fine_tuning/fine_tuning_job.py
 delete mode 100644 openai/types/fine_tuning/fine_tuning_job_event.py
 delete mode 100644 openai/types/fine_tuning/job_create_params.py
 delete mode 100644 openai/types/fine_tuning/job_list_events_params.py
 delete mode 100644 openai/types/fine_tuning/job_list_params.py
 delete mode 100644 openai/types/image.py
 delete mode 100644 openai/types/image_create_variation_params.py
 delete mode 100644 openai/types/image_edit_params.py
 delete mode 100644 openai/types/image_generate_params.py
 delete mode 100644 openai/types/images_response.py
 delete mode 100644 openai/types/model.py
 delete mode 100644 openai/types/model_deleted.py
 delete mode 100644 openai/types/moderation.py
 delete mode 100644 openai/types/moderation_create_params.py
 delete mode 100644 openai/types/moderation_create_response.py
 delete mode 100644 openai/types/shared/__init__.py
 delete mode 100644 openai/types/shared/error_object.py
 delete mode 100644 openai/types/shared/function_definition.py
 delete mode 100644 openai/types/shared/function_parameters.py
 delete mode 100644 openai/types/shared_params/__init__.py
 delete mode 100644 openai/types/shared_params/function_definition.py
 delete mode 100644 openai/types/shared_params/function_parameters.py
 delete mode 100644 openai/version.py

diff --git a/openai/__init__.py b/openai/__init__.py
deleted file mode 100644
index cd05a749..00000000
--- a/openai/__init__.py
+++ /dev/null
@@ -1,339 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os as _os
-from typing_extensions import override
-
-from . import types
-from ._types import NOT_GIVEN, NoneType, NotGiven, Transport, ProxiesTypes
-from ._utils import file_from_path
-from ._client import Client, OpenAI, Stream, Timeout, Transport, AsyncClient, AsyncOpenAI, AsyncStream, RequestOptions
-from ._models import BaseModel
-from ._version import __title__, __version__
-from ._response import APIResponse as APIResponse, AsyncAPIResponse as AsyncAPIResponse
-from ._constants import DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES, DEFAULT_CONNECTION_LIMITS
-from ._exceptions import (
-    APIError,
-    OpenAIError,
-    ConflictError,
-    NotFoundError,
-    APIStatusError,
-    RateLimitError,
-    APITimeoutError,
-    BadRequestError,
-    APIConnectionError,
-    AuthenticationError,
-    InternalServerError,
-    PermissionDeniedError,
-    UnprocessableEntityError,
-    APIResponseValidationError,
-)
-from ._utils._logs import setup_logging as _setup_logging
-
-__all__ = [
-    "types",
-    "__version__",
-    "__title__",
-    "NoneType",
-    "Transport",
-    "ProxiesTypes",
-    "NotGiven",
-    "NOT_GIVEN",
-    "OpenAIError",
-    "APIError",
-    "APIStatusError",
-    "APITimeoutError",
-    "APIConnectionError",
-    "APIResponseValidationError",
-    "BadRequestError",
-    "AuthenticationError",
-    "PermissionDeniedError",
-    "NotFoundError",
-    "ConflictError",
-    "UnprocessableEntityError",
-    "RateLimitError",
-    "InternalServerError",
-    "Timeout",
-    "RequestOptions",
-    "Client",
-    "AsyncClient",
-    "Stream",
-    "AsyncStream",
-    "OpenAI",
-    "AsyncOpenAI",
-    "file_from_path",
-    "BaseModel",
-    "DEFAULT_TIMEOUT",
-    "DEFAULT_MAX_RETRIES",
-    "DEFAULT_CONNECTION_LIMITS",
-]
-
-from .lib import azure as _azure
-from .version import VERSION as VERSION
-from .lib.azure import AzureOpenAI as AzureOpenAI, AsyncAzureOpenAI as AsyncAzureOpenAI
-from .lib._old_api import *
-from .lib.streaming import (
-    AssistantEventHandler as AssistantEventHandler,
-    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
-)
-
-_setup_logging()
-
-# Update the __module__ attribute for exported symbols so that
-# error messages point to this module instead of the module
-# it was originally defined in, e.g.
-# openai._exceptions.NotFoundError -> openai.NotFoundError
-__locals = locals()
-for __name in __all__:
-    if not __name.startswith("__"):
-        try:
-            __locals[__name].__module__ = "openai"
-        except (TypeError, AttributeError):
-            # Some of our exported symbols are builtins which we can't set attributes for.
-            pass
-
-# ------ Module level client ------
-import typing as _t
-import typing_extensions as _te
-
-import httpx as _httpx
-
-from ._base_client import DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES
-
-api_key: str | None = None
-
-organization: str | None = None
-
-base_url: str | _httpx.URL | None = None
-
-timeout: float | Timeout | None = DEFAULT_TIMEOUT
-
-max_retries: int = DEFAULT_MAX_RETRIES
-
-default_headers: _t.Mapping[str, str] | None = None
-
-default_query: _t.Mapping[str, object] | None = None
-
-http_client: _httpx.Client | None = None
-
-_ApiType = _te.Literal["openai", "azure"]
-
-api_type: _ApiType | None = _t.cast(_ApiType, _os.environ.get("OPENAI_API_TYPE"))
-
-api_version: str | None = _os.environ.get("OPENAI_API_VERSION")
-
-azure_endpoint: str | None = _os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-azure_ad_token: str | None = _os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-azure_ad_token_provider: _azure.AzureADTokenProvider | None = None
-
-
-class _ModuleClient(OpenAI):
-    # Note: we have to use type: ignores here as overriding class members
-    # with properties is technically unsafe but it is fine for our use case
-
-    @property  # type: ignore
-    @override
-    def api_key(self) -> str | None:
-        return api_key
-
-    @api_key.setter  # type: ignore
-    def api_key(self, value: str | None) -> None:  # type: ignore
-        global api_key
-
-        api_key = value
-
-    @property  # type: ignore
-    @override
-    def organization(self) -> str | None:
-        return organization
-
-    @organization.setter  # type: ignore
-    def organization(self, value: str | None) -> None:  # type: ignore
-        global organization
-
-        organization = value
-
-    @property
-    @override
-    def base_url(self) -> _httpx.URL:
-        if base_url is not None:
-            return _httpx.URL(base_url)
-
-        return super().base_url
-
-    @base_url.setter
-    def base_url(self, url: _httpx.URL | str) -> None:
-        super().base_url = url  # type: ignore[misc]
-
-    @property  # type: ignore
-    @override
-    def timeout(self) -> float | Timeout | None:
-        return timeout
-
-    @timeout.setter  # type: ignore
-    def timeout(self, value: float | Timeout | None) -> None:  # type: ignore
-        global timeout
-
-        timeout = value
-
-    @property  # type: ignore
-    @override
-    def max_retries(self) -> int:
-        return max_retries
-
-    @max_retries.setter  # type: ignore
-    def max_retries(self, value: int) -> None:  # type: ignore
-        global max_retries
-
-        max_retries = value
-
-    @property  # type: ignore
-    @override
-    def _custom_headers(self) -> _t.Mapping[str, str] | None:
-        return default_headers
-
-    @_custom_headers.setter  # type: ignore
-    def _custom_headers(self, value: _t.Mapping[str, str] | None) -> None:  # type: ignore
-        global default_headers
-
-        default_headers = value
-
-    @property  # type: ignore
-    @override
-    def _custom_query(self) -> _t.Mapping[str, object] | None:
-        return default_query
-
-    @_custom_query.setter  # type: ignore
-    def _custom_query(self, value: _t.Mapping[str, object] | None) -> None:  # type: ignore
-        global default_query
-
-        default_query = value
-
-    @property  # type: ignore
-    @override
-    def _client(self) -> _httpx.Client:
-        return http_client or super()._client
-
-    @_client.setter  # type: ignore
-    def _client(self, value: _httpx.Client) -> None:  # type: ignore
-        global http_client
-
-        http_client = value
-
-
-class _AzureModuleClient(_ModuleClient, AzureOpenAI):  # type: ignore
-    ...
-
-
-class _AmbiguousModuleClientUsageError(OpenAIError):
-    def __init__(self) -> None:
-        super().__init__(
-            "Ambiguous use of module client; please set `openai.api_type` or the `OPENAI_API_TYPE` environment variable to `openai` or `azure`"
-        )
-
-
-def _has_openai_credentials() -> bool:
-    return _os.environ.get("OPENAI_API_KEY") is not None
-
-
-def _has_azure_credentials() -> bool:
-    return azure_endpoint is not None or _os.environ.get("AZURE_OPENAI_API_KEY") is not None
-
-
-def _has_azure_ad_credentials() -> bool:
-    return (
-        _os.environ.get("AZURE_OPENAI_AD_TOKEN") is not None
-        or azure_ad_token is not None
-        or azure_ad_token_provider is not None
-    )
-
-
-_client: OpenAI | None = None
-
-
-def _load_client() -> OpenAI:  # type: ignore[reportUnusedFunction]
-    global _client
-
-    if _client is None:
-        global api_type, azure_endpoint, azure_ad_token, api_version
-
-        if azure_endpoint is None:
-            azure_endpoint = _os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-        if azure_ad_token is None:
-            azure_ad_token = _os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_version is None:
-            api_version = _os.environ.get("OPENAI_API_VERSION")
-
-        if api_type is None:
-            has_openai = _has_openai_credentials()
-            has_azure = _has_azure_credentials()
-            has_azure_ad = _has_azure_ad_credentials()
-
-            if has_openai and (has_azure or has_azure_ad):
-                raise _AmbiguousModuleClientUsageError()
-
-            if (azure_ad_token is not None or azure_ad_token_provider is not None) and _os.environ.get(
-                "AZURE_OPENAI_API_KEY"
-            ) is not None:
-                raise _AmbiguousModuleClientUsageError()
-
-            if has_azure or has_azure_ad:
-                api_type = "azure"
-            else:
-                api_type = "openai"
-
-        if api_type == "azure":
-            _client = _AzureModuleClient(  # type: ignore
-                api_version=api_version,
-                azure_endpoint=azure_endpoint,
-                api_key=api_key,
-                azure_ad_token=azure_ad_token,
-                azure_ad_token_provider=azure_ad_token_provider,
-                organization=organization,
-                base_url=base_url,
-                timeout=timeout,
-                max_retries=max_retries,
-                default_headers=default_headers,
-                default_query=default_query,
-                http_client=http_client,
-            )
-            return _client
-
-        _client = _ModuleClient(
-            api_key=api_key,
-            organization=organization,
-            base_url=base_url,
-            timeout=timeout,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            default_query=default_query,
-            http_client=http_client,
-        )
-        return _client
-
-    return _client
-
-
-def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
-    global _client
-
-    _client = None
-
-
-from ._module_client import (
-    beta as beta,
-    chat as chat,
-    audio as audio,
-    files as files,
-    images as images,
-    models as models,
-    embeddings as embeddings,
-    completions as completions,
-    fine_tuning as fine_tuning,
-    moderations as moderations,
-)
diff --git a/openai/__main__.py b/openai/__main__.py
deleted file mode 100644
index 4e28416e..00000000
--- a/openai/__main__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from .cli import main
-
-main()
diff --git a/openai/_base_client.py b/openai/_base_client.py
deleted file mode 100644
index 502ed7c7..00000000
--- a/openai/_base_client.py
+++ /dev/null
@@ -1,1972 +0,0 @@
-from __future__ import annotations
-
-import json
-import time
-import uuid
-import email
-import asyncio
-import inspect
-import logging
-import platform
-import warnings
-import email.utils
-from types import TracebackType
-from random import random
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Type,
-    Union,
-    Generic,
-    Mapping,
-    TypeVar,
-    Iterable,
-    Iterator,
-    Optional,
-    Generator,
-    AsyncIterator,
-    cast,
-    overload,
-)
-from functools import lru_cache
-from typing_extensions import Literal, override, get_origin
-
-import anyio
-import httpx
-import distro
-import pydantic
-from httpx import URL, Limits
-from pydantic import PrivateAttr
-
-from . import _exceptions
-from ._qs import Querystring
-from ._files import to_httpx_files, async_to_httpx_files
-from ._types import (
-    NOT_GIVEN,
-    Body,
-    Omit,
-    Query,
-    Headers,
-    Timeout,
-    NotGiven,
-    ResponseT,
-    Transport,
-    AnyMapping,
-    PostParser,
-    ProxiesTypes,
-    RequestFiles,
-    HttpxSendArgs,
-    AsyncTransport,
-    RequestOptions,
-    ModelBuilderProtocol,
-)
-from ._utils import is_dict, is_list, is_given, is_mapping
-from ._compat import model_copy, model_dump
-from ._models import GenericModel, FinalRequestOptions, validate_type, construct_type
-from ._response import (
-    APIResponse,
-    BaseAPIResponse,
-    AsyncAPIResponse,
-    extract_response_type,
-)
-from ._constants import (
-    DEFAULT_TIMEOUT,
-    MAX_RETRY_DELAY,
-    DEFAULT_MAX_RETRIES,
-    INITIAL_RETRY_DELAY,
-    RAW_RESPONSE_HEADER,
-    OVERRIDE_CAST_TO_HEADER,
-    DEFAULT_CONNECTION_LIMITS,
-)
-from ._streaming import Stream, SSEDecoder, AsyncStream, SSEBytesDecoder
-from ._exceptions import (
-    APIStatusError,
-    APITimeoutError,
-    APIConnectionError,
-    APIResponseValidationError,
-)
-from ._legacy_response import LegacyAPIResponse
-
-log: logging.Logger = logging.getLogger(__name__)
-
-# TODO: make base page type vars covariant
-SyncPageT = TypeVar("SyncPageT", bound="BaseSyncPage[Any]")
-AsyncPageT = TypeVar("AsyncPageT", bound="BaseAsyncPage[Any]")
-
-
-_T = TypeVar("_T")
-_T_co = TypeVar("_T_co", covariant=True)
-
-_StreamT = TypeVar("_StreamT", bound=Stream[Any])
-_AsyncStreamT = TypeVar("_AsyncStreamT", bound=AsyncStream[Any])
-
-if TYPE_CHECKING:
-    from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
-else:
-    try:
-        from httpx._config import DEFAULT_TIMEOUT_CONFIG as HTTPX_DEFAULT_TIMEOUT
-    except ImportError:
-        # taken from https://github.com/encode/httpx/blob/3ba5fe0d7ac70222590e759c31442b1cab263791/httpx/_config.py#L366
-        HTTPX_DEFAULT_TIMEOUT = Timeout(5.0)
-
-
-class PageInfo:
-    """Stores the necessary information to build the request to retrieve the next page.
-
-    Either `url` or `params` must be set.
-    """
-
-    url: URL | NotGiven
-    params: Query | NotGiven
-
-    @overload
-    def __init__(
-        self,
-        *,
-        url: URL,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        params: Query,
-    ) -> None:
-        ...
-
-    def __init__(
-        self,
-        *,
-        url: URL | NotGiven = NOT_GIVEN,
-        params: Query | NotGiven = NOT_GIVEN,
-    ) -> None:
-        self.url = url
-        self.params = params
-
-
-class BasePage(GenericModel, Generic[_T]):
-    """
-    Defines the core interface for pagination.
-
-    Type Args:
-        ModelT: The pydantic model that represents an item in the response.
-
-    Methods:
-        has_next_page(): Check if there is another page available
-        next_page_info(): Get the necessary information to make a request for the next page
-    """
-
-    _options: FinalRequestOptions = PrivateAttr()
-    _model: Type[_T] = PrivateAttr()
-
-    def has_next_page(self) -> bool:
-        items = self._get_page_items()
-        if not items:
-            return False
-        return self.next_page_info() is not None
-
-    def next_page_info(self) -> Optional[PageInfo]:
-        ...
-
-    def _get_page_items(self) -> Iterable[_T]:  # type: ignore[empty-body]
-        ...
-
-    def _params_from_url(self, url: URL) -> httpx.QueryParams:
-        # TODO: do we have to preprocess params here?
-        return httpx.QueryParams(cast(Any, self._options.params)).merge(url.params)
-
-    def _info_to_options(self, info: PageInfo) -> FinalRequestOptions:
-        options = model_copy(self._options)
-        options._strip_raw_response_header()
-
-        if not isinstance(info.params, NotGiven):
-            options.params = {**options.params, **info.params}
-            return options
-
-        if not isinstance(info.url, NotGiven):
-            params = self._params_from_url(info.url)
-            url = info.url.copy_with(params=params)
-            options.params = dict(url.params)
-            options.url = str(url)
-            return options
-
-        raise ValueError("Unexpected PageInfo state")
-
-
-class BaseSyncPage(BasePage[_T], Generic[_T]):
-    _client: SyncAPIClient = pydantic.PrivateAttr()
-
-    def _set_private_attributes(
-        self,
-        client: SyncAPIClient,
-        model: Type[_T],
-        options: FinalRequestOptions,
-    ) -> None:
-        self._model = model
-        self._client = client
-        self._options = options
-
-    # Pydantic uses a custom `__iter__` method to support casting BaseModels
-    # to dictionaries. e.g. dict(model).
-    # As we want to support `for item in page`, this is inherently incompatible
-    # with the default pydantic behaviour. It is not possible to support both
-    # use cases at once. Fortunately, this is not a big deal as all other pydantic
-    # methods should continue to work as expected as there is an alternative method
-    # to cast a model to a dictionary, model.dict(), which is used internally
-    # by pydantic.
-    def __iter__(self) -> Iterator[_T]:  # type: ignore
-        for page in self.iter_pages():
-            for item in page._get_page_items():
-                yield item
-
-    def iter_pages(self: SyncPageT) -> Iterator[SyncPageT]:
-        page = self
-        while True:
-            yield page
-            if page.has_next_page():
-                page = page.get_next_page()
-            else:
-                return
-
-    def get_next_page(self: SyncPageT) -> SyncPageT:
-        info = self.next_page_info()
-        if not info:
-            raise RuntimeError(
-                "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`."
-            )
-
-        options = self._info_to_options(info)
-        return self._client._request_api_list(self._model, page=self.__class__, options=options)
-
-
-class AsyncPaginator(Generic[_T, AsyncPageT]):
-    def __init__(
-        self,
-        client: AsyncAPIClient,
-        options: FinalRequestOptions,
-        page_cls: Type[AsyncPageT],
-        model: Type[_T],
-    ) -> None:
-        self._model = model
-        self._client = client
-        self._options = options
-        self._page_cls = page_cls
-
-    def __await__(self) -> Generator[Any, None, AsyncPageT]:
-        return self._get_page().__await__()
-
-    async def _get_page(self) -> AsyncPageT:
-        def _parser(resp: AsyncPageT) -> AsyncPageT:
-            resp._set_private_attributes(
-                model=self._model,
-                options=self._options,
-                client=self._client,
-            )
-            return resp
-
-        self._options.post_parser = _parser
-
-        return await self._client.request(self._page_cls, self._options)
-
-    async def __aiter__(self) -> AsyncIterator[_T]:
-        # https://github.com/microsoft/pyright/issues/3464
-        page = cast(
-            AsyncPageT,
-            await self,  # type: ignore
-        )
-        async for item in page:
-            yield item
-
-
-class BaseAsyncPage(BasePage[_T], Generic[_T]):
-    _client: AsyncAPIClient = pydantic.PrivateAttr()
-
-    def _set_private_attributes(
-        self,
-        model: Type[_T],
-        client: AsyncAPIClient,
-        options: FinalRequestOptions,
-    ) -> None:
-        self._model = model
-        self._client = client
-        self._options = options
-
-    async def __aiter__(self) -> AsyncIterator[_T]:
-        async for page in self.iter_pages():
-            for item in page._get_page_items():
-                yield item
-
-    async def iter_pages(self: AsyncPageT) -> AsyncIterator[AsyncPageT]:
-        page = self
-        while True:
-            yield page
-            if page.has_next_page():
-                page = await page.get_next_page()
-            else:
-                return
-
-    async def get_next_page(self: AsyncPageT) -> AsyncPageT:
-        info = self.next_page_info()
-        if not info:
-            raise RuntimeError(
-                "No next page expected; please check `.has_next_page()` before calling `.get_next_page()`."
-            )
-
-        options = self._info_to_options(info)
-        return await self._client._request_api_list(self._model, page=self.__class__, options=options)
-
-
-_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
-_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
-
-
-class BaseClient(Generic[_HttpxClientT, _DefaultStreamT]):
-    _client: _HttpxClientT
-    _version: str
-    _base_url: URL
-    max_retries: int
-    timeout: Union[float, Timeout, None]
-    _limits: httpx.Limits
-    _proxies: ProxiesTypes | None
-    _transport: Transport | AsyncTransport | None
-    _strict_response_validation: bool
-    _idempotency_header: str | None
-    _default_stream_cls: type[_DefaultStreamT] | None = None
-
-    def __init__(
-        self,
-        *,
-        version: str,
-        base_url: str | URL,
-        _strict_response_validation: bool,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        timeout: float | Timeout | None = DEFAULT_TIMEOUT,
-        limits: httpx.Limits,
-        transport: Transport | AsyncTransport | None,
-        proxies: ProxiesTypes | None,
-        custom_headers: Mapping[str, str] | None = None,
-        custom_query: Mapping[str, object] | None = None,
-    ) -> None:
-        self._version = version
-        self._base_url = self._enforce_trailing_slash(URL(base_url))
-        self.max_retries = max_retries
-        self.timeout = timeout
-        self._limits = limits
-        self._proxies = proxies
-        self._transport = transport
-        self._custom_headers = custom_headers or {}
-        self._custom_query = custom_query or {}
-        self._strict_response_validation = _strict_response_validation
-        self._idempotency_header = None
-
-        if max_retries is None:  # pyright: ignore[reportUnnecessaryComparison]
-            raise TypeError(
-                "max_retries cannot be None. If you want to disable retries, pass `0`; if you want unlimited retries, pass `math.inf` or a very high number; if you want the default behavior, pass `openai.DEFAULT_MAX_RETRIES`"
-            )
-
-    def _enforce_trailing_slash(self, url: URL) -> URL:
-        if url.raw_path.endswith(b"/"):
-            return url
-        return url.copy_with(raw_path=url.raw_path + b"/")
-
-    def _make_status_error_from_response(
-        self,
-        response: httpx.Response,
-    ) -> APIStatusError:
-        if response.is_closed and not response.is_stream_consumed:
-            # We can't read the response body as it has been closed
-            # before it was read. This can happen if an event hook
-            # raises a status error.
-            body = None
-            err_msg = f"Error code: {response.status_code}"
-        else:
-            err_text = response.text.strip()
-            body = err_text
-
-            try:
-                body = json.loads(err_text)
-                err_msg = f"Error code: {response.status_code} - {body}"
-            except Exception:
-                err_msg = err_text or f"Error code: {response.status_code}"
-
-        return self._make_status_error(err_msg, body=body, response=response)
-
-    def _make_status_error(
-        self,
-        err_msg: str,
-        *,
-        body: object,
-        response: httpx.Response,
-    ) -> _exceptions.APIStatusError:
-        raise NotImplementedError()
-
-    def _remaining_retries(
-        self,
-        remaining_retries: Optional[int],
-        options: FinalRequestOptions,
-    ) -> int:
-        return remaining_retries if remaining_retries is not None else options.get_max_retries(self.max_retries)
-
-    def _build_headers(self, options: FinalRequestOptions) -> httpx.Headers:
-        custom_headers = options.headers or {}
-        headers_dict = _merge_mappings(self.default_headers, custom_headers)
-        self._validate_headers(headers_dict, custom_headers)
-
-        # headers are case-insensitive while dictionaries are not.
-        headers = httpx.Headers(headers_dict)
-
-        idempotency_header = self._idempotency_header
-        if idempotency_header and options.method.lower() != "get" and idempotency_header not in headers:
-            headers[idempotency_header] = options.idempotency_key or self._idempotency_key()
-
-        return headers
-
-    def _prepare_url(self, url: str) -> URL:
-        """
-        Merge a URL argument together with any 'base_url' on the client,
-        to create the URL used for the outgoing request.
-        """
-        # Copied from httpx's `_merge_url` method.
-        merge_url = URL(url)
-        if merge_url.is_relative_url:
-            merge_raw_path = self.base_url.raw_path + merge_url.raw_path.lstrip(b"/")
-            return self.base_url.copy_with(raw_path=merge_raw_path)
-
-        return merge_url
-
-    def _make_sse_decoder(self) -> SSEDecoder | SSEBytesDecoder:
-        return SSEDecoder()
-
-    def _build_request(
-        self,
-        options: FinalRequestOptions,
-    ) -> httpx.Request:
-        if log.isEnabledFor(logging.DEBUG):
-            log.debug("Request options: %s", model_dump(options, exclude_unset=True))
-
-        kwargs: dict[str, Any] = {}
-
-        json_data = options.json_data
-        if options.extra_json is not None:
-            if json_data is None:
-                json_data = cast(Body, options.extra_json)
-            elif is_mapping(json_data):
-                json_data = _merge_mappings(json_data, options.extra_json)
-            else:
-                raise RuntimeError(f"Unexpected JSON data type, {type(json_data)}, cannot merge with `extra_body`")
-
-        headers = self._build_headers(options)
-        params = _merge_mappings(self._custom_query, options.params)
-        content_type = headers.get("Content-Type")
-
-        # If the given Content-Type header is multipart/form-data then it
-        # has to be removed so that httpx can generate the header with
-        # additional information for us as it has to be in this form
-        # for the server to be able to correctly parse the request:
-        # multipart/form-data; boundary=---abc--
-        if content_type is not None and content_type.startswith("multipart/form-data"):
-            if "boundary" not in content_type:
-                # only remove the header if the boundary hasn't been explicitly set
-                # as the caller doesn't want httpx to come up with their own boundary
-                headers.pop("Content-Type")
-
-            # As we are now sending multipart/form-data instead of application/json
-            # we need to tell httpx to use it, https://www.python-httpx.org/advanced/#multipart-file-encoding
-            if json_data:
-                if not is_dict(json_data):
-                    raise TypeError(
-                        f"Expected query input to be a dictionary for multipart requests but got {type(json_data)} instead."
-                    )
-                kwargs["data"] = self._serialize_multipartform(json_data)
-
-        # TODO: report this error to httpx
-        return self._client.build_request(  # pyright: ignore[reportUnknownMemberType]
-            headers=headers,
-            timeout=self.timeout if isinstance(options.timeout, NotGiven) else options.timeout,
-            method=options.method,
-            url=self._prepare_url(options.url),
-            # the `Query` type that we use is incompatible with qs'
-            # `Params` type as it needs to be typed as `Mapping[str, object]`
-            # so that passing a `TypedDict` doesn't cause an error.
-            # https://github.com/microsoft/pyright/issues/3526#event-6715453066
-            params=self.qs.stringify(cast(Mapping[str, Any], params)) if params else None,
-            json=json_data,
-            files=options.files,
-            **kwargs,
-        )
-
-    def _serialize_multipartform(self, data: Mapping[object, object]) -> dict[str, object]:
-        items = self.qs.stringify_items(
-            # TODO: type ignore is required as stringify_items is well typed but we can't be
-            # well typed without heavy validation.
-            data,  # type: ignore
-            array_format="brackets",
-        )
-        serialized: dict[str, object] = {}
-        for key, value in items:
-            existing = serialized.get(key)
-
-            if not existing:
-                serialized[key] = value
-                continue
-
-            # If a value has already been set for this key then that
-            # means we're sending data like `array[]=[1, 2, 3]` and we
-            # need to tell httpx that we want to send multiple values with
-            # the same key which is done by using a list or a tuple.
-            #
-            # Note: 2d arrays should never result in the same key at both
-            # levels so it's safe to assume that if the value is a list,
-            # it was because we changed it to be a list.
-            if is_list(existing):
-                existing.append(value)
-            else:
-                serialized[key] = [existing, value]
-
-        return serialized
-
-    def _maybe_override_cast_to(self, cast_to: type[ResponseT], options: FinalRequestOptions) -> type[ResponseT]:
-        if not is_given(options.headers):
-            return cast_to
-
-        # make a copy of the headers so we don't mutate user-input
-        headers = dict(options.headers)
-
-        # we internally support defining a temporary header to override the
-        # default `cast_to` type for use with `.with_raw_response` and `.with_streaming_response`
-        # see _response.py for implementation details
-        override_cast_to = headers.pop(OVERRIDE_CAST_TO_HEADER, NOT_GIVEN)
-        if is_given(override_cast_to):
-            options.headers = headers
-            return cast(Type[ResponseT], override_cast_to)
-
-        return cast_to
-
-    def _should_stream_response_body(self, request: httpx.Request) -> bool:
-        return request.headers.get(RAW_RESPONSE_HEADER) == "stream"  # type: ignore[no-any-return]
-
-    def _process_response_data(
-        self,
-        *,
-        data: object,
-        cast_to: type[ResponseT],
-        response: httpx.Response,
-    ) -> ResponseT:
-        if data is None:
-            return cast(ResponseT, None)
-
-        if cast_to is object:
-            return cast(ResponseT, data)
-
-        try:
-            if inspect.isclass(cast_to) and issubclass(cast_to, ModelBuilderProtocol):
-                return cast(ResponseT, cast_to.build(response=response, data=data))
-
-            if self._strict_response_validation:
-                return cast(ResponseT, validate_type(type_=cast_to, value=data))
-
-            return cast(ResponseT, construct_type(type_=cast_to, value=data))
-        except pydantic.ValidationError as err:
-            raise APIResponseValidationError(response=response, body=data) from err
-
-    @property
-    def qs(self) -> Querystring:
-        return Querystring()
-
-    @property
-    def custom_auth(self) -> httpx.Auth | None:
-        return None
-
-    @property
-    def auth_headers(self) -> dict[str, str]:
-        return {}
-
-    @property
-    def default_headers(self) -> dict[str, str | Omit]:
-        return {
-            "Accept": "application/json",
-            "Content-Type": "application/json",
-            "User-Agent": self.user_agent,
-            **self.platform_headers(),
-            **self.auth_headers,
-            **self._custom_headers,
-        }
-
-    def _validate_headers(
-        self,
-        headers: Headers,  # noqa: ARG002
-        custom_headers: Headers,  # noqa: ARG002
-    ) -> None:
-        """Validate the given default headers and custom headers.
-
-        Does nothing by default.
-        """
-        return
-
-    @property
-    def user_agent(self) -> str:
-        return f"{self.__class__.__name__}/Python {self._version}"
-
-    @property
-    def base_url(self) -> URL:
-        return self._base_url
-
-    @base_url.setter
-    def base_url(self, url: URL | str) -> None:
-        self._base_url = self._enforce_trailing_slash(url if isinstance(url, URL) else URL(url))
-
-    def platform_headers(self) -> Dict[str, str]:
-        return platform_headers(self._version)
-
-    def _parse_retry_after_header(self, response_headers: Optional[httpx.Headers] = None) -> float | None:
-        """Returns a float of the number of seconds (not milliseconds) to wait after retrying, or None if unspecified.
-
-        About the Retry-After header: https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After
-        See also  https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After#syntax
-        """
-        if response_headers is None:
-            return None
-
-        # First, try the non-standard `retry-after-ms` header for milliseconds,
-        # which is more precise than integer-seconds `retry-after`
-        try:
-            retry_ms_header = response_headers.get("retry-after-ms", None)
-            return float(retry_ms_header) / 1000
-        except (TypeError, ValueError):
-            pass
-
-        # Next, try parsing `retry-after` header as seconds (allowing nonstandard floats).
-        retry_header = response_headers.get("retry-after")
-        try:
-            # note: the spec indicates that this should only ever be an integer
-            # but if someone sends a float there's no reason for us to not respect it
-            return float(retry_header)
-        except (TypeError, ValueError):
-            pass
-
-        # Last, try parsing `retry-after` as a date.
-        retry_date_tuple = email.utils.parsedate_tz(retry_header)
-        if retry_date_tuple is None:
-            return None
-
-        retry_date = email.utils.mktime_tz(retry_date_tuple)
-        return float(retry_date - time.time())
-
-    def _calculate_retry_timeout(
-        self,
-        remaining_retries: int,
-        options: FinalRequestOptions,
-        response_headers: Optional[httpx.Headers] = None,
-    ) -> float:
-        max_retries = options.get_max_retries(self.max_retries)
-
-        # If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
-        retry_after = self._parse_retry_after_header(response_headers)
-        if retry_after is not None and 0 < retry_after <= 60:
-            return retry_after
-
-        nb_retries = max_retries - remaining_retries
-
-        # Apply exponential backoff, but not more than the max.
-        sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
-
-        # Apply some jitter, plus-or-minus half a second.
-        jitter = 1 - 0.25 * random()
-        timeout = sleep_seconds * jitter
-        return timeout if timeout >= 0 else 0
-
-    def _should_retry(self, response: httpx.Response) -> bool:
-        # Note: this is not a standard header
-        should_retry_header = response.headers.get("x-should-retry")
-
-        # If the server explicitly says whether or not to retry, obey.
-        if should_retry_header == "true":
-            log.debug("Retrying as header `x-should-retry` is set to `true`")
-            return True
-        if should_retry_header == "false":
-            log.debug("Not retrying as header `x-should-retry` is set to `false`")
-            return False
-
-        # Retry on request timeouts.
-        if response.status_code == 408:
-            log.debug("Retrying due to status code %i", response.status_code)
-            return True
-
-        # Retry on lock timeouts.
-        if response.status_code == 409:
-            log.debug("Retrying due to status code %i", response.status_code)
-            return True
-
-        # Retry on rate limits.
-        if response.status_code == 429:
-            log.debug("Retrying due to status code %i", response.status_code)
-            return True
-
-        # Retry internal errors.
-        if response.status_code >= 500:
-            log.debug("Retrying due to status code %i", response.status_code)
-            return True
-
-        log.debug("Not retrying")
-        return False
-
-    def _idempotency_key(self) -> str:
-        return f"stainless-python-retry-{uuid.uuid4()}"
-
-
-class SyncHttpxClientWrapper(httpx.Client):
-    def __del__(self) -> None:
-        try:
-            self.close()
-        except Exception:
-            pass
-
-
-class SyncAPIClient(BaseClient[httpx.Client, Stream[Any]]):
-    _client: httpx.Client
-    _default_stream_cls: type[Stream[Any]] | None = None
-
-    def __init__(
-        self,
-        *,
-        version: str,
-        base_url: str | URL,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: Transport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
-        http_client: httpx.Client | None = None,
-        custom_headers: Mapping[str, str] | None = None,
-        custom_query: Mapping[str, object] | None = None,
-        _strict_response_validation: bool,
-    ) -> None:
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_CONNECTION_LIMITS
-
-        if transport is not None:
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
-        if not is_given(timeout):
-            # if the user passed in a custom http client with a non-default
-            # timeout set then we use that timeout.
-            #
-            # note: there is an edge case here where the user passes in a client
-            # where they've explicitly set the timeout to match the default timeout
-            # as this check is structural, meaning that we'll think they didn't
-            # pass in a timeout and will ignore it
-            if http_client and http_client.timeout != HTTPX_DEFAULT_TIMEOUT:
-                timeout = http_client.timeout
-            else:
-                timeout = DEFAULT_TIMEOUT
-
-        if http_client is not None and not isinstance(http_client, httpx.Client):  # pyright: ignore[reportUnnecessaryIsInstance]
-            raise TypeError(
-                f"Invalid `http_client` argument; Expected an instance of `httpx.Client` but got {type(http_client)}"
-            )
-
-        super().__init__(
-            version=version,
-            limits=limits,
-            # cast to a valid type because mypy doesn't understand our type narrowing
-            timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            base_url=base_url,
-            transport=transport,
-            max_retries=max_retries,
-            custom_query=custom_query,
-            custom_headers=custom_headers,
-            _strict_response_validation=_strict_response_validation,
-        )
-        self._client = http_client or SyncHttpxClientWrapper(
-            base_url=base_url,
-            # cast to a valid type because mypy doesn't understand our type narrowing
-            timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            limits=limits,
-            follow_redirects=True,
-        )
-
-    def is_closed(self) -> bool:
-        return self._client.is_closed
-
-    def close(self) -> None:
-        """Close the underlying HTTPX client.
-
-        The client will *not* be usable after this.
-        """
-        # If an error is thrown while constructing a client, self._client
-        # may not be present
-        if hasattr(self, "_client"):
-            self._client.close()
-
-    def __enter__(self: _T) -> _T:
-        return self
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        self.close()
-
-    def _prepare_options(
-        self,
-        options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
-        """Hook for mutating the given options"""
-        return None
-
-    def _prepare_request(
-        self,
-        request: httpx.Request,  # noqa: ARG002
-    ) -> None:
-        """This method is used as a callback for mutating the `Request` object
-        after it has been constructed.
-        This is useful for cases where you want to add certain headers based off of
-        the request properties, e.g. `url`, `method` etc.
-        """
-        return None
-
-    @overload
-    def request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
-        *,
-        stream: Literal[True],
-        stream_cls: Type[_StreamT],
-    ) -> _StreamT:
-        ...
-
-    @overload
-    def request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
-        *,
-        stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
-
-    @overload
-    def request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
-        *,
-        stream: bool = False,
-        stream_cls: Type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
-
-    def request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        remaining_retries: Optional[int] = None,
-        *,
-        stream: bool = False,
-        stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        return self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
-        )
-
-    def _request(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        remaining_retries: int | None,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
-        cast_to = self._maybe_override_cast_to(cast_to, options)
-        self._prepare_options(options)
-
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
-        self._prepare_request(request)
-
-        kwargs: HttpxSendArgs = {}
-        if self.custom_auth is not None:
-            kwargs["auth"] = self.custom_auth
-
-        try:
-            response = self._client.send(
-                request,
-                stream=stream or self._should_stream_response_body(request=request),
-                **kwargs,
-            )
-        except httpx.TimeoutException as err:
-            log.debug("Encountered httpx.TimeoutException", exc_info=True)
-
-            if retries > 0:
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
-
-            log.debug("Raising timeout error")
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            log.debug("Encountered Exception", exc_info=True)
-
-            if retries > 0:
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
-
-            log.debug("Raising connection error")
-            raise APIConnectionError(request=request) from err
-
-        log.debug(
-            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
-        )
-
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
-            if retries > 0 and self._should_retry(err.response):
-                err.response.close()
-                return self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
-
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                err.response.read()
-
-            log.debug("Re-raising status error")
-            raise self._make_status_error_from_response(err.response) from None
-
-        return self._process_response(
-            cast_to=cast_to,
-            options=options,
-            response=response,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
-    def _retry_request(
-        self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
-        *,
-        stream: bool,
-        stream_cls: type[_StreamT] | None,
-    ) -> ResponseT | _StreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
-            log.debug("1 retry left")
-        else:
-            log.debug("%i retries left", remaining)
-
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
-        log.info("Retrying request to %s in %f seconds", options.url, timeout)
-
-        # In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
-        # different thread if necessary.
-        time.sleep(timeout)
-
-        return self._request(
-            options=options,
-            cast_to=cast_to,
-            remaining_retries=remaining,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
-    def _process_response(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        response: httpx.Response,
-        stream: bool,
-        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
-    ) -> ResponseT:
-        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
-            return cast(
-                ResponseT,
-                LegacyAPIResponse(
-                    raw=response,
-                    client=self,
-                    cast_to=cast_to,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    options=options,
-                ),
-            )
-
-        origin = get_origin(cast_to) or cast_to
-
-        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
-            if not issubclass(origin, APIResponse):
-                raise TypeError(f"API Response types must subclass {APIResponse}; Received {origin}")
-
-            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
-            return cast(
-                ResponseT,
-                response_cls(
-                    raw=response,
-                    client=self,
-                    cast_to=extract_response_type(response_cls),
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    options=options,
-                ),
-            )
-
-        if cast_to == httpx.Response:
-            return cast(ResponseT, response)
-
-        api_response = APIResponse(
-            raw=response,
-            client=self,
-            cast_to=cast("type[ResponseT]", cast_to),  # pyright: ignore[reportUnnecessaryCast]
-            stream=stream,
-            stream_cls=stream_cls,
-            options=options,
-        )
-        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
-            return cast(ResponseT, api_response)
-
-        return api_response.parse()
-
-    def _request_api_list(
-        self,
-        model: Type[object],
-        page: Type[SyncPageT],
-        options: FinalRequestOptions,
-    ) -> SyncPageT:
-        def _parser(resp: SyncPageT) -> SyncPageT:
-            resp._set_private_attributes(
-                client=self,
-                model=model,
-                options=options,
-            )
-            return resp
-
-        options.post_parser = _parser
-
-        return self.request(page, options, stream=False)
-
-    @overload
-    def get(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        options: RequestOptions = {},
-        stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
-
-    @overload
-    def get(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        options: RequestOptions = {},
-        stream: Literal[True],
-        stream_cls: type[_StreamT],
-    ) -> _StreamT:
-        ...
-
-    @overload
-    def get(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        options: RequestOptions = {},
-        stream: bool,
-        stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
-
-    def get(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        options: RequestOptions = {},
-        stream: bool = False,
-        stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        opts = FinalRequestOptions.construct(method="get", url=path, **options)
-        # cast is required because mypy complains about returning Any even though
-        # it understands the type variables
-        return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
-
-    @overload
-    def post(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-        files: RequestFiles | None = None,
-        stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
-
-    @overload
-    def post(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-        files: RequestFiles | None = None,
-        stream: Literal[True],
-        stream_cls: type[_StreamT],
-    ) -> _StreamT:
-        ...
-
-    @overload
-    def post(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-        files: RequestFiles | None = None,
-        stream: bool,
-        stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        ...
-
-    def post(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-        files: RequestFiles | None = None,
-        stream: bool = False,
-        stream_cls: type[_StreamT] | None = None,
-    ) -> ResponseT | _StreamT:
-        opts = FinalRequestOptions.construct(
-            method="post", url=path, json_data=body, files=to_httpx_files(files), **options
-        )
-        return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))
-
-    def patch(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-    ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
-        return self.request(cast_to, opts)
-
-    def put(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        files: RequestFiles | None = None,
-        options: RequestOptions = {},
-    ) -> ResponseT:
-        opts = FinalRequestOptions.construct(
-            method="put", url=path, json_data=body, files=to_httpx_files(files), **options
-        )
-        return self.request(cast_to, opts)
-
-    def delete(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-    ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options)
-        return self.request(cast_to, opts)
-
-    def get_api_list(
-        self,
-        path: str,
-        *,
-        model: Type[object],
-        page: Type[SyncPageT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-        method: str = "get",
-    ) -> SyncPageT:
-        opts = FinalRequestOptions.construct(method=method, url=path, json_data=body, **options)
-        return self._request_api_list(model, page, opts)
-
-
-class AsyncHttpxClientWrapper(httpx.AsyncClient):
-    def __del__(self) -> None:
-        try:
-            # TODO(someday): support non asyncio runtimes here
-            asyncio.get_running_loop().create_task(self.aclose())
-        except Exception:
-            pass
-
-
-class AsyncAPIClient(BaseClient[httpx.AsyncClient, AsyncStream[Any]]):
-    _client: httpx.AsyncClient
-    _default_stream_cls: type[AsyncStream[Any]] | None = None
-
-    def __init__(
-        self,
-        *,
-        version: str,
-        base_url: str | URL,
-        _strict_response_validation: bool,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        transport: AsyncTransport | None = None,
-        proxies: ProxiesTypes | None = None,
-        limits: Limits | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        custom_headers: Mapping[str, str] | None = None,
-        custom_query: Mapping[str, object] | None = None,
-    ) -> None:
-        if limits is not None:
-            warnings.warn(
-                "The `connection_pool_limits` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `connection_pool_limits`")
-        else:
-            limits = DEFAULT_CONNECTION_LIMITS
-
-        if transport is not None:
-            warnings.warn(
-                "The `transport` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `transport`")
-
-        if proxies is not None:
-            warnings.warn(
-                "The `proxies` argument is deprecated. The `http_client` argument should be passed instead",
-                category=DeprecationWarning,
-                stacklevel=3,
-            )
-            if http_client is not None:
-                raise ValueError("The `http_client` argument is mutually exclusive with `proxies`")
-
-        if not is_given(timeout):
-            # if the user passed in a custom http client with a non-default
-            # timeout set then we use that timeout.
-            #
-            # note: there is an edge case here where the user passes in a client
-            # where they've explicitly set the timeout to match the default timeout
-            # as this check is structural, meaning that we'll think they didn't
-            # pass in a timeout and will ignore it
-            if http_client and http_client.timeout != HTTPX_DEFAULT_TIMEOUT:
-                timeout = http_client.timeout
-            else:
-                timeout = DEFAULT_TIMEOUT
-
-        if http_client is not None and not isinstance(http_client, httpx.AsyncClient):  # pyright: ignore[reportUnnecessaryIsInstance]
-            raise TypeError(
-                f"Invalid `http_client` argument; Expected an instance of `httpx.AsyncClient` but got {type(http_client)}"
-            )
-
-        super().__init__(
-            version=version,
-            base_url=base_url,
-            limits=limits,
-            # cast to a valid type because mypy doesn't understand our type narrowing
-            timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            max_retries=max_retries,
-            custom_query=custom_query,
-            custom_headers=custom_headers,
-            _strict_response_validation=_strict_response_validation,
-        )
-        self._client = http_client or AsyncHttpxClientWrapper(
-            base_url=base_url,
-            # cast to a valid type because mypy doesn't understand our type narrowing
-            timeout=cast(Timeout, timeout),
-            proxies=proxies,
-            transport=transport,
-            limits=limits,
-            follow_redirects=True,
-        )
-
-    def is_closed(self) -> bool:
-        return self._client.is_closed
-
-    async def close(self) -> None:
-        """Close the underlying HTTPX client.
-
-        The client will *not* be usable after this.
-        """
-        await self._client.aclose()
-
-    async def __aenter__(self: _T) -> _T:
-        return self
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        await self.close()
-
-    async def _prepare_options(
-        self,
-        options: FinalRequestOptions,  # noqa: ARG002
-    ) -> None:
-        """Hook for mutating the given options"""
-        return None
-
-    async def _prepare_request(
-        self,
-        request: httpx.Request,  # noqa: ARG002
-    ) -> None:
-        """This method is used as a callback for mutating the `Request` object
-        after it has been constructed.
-        This is useful for cases where you want to add certain headers based off of
-        the request properties, e.g. `url`, `method` etc.
-        """
-        return None
-
-    @overload
-    async def request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: Literal[False] = False,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT:
-        ...
-
-    @overload
-    async def request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: Literal[True],
-        stream_cls: type[_AsyncStreamT],
-        remaining_retries: Optional[int] = None,
-    ) -> _AsyncStreamT:
-        ...
-
-    @overload
-    async def request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
-
-    async def request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: bool = False,
-        stream_cls: type[_AsyncStreamT] | None = None,
-        remaining_retries: Optional[int] = None,
-    ) -> ResponseT | _AsyncStreamT:
-        return await self._request(
-            cast_to=cast_to,
-            options=options,
-            stream=stream,
-            stream_cls=stream_cls,
-            remaining_retries=remaining_retries,
-        )
-
-    async def _request(
-        self,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        *,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-        remaining_retries: int | None,
-    ) -> ResponseT | _AsyncStreamT:
-        cast_to = self._maybe_override_cast_to(cast_to, options)
-        await self._prepare_options(options)
-
-        retries = self._remaining_retries(remaining_retries, options)
-        request = self._build_request(options)
-        await self._prepare_request(request)
-
-        kwargs: HttpxSendArgs = {}
-        if self.custom_auth is not None:
-            kwargs["auth"] = self.custom_auth
-
-        try:
-            response = await self._client.send(
-                request,
-                stream=stream or self._should_stream_response_body(request=request),
-                **kwargs,
-            )
-        except httpx.TimeoutException as err:
-            log.debug("Encountered httpx.TimeoutException", exc_info=True)
-
-            if retries > 0:
-                return await self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
-
-            log.debug("Raising timeout error")
-            raise APITimeoutError(request=request) from err
-        except Exception as err:
-            log.debug("Encountered Exception", exc_info=True)
-
-            if retries > 0:
-                return await self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    response_headers=None,
-                )
-
-            log.debug("Raising connection error")
-            raise APIConnectionError(request=request) from err
-
-        log.debug(
-            'HTTP Request: %s %s "%i %s"', request.method, request.url, response.status_code, response.reason_phrase
-        )
-
-        try:
-            response.raise_for_status()
-        except httpx.HTTPStatusError as err:  # thrown on 4xx and 5xx status code
-            log.debug("Encountered httpx.HTTPStatusError", exc_info=True)
-
-            if retries > 0 and self._should_retry(err.response):
-                await err.response.aclose()
-                return await self._retry_request(
-                    options,
-                    cast_to,
-                    retries,
-                    err.response.headers,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                )
-
-            # If the response is streamed then we need to explicitly read the response
-            # to completion before attempting to access the response text.
-            if not err.response.is_closed:
-                await err.response.aread()
-
-            log.debug("Re-raising status error")
-            raise self._make_status_error_from_response(err.response) from None
-
-        return await self._process_response(
-            cast_to=cast_to,
-            options=options,
-            response=response,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
-    async def _retry_request(
-        self,
-        options: FinalRequestOptions,
-        cast_to: Type[ResponseT],
-        remaining_retries: int,
-        response_headers: httpx.Headers | None,
-        *,
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None,
-    ) -> ResponseT | _AsyncStreamT:
-        remaining = remaining_retries - 1
-        if remaining == 1:
-            log.debug("1 retry left")
-        else:
-            log.debug("%i retries left", remaining)
-
-        timeout = self._calculate_retry_timeout(remaining, options, response_headers)
-        log.info("Retrying request to %s in %f seconds", options.url, timeout)
-
-        await anyio.sleep(timeout)
-
-        return await self._request(
-            options=options,
-            cast_to=cast_to,
-            remaining_retries=remaining,
-            stream=stream,
-            stream_cls=stream_cls,
-        )
-
-    async def _process_response(
-        self,
-        *,
-        cast_to: Type[ResponseT],
-        options: FinalRequestOptions,
-        response: httpx.Response,
-        stream: bool,
-        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
-    ) -> ResponseT:
-        if response.request.headers.get(RAW_RESPONSE_HEADER) == "true":
-            return cast(
-                ResponseT,
-                LegacyAPIResponse(
-                    raw=response,
-                    client=self,
-                    cast_to=cast_to,
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    options=options,
-                ),
-            )
-
-        origin = get_origin(cast_to) or cast_to
-
-        if inspect.isclass(origin) and issubclass(origin, BaseAPIResponse):
-            if not issubclass(origin, AsyncAPIResponse):
-                raise TypeError(f"API Response types must subclass {AsyncAPIResponse}; Received {origin}")
-
-            response_cls = cast("type[BaseAPIResponse[Any]]", cast_to)
-            return cast(
-                "ResponseT",
-                response_cls(
-                    raw=response,
-                    client=self,
-                    cast_to=extract_response_type(response_cls),
-                    stream=stream,
-                    stream_cls=stream_cls,
-                    options=options,
-                ),
-            )
-
-        if cast_to == httpx.Response:
-            return cast(ResponseT, response)
-
-        api_response = AsyncAPIResponse(
-            raw=response,
-            client=self,
-            cast_to=cast("type[ResponseT]", cast_to),  # pyright: ignore[reportUnnecessaryCast]
-            stream=stream,
-            stream_cls=stream_cls,
-            options=options,
-        )
-        if bool(response.request.headers.get(RAW_RESPONSE_HEADER)):
-            return cast(ResponseT, api_response)
-
-        return await api_response.parse()
-
-    def _request_api_list(
-        self,
-        model: Type[_T],
-        page: Type[AsyncPageT],
-        options: FinalRequestOptions,
-    ) -> AsyncPaginator[_T, AsyncPageT]:
-        return AsyncPaginator(client=self, options=options, page_cls=page, model=model)
-
-    @overload
-    async def get(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        options: RequestOptions = {},
-        stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
-
-    @overload
-    async def get(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        options: RequestOptions = {},
-        stream: Literal[True],
-        stream_cls: type[_AsyncStreamT],
-    ) -> _AsyncStreamT:
-        ...
-
-    @overload
-    async def get(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        options: RequestOptions = {},
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
-
-    async def get(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        options: RequestOptions = {},
-        stream: bool = False,
-        stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        opts = FinalRequestOptions.construct(method="get", url=path, **options)
-        return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
-
-    @overload
-    async def post(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        files: RequestFiles | None = None,
-        options: RequestOptions = {},
-        stream: Literal[False] = False,
-    ) -> ResponseT:
-        ...
-
-    @overload
-    async def post(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        files: RequestFiles | None = None,
-        options: RequestOptions = {},
-        stream: Literal[True],
-        stream_cls: type[_AsyncStreamT],
-    ) -> _AsyncStreamT:
-        ...
-
-    @overload
-    async def post(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        files: RequestFiles | None = None,
-        options: RequestOptions = {},
-        stream: bool,
-        stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        ...
-
-    async def post(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        files: RequestFiles | None = None,
-        options: RequestOptions = {},
-        stream: bool = False,
-        stream_cls: type[_AsyncStreamT] | None = None,
-    ) -> ResponseT | _AsyncStreamT:
-        opts = FinalRequestOptions.construct(
-            method="post", url=path, json_data=body, files=await async_to_httpx_files(files), **options
-        )
-        return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls)
-
-    async def patch(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-    ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="patch", url=path, json_data=body, **options)
-        return await self.request(cast_to, opts)
-
-    async def put(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        files: RequestFiles | None = None,
-        options: RequestOptions = {},
-    ) -> ResponseT:
-        opts = FinalRequestOptions.construct(
-            method="put", url=path, json_data=body, files=await async_to_httpx_files(files), **options
-        )
-        return await self.request(cast_to, opts)
-
-    async def delete(
-        self,
-        path: str,
-        *,
-        cast_to: Type[ResponseT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-    ) -> ResponseT:
-        opts = FinalRequestOptions.construct(method="delete", url=path, json_data=body, **options)
-        return await self.request(cast_to, opts)
-
-    def get_api_list(
-        self,
-        path: str,
-        *,
-        model: Type[_T],
-        page: Type[AsyncPageT],
-        body: Body | None = None,
-        options: RequestOptions = {},
-        method: str = "get",
-    ) -> AsyncPaginator[_T, AsyncPageT]:
-        opts = FinalRequestOptions.construct(method=method, url=path, json_data=body, **options)
-        return self._request_api_list(model, page, opts)
-
-
-def make_request_options(
-    *,
-    query: Query | None = None,
-    extra_headers: Headers | None = None,
-    extra_query: Query | None = None,
-    extra_body: Body | None = None,
-    idempotency_key: str | None = None,
-    timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    post_parser: PostParser | NotGiven = NOT_GIVEN,
-) -> RequestOptions:
-    """Create a dict of type RequestOptions without keys of NotGiven values."""
-    options: RequestOptions = {}
-    if extra_headers is not None:
-        options["headers"] = extra_headers
-
-    if extra_body is not None:
-        options["extra_json"] = cast(AnyMapping, extra_body)
-
-    if query is not None:
-        options["params"] = query
-
-    if extra_query is not None:
-        options["params"] = {**options.get("params", {}), **extra_query}
-
-    if not isinstance(timeout, NotGiven):
-        options["timeout"] = timeout
-
-    if idempotency_key is not None:
-        options["idempotency_key"] = idempotency_key
-
-    if is_given(post_parser):
-        # internal
-        options["post_parser"] = post_parser  # type: ignore
-
-    return options
-
-
-class OtherPlatform:
-    def __init__(self, name: str) -> None:
-        self.name = name
-
-    @override
-    def __str__(self) -> str:
-        return f"Other:{self.name}"
-
-
-Platform = Union[
-    OtherPlatform,
-    Literal[
-        "MacOS",
-        "Linux",
-        "Windows",
-        "FreeBSD",
-        "OpenBSD",
-        "iOS",
-        "Android",
-        "Unknown",
-    ],
-]
-
-
-def get_platform() -> Platform:
-    try:
-        system = platform.system().lower()
-        platform_name = platform.platform().lower()
-    except Exception:
-        return "Unknown"
-
-    if "iphone" in platform_name or "ipad" in platform_name:
-        # Tested using Python3IDE on an iPhone 11 and Pythonista on an iPad 7
-        # system is Darwin and platform_name is a string like:
-        # - Darwin-21.6.0-iPhone12,1-64bit
-        # - Darwin-21.6.0-iPad7,11-64bit
-        return "iOS"
-
-    if system == "darwin":
-        return "MacOS"
-
-    if system == "windows":
-        return "Windows"
-
-    if "android" in platform_name:
-        # Tested using Pydroid 3
-        # system is Linux and platform_name is a string like 'Linux-5.10.81-android12-9-00001-geba40aecb3b7-ab8534902-aarch64-with-libc'
-        return "Android"
-
-    if system == "linux":
-        # https://distro.readthedocs.io/en/latest/#distro.id
-        distro_id = distro.id()
-        if distro_id == "freebsd":
-            return "FreeBSD"
-
-        if distro_id == "openbsd":
-            return "OpenBSD"
-
-        return "Linux"
-
-    if platform_name:
-        return OtherPlatform(platform_name)
-
-    return "Unknown"
-
-
-@lru_cache(maxsize=None)
-def platform_headers(version: str) -> Dict[str, str]:
-    return {
-        "X-Stainless-Lang": "python",
-        "X-Stainless-Package-Version": version,
-        "X-Stainless-OS": str(get_platform()),
-        "X-Stainless-Arch": str(get_architecture()),
-        "X-Stainless-Runtime": get_python_runtime(),
-        "X-Stainless-Runtime-Version": get_python_version(),
-    }
-
-
-class OtherArch:
-    def __init__(self, name: str) -> None:
-        self.name = name
-
-    @override
-    def __str__(self) -> str:
-        return f"other:{self.name}"
-
-
-Arch = Union[OtherArch, Literal["x32", "x64", "arm", "arm64", "unknown"]]
-
-
-def get_python_runtime() -> str:
-    try:
-        return platform.python_implementation()
-    except Exception:
-        return "unknown"
-
-
-def get_python_version() -> str:
-    try:
-        return platform.python_version()
-    except Exception:
-        return "unknown"
-
-
-def get_architecture() -> Arch:
-    try:
-        python_bitness, _ = platform.architecture()
-        machine = platform.machine().lower()
-    except Exception:
-        return "unknown"
-
-    if machine in ("arm64", "aarch64"):
-        return "arm64"
-
-    # TODO: untested
-    if machine == "arm":
-        return "arm"
-
-    if machine == "x86_64":
-        return "x64"
-
-    # TODO: untested
-    if python_bitness == "32bit":
-        return "x32"
-
-    if machine:
-        return OtherArch(machine)
-
-    return "unknown"
-
-
-def _merge_mappings(
-    obj1: Mapping[_T_co, Union[_T, Omit]],
-    obj2: Mapping[_T_co, Union[_T, Omit]],
-) -> Dict[_T_co, _T]:
-    """Merge two mappings of the same type, removing any values that are instances of `Omit`.
-
-    In cases with duplicate keys the second mapping takes precedence.
-    """
-    merged = {**obj1, **obj2}
-    return {key: value for key, value in merged.items() if not isinstance(value, Omit)}
diff --git a/openai/_client.py b/openai/_client.py
deleted file mode 100644
index 7fe2c9af..00000000
--- a/openai/_client.py
+++ /dev/null
@@ -1,503 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import os
-from typing import Any, Union, Mapping
-from typing_extensions import Self, override
-
-import httpx
-
-from . import resources, _exceptions
-from ._qs import Querystring
-from ._types import (
-    NOT_GIVEN,
-    Omit,
-    Timeout,
-    NotGiven,
-    Transport,
-    ProxiesTypes,
-    RequestOptions,
-)
-from ._utils import (
-    is_given,
-    is_mapping,
-    get_async_library,
-)
-from ._version import __version__
-from ._streaming import Stream as Stream, AsyncStream as AsyncStream
-from ._exceptions import OpenAIError, APIStatusError
-from ._base_client import (
-    DEFAULT_MAX_RETRIES,
-    SyncAPIClient,
-    AsyncAPIClient,
-)
-
-__all__ = [
-    "Timeout",
-    "Transport",
-    "ProxiesTypes",
-    "RequestOptions",
-    "resources",
-    "OpenAI",
-    "AsyncOpenAI",
-    "Client",
-    "AsyncClient",
-]
-
-
-class OpenAI(SyncAPIClient):
-    completions: resources.Completions
-    chat: resources.Chat
-    embeddings: resources.Embeddings
-    files: resources.Files
-    images: resources.Images
-    audio: resources.Audio
-    moderations: resources.Moderations
-    models: resources.Models
-    fine_tuning: resources.FineTuning
-    beta: resources.Beta
-    with_raw_response: OpenAIWithRawResponse
-    with_streaming_response: OpenAIWithStreamedResponse
-
-    # client options
-    api_key: str
-    organization: str | None
-
-    def __init__(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#client) for more details.
-        http_client: httpx.Client | None = None,
-        # Enable or disable schema validation for data returned by the API.
-        # When enabled an error APIResponseValidationError is raised
-        # if the API responds with invalid data for the expected schema.
-        #
-        # This parameter may be removed or changed in the future.
-        # If you rely on this feature, please open a GitHub issue
-        # outlining your use-case to help us decide if it should be
-        # part of our public interface in the future.
-        _strict_response_validation: bool = False,
-    ) -> None:
-        """Construct a new synchronous openai client instance.
-
-        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
-        - `api_key` from `OPENAI_API_KEY`
-        - `organization` from `OPENAI_ORG_ID`
-        """
-        if api_key is None:
-            api_key = os.environ.get("OPENAI_API_KEY")
-        if api_key is None:
-            raise OpenAIError(
-                "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
-            )
-        self.api_key = api_key
-
-        if organization is None:
-            organization = os.environ.get("OPENAI_ORG_ID")
-        self.organization = organization
-
-        if base_url is None:
-            base_url = os.environ.get("OPENAI_BASE_URL")
-        if base_url is None:
-            base_url = f"https://api.openai.com/v1"
-
-        super().__init__(
-            version=__version__,
-            base_url=base_url,
-            max_retries=max_retries,
-            timeout=timeout,
-            http_client=http_client,
-            custom_headers=default_headers,
-            custom_query=default_query,
-            _strict_response_validation=_strict_response_validation,
-        )
-
-        self._default_stream_cls = Stream
-
-        self.completions = resources.Completions(self)
-        self.chat = resources.Chat(self)
-        self.embeddings = resources.Embeddings(self)
-        self.files = resources.Files(self)
-        self.images = resources.Images(self)
-        self.audio = resources.Audio(self)
-        self.moderations = resources.Moderations(self)
-        self.models = resources.Models(self)
-        self.fine_tuning = resources.FineTuning(self)
-        self.beta = resources.Beta(self)
-        self.with_raw_response = OpenAIWithRawResponse(self)
-        self.with_streaming_response = OpenAIWithStreamedResponse(self)
-
-    @property
-    @override
-    def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
-
-    @property
-    @override
-    def auth_headers(self) -> dict[str, str]:
-        api_key = self.api_key
-        return {"Authorization": f"Bearer {api_key}"}
-
-    @property
-    @override
-    def default_headers(self) -> dict[str, str | Omit]:
-        return {
-            **super().default_headers,
-            "X-Stainless-Async": "false",
-            "OpenAI-Organization": self.organization if self.organization is not None else Omit(),
-            **self._custom_headers,
-        }
-
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.Client | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        if default_headers is not None and set_default_headers is not None:
-            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
-
-        if default_query is not None and set_default_query is not None:
-            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
-
-        headers = self._custom_headers
-        if default_headers is not None:
-            headers = {**headers, **default_headers}
-        elif set_default_headers is not None:
-            headers = set_default_headers
-
-        params = self._custom_query
-        if default_query is not None:
-            params = {**params, **default_query}
-        elif set_default_query is not None:
-            params = set_default_query
-
-        http_client = http_client or self._client
-        return self.__class__(
-            api_key=api_key or self.api_key,
-            organization=organization or self.organization,
-            base_url=base_url or self.base_url,
-            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
-            http_client=http_client,
-            max_retries=max_retries if is_given(max_retries) else self.max_retries,
-            default_headers=headers,
-            default_query=params,
-            **_extra_kwargs,
-        )
-
-    # Alias for `copy` for nicer inline usage, e.g.
-    # client.with_options(timeout=10).foo.create(...)
-    with_options = copy
-
-    @override
-    def _make_status_error(
-        self,
-        err_msg: str,
-        *,
-        body: object,
-        response: httpx.Response,
-    ) -> APIStatusError:
-        data = body.get("error", body) if is_mapping(body) else body
-        if response.status_code == 400:
-            return _exceptions.BadRequestError(err_msg, response=response, body=data)
-
-        if response.status_code == 401:
-            return _exceptions.AuthenticationError(err_msg, response=response, body=data)
-
-        if response.status_code == 403:
-            return _exceptions.PermissionDeniedError(err_msg, response=response, body=data)
-
-        if response.status_code == 404:
-            return _exceptions.NotFoundError(err_msg, response=response, body=data)
-
-        if response.status_code == 409:
-            return _exceptions.ConflictError(err_msg, response=response, body=data)
-
-        if response.status_code == 422:
-            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=data)
-
-        if response.status_code == 429:
-            return _exceptions.RateLimitError(err_msg, response=response, body=data)
-
-        if response.status_code >= 500:
-            return _exceptions.InternalServerError(err_msg, response=response, body=data)
-        return APIStatusError(err_msg, response=response, body=data)
-
-
-class AsyncOpenAI(AsyncAPIClient):
-    completions: resources.AsyncCompletions
-    chat: resources.AsyncChat
-    embeddings: resources.AsyncEmbeddings
-    files: resources.AsyncFiles
-    images: resources.AsyncImages
-    audio: resources.AsyncAudio
-    moderations: resources.AsyncModerations
-    models: resources.AsyncModels
-    fine_tuning: resources.AsyncFineTuning
-    beta: resources.AsyncBeta
-    with_raw_response: AsyncOpenAIWithRawResponse
-    with_streaming_response: AsyncOpenAIWithStreamedResponse
-
-    # client options
-    api_key: str
-    organization: str | None
-
-    def __init__(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        # Configure a custom httpx client. See the [httpx documentation](https://www.python-httpx.org/api/#asyncclient) for more details.
-        http_client: httpx.AsyncClient | None = None,
-        # Enable or disable schema validation for data returned by the API.
-        # When enabled an error APIResponseValidationError is raised
-        # if the API responds with invalid data for the expected schema.
-        #
-        # This parameter may be removed or changed in the future.
-        # If you rely on this feature, please open a GitHub issue
-        # outlining your use-case to help us decide if it should be
-        # part of our public interface in the future.
-        _strict_response_validation: bool = False,
-    ) -> None:
-        """Construct a new async openai client instance.
-
-        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
-        - `api_key` from `OPENAI_API_KEY`
-        - `organization` from `OPENAI_ORG_ID`
-        """
-        if api_key is None:
-            api_key = os.environ.get("OPENAI_API_KEY")
-        if api_key is None:
-            raise OpenAIError(
-                "The api_key client option must be set either by passing api_key to the client or by setting the OPENAI_API_KEY environment variable"
-            )
-        self.api_key = api_key
-
-        if organization is None:
-            organization = os.environ.get("OPENAI_ORG_ID")
-        self.organization = organization
-
-        if base_url is None:
-            base_url = os.environ.get("OPENAI_BASE_URL")
-        if base_url is None:
-            base_url = f"https://api.openai.com/v1"
-
-        super().__init__(
-            version=__version__,
-            base_url=base_url,
-            max_retries=max_retries,
-            timeout=timeout,
-            http_client=http_client,
-            custom_headers=default_headers,
-            custom_query=default_query,
-            _strict_response_validation=_strict_response_validation,
-        )
-
-        self._default_stream_cls = AsyncStream
-
-        self.completions = resources.AsyncCompletions(self)
-        self.chat = resources.AsyncChat(self)
-        self.embeddings = resources.AsyncEmbeddings(self)
-        self.files = resources.AsyncFiles(self)
-        self.images = resources.AsyncImages(self)
-        self.audio = resources.AsyncAudio(self)
-        self.moderations = resources.AsyncModerations(self)
-        self.models = resources.AsyncModels(self)
-        self.fine_tuning = resources.AsyncFineTuning(self)
-        self.beta = resources.AsyncBeta(self)
-        self.with_raw_response = AsyncOpenAIWithRawResponse(self)
-        self.with_streaming_response = AsyncOpenAIWithStreamedResponse(self)
-
-    @property
-    @override
-    def qs(self) -> Querystring:
-        return Querystring(array_format="comma")
-
-    @property
-    @override
-    def auth_headers(self) -> dict[str, str]:
-        api_key = self.api_key
-        return {"Authorization": f"Bearer {api_key}"}
-
-    @property
-    @override
-    def default_headers(self) -> dict[str, str | Omit]:
-        return {
-            **super().default_headers,
-            "X-Stainless-Async": f"async:{get_async_library()}",
-            "OpenAI-Organization": self.organization if self.organization is not None else Omit(),
-            **self._custom_headers,
-        }
-
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.AsyncClient | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        if default_headers is not None and set_default_headers is not None:
-            raise ValueError("The `default_headers` and `set_default_headers` arguments are mutually exclusive")
-
-        if default_query is not None and set_default_query is not None:
-            raise ValueError("The `default_query` and `set_default_query` arguments are mutually exclusive")
-
-        headers = self._custom_headers
-        if default_headers is not None:
-            headers = {**headers, **default_headers}
-        elif set_default_headers is not None:
-            headers = set_default_headers
-
-        params = self._custom_query
-        if default_query is not None:
-            params = {**params, **default_query}
-        elif set_default_query is not None:
-            params = set_default_query
-
-        http_client = http_client or self._client
-        return self.__class__(
-            api_key=api_key or self.api_key,
-            organization=organization or self.organization,
-            base_url=base_url or self.base_url,
-            timeout=self.timeout if isinstance(timeout, NotGiven) else timeout,
-            http_client=http_client,
-            max_retries=max_retries if is_given(max_retries) else self.max_retries,
-            default_headers=headers,
-            default_query=params,
-            **_extra_kwargs,
-        )
-
-    # Alias for `copy` for nicer inline usage, e.g.
-    # client.with_options(timeout=10).foo.create(...)
-    with_options = copy
-
-    @override
-    def _make_status_error(
-        self,
-        err_msg: str,
-        *,
-        body: object,
-        response: httpx.Response,
-    ) -> APIStatusError:
-        data = body.get("error", body) if is_mapping(body) else body
-        if response.status_code == 400:
-            return _exceptions.BadRequestError(err_msg, response=response, body=data)
-
-        if response.status_code == 401:
-            return _exceptions.AuthenticationError(err_msg, response=response, body=data)
-
-        if response.status_code == 403:
-            return _exceptions.PermissionDeniedError(err_msg, response=response, body=data)
-
-        if response.status_code == 404:
-            return _exceptions.NotFoundError(err_msg, response=response, body=data)
-
-        if response.status_code == 409:
-            return _exceptions.ConflictError(err_msg, response=response, body=data)
-
-        if response.status_code == 422:
-            return _exceptions.UnprocessableEntityError(err_msg, response=response, body=data)
-
-        if response.status_code == 429:
-            return _exceptions.RateLimitError(err_msg, response=response, body=data)
-
-        if response.status_code >= 500:
-            return _exceptions.InternalServerError(err_msg, response=response, body=data)
-        return APIStatusError(err_msg, response=response, body=data)
-
-
-class OpenAIWithRawResponse:
-    def __init__(self, client: OpenAI) -> None:
-        self.completions = resources.CompletionsWithRawResponse(client.completions)
-        self.chat = resources.ChatWithRawResponse(client.chat)
-        self.embeddings = resources.EmbeddingsWithRawResponse(client.embeddings)
-        self.files = resources.FilesWithRawResponse(client.files)
-        self.images = resources.ImagesWithRawResponse(client.images)
-        self.audio = resources.AudioWithRawResponse(client.audio)
-        self.moderations = resources.ModerationsWithRawResponse(client.moderations)
-        self.models = resources.ModelsWithRawResponse(client.models)
-        self.fine_tuning = resources.FineTuningWithRawResponse(client.fine_tuning)
-        self.beta = resources.BetaWithRawResponse(client.beta)
-
-
-class AsyncOpenAIWithRawResponse:
-    def __init__(self, client: AsyncOpenAI) -> None:
-        self.completions = resources.AsyncCompletionsWithRawResponse(client.completions)
-        self.chat = resources.AsyncChatWithRawResponse(client.chat)
-        self.embeddings = resources.AsyncEmbeddingsWithRawResponse(client.embeddings)
-        self.files = resources.AsyncFilesWithRawResponse(client.files)
-        self.images = resources.AsyncImagesWithRawResponse(client.images)
-        self.audio = resources.AsyncAudioWithRawResponse(client.audio)
-        self.moderations = resources.AsyncModerationsWithRawResponse(client.moderations)
-        self.models = resources.AsyncModelsWithRawResponse(client.models)
-        self.fine_tuning = resources.AsyncFineTuningWithRawResponse(client.fine_tuning)
-        self.beta = resources.AsyncBetaWithRawResponse(client.beta)
-
-
-class OpenAIWithStreamedResponse:
-    def __init__(self, client: OpenAI) -> None:
-        self.completions = resources.CompletionsWithStreamingResponse(client.completions)
-        self.chat = resources.ChatWithStreamingResponse(client.chat)
-        self.embeddings = resources.EmbeddingsWithStreamingResponse(client.embeddings)
-        self.files = resources.FilesWithStreamingResponse(client.files)
-        self.images = resources.ImagesWithStreamingResponse(client.images)
-        self.audio = resources.AudioWithStreamingResponse(client.audio)
-        self.moderations = resources.ModerationsWithStreamingResponse(client.moderations)
-        self.models = resources.ModelsWithStreamingResponse(client.models)
-        self.fine_tuning = resources.FineTuningWithStreamingResponse(client.fine_tuning)
-        self.beta = resources.BetaWithStreamingResponse(client.beta)
-
-
-class AsyncOpenAIWithStreamedResponse:
-    def __init__(self, client: AsyncOpenAI) -> None:
-        self.completions = resources.AsyncCompletionsWithStreamingResponse(client.completions)
-        self.chat = resources.AsyncChatWithStreamingResponse(client.chat)
-        self.embeddings = resources.AsyncEmbeddingsWithStreamingResponse(client.embeddings)
-        self.files = resources.AsyncFilesWithStreamingResponse(client.files)
-        self.images = resources.AsyncImagesWithStreamingResponse(client.images)
-        self.audio = resources.AsyncAudioWithStreamingResponse(client.audio)
-        self.moderations = resources.AsyncModerationsWithStreamingResponse(client.moderations)
-        self.models = resources.AsyncModelsWithStreamingResponse(client.models)
-        self.fine_tuning = resources.AsyncFineTuningWithStreamingResponse(client.fine_tuning)
-        self.beta = resources.AsyncBetaWithStreamingResponse(client.beta)
-
-
-Client = OpenAI
-
-AsyncClient = AsyncOpenAI
diff --git a/openai/_compat.py b/openai/_compat.py
deleted file mode 100644
index 74c7639b..00000000
--- a/openai/_compat.py
+++ /dev/null
@@ -1,222 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, cast, overload
-from datetime import date, datetime
-from typing_extensions import Self
-
-import pydantic
-from pydantic.fields import FieldInfo
-
-from ._types import StrBytesIntFloat
-
-_T = TypeVar("_T")
-_ModelT = TypeVar("_ModelT", bound=pydantic.BaseModel)
-
-# --------------- Pydantic v2 compatibility ---------------
-
-# Pyright incorrectly reports some of our functions as overriding a method when they don't
-# pyright: reportIncompatibleMethodOverride=false
-
-PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
-
-# v1 re-exports
-if TYPE_CHECKING:
-
-    def parse_date(value: date | StrBytesIntFloat) -> date:  # noqa: ARG001
-        ...
-
-    def parse_datetime(value: Union[datetime, StrBytesIntFloat]) -> datetime:  # noqa: ARG001
-        ...
-
-    def get_args(t: type[Any]) -> tuple[Any, ...]:  # noqa: ARG001
-        ...
-
-    def is_union(tp: type[Any] | None) -> bool:  # noqa: ARG001
-        ...
-
-    def get_origin(t: type[Any]) -> type[Any] | None:  # noqa: ARG001
-        ...
-
-    def is_literal_type(type_: type[Any]) -> bool:  # noqa: ARG001
-        ...
-
-    def is_typeddict(type_: type[Any]) -> bool:  # noqa: ARG001
-        ...
-
-else:
-    if PYDANTIC_V2:
-        from pydantic.v1.typing import (
-            get_args as get_args,
-            is_union as is_union,
-            get_origin as get_origin,
-            is_typeddict as is_typeddict,
-            is_literal_type as is_literal_type,
-        )
-        from pydantic.v1.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
-    else:
-        from pydantic.typing import (
-            get_args as get_args,
-            is_union as is_union,
-            get_origin as get_origin,
-            is_typeddict as is_typeddict,
-            is_literal_type as is_literal_type,
-        )
-        from pydantic.datetime_parse import parse_date as parse_date, parse_datetime as parse_datetime
-
-
-# refactored config
-if TYPE_CHECKING:
-    from pydantic import ConfigDict as ConfigDict
-else:
-    if PYDANTIC_V2:
-        from pydantic import ConfigDict
-    else:
-        # TODO: provide an error message here?
-        ConfigDict = None
-
-
-# renamed methods / properties
-def parse_obj(model: type[_ModelT], value: object) -> _ModelT:
-    if PYDANTIC_V2:
-        return model.model_validate(value)
-    else:
-        return cast(_ModelT, model.parse_obj(value))  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
-
-
-def field_is_required(field: FieldInfo) -> bool:
-    if PYDANTIC_V2:
-        return field.is_required()
-    return field.required  # type: ignore
-
-
-def field_get_default(field: FieldInfo) -> Any:
-    value = field.get_default()
-    if PYDANTIC_V2:
-        from pydantic_core import PydanticUndefined
-
-        if value == PydanticUndefined:
-            return None
-        return value
-    return value
-
-
-def field_outer_type(field: FieldInfo) -> Any:
-    if PYDANTIC_V2:
-        return field.annotation
-    return field.outer_type_  # type: ignore
-
-
-def get_model_config(model: type[pydantic.BaseModel]) -> Any:
-    if PYDANTIC_V2:
-        return model.model_config
-    return model.__config__  # type: ignore
-
-
-def get_model_fields(model: type[pydantic.BaseModel]) -> dict[str, FieldInfo]:
-    if PYDANTIC_V2:
-        return model.model_fields
-    return model.__fields__  # type: ignore
-
-
-def model_copy(model: _ModelT) -> _ModelT:
-    if PYDANTIC_V2:
-        return model.model_copy()
-    return model.copy()  # type: ignore
-
-
-def model_json(model: pydantic.BaseModel, *, indent: int | None = None) -> str:
-    if PYDANTIC_V2:
-        return model.model_dump_json(indent=indent)
-    return model.json(indent=indent)  # type: ignore
-
-
-def model_dump(
-    model: pydantic.BaseModel,
-    *,
-    exclude_unset: bool = False,
-    exclude_defaults: bool = False,
-) -> dict[str, Any]:
-    if PYDANTIC_V2:
-        return model.model_dump(
-            exclude_unset=exclude_unset,
-            exclude_defaults=exclude_defaults,
-        )
-    return cast(
-        "dict[str, Any]",
-        model.dict(  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
-            exclude_unset=exclude_unset,
-            exclude_defaults=exclude_defaults,
-        ),
-    )
-
-
-def model_parse(model: type[_ModelT], data: Any) -> _ModelT:
-    if PYDANTIC_V2:
-        return model.model_validate(data)
-    return model.parse_obj(data)  # pyright: ignore[reportDeprecated]
-
-
-# generic models
-if TYPE_CHECKING:
-
-    class GenericModel(pydantic.BaseModel):
-        ...
-
-else:
-    if PYDANTIC_V2:
-        # there no longer needs to be a distinction in v2 but
-        # we still have to create our own subclass to avoid
-        # inconsistent MRO ordering errors
-        class GenericModel(pydantic.BaseModel):
-            ...
-
-    else:
-        import pydantic.generics
-
-        class GenericModel(pydantic.generics.GenericModel, pydantic.BaseModel):
-            ...
-
-
-# cached properties
-if TYPE_CHECKING:
-    cached_property = property
-
-    # we define a separate type (copied from typeshed)
-    # that represents that `cached_property` is `set`able
-    # at runtime, which differs from `@property`.
-    #
-    # this is a separate type as editors likely special case
-    # `@property` and we don't want to cause issues just to have
-    # more helpful internal types.
-
-    class typed_cached_property(Generic[_T]):
-        func: Callable[[Any], _T]
-        attrname: str | None
-
-        def __init__(self, func: Callable[[Any], _T]) -> None:
-            ...
-
-        @overload
-        def __get__(self, instance: None, owner: type[Any] | None = None) -> Self:
-            ...
-
-        @overload
-        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T:
-            ...
-
-        def __get__(self, instance: object, owner: type[Any] | None = None) -> _T | Self:
-            raise NotImplementedError()
-
-        def __set_name__(self, owner: type[Any], name: str) -> None:
-            ...
-
-        # __set__ is not defined at runtime, but @cached_property is designed to be settable
-        def __set__(self, instance: object, value: _T) -> None:
-            ...
-else:
-    try:
-        from functools import cached_property as cached_property
-    except ImportError:
-        from cached_property import cached_property as cached_property
-
-    typed_cached_property = cached_property
diff --git a/openai/_constants.py b/openai/_constants.py
deleted file mode 100644
index 3f82bed0..00000000
--- a/openai/_constants.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-import httpx
-
-RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response"
-OVERRIDE_CAST_TO_HEADER = "____stainless_override_cast_to"
-
-# default timeout is 10 minutes
-DEFAULT_TIMEOUT = httpx.Timeout(timeout=600.0, connect=5.0)
-DEFAULT_MAX_RETRIES = 2
-DEFAULT_CONNECTION_LIMITS = httpx.Limits(max_connections=1000, max_keepalive_connections=100)
-
-INITIAL_RETRY_DELAY = 0.5
-MAX_RETRY_DELAY = 8.0
diff --git a/openai/_exceptions.py b/openai/_exceptions.py
deleted file mode 100644
index 074752c8..00000000
--- a/openai/_exceptions.py
+++ /dev/null
@@ -1,125 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Any, Optional, cast
-from typing_extensions import Literal
-
-import httpx
-
-from ._utils import is_dict
-from ._models import construct_type
-
-__all__ = [
-    "BadRequestError",
-    "AuthenticationError",
-    "PermissionDeniedError",
-    "NotFoundError",
-    "ConflictError",
-    "UnprocessableEntityError",
-    "RateLimitError",
-    "InternalServerError",
-]
-
-
-class OpenAIError(Exception):
-    pass
-
-
-class APIError(OpenAIError):
-    message: str
-    request: httpx.Request
-
-    body: object | None
-    """The API response body.
-
-    If the API responded with a valid JSON structure then this property will be the
-    decoded result.
-
-    If it isn't a valid JSON structure then this will be the raw response.
-
-    If there was no response associated with this error then it will be `None`.
-    """
-
-    code: Optional[str] = None
-    param: Optional[str] = None
-    type: Optional[str]
-
-    def __init__(self, message: str, request: httpx.Request, *, body: object | None) -> None:
-        super().__init__(message)
-        self.request = request
-        self.message = message
-        self.body = body
-
-        if is_dict(body):
-            self.code = cast(Any, construct_type(type_=Optional[str], value=body.get("code")))
-            self.param = cast(Any, construct_type(type_=Optional[str], value=body.get("param")))
-            self.type = cast(Any, construct_type(type_=str, value=body.get("type")))
-        else:
-            self.code = None
-            self.param = None
-            self.type = None
-
-
-class APIResponseValidationError(APIError):
-    response: httpx.Response
-    status_code: int
-
-    def __init__(self, response: httpx.Response, body: object | None, *, message: str | None = None) -> None:
-        super().__init__(message or "Data returned by API invalid for expected schema.", response.request, body=body)
-        self.response = response
-        self.status_code = response.status_code
-
-
-class APIStatusError(APIError):
-    """Raised when an API response has a status code of 4xx or 5xx."""
-
-    response: httpx.Response
-    status_code: int
-
-    def __init__(self, message: str, *, response: httpx.Response, body: object | None) -> None:
-        super().__init__(message, response.request, body=body)
-        self.response = response
-        self.status_code = response.status_code
-
-
-class APIConnectionError(APIError):
-    def __init__(self, *, message: str = "Connection error.", request: httpx.Request) -> None:
-        super().__init__(message, request, body=None)
-
-
-class APITimeoutError(APIConnectionError):
-    def __init__(self, request: httpx.Request) -> None:
-        super().__init__(message="Request timed out.", request=request)
-
-
-class BadRequestError(APIStatusError):
-    status_code: Literal[400] = 400  # pyright: ignore[reportIncompatibleVariableOverride]
-
-
-class AuthenticationError(APIStatusError):
-    status_code: Literal[401] = 401  # pyright: ignore[reportIncompatibleVariableOverride]
-
-
-class PermissionDeniedError(APIStatusError):
-    status_code: Literal[403] = 403  # pyright: ignore[reportIncompatibleVariableOverride]
-
-
-class NotFoundError(APIStatusError):
-    status_code: Literal[404] = 404  # pyright: ignore[reportIncompatibleVariableOverride]
-
-
-class ConflictError(APIStatusError):
-    status_code: Literal[409] = 409  # pyright: ignore[reportIncompatibleVariableOverride]
-
-
-class UnprocessableEntityError(APIStatusError):
-    status_code: Literal[422] = 422  # pyright: ignore[reportIncompatibleVariableOverride]
-
-
-class RateLimitError(APIStatusError):
-    status_code: Literal[429] = 429  # pyright: ignore[reportIncompatibleVariableOverride]
-
-
-class InternalServerError(APIStatusError):
-    pass
diff --git a/openai/_extras/__init__.py b/openai/_extras/__init__.py
deleted file mode 100644
index 864dac41..00000000
--- a/openai/_extras/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-from .numpy_proxy import numpy as numpy, has_numpy as has_numpy
-from .pandas_proxy import pandas as pandas
diff --git a/openai/_extras/_common.py b/openai/_extras/_common.py
deleted file mode 100644
index 6e71720e..00000000
--- a/openai/_extras/_common.py
+++ /dev/null
@@ -1,21 +0,0 @@
-from .._exceptions import OpenAIError
-
-INSTRUCTIONS = """
-
-OpenAI error:
-
-    missing `{library}`
-
-This feature requires additional dependencies:
-
-    $ pip install openai[{extra}]
-
-"""
-
-
-def format_instructions(*, library: str, extra: str) -> str:
-    return INSTRUCTIONS.format(library=library, extra=extra)
-
-
-class MissingDependencyError(OpenAIError):
-    pass
diff --git a/openai/_extras/numpy_proxy.py b/openai/_extras/numpy_proxy.py
deleted file mode 100644
index 27880bf1..00000000
--- a/openai/_extras/numpy_proxy.py
+++ /dev/null
@@ -1,37 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from ._common import MissingDependencyError, format_instructions
-
-if TYPE_CHECKING:
-    import numpy as numpy
-
-
-NUMPY_INSTRUCTIONS = format_instructions(library="numpy", extra="datalib")
-
-
-class NumpyProxy(LazyProxy[Any]):
-    @override
-    def __load__(self) -> Any:
-        try:
-            import numpy
-        except ImportError as err:
-            raise MissingDependencyError(NUMPY_INSTRUCTIONS) from err
-
-        return numpy
-
-
-if not TYPE_CHECKING:
-    numpy = NumpyProxy()
-
-
-def has_numpy() -> bool:
-    try:
-        import numpy  # noqa: F401  # pyright: ignore[reportUnusedImport]
-    except ImportError:
-        return False
-
-    return True
diff --git a/openai/_extras/pandas_proxy.py b/openai/_extras/pandas_proxy.py
deleted file mode 100644
index 686377ba..00000000
--- a/openai/_extras/pandas_proxy.py
+++ /dev/null
@@ -1,28 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from ._common import MissingDependencyError, format_instructions
-
-if TYPE_CHECKING:
-    import pandas as pandas
-
-
-PANDAS_INSTRUCTIONS = format_instructions(library="pandas", extra="datalib")
-
-
-class PandasProxy(LazyProxy[Any]):
-    @override
-    def __load__(self) -> Any:
-        try:
-            import pandas
-        except ImportError as err:
-            raise MissingDependencyError(PANDAS_INSTRUCTIONS) from err
-
-        return pandas
-
-
-if not TYPE_CHECKING:
-    pandas = PandasProxy()
diff --git a/openai/_files.py b/openai/_files.py
deleted file mode 100644
index ad7b668b..00000000
--- a/openai/_files.py
+++ /dev/null
@@ -1,127 +0,0 @@
-from __future__ import annotations
-
-import io
-import os
-import pathlib
-from typing import overload
-from typing_extensions import TypeGuard
-
-import anyio
-
-from ._types import (
-    FileTypes,
-    FileContent,
-    RequestFiles,
-    HttpxFileTypes,
-    Base64FileInput,
-    HttpxFileContent,
-    HttpxRequestFiles,
-)
-from ._utils import is_tuple_t, is_mapping_t, is_sequence_t
-
-
-def is_base64_file_input(obj: object) -> TypeGuard[Base64FileInput]:
-    return isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
-
-
-def is_file_content(obj: object) -> TypeGuard[FileContent]:
-    return (
-        isinstance(obj, bytes) or isinstance(obj, tuple) or isinstance(obj, io.IOBase) or isinstance(obj, os.PathLike)
-    )
-
-
-def assert_is_file_content(obj: object, *, key: str | None = None) -> None:
-    if not is_file_content(obj):
-        prefix = f"Expected entry at `{key}`" if key is not None else f"Expected file input `{obj!r}`"
-        raise RuntimeError(
-            f"{prefix} to be bytes, an io.IOBase instance, PathLike or a tuple but received {type(obj)} instead. See https://github.com/openai/openai-python/tree/main#file-uploads"
-        ) from None
-
-
-@overload
-def to_httpx_files(files: None) -> None:
-    ...
-
-
-@overload
-def to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
-    ...
-
-
-def to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
-    if files is None:
-        return None
-
-    if is_mapping_t(files):
-        files = {key: _transform_file(file) for key, file in files.items()}
-    elif is_sequence_t(files):
-        files = [(key, _transform_file(file)) for key, file in files]
-    else:
-        raise TypeError(f"Unexpected file type input {type(files)}, expected mapping or sequence")
-
-    return files
-
-
-def _transform_file(file: FileTypes) -> HttpxFileTypes:
-    if is_file_content(file):
-        if isinstance(file, os.PathLike):
-            path = pathlib.Path(file)
-            return (path.name, path.read_bytes())
-
-        return file
-
-    if is_tuple_t(file):
-        return (file[0], _read_file_content(file[1]), *file[2:])
-
-    raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
-
-
-def _read_file_content(file: FileContent) -> HttpxFileContent:
-    if isinstance(file, os.PathLike):
-        return pathlib.Path(file).read_bytes()
-    return file
-
-
-@overload
-async def async_to_httpx_files(files: None) -> None:
-    ...
-
-
-@overload
-async def async_to_httpx_files(files: RequestFiles) -> HttpxRequestFiles:
-    ...
-
-
-async def async_to_httpx_files(files: RequestFiles | None) -> HttpxRequestFiles | None:
-    if files is None:
-        return None
-
-    if is_mapping_t(files):
-        files = {key: await _async_transform_file(file) for key, file in files.items()}
-    elif is_sequence_t(files):
-        files = [(key, await _async_transform_file(file)) for key, file in files]
-    else:
-        raise TypeError("Unexpected file type input {type(files)}, expected mapping or sequence")
-
-    return files
-
-
-async def _async_transform_file(file: FileTypes) -> HttpxFileTypes:
-    if is_file_content(file):
-        if isinstance(file, os.PathLike):
-            path = anyio.Path(file)
-            return (path.name, await path.read_bytes())
-
-        return file
-
-    if is_tuple_t(file):
-        return (file[0], await _async_read_file_content(file[1]), *file[2:])
-
-    raise TypeError(f"Expected file types input to be a FileContent type or to be a tuple")
-
-
-async def _async_read_file_content(file: FileContent) -> HttpxFileContent:
-    if isinstance(file, os.PathLike):
-        return await anyio.Path(file).read_bytes()
-
-    return file
diff --git a/openai/_legacy_response.py b/openai/_legacy_response.py
deleted file mode 100644
index 4585cd74..00000000
--- a/openai/_legacy_response.py
+++ /dev/null
@@ -1,456 +0,0 @@
-from __future__ import annotations
-
-import os
-import inspect
-import logging
-import datetime
-import functools
-from typing import TYPE_CHECKING, Any, Union, Generic, TypeVar, Callable, Iterator, AsyncIterator, cast, overload
-from typing_extensions import Awaitable, ParamSpec, override, deprecated, get_origin
-
-import anyio
-import httpx
-import pydantic
-
-from ._types import NoneType
-from ._utils import is_given, extract_type_arg, is_annotated_type
-from ._models import BaseModel, is_basemodel
-from ._constants import RAW_RESPONSE_HEADER
-from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
-from ._exceptions import APIResponseValidationError
-
-if TYPE_CHECKING:
-    from ._models import FinalRequestOptions
-    from ._base_client import BaseClient
-
-
-P = ParamSpec("P")
-R = TypeVar("R")
-_T = TypeVar("_T")
-
-log: logging.Logger = logging.getLogger(__name__)
-
-
-class LegacyAPIResponse(Generic[R]):
-    """This is a legacy class as it will be replaced by `APIResponse`
-    and `AsyncAPIResponse` in the `_response.py` file in the next major
-    release.
-
-    For the sync client this will mostly be the same with the exception
-    of `content` & `text` will be methods instead of properties. In the
-    async client, all methods will be async.
-
-    A migration script will be provided & the migration in general should
-    be smooth.
-    """
-
-    _cast_to: type[R]
-    _client: BaseClient[Any, Any]
-    _parsed_by_type: dict[type[Any], Any]
-    _stream: bool
-    _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
-    _options: FinalRequestOptions
-
-    http_response: httpx.Response
-
-    def __init__(
-        self,
-        *,
-        raw: httpx.Response,
-        cast_to: type[R],
-        client: BaseClient[Any, Any],
-        stream: bool,
-        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
-        options: FinalRequestOptions,
-    ) -> None:
-        self._cast_to = cast_to
-        self._client = client
-        self._parsed_by_type = {}
-        self._stream = stream
-        self._stream_cls = stream_cls
-        self._options = options
-        self.http_response = raw
-
-    @overload
-    def parse(self, *, to: type[_T]) -> _T:
-        ...
-
-    @overload
-    def parse(self) -> R:
-        ...
-
-    def parse(self, *, to: type[_T] | None = None) -> R | _T:
-        """Returns the rich python representation of this response's data.
-
-        NOTE: For the async client: this will become a coroutine in the next major version.
-
-        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
-
-        You can customise the type that the response is parsed into through
-        the `to` argument, e.g.
-
-        ```py
-        from openai import BaseModel
-
-
-        class MyModel(BaseModel):
-            foo: str
-
-
-        obj = response.parse(to=MyModel)
-        print(obj.foo)
-        ```
-
-        We support parsing:
-          - `BaseModel`
-          - `dict`
-          - `list`
-          - `Union`
-          - `str`
-          - `int`
-          - `float`
-          - `httpx.Response`
-        """
-        cache_key = to if to is not None else self._cast_to
-        cached = self._parsed_by_type.get(cache_key)
-        if cached is not None:
-            return cached  # type: ignore[no-any-return]
-
-        parsed = self._parse(to=to)
-        if is_given(self._options.post_parser):
-            parsed = self._options.post_parser(parsed)
-
-        self._parsed_by_type[cache_key] = parsed
-        return parsed
-
-    @property
-    def headers(self) -> httpx.Headers:
-        return self.http_response.headers
-
-    @property
-    def http_request(self) -> httpx.Request:
-        return self.http_response.request
-
-    @property
-    def status_code(self) -> int:
-        return self.http_response.status_code
-
-    @property
-    def url(self) -> httpx.URL:
-        return self.http_response.url
-
-    @property
-    def method(self) -> str:
-        return self.http_request.method
-
-    @property
-    def content(self) -> bytes:
-        """Return the binary response content.
-
-        NOTE: this will be removed in favour of `.read()` in the
-        next major version.
-        """
-        return self.http_response.content
-
-    @property
-    def text(self) -> str:
-        """Return the decoded response content.
-
-        NOTE: this will be turned into a method in the next major version.
-        """
-        return self.http_response.text
-
-    @property
-    def http_version(self) -> str:
-        return self.http_response.http_version
-
-    @property
-    def is_closed(self) -> bool:
-        return self.http_response.is_closed
-
-    @property
-    def elapsed(self) -> datetime.timedelta:
-        """The time taken for the complete request/response cycle to complete."""
-        return self.http_response.elapsed
-
-    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
-        # unwrap `Annotated[T, ...]` -> `T`
-        if to and is_annotated_type(to):
-            to = extract_type_arg(to, 0)
-
-        if self._stream:
-            if to:
-                if not is_stream_class_type(to):
-                    raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
-
-                return cast(
-                    _T,
-                    to(
-                        cast_to=extract_stream_chunk_type(
-                            to,
-                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
-                        ),
-                        response=self.http_response,
-                        client=cast(Any, self._client),
-                    ),
-                )
-
-            if self._stream_cls:
-                return cast(
-                    R,
-                    self._stream_cls(
-                        cast_to=extract_stream_chunk_type(self._stream_cls),
-                        response=self.http_response,
-                        client=cast(Any, self._client),
-                    ),
-                )
-
-            stream_cls = cast("type[Stream[Any]] | type[AsyncStream[Any]] | None", self._client._default_stream_cls)
-            if stream_cls is None:
-                raise MissingStreamClassError()
-
-            return cast(
-                R,
-                stream_cls(
-                    cast_to=self._cast_to,
-                    response=self.http_response,
-                    client=cast(Any, self._client),
-                ),
-            )
-
-        cast_to = to if to is not None else self._cast_to
-
-        # unwrap `Annotated[T, ...]` -> `T`
-        if is_annotated_type(cast_to):
-            cast_to = extract_type_arg(cast_to, 0)
-
-        if cast_to is NoneType:
-            return cast(R, None)
-
-        response = self.http_response
-        if cast_to == str:
-            return cast(R, response.text)
-
-        if cast_to == int:
-            return cast(R, int(response.text))
-
-        if cast_to == float:
-            return cast(R, float(response.text))
-
-        origin = get_origin(cast_to) or cast_to
-
-        if inspect.isclass(origin) and issubclass(origin, HttpxBinaryResponseContent):
-            return cast(R, cast_to(response))  # type: ignore
-
-        if origin == LegacyAPIResponse:
-            raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
-
-        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
-            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
-            # and pass that class to our request functions. We cannot change the variance to be either
-            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
-            # the response class ourselves but that is something that should be supported directly in httpx
-            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
-            if cast_to != httpx.Response:
-                raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
-            return cast(R, response)
-
-        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
-            raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
-
-        if (
-            cast_to is not object
-            and not origin is list
-            and not origin is dict
-            and not origin is Union
-            and not issubclass(origin, BaseModel)
-        ):
-            raise RuntimeError(
-                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
-            )
-
-        # split is required to handle cases where additional information is included
-        # in the response, e.g. application/json; charset=utf-8
-        content_type, *_ = response.headers.get("content-type", "*").split(";")
-        if content_type != "application/json":
-            if is_basemodel(cast_to):
-                try:
-                    data = response.json()
-                except Exception as exc:
-                    log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
-                else:
-                    return self._client._process_response_data(
-                        data=data,
-                        cast_to=cast_to,  # type: ignore
-                        response=response,
-                    )
-
-            if self._client._strict_response_validation:
-                raise APIResponseValidationError(
-                    response=response,
-                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
-                    body=response.text,
-                )
-
-            # If the API responds with content that isn't JSON then we just return
-            # the (decoded) text without performing any parsing so that you can still
-            # handle the response however you need to.
-            return response.text  # type: ignore
-
-        data = response.json()
-
-        return self._client._process_response_data(
-            data=data,
-            cast_to=cast_to,  # type: ignore
-            response=response,
-        )
-
-    @override
-    def __repr__(self) -> str:
-        return f"<APIResponse [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
-
-
-class MissingStreamClassError(TypeError):
-    def __init__(self) -> None:
-        super().__init__(
-            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference",
-        )
-
-
-def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, LegacyAPIResponse[R]]:
-    """Higher order function that takes one of our bound API methods and wraps it
-    to support returning the raw `APIResponse` object directly.
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
-        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "true"
-
-        kwargs["extra_headers"] = extra_headers
-
-        return cast(LegacyAPIResponse[R], func(*args, **kwargs))
-
-    return wrapped
-
-
-def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[LegacyAPIResponse[R]]]:
-    """Higher order function that takes one of our bound API methods and wraps it
-    to support returning the raw `APIResponse` object directly.
-    """
-
-    @functools.wraps(func)
-    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> LegacyAPIResponse[R]:
-        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "true"
-
-        kwargs["extra_headers"] = extra_headers
-
-        return cast(LegacyAPIResponse[R], await func(*args, **kwargs))
-
-    return wrapped
-
-
-class HttpxBinaryResponseContent:
-    response: httpx.Response
-
-    def __init__(self, response: httpx.Response) -> None:
-        self.response = response
-
-    @property
-    def content(self) -> bytes:
-        return self.response.content
-
-    @property
-    def text(self) -> str:
-        return self.response.text
-
-    @property
-    def encoding(self) -> str | None:
-        return self.response.encoding
-
-    @property
-    def charset_encoding(self) -> str | None:
-        return self.response.charset_encoding
-
-    def json(self, **kwargs: Any) -> Any:
-        return self.response.json(**kwargs)
-
-    def read(self) -> bytes:
-        return self.response.read()
-
-    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
-        return self.response.iter_bytes(chunk_size)
-
-    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
-        return self.response.iter_text(chunk_size)
-
-    def iter_lines(self) -> Iterator[str]:
-        return self.response.iter_lines()
-
-    def iter_raw(self, chunk_size: int | None = None) -> Iterator[bytes]:
-        return self.response.iter_raw(chunk_size)
-
-    def write_to_file(
-        self,
-        file: str | os.PathLike[str],
-    ) -> None:
-        """Write the output to the given file.
-
-        Accepts a filename or any path-like object, e.g. pathlib.Path
-
-        Note: if you want to stream the data to the file instead of writing
-        all at once then you should use `.with_streaming_response` when making
-        the API request, e.g. `client.with_streaming_response.foo().stream_to_file('my_filename.txt')`
-        """
-        with open(file, mode="wb") as f:
-            for data in self.response.iter_bytes():
-                f.write(data)
-
-    @deprecated(
-        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
-    )
-    def stream_to_file(
-        self,
-        file: str | os.PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        with open(file, mode="wb") as f:
-            for data in self.response.iter_bytes(chunk_size):
-                f.write(data)
-
-    def close(self) -> None:
-        return self.response.close()
-
-    async def aread(self) -> bytes:
-        return await self.response.aread()
-
-    async def aiter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
-        return self.response.aiter_bytes(chunk_size)
-
-    async def aiter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
-        return self.response.aiter_text(chunk_size)
-
-    async def aiter_lines(self) -> AsyncIterator[str]:
-        return self.response.aiter_lines()
-
-    async def aiter_raw(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
-        return self.response.aiter_raw(chunk_size)
-
-    @deprecated(
-        "Due to a bug, this method doesn't actually stream the response content, `.with_streaming_response.method()` should be used instead"
-    )
-    async def astream_to_file(
-        self,
-        file: str | os.PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        path = anyio.Path(file)
-        async with await path.open(mode="wb") as f:
-            async for data in self.response.aiter_bytes(chunk_size):
-                await f.write(data)
-
-    async def aclose(self) -> None:
-        return await self.response.aclose()
diff --git a/openai/_models.py b/openai/_models.py
deleted file mode 100644
index 0f001150..00000000
--- a/openai/_models.py
+++ /dev/null
@@ -1,654 +0,0 @@
-from __future__ import annotations
-
-import os
-import inspect
-from typing import TYPE_CHECKING, Any, Type, Union, Generic, TypeVar, Callable, cast
-from datetime import date, datetime
-from functools import lru_cache
-from typing_extensions import (
-    Unpack,
-    Literal,
-    ClassVar,
-    Protocol,
-    Required,
-    TypedDict,
-    TypeGuard,
-    final,
-    override,
-    runtime_checkable,
-)
-
-import pydantic
-import pydantic.generics
-from pydantic.fields import FieldInfo
-
-from ._types import (
-    Body,
-    IncEx,
-    Query,
-    ModelT,
-    Headers,
-    Timeout,
-    NotGiven,
-    AnyMapping,
-    HttpxRequestFiles,
-)
-from ._utils import (
-    PropertyInfo,
-    is_list,
-    is_given,
-    is_mapping,
-    parse_date,
-    coerce_boolean,
-    parse_datetime,
-    strip_not_given,
-    extract_type_arg,
-    is_annotated_type,
-    strip_annotated_type,
-)
-from ._compat import (
-    PYDANTIC_V2,
-    ConfigDict,
-    GenericModel as BaseGenericModel,
-    get_args,
-    is_union,
-    parse_obj,
-    get_origin,
-    is_literal_type,
-    get_model_config,
-    get_model_fields,
-    field_get_default,
-)
-from ._constants import RAW_RESPONSE_HEADER
-
-if TYPE_CHECKING:
-    from pydantic_core.core_schema import ModelField, ModelFieldsSchema
-
-__all__ = ["BaseModel", "GenericModel"]
-
-_T = TypeVar("_T")
-
-
-@runtime_checkable
-class _ConfigProtocol(Protocol):
-    allow_population_by_field_name: bool
-
-
-class BaseModel(pydantic.BaseModel):
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(
-            extra="allow", defer_build=coerce_boolean(os.environ.get("DEFER_PYDANTIC_BUILD", "true"))
-        )
-    else:
-
-        @property
-        @override
-        def model_fields_set(self) -> set[str]:
-            # a forwards-compat shim for pydantic v2
-            return self.__fields_set__  # type: ignore
-
-        class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
-            extra: Any = pydantic.Extra.allow  # type: ignore
-
-    @override
-    def __str__(self) -> str:
-        # mypy complains about an invalid self arg
-        return f'{self.__repr_name__()}({self.__repr_str__(", ")})'  # type: ignore[misc]
-
-    # Override the 'construct' method in a way that supports recursive parsing without validation.
-    # Based on https://github.com/samuelcolvin/pydantic/issues/1168#issuecomment-817742836.
-    @classmethod
-    @override
-    def construct(
-        cls: Type[ModelT],
-        _fields_set: set[str] | None = None,
-        **values: object,
-    ) -> ModelT:
-        m = cls.__new__(cls)
-        fields_values: dict[str, object] = {}
-
-        config = get_model_config(cls)
-        populate_by_name = (
-            config.allow_population_by_field_name
-            if isinstance(config, _ConfigProtocol)
-            else config.get("populate_by_name")
-        )
-
-        if _fields_set is None:
-            _fields_set = set()
-
-        model_fields = get_model_fields(cls)
-        for name, field in model_fields.items():
-            key = field.alias
-            if key is None or (key not in values and populate_by_name):
-                key = name
-
-            if key in values:
-                fields_values[name] = _construct_field(value=values[key], field=field, key=key)
-                _fields_set.add(name)
-            else:
-                fields_values[name] = field_get_default(field)
-
-        _extra = {}
-        for key, value in values.items():
-            if key not in model_fields:
-                if PYDANTIC_V2:
-                    _extra[key] = value
-                else:
-                    _fields_set.add(key)
-                    fields_values[key] = value
-
-        object.__setattr__(m, "__dict__", fields_values)
-
-        if PYDANTIC_V2:
-            # these properties are copied from Pydantic's `model_construct()` method
-            object.__setattr__(m, "__pydantic_private__", None)
-            object.__setattr__(m, "__pydantic_extra__", _extra)
-            object.__setattr__(m, "__pydantic_fields_set__", _fields_set)
-        else:
-            # init_private_attributes() does not exist in v2
-            m._init_private_attributes()  # type: ignore
-
-            # copied from Pydantic v1's `construct()` method
-            object.__setattr__(m, "__fields_set__", _fields_set)
-
-        return m
-
-    if not TYPE_CHECKING:
-        # type checkers incorrectly complain about this assignment
-        # because the type signatures are technically different
-        # although not in practice
-        model_construct = construct
-
-    if not PYDANTIC_V2:
-        # we define aliases for some of the new pydantic v2 methods so
-        # that we can just document these methods without having to specify
-        # a specific pydantic version as some users may not know which
-        # pydantic version they are currently using
-
-        @override
-        def model_dump(
-            self,
-            *,
-            mode: Literal["json", "python"] | str = "python",
-            include: IncEx = None,
-            exclude: IncEx = None,
-            by_alias: bool = False,
-            exclude_unset: bool = False,
-            exclude_defaults: bool = False,
-            exclude_none: bool = False,
-            round_trip: bool = False,
-            warnings: bool = True,
-        ) -> dict[str, Any]:
-            """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump
-
-            Generate a dictionary representation of the model, optionally specifying which fields to include or exclude.
-
-            Args:
-                mode: The mode in which `to_python` should run.
-                    If mode is 'json', the dictionary will only contain JSON serializable types.
-                    If mode is 'python', the dictionary may contain any Python objects.
-                include: A list of fields to include in the output.
-                exclude: A list of fields to exclude from the output.
-                by_alias: Whether to use the field's alias in the dictionary key if defined.
-                exclude_unset: Whether to exclude fields that are unset or None from the output.
-                exclude_defaults: Whether to exclude fields that are set to their default value from the output.
-                exclude_none: Whether to exclude fields that have a value of `None` from the output.
-                round_trip: Whether to enable serialization and deserialization round-trip support.
-                warnings: Whether to log warnings when invalid fields are encountered.
-
-            Returns:
-                A dictionary representation of the model.
-            """
-            if mode != "python":
-                raise ValueError("mode is only supported in Pydantic v2")
-            if round_trip != False:
-                raise ValueError("round_trip is only supported in Pydantic v2")
-            if warnings != True:
-                raise ValueError("warnings is only supported in Pydantic v2")
-            return super().dict(  # pyright: ignore[reportDeprecated]
-                include=include,
-                exclude=exclude,
-                by_alias=by_alias,
-                exclude_unset=exclude_unset,
-                exclude_defaults=exclude_defaults,
-                exclude_none=exclude_none,
-            )
-
-        @override
-        def model_dump_json(
-            self,
-            *,
-            indent: int | None = None,
-            include: IncEx = None,
-            exclude: IncEx = None,
-            by_alias: bool = False,
-            exclude_unset: bool = False,
-            exclude_defaults: bool = False,
-            exclude_none: bool = False,
-            round_trip: bool = False,
-            warnings: bool = True,
-        ) -> str:
-            """Usage docs: https://docs.pydantic.dev/2.4/concepts/serialization/#modelmodel_dump_json
-
-            Generates a JSON representation of the model using Pydantic's `to_json` method.
-
-            Args:
-                indent: Indentation to use in the JSON output. If None is passed, the output will be compact.
-                include: Field(s) to include in the JSON output. Can take either a string or set of strings.
-                exclude: Field(s) to exclude from the JSON output. Can take either a string or set of strings.
-                by_alias: Whether to serialize using field aliases.
-                exclude_unset: Whether to exclude fields that have not been explicitly set.
-                exclude_defaults: Whether to exclude fields that have the default value.
-                exclude_none: Whether to exclude fields that have a value of `None`.
-                round_trip: Whether to use serialization/deserialization between JSON and class instance.
-                warnings: Whether to show any warnings that occurred during serialization.
-
-            Returns:
-                A JSON string representation of the model.
-            """
-            if round_trip != False:
-                raise ValueError("round_trip is only supported in Pydantic v2")
-            if warnings != True:
-                raise ValueError("warnings is only supported in Pydantic v2")
-            return super().json(  # type: ignore[reportDeprecated]
-                indent=indent,
-                include=include,
-                exclude=exclude,
-                by_alias=by_alias,
-                exclude_unset=exclude_unset,
-                exclude_defaults=exclude_defaults,
-                exclude_none=exclude_none,
-            )
-
-
-def _construct_field(value: object, field: FieldInfo, key: str) -> object:
-    if value is None:
-        return field_get_default(field)
-
-    if PYDANTIC_V2:
-        type_ = field.annotation
-    else:
-        type_ = cast(type, field.outer_type_)  # type: ignore
-
-    if type_ is None:
-        raise RuntimeError(f"Unexpected field type is None for {key}")
-
-    return construct_type(value=value, type_=type_)
-
-
-def is_basemodel(type_: type) -> bool:
-    """Returns whether or not the given type is either a `BaseModel` or a union of `BaseModel`"""
-    if is_union(type_):
-        for variant in get_args(type_):
-            if is_basemodel(variant):
-                return True
-
-        return False
-
-    return is_basemodel_type(type_)
-
-
-def is_basemodel_type(type_: type) -> TypeGuard[type[BaseModel] | type[GenericModel]]:
-    origin = get_origin(type_) or type_
-    return issubclass(origin, BaseModel) or issubclass(origin, GenericModel)
-
-
-def construct_type(*, value: object, type_: object) -> object:
-    """Loose coercion to the expected type with construction of nested values.
-
-    If the given value does not match the expected type then it is returned as-is.
-    """
-    # we allow `object` as the input type because otherwise, passing things like
-    # `Literal['value']` will be reported as a type error by type checkers
-    type_ = cast("type[object]", type_)
-
-    # unwrap `Annotated[T, ...]` -> `T`
-    if is_annotated_type(type_):
-        meta = get_args(type_)[1:]
-        type_ = extract_type_arg(type_, 0)
-    else:
-        meta = tuple()
-
-    # we need to use the origin class for any types that are subscripted generics
-    # e.g. Dict[str, object]
-    origin = get_origin(type_) or type_
-    args = get_args(type_)
-
-    if is_union(origin):
-        try:
-            return validate_type(type_=cast("type[object]", type_), value=value)
-        except Exception:
-            pass
-
-        # if the type is a discriminated union then we want to construct the right variant
-        # in the union, even if the data doesn't match exactly, otherwise we'd break code
-        # that relies on the constructed class types, e.g.
-        #
-        # class FooType:
-        #   kind: Literal['foo']
-        #   value: str
-        #
-        # class BarType:
-        #   kind: Literal['bar']
-        #   value: int
-        #
-        # without this block, if the data we get is something like `{'kind': 'bar', 'value': 'foo'}` then
-        # we'd end up constructing `FooType` when it should be `BarType`.
-        discriminator = _build_discriminated_union_meta(union=type_, meta_annotations=meta)
-        if discriminator and is_mapping(value):
-            variant_value = value.get(discriminator.field_alias_from or discriminator.field_name)
-            if variant_value and isinstance(variant_value, str):
-                variant_type = discriminator.mapping.get(variant_value)
-                if variant_type:
-                    return construct_type(type_=variant_type, value=value)
-
-        # if the data is not valid, use the first variant that doesn't fail while deserializing
-        for variant in args:
-            try:
-                return construct_type(value=value, type_=variant)
-            except Exception:
-                continue
-
-        raise RuntimeError(f"Could not convert data into a valid instance of {type_}")
-
-    if origin == dict:
-        if not is_mapping(value):
-            return value
-
-        _, items_type = get_args(type_)  # Dict[_, items_type]
-        return {key: construct_type(value=item, type_=items_type) for key, item in value.items()}
-
-    if not is_literal_type(type_) and (issubclass(origin, BaseModel) or issubclass(origin, GenericModel)):
-        if is_list(value):
-            return [cast(Any, type_).construct(**entry) if is_mapping(entry) else entry for entry in value]
-
-        if is_mapping(value):
-            if issubclass(type_, BaseModel):
-                return type_.construct(**value)  # type: ignore[arg-type]
-
-            return cast(Any, type_).construct(**value)
-
-    if origin == list:
-        if not is_list(value):
-            return value
-
-        inner_type = args[0]  # List[inner_type]
-        return [construct_type(value=entry, type_=inner_type) for entry in value]
-
-    if origin == float:
-        if isinstance(value, int):
-            coerced = float(value)
-            if coerced != value:
-                return value
-            return coerced
-
-        return value
-
-    if type_ == datetime:
-        try:
-            return parse_datetime(value)  # type: ignore
-        except Exception:
-            return value
-
-    if type_ == date:
-        try:
-            return parse_date(value)  # type: ignore
-        except Exception:
-            return value
-
-    return value
-
-
-@runtime_checkable
-class CachedDiscriminatorType(Protocol):
-    __discriminator__: DiscriminatorDetails
-
-
-class DiscriminatorDetails:
-    field_name: str
-    """The name of the discriminator field in the variant class, e.g.
-
-    ```py
-    class Foo(BaseModel):
-        type: Literal['foo']
-    ```
-
-    Will result in field_name='type'
-    """
-
-    field_alias_from: str | None
-    """The name of the discriminator field in the API response, e.g.
-
-    ```py
-    class Foo(BaseModel):
-        type: Literal['foo'] = Field(alias='type_from_api')
-    ```
-
-    Will result in field_alias_from='type_from_api'
-    """
-
-    mapping: dict[str, type]
-    """Mapping of discriminator value to variant type, e.g.
-
-    {'foo': FooVariant, 'bar': BarVariant}
-    """
-
-    def __init__(
-        self,
-        *,
-        mapping: dict[str, type],
-        discriminator_field: str,
-        discriminator_alias: str | None,
-    ) -> None:
-        self.mapping = mapping
-        self.field_name = discriminator_field
-        self.field_alias_from = discriminator_alias
-
-
-def _build_discriminated_union_meta(*, union: type, meta_annotations: tuple[Any, ...]) -> DiscriminatorDetails | None:
-    if isinstance(union, CachedDiscriminatorType):
-        return union.__discriminator__
-
-    discriminator_field_name: str | None = None
-
-    for annotation in meta_annotations:
-        if isinstance(annotation, PropertyInfo) and annotation.discriminator is not None:
-            discriminator_field_name = annotation.discriminator
-            break
-
-    if not discriminator_field_name:
-        return None
-
-    mapping: dict[str, type] = {}
-    discriminator_alias: str | None = None
-
-    for variant in get_args(union):
-        variant = strip_annotated_type(variant)
-        if is_basemodel_type(variant):
-            if PYDANTIC_V2:
-                field = _extract_field_schema_pv2(variant, discriminator_field_name)
-                if not field:
-                    continue
-
-                # Note: if one variant defines an alias then they all should
-                discriminator_alias = field.get("serialization_alias")
-
-                field_schema = field["schema"]
-
-                if field_schema["type"] == "literal":
-                    for entry in field_schema["expected"]:
-                        if isinstance(entry, str):
-                            mapping[entry] = variant
-            else:
-                field_info = cast("dict[str, FieldInfo]", variant.__fields__).get(discriminator_field_name)  # pyright: ignore[reportDeprecated, reportUnnecessaryCast]
-                if not field_info:
-                    continue
-
-                # Note: if one variant defines an alias then they all should
-                discriminator_alias = field_info.alias
-
-                if field_info.annotation and is_literal_type(field_info.annotation):
-                    for entry in get_args(field_info.annotation):
-                        if isinstance(entry, str):
-                            mapping[entry] = variant
-
-    if not mapping:
-        return None
-
-    details = DiscriminatorDetails(
-        mapping=mapping,
-        discriminator_field=discriminator_field_name,
-        discriminator_alias=discriminator_alias,
-    )
-    cast(CachedDiscriminatorType, union).__discriminator__ = details
-    return details
-
-
-def _extract_field_schema_pv2(model: type[BaseModel], field_name: str) -> ModelField | None:
-    schema = model.__pydantic_core_schema__
-    if schema["type"] != "model":
-        return None
-
-    fields_schema = schema["schema"]
-    if fields_schema["type"] != "model-fields":
-        return None
-
-    fields_schema = cast("ModelFieldsSchema", fields_schema)
-
-    field = fields_schema["fields"].get(field_name)
-    if not field:
-        return None
-
-    return cast("ModelField", field)  # pyright: ignore[reportUnnecessaryCast]
-
-
-def validate_type(*, type_: type[_T], value: object) -> _T:
-    """Strict validation that the given value matches the expected type"""
-    if inspect.isclass(type_) and issubclass(type_, pydantic.BaseModel):
-        return cast(_T, parse_obj(type_, value))
-
-    return cast(_T, _validate_non_model_type(type_=type_, value=value))
-
-
-# our use of subclasssing here causes weirdness for type checkers,
-# so we just pretend that we don't subclass
-if TYPE_CHECKING:
-    GenericModel = BaseModel
-else:
-
-    class GenericModel(BaseGenericModel, BaseModel):
-        pass
-
-
-if PYDANTIC_V2:
-    from pydantic import TypeAdapter as _TypeAdapter
-
-    _CachedTypeAdapter = cast("TypeAdapter[object]", lru_cache(maxsize=None)(_TypeAdapter))
-
-    if TYPE_CHECKING:
-        from pydantic import TypeAdapter
-    else:
-        TypeAdapter = _CachedTypeAdapter
-
-    def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
-        return TypeAdapter(type_).validate_python(value)
-
-elif not TYPE_CHECKING:  # TODO: condition is weird
-
-    class RootModel(GenericModel, Generic[_T]):
-        """Used as a placeholder to easily convert runtime types to a Pydantic format
-        to provide validation.
-
-        For example:
-        ```py
-        validated = RootModel[int](__root__="5").__root__
-        # validated: 5
-        ```
-        """
-
-        __root__: _T
-
-    def _validate_non_model_type(*, type_: type[_T], value: object) -> _T:
-        model = _create_pydantic_model(type_).validate(value)
-        return cast(_T, model.__root__)
-
-    def _create_pydantic_model(type_: _T) -> Type[RootModel[_T]]:
-        return RootModel[type_]  # type: ignore
-
-
-class FinalRequestOptionsInput(TypedDict, total=False):
-    method: Required[str]
-    url: Required[str]
-    params: Query
-    headers: Headers
-    max_retries: int
-    timeout: float | Timeout | None
-    files: HttpxRequestFiles | None
-    idempotency_key: str
-    json_data: Body
-    extra_json: AnyMapping
-
-
-@final
-class FinalRequestOptions(pydantic.BaseModel):
-    method: str
-    url: str
-    params: Query = {}
-    headers: Union[Headers, NotGiven] = NotGiven()
-    max_retries: Union[int, NotGiven] = NotGiven()
-    timeout: Union[float, Timeout, None, NotGiven] = NotGiven()
-    files: Union[HttpxRequestFiles, None] = None
-    idempotency_key: Union[str, None] = None
-    post_parser: Union[Callable[[Any], Any], NotGiven] = NotGiven()
-
-    # It should be noted that we cannot use `json` here as that would override
-    # a BaseModel method in an incompatible fashion.
-    json_data: Union[Body, None] = None
-    extra_json: Union[AnyMapping, None] = None
-
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(arbitrary_types_allowed=True)
-    else:
-
-        class Config(pydantic.BaseConfig):  # pyright: ignore[reportDeprecated]
-            arbitrary_types_allowed: bool = True
-
-    def get_max_retries(self, max_retries: int) -> int:
-        if isinstance(self.max_retries, NotGiven):
-            return max_retries
-        return self.max_retries
-
-    def _strip_raw_response_header(self) -> None:
-        if not is_given(self.headers):
-            return
-
-        if self.headers.get(RAW_RESPONSE_HEADER):
-            self.headers = {**self.headers}
-            self.headers.pop(RAW_RESPONSE_HEADER)
-
-    # override the `construct` method so that we can run custom transformations.
-    # this is necessary as we don't want to do any actual runtime type checking
-    # (which means we can't use validators) but we do want to ensure that `NotGiven`
-    # values are not present
-    #
-    # type ignore required because we're adding explicit types to `**values`
-    @classmethod
-    def construct(  # type: ignore
-        cls,
-        _fields_set: set[str] | None = None,
-        **values: Unpack[FinalRequestOptionsInput],
-    ) -> FinalRequestOptions:
-        kwargs: dict[str, Any] = {
-            # we unconditionally call `strip_not_given` on any value
-            # as it will just ignore any non-mapping types
-            key: strip_not_given(value)
-            for key, value in values.items()
-        }
-        if PYDANTIC_V2:
-            return super().model_construct(_fields_set, **kwargs)
-        return cast(FinalRequestOptions, super().construct(_fields_set, **kwargs))  # pyright: ignore[reportDeprecated]
-
-    if not TYPE_CHECKING:
-        # type checkers incorrectly complain about this assignment
-        model_construct = construct
diff --git a/openai/_module_client.py b/openai/_module_client.py
deleted file mode 100644
index 9227f5e2..00000000
--- a/openai/_module_client.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import override
-
-from . import resources, _load_client
-from ._utils import LazyProxy
-
-
-class ChatProxy(LazyProxy[resources.Chat]):
-    @override
-    def __load__(self) -> resources.Chat:
-        return _load_client().chat
-
-
-class BetaProxy(LazyProxy[resources.Beta]):
-    @override
-    def __load__(self) -> resources.Beta:
-        return _load_client().beta
-
-
-class FilesProxy(LazyProxy[resources.Files]):
-    @override
-    def __load__(self) -> resources.Files:
-        return _load_client().files
-
-
-class AudioProxy(LazyProxy[resources.Audio]):
-    @override
-    def __load__(self) -> resources.Audio:
-        return _load_client().audio
-
-
-class ImagesProxy(LazyProxy[resources.Images]):
-    @override
-    def __load__(self) -> resources.Images:
-        return _load_client().images
-
-
-class ModelsProxy(LazyProxy[resources.Models]):
-    @override
-    def __load__(self) -> resources.Models:
-        return _load_client().models
-
-
-class EmbeddingsProxy(LazyProxy[resources.Embeddings]):
-    @override
-    def __load__(self) -> resources.Embeddings:
-        return _load_client().embeddings
-
-
-class CompletionsProxy(LazyProxy[resources.Completions]):
-    @override
-    def __load__(self) -> resources.Completions:
-        return _load_client().completions
-
-
-class ModerationsProxy(LazyProxy[resources.Moderations]):
-    @override
-    def __load__(self) -> resources.Moderations:
-        return _load_client().moderations
-
-
-class FineTuningProxy(LazyProxy[resources.FineTuning]):
-    @override
-    def __load__(self) -> resources.FineTuning:
-        return _load_client().fine_tuning
-
-
-chat: resources.Chat = ChatProxy().__as_proxied__()
-beta: resources.Beta = BetaProxy().__as_proxied__()
-files: resources.Files = FilesProxy().__as_proxied__()
-audio: resources.Audio = AudioProxy().__as_proxied__()
-images: resources.Images = ImagesProxy().__as_proxied__()
-models: resources.Models = ModelsProxy().__as_proxied__()
-embeddings: resources.Embeddings = EmbeddingsProxy().__as_proxied__()
-completions: resources.Completions = CompletionsProxy().__as_proxied__()
-moderations: resources.Moderations = ModerationsProxy().__as_proxied__()
-fine_tuning: resources.FineTuning = FineTuningProxy().__as_proxied__()
diff --git a/openai/_qs.py b/openai/_qs.py
deleted file mode 100644
index 274320ca..00000000
--- a/openai/_qs.py
+++ /dev/null
@@ -1,150 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, List, Tuple, Union, Mapping, TypeVar
-from urllib.parse import parse_qs, urlencode
-from typing_extensions import Literal, get_args
-
-from ._types import NOT_GIVEN, NotGiven, NotGivenOr
-from ._utils import flatten
-
-_T = TypeVar("_T")
-
-
-ArrayFormat = Literal["comma", "repeat", "indices", "brackets"]
-NestedFormat = Literal["dots", "brackets"]
-
-PrimitiveData = Union[str, int, float, bool, None]
-# this should be Data = Union[PrimitiveData, "List[Data]", "Tuple[Data]", "Mapping[str, Data]"]
-# https://github.com/microsoft/pyright/issues/3555
-Data = Union[PrimitiveData, List[Any], Tuple[Any], "Mapping[str, Any]"]
-Params = Mapping[str, Data]
-
-
-class Querystring:
-    array_format: ArrayFormat
-    nested_format: NestedFormat
-
-    def __init__(
-        self,
-        *,
-        array_format: ArrayFormat = "repeat",
-        nested_format: NestedFormat = "brackets",
-    ) -> None:
-        self.array_format = array_format
-        self.nested_format = nested_format
-
-    def parse(self, query: str) -> Mapping[str, object]:
-        # Note: custom format syntax is not supported yet
-        return parse_qs(query)
-
-    def stringify(
-        self,
-        params: Params,
-        *,
-        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
-        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
-    ) -> str:
-        return urlencode(
-            self.stringify_items(
-                params,
-                array_format=array_format,
-                nested_format=nested_format,
-            )
-        )
-
-    def stringify_items(
-        self,
-        params: Params,
-        *,
-        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
-        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
-    ) -> list[tuple[str, str]]:
-        opts = Options(
-            qs=self,
-            array_format=array_format,
-            nested_format=nested_format,
-        )
-        return flatten([self._stringify_item(key, value, opts) for key, value in params.items()])
-
-    def _stringify_item(
-        self,
-        key: str,
-        value: Data,
-        opts: Options,
-    ) -> list[tuple[str, str]]:
-        if isinstance(value, Mapping):
-            items: list[tuple[str, str]] = []
-            nested_format = opts.nested_format
-            for subkey, subvalue in value.items():
-                items.extend(
-                    self._stringify_item(
-                        # TODO: error if unknown format
-                        f"{key}.{subkey}" if nested_format == "dots" else f"{key}[{subkey}]",
-                        subvalue,
-                        opts,
-                    )
-                )
-            return items
-
-        if isinstance(value, (list, tuple)):
-            array_format = opts.array_format
-            if array_format == "comma":
-                return [
-                    (
-                        key,
-                        ",".join(self._primitive_value_to_str(item) for item in value if item is not None),
-                    ),
-                ]
-            elif array_format == "repeat":
-                items = []
-                for item in value:
-                    items.extend(self._stringify_item(key, item, opts))
-                return items
-            elif array_format == "indices":
-                raise NotImplementedError("The array indices format is not supported yet")
-            elif array_format == "brackets":
-                items = []
-                key = key + "[]"
-                for item in value:
-                    items.extend(self._stringify_item(key, item, opts))
-                return items
-            else:
-                raise NotImplementedError(
-                    f"Unknown array_format value: {array_format}, choose from {', '.join(get_args(ArrayFormat))}"
-                )
-
-        serialised = self._primitive_value_to_str(value)
-        if not serialised:
-            return []
-        return [(key, serialised)]
-
-    def _primitive_value_to_str(self, value: PrimitiveData) -> str:
-        # copied from httpx
-        if value is True:
-            return "true"
-        elif value is False:
-            return "false"
-        elif value is None:
-            return ""
-        return str(value)
-
-
-_qs = Querystring()
-parse = _qs.parse
-stringify = _qs.stringify
-stringify_items = _qs.stringify_items
-
-
-class Options:
-    array_format: ArrayFormat
-    nested_format: NestedFormat
-
-    def __init__(
-        self,
-        qs: Querystring = _qs,
-        *,
-        array_format: NotGivenOr[ArrayFormat] = NOT_GIVEN,
-        nested_format: NotGivenOr[NestedFormat] = NOT_GIVEN,
-    ) -> None:
-        self.array_format = qs.array_format if isinstance(array_format, NotGiven) else array_format
-        self.nested_format = qs.nested_format if isinstance(nested_format, NotGiven) else nested_format
diff --git a/openai/_resource.py b/openai/_resource.py
deleted file mode 100644
index fff9ba19..00000000
--- a/openai/_resource.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import time
-from typing import TYPE_CHECKING
-
-import anyio
-
-if TYPE_CHECKING:
-    from ._client import OpenAI, AsyncOpenAI
-
-
-class SyncAPIResource:
-    _client: OpenAI
-
-    def __init__(self, client: OpenAI) -> None:
-        self._client = client
-        self._get = client.get
-        self._post = client.post
-        self._patch = client.patch
-        self._put = client.put
-        self._delete = client.delete
-        self._get_api_list = client.get_api_list
-
-    def _sleep(self, seconds: float) -> None:
-        time.sleep(seconds)
-
-
-class AsyncAPIResource:
-    _client: AsyncOpenAI
-
-    def __init__(self, client: AsyncOpenAI) -> None:
-        self._client = client
-        self._get = client.get
-        self._post = client.post
-        self._patch = client.patch
-        self._put = client.put
-        self._delete = client.delete
-        self._get_api_list = client.get_api_list
-
-    async def _sleep(self, seconds: float) -> None:
-        await anyio.sleep(seconds)
diff --git a/openai/_response.py b/openai/_response.py
deleted file mode 100644
index 47f484ef..00000000
--- a/openai/_response.py
+++ /dev/null
@@ -1,824 +0,0 @@
-from __future__ import annotations
-
-import os
-import inspect
-import logging
-import datetime
-import functools
-from types import TracebackType
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Union,
-    Generic,
-    TypeVar,
-    Callable,
-    Iterator,
-    AsyncIterator,
-    cast,
-    overload,
-)
-from typing_extensions import Awaitable, ParamSpec, override, get_origin
-
-import anyio
-import httpx
-import pydantic
-
-from ._types import NoneType
-from ._utils import is_given, extract_type_arg, is_annotated_type, extract_type_var_from_base
-from ._models import BaseModel, is_basemodel
-from ._constants import RAW_RESPONSE_HEADER, OVERRIDE_CAST_TO_HEADER
-from ._streaming import Stream, AsyncStream, is_stream_class_type, extract_stream_chunk_type
-from ._exceptions import OpenAIError, APIResponseValidationError
-
-if TYPE_CHECKING:
-    from ._models import FinalRequestOptions
-    from ._base_client import BaseClient
-
-
-P = ParamSpec("P")
-R = TypeVar("R")
-_T = TypeVar("_T")
-_APIResponseT = TypeVar("_APIResponseT", bound="APIResponse[Any]")
-_AsyncAPIResponseT = TypeVar("_AsyncAPIResponseT", bound="AsyncAPIResponse[Any]")
-
-log: logging.Logger = logging.getLogger(__name__)
-
-
-class BaseAPIResponse(Generic[R]):
-    _cast_to: type[R]
-    _client: BaseClient[Any, Any]
-    _parsed_by_type: dict[type[Any], Any]
-    _is_sse_stream: bool
-    _stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None
-    _options: FinalRequestOptions
-
-    http_response: httpx.Response
-
-    def __init__(
-        self,
-        *,
-        raw: httpx.Response,
-        cast_to: type[R],
-        client: BaseClient[Any, Any],
-        stream: bool,
-        stream_cls: type[Stream[Any]] | type[AsyncStream[Any]] | None,
-        options: FinalRequestOptions,
-    ) -> None:
-        self._cast_to = cast_to
-        self._client = client
-        self._parsed_by_type = {}
-        self._is_sse_stream = stream
-        self._stream_cls = stream_cls
-        self._options = options
-        self.http_response = raw
-
-    @property
-    def headers(self) -> httpx.Headers:
-        return self.http_response.headers
-
-    @property
-    def http_request(self) -> httpx.Request:
-        """Returns the httpx Request instance associated with the current response."""
-        return self.http_response.request
-
-    @property
-    def status_code(self) -> int:
-        return self.http_response.status_code
-
-    @property
-    def url(self) -> httpx.URL:
-        """Returns the URL for which the request was made."""
-        return self.http_response.url
-
-    @property
-    def method(self) -> str:
-        return self.http_request.method
-
-    @property
-    def http_version(self) -> str:
-        return self.http_response.http_version
-
-    @property
-    def elapsed(self) -> datetime.timedelta:
-        """The time taken for the complete request/response cycle to complete."""
-        return self.http_response.elapsed
-
-    @property
-    def is_closed(self) -> bool:
-        """Whether or not the response body has been closed.
-
-        If this is False then there is response data that has not been read yet.
-        You must either fully consume the response body or call `.close()`
-        before discarding the response to prevent resource leaks.
-        """
-        return self.http_response.is_closed
-
-    @override
-    def __repr__(self) -> str:
-        return (
-            f"<{self.__class__.__name__} [{self.status_code} {self.http_response.reason_phrase}] type={self._cast_to}>"
-        )
-
-    def _parse(self, *, to: type[_T] | None = None) -> R | _T:
-        # unwrap `Annotated[T, ...]` -> `T`
-        if to and is_annotated_type(to):
-            to = extract_type_arg(to, 0)
-
-        if self._is_sse_stream:
-            if to:
-                if not is_stream_class_type(to):
-                    raise TypeError(f"Expected custom parse type to be a subclass of {Stream} or {AsyncStream}")
-
-                return cast(
-                    _T,
-                    to(
-                        cast_to=extract_stream_chunk_type(
-                            to,
-                            failure_message="Expected custom stream type to be passed with a type argument, e.g. Stream[ChunkType]",
-                        ),
-                        response=self.http_response,
-                        client=cast(Any, self._client),
-                    ),
-                )
-
-            if self._stream_cls:
-                return cast(
-                    R,
-                    self._stream_cls(
-                        cast_to=extract_stream_chunk_type(self._stream_cls),
-                        response=self.http_response,
-                        client=cast(Any, self._client),
-                    ),
-                )
-
-            stream_cls = cast("type[Stream[Any]] | type[AsyncStream[Any]] | None", self._client._default_stream_cls)
-            if stream_cls is None:
-                raise MissingStreamClassError()
-
-            return cast(
-                R,
-                stream_cls(
-                    cast_to=self._cast_to,
-                    response=self.http_response,
-                    client=cast(Any, self._client),
-                ),
-            )
-
-        cast_to = to if to is not None else self._cast_to
-
-        # unwrap `Annotated[T, ...]` -> `T`
-        if is_annotated_type(cast_to):
-            cast_to = extract_type_arg(cast_to, 0)
-
-        if cast_to is NoneType:
-            return cast(R, None)
-
-        response = self.http_response
-        if cast_to == str:
-            return cast(R, response.text)
-
-        if cast_to == bytes:
-            return cast(R, response.content)
-
-        if cast_to == int:
-            return cast(R, int(response.text))
-
-        if cast_to == float:
-            return cast(R, float(response.text))
-
-        origin = get_origin(cast_to) or cast_to
-
-        # handle the legacy binary response case
-        if inspect.isclass(cast_to) and cast_to.__name__ == "HttpxBinaryResponseContent":
-            return cast(R, cast_to(response))  # type: ignore
-
-        if origin == APIResponse:
-            raise RuntimeError("Unexpected state - cast_to is `APIResponse`")
-
-        if inspect.isclass(origin) and issubclass(origin, httpx.Response):
-            # Because of the invariance of our ResponseT TypeVar, users can subclass httpx.Response
-            # and pass that class to our request functions. We cannot change the variance to be either
-            # covariant or contravariant as that makes our usage of ResponseT illegal. We could construct
-            # the response class ourselves but that is something that should be supported directly in httpx
-            # as it would be easy to incorrectly construct the Response object due to the multitude of arguments.
-            if cast_to != httpx.Response:
-                raise ValueError(f"Subclasses of httpx.Response cannot be passed to `cast_to`")
-            return cast(R, response)
-
-        if inspect.isclass(origin) and not issubclass(origin, BaseModel) and issubclass(origin, pydantic.BaseModel):
-            raise TypeError("Pydantic models must subclass our base model type, e.g. `from openai import BaseModel`")
-
-        if (
-            cast_to is not object
-            and not origin is list
-            and not origin is dict
-            and not origin is Union
-            and not issubclass(origin, BaseModel)
-        ):
-            raise RuntimeError(
-                f"Unsupported type, expected {cast_to} to be a subclass of {BaseModel}, {dict}, {list}, {Union}, {NoneType}, {str} or {httpx.Response}."
-            )
-
-        # split is required to handle cases where additional information is included
-        # in the response, e.g. application/json; charset=utf-8
-        content_type, *_ = response.headers.get("content-type", "*").split(";")
-        if content_type != "application/json":
-            if is_basemodel(cast_to):
-                try:
-                    data = response.json()
-                except Exception as exc:
-                    log.debug("Could not read JSON from response data due to %s - %s", type(exc), exc)
-                else:
-                    return self._client._process_response_data(
-                        data=data,
-                        cast_to=cast_to,  # type: ignore
-                        response=response,
-                    )
-
-            if self._client._strict_response_validation:
-                raise APIResponseValidationError(
-                    response=response,
-                    message=f"Expected Content-Type response header to be `application/json` but received `{content_type}` instead.",
-                    body=response.text,
-                )
-
-            # If the API responds with content that isn't JSON then we just return
-            # the (decoded) text without performing any parsing so that you can still
-            # handle the response however you need to.
-            return response.text  # type: ignore
-
-        data = response.json()
-
-        return self._client._process_response_data(
-            data=data,
-            cast_to=cast_to,  # type: ignore
-            response=response,
-        )
-
-
-class APIResponse(BaseAPIResponse[R]):
-    @overload
-    def parse(self, *, to: type[_T]) -> _T:
-        ...
-
-    @overload
-    def parse(self) -> R:
-        ...
-
-    def parse(self, *, to: type[_T] | None = None) -> R | _T:
-        """Returns the rich python representation of this response's data.
-
-        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
-
-        You can customise the type that the response is parsed into through
-        the `to` argument, e.g.
-
-        ```py
-        from openai import BaseModel
-
-
-        class MyModel(BaseModel):
-            foo: str
-
-
-        obj = response.parse(to=MyModel)
-        print(obj.foo)
-        ```
-
-        We support parsing:
-          - `BaseModel`
-          - `dict`
-          - `list`
-          - `Union`
-          - `str`
-          - `int`
-          - `float`
-          - `httpx.Response`
-        """
-        cache_key = to if to is not None else self._cast_to
-        cached = self._parsed_by_type.get(cache_key)
-        if cached is not None:
-            return cached  # type: ignore[no-any-return]
-
-        if not self._is_sse_stream:
-            self.read()
-
-        parsed = self._parse(to=to)
-        if is_given(self._options.post_parser):
-            parsed = self._options.post_parser(parsed)
-
-        self._parsed_by_type[cache_key] = parsed
-        return parsed
-
-    def read(self) -> bytes:
-        """Read and return the binary response content."""
-        try:
-            return self.http_response.read()
-        except httpx.StreamConsumed as exc:
-            # The default error raised by httpx isn't very
-            # helpful in our case so we re-raise it with
-            # a different error message.
-            raise StreamAlreadyConsumed() from exc
-
-    def text(self) -> str:
-        """Read and decode the response content into a string."""
-        self.read()
-        return self.http_response.text
-
-    def json(self) -> object:
-        """Read and decode the JSON response content."""
-        self.read()
-        return self.http_response.json()
-
-    def close(self) -> None:
-        """Close the response and release the connection.
-
-        Automatically called if the response body is read to completion.
-        """
-        self.http_response.close()
-
-    def iter_bytes(self, chunk_size: int | None = None) -> Iterator[bytes]:
-        """
-        A byte-iterator over the decoded response content.
-
-        This automatically handles gzip, deflate and brotli encoded responses.
-        """
-        for chunk in self.http_response.iter_bytes(chunk_size):
-            yield chunk
-
-    def iter_text(self, chunk_size: int | None = None) -> Iterator[str]:
-        """A str-iterator over the decoded response content
-        that handles both gzip, deflate, etc but also detects the content's
-        string encoding.
-        """
-        for chunk in self.http_response.iter_text(chunk_size):
-            yield chunk
-
-    def iter_lines(self) -> Iterator[str]:
-        """Like `iter_text()` but will only yield chunks for each line"""
-        for chunk in self.http_response.iter_lines():
-            yield chunk
-
-
-class AsyncAPIResponse(BaseAPIResponse[R]):
-    @overload
-    async def parse(self, *, to: type[_T]) -> _T:
-        ...
-
-    @overload
-    async def parse(self) -> R:
-        ...
-
-    async def parse(self, *, to: type[_T] | None = None) -> R | _T:
-        """Returns the rich python representation of this response's data.
-
-        For lower-level control, see `.read()`, `.json()`, `.iter_bytes()`.
-
-        You can customise the type that the response is parsed into through
-        the `to` argument, e.g.
-
-        ```py
-        from openai import BaseModel
-
-
-        class MyModel(BaseModel):
-            foo: str
-
-
-        obj = response.parse(to=MyModel)
-        print(obj.foo)
-        ```
-
-        We support parsing:
-          - `BaseModel`
-          - `dict`
-          - `list`
-          - `Union`
-          - `str`
-          - `httpx.Response`
-        """
-        cache_key = to if to is not None else self._cast_to
-        cached = self._parsed_by_type.get(cache_key)
-        if cached is not None:
-            return cached  # type: ignore[no-any-return]
-
-        if not self._is_sse_stream:
-            await self.read()
-
-        parsed = self._parse(to=to)
-        if is_given(self._options.post_parser):
-            parsed = self._options.post_parser(parsed)
-
-        self._parsed_by_type[cache_key] = parsed
-        return parsed
-
-    async def read(self) -> bytes:
-        """Read and return the binary response content."""
-        try:
-            return await self.http_response.aread()
-        except httpx.StreamConsumed as exc:
-            # the default error raised by httpx isn't very
-            # helpful in our case so we re-raise it with
-            # a different error message
-            raise StreamAlreadyConsumed() from exc
-
-    async def text(self) -> str:
-        """Read and decode the response content into a string."""
-        await self.read()
-        return self.http_response.text
-
-    async def json(self) -> object:
-        """Read and decode the JSON response content."""
-        await self.read()
-        return self.http_response.json()
-
-    async def close(self) -> None:
-        """Close the response and release the connection.
-
-        Automatically called if the response body is read to completion.
-        """
-        await self.http_response.aclose()
-
-    async def iter_bytes(self, chunk_size: int | None = None) -> AsyncIterator[bytes]:
-        """
-        A byte-iterator over the decoded response content.
-
-        This automatically handles gzip, deflate and brotli encoded responses.
-        """
-        async for chunk in self.http_response.aiter_bytes(chunk_size):
-            yield chunk
-
-    async def iter_text(self, chunk_size: int | None = None) -> AsyncIterator[str]:
-        """A str-iterator over the decoded response content
-        that handles both gzip, deflate, etc but also detects the content's
-        string encoding.
-        """
-        async for chunk in self.http_response.aiter_text(chunk_size):
-            yield chunk
-
-    async def iter_lines(self) -> AsyncIterator[str]:
-        """Like `iter_text()` but will only yield chunks for each line"""
-        async for chunk in self.http_response.aiter_lines():
-            yield chunk
-
-
-class BinaryAPIResponse(APIResponse[bytes]):
-    """Subclass of APIResponse providing helpers for dealing with binary data.
-
-    Note: If you want to stream the response data instead of eagerly reading it
-    all at once then you should use `.with_streaming_response` when making
-    the API request, e.g. `.with_streaming_response.get_binary_response()`
-    """
-
-    def write_to_file(
-        self,
-        file: str | os.PathLike[str],
-    ) -> None:
-        """Write the output to the given file.
-
-        Accepts a filename or any path-like object, e.g. pathlib.Path
-
-        Note: if you want to stream the data to the file instead of writing
-        all at once then you should use `.with_streaming_response` when making
-        the API request, e.g. `.with_streaming_response.get_binary_response()`
-        """
-        with open(file, mode="wb") as f:
-            for data in self.iter_bytes():
-                f.write(data)
-
-
-class AsyncBinaryAPIResponse(AsyncAPIResponse[bytes]):
-    """Subclass of APIResponse providing helpers for dealing with binary data.
-
-    Note: If you want to stream the response data instead of eagerly reading it
-    all at once then you should use `.with_streaming_response` when making
-    the API request, e.g. `.with_streaming_response.get_binary_response()`
-    """
-
-    async def write_to_file(
-        self,
-        file: str | os.PathLike[str],
-    ) -> None:
-        """Write the output to the given file.
-
-        Accepts a filename or any path-like object, e.g. pathlib.Path
-
-        Note: if you want to stream the data to the file instead of writing
-        all at once then you should use `.with_streaming_response` when making
-        the API request, e.g. `.with_streaming_response.get_binary_response()`
-        """
-        path = anyio.Path(file)
-        async with await path.open(mode="wb") as f:
-            async for data in self.iter_bytes():
-                await f.write(data)
-
-
-class StreamedBinaryAPIResponse(APIResponse[bytes]):
-    def stream_to_file(
-        self,
-        file: str | os.PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        """Streams the output to the given file.
-
-        Accepts a filename or any path-like object, e.g. pathlib.Path
-        """
-        with open(file, mode="wb") as f:
-            for data in self.iter_bytes(chunk_size):
-                f.write(data)
-
-
-class AsyncStreamedBinaryAPIResponse(AsyncAPIResponse[bytes]):
-    async def stream_to_file(
-        self,
-        file: str | os.PathLike[str],
-        *,
-        chunk_size: int | None = None,
-    ) -> None:
-        """Streams the output to the given file.
-
-        Accepts a filename or any path-like object, e.g. pathlib.Path
-        """
-        path = anyio.Path(file)
-        async with await path.open(mode="wb") as f:
-            async for data in self.iter_bytes(chunk_size):
-                await f.write(data)
-
-
-class MissingStreamClassError(TypeError):
-    def __init__(self) -> None:
-        super().__init__(
-            "The `stream` argument was set to `True` but the `stream_cls` argument was not given. See `openai._streaming` for reference",
-        )
-
-
-class StreamAlreadyConsumed(OpenAIError):
-    """
-    Attempted to read or stream content, but the content has already
-    been streamed.
-
-    This can happen if you use a method like `.iter_lines()` and then attempt
-    to read th entire response body afterwards, e.g.
-
-    ```py
-    response = await client.post(...)
-    async for line in response.iter_lines():
-        ...  # do something with `line`
-
-    content = await response.read()
-    # ^ error
-    ```
-
-    If you want this behaviour you'll need to either manually accumulate the response
-    content or call `await response.read()` before iterating over the stream.
-    """
-
-    def __init__(self) -> None:
-        message = (
-            "Attempted to read or stream some content, but the content has "
-            "already been streamed. "
-            "This could be due to attempting to stream the response "
-            "content more than once."
-            "\n\n"
-            "You can fix this by manually accumulating the response content while streaming "
-            "or by calling `.read()` before starting to stream."
-        )
-        super().__init__(message)
-
-
-class ResponseContextManager(Generic[_APIResponseT]):
-    """Context manager for ensuring that a request is not made
-    until it is entered and that the response will always be closed
-    when the context manager exits
-    """
-
-    def __init__(self, request_func: Callable[[], _APIResponseT]) -> None:
-        self._request_func = request_func
-        self.__response: _APIResponseT | None = None
-
-    def __enter__(self) -> _APIResponseT:
-        self.__response = self._request_func()
-        return self.__response
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__response is not None:
-            self.__response.close()
-
-
-class AsyncResponseContextManager(Generic[_AsyncAPIResponseT]):
-    """Context manager for ensuring that a request is not made
-    until it is entered and that the response will always be closed
-    when the context manager exits
-    """
-
-    def __init__(self, api_request: Awaitable[_AsyncAPIResponseT]) -> None:
-        self._api_request = api_request
-        self.__response: _AsyncAPIResponseT | None = None
-
-    async def __aenter__(self) -> _AsyncAPIResponseT:
-        self.__response = await self._api_request
-        return self.__response
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__response is not None:
-            await self.__response.close()
-
-
-def to_streamed_response_wrapper(func: Callable[P, R]) -> Callable[P, ResponseContextManager[APIResponse[R]]]:
-    """Higher order function that takes one of our bound API methods and wraps it
-    to support streaming and returning the raw `APIResponse` object directly.
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[APIResponse[R]]:
-        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "stream"
-
-        kwargs["extra_headers"] = extra_headers
-
-        make_request = functools.partial(func, *args, **kwargs)
-
-        return ResponseContextManager(cast(Callable[[], APIResponse[R]], make_request))
-
-    return wrapped
-
-
-def async_to_streamed_response_wrapper(
-    func: Callable[P, Awaitable[R]],
-) -> Callable[P, AsyncResponseContextManager[AsyncAPIResponse[R]]]:
-    """Higher order function that takes one of our bound API methods and wraps it
-    to support streaming and returning the raw `APIResponse` object directly.
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[AsyncAPIResponse[R]]:
-        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "stream"
-
-        kwargs["extra_headers"] = extra_headers
-
-        make_request = func(*args, **kwargs)
-
-        return AsyncResponseContextManager(cast(Awaitable[AsyncAPIResponse[R]], make_request))
-
-    return wrapped
-
-
-def to_custom_streamed_response_wrapper(
-    func: Callable[P, object],
-    response_cls: type[_APIResponseT],
-) -> Callable[P, ResponseContextManager[_APIResponseT]]:
-    """Higher order function that takes one of our bound API methods and an `APIResponse` class
-    and wraps the method to support streaming and returning the given response class directly.
-
-    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> ResponseContextManager[_APIResponseT]:
-        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "stream"
-        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
-
-        kwargs["extra_headers"] = extra_headers
-
-        make_request = functools.partial(func, *args, **kwargs)
-
-        return ResponseContextManager(cast(Callable[[], _APIResponseT], make_request))
-
-    return wrapped
-
-
-def async_to_custom_streamed_response_wrapper(
-    func: Callable[P, Awaitable[object]],
-    response_cls: type[_AsyncAPIResponseT],
-) -> Callable[P, AsyncResponseContextManager[_AsyncAPIResponseT]]:
-    """Higher order function that takes one of our bound API methods and an `APIResponse` class
-    and wraps the method to support streaming and returning the given response class directly.
-
-    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncResponseContextManager[_AsyncAPIResponseT]:
-        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "stream"
-        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
-
-        kwargs["extra_headers"] = extra_headers
-
-        make_request = func(*args, **kwargs)
-
-        return AsyncResponseContextManager(cast(Awaitable[_AsyncAPIResponseT], make_request))
-
-    return wrapped
-
-
-def to_raw_response_wrapper(func: Callable[P, R]) -> Callable[P, APIResponse[R]]:
-    """Higher order function that takes one of our bound API methods and wraps it
-    to support returning the raw `APIResponse` object directly.
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> APIResponse[R]:
-        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "raw"
-
-        kwargs["extra_headers"] = extra_headers
-
-        return cast(APIResponse[R], func(*args, **kwargs))
-
-    return wrapped
-
-
-def async_to_raw_response_wrapper(func: Callable[P, Awaitable[R]]) -> Callable[P, Awaitable[AsyncAPIResponse[R]]]:
-    """Higher order function that takes one of our bound API methods and wraps it
-    to support returning the raw `APIResponse` object directly.
-    """
-
-    @functools.wraps(func)
-    async def wrapped(*args: P.args, **kwargs: P.kwargs) -> AsyncAPIResponse[R]:
-        extra_headers: dict[str, str] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "raw"
-
-        kwargs["extra_headers"] = extra_headers
-
-        return cast(AsyncAPIResponse[R], await func(*args, **kwargs))
-
-    return wrapped
-
-
-def to_custom_raw_response_wrapper(
-    func: Callable[P, object],
-    response_cls: type[_APIResponseT],
-) -> Callable[P, _APIResponseT]:
-    """Higher order function that takes one of our bound API methods and an `APIResponse` class
-    and wraps the method to support returning the given response class directly.
-
-    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> _APIResponseT:
-        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "raw"
-        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
-
-        kwargs["extra_headers"] = extra_headers
-
-        return cast(_APIResponseT, func(*args, **kwargs))
-
-    return wrapped
-
-
-def async_to_custom_raw_response_wrapper(
-    func: Callable[P, Awaitable[object]],
-    response_cls: type[_AsyncAPIResponseT],
-) -> Callable[P, Awaitable[_AsyncAPIResponseT]]:
-    """Higher order function that takes one of our bound API methods and an `APIResponse` class
-    and wraps the method to support returning the given response class directly.
-
-    Note: the given `response_cls` *must* be concrete, e.g. `class BinaryAPIResponse(APIResponse[bytes])`
-    """
-
-    @functools.wraps(func)
-    def wrapped(*args: P.args, **kwargs: P.kwargs) -> Awaitable[_AsyncAPIResponseT]:
-        extra_headers: dict[str, Any] = {**(cast(Any, kwargs.get("extra_headers")) or {})}
-        extra_headers[RAW_RESPONSE_HEADER] = "raw"
-        extra_headers[OVERRIDE_CAST_TO_HEADER] = response_cls
-
-        kwargs["extra_headers"] = extra_headers
-
-        return cast(Awaitable[_AsyncAPIResponseT], func(*args, **kwargs))
-
-    return wrapped
-
-
-def extract_response_type(typ: type[BaseAPIResponse[Any]]) -> type:
-    """Given a type like `APIResponse[T]`, returns the generic type variable `T`.
-
-    This also handles the case where a concrete subclass is given, e.g.
-    ```py
-    class MyResponse(APIResponse[bytes]):
-        ...
-
-    extract_response_type(MyResponse) -> bytes
-    ```
-    """
-    return extract_type_var_from_base(
-        typ,
-        generic_bases=cast("tuple[type, ...]", (BaseAPIResponse, APIResponse, AsyncAPIResponse)),
-        index=0,
-    )
diff --git a/openai/_streaming.py b/openai/_streaming.py
deleted file mode 100644
index 0fda992c..00000000
--- a/openai/_streaming.py
+++ /dev/null
@@ -1,410 +0,0 @@
-# Note: initially copied from https://github.com/florimondmanca/httpx-sse/blob/master/src/httpx_sse/_decoders.py
-from __future__ import annotations
-
-import json
-import inspect
-from types import TracebackType
-from typing import TYPE_CHECKING, Any, Generic, TypeVar, Iterator, AsyncIterator, cast
-from typing_extensions import Self, Protocol, TypeGuard, override, get_origin, runtime_checkable
-
-import httpx
-
-from ._utils import is_mapping, extract_type_var_from_base
-from ._exceptions import APIError
-
-if TYPE_CHECKING:
-    from ._client import OpenAI, AsyncOpenAI
-
-
-_T = TypeVar("_T")
-
-
-class Stream(Generic[_T]):
-    """Provides the core interface to iterate over a synchronous stream response."""
-
-    response: httpx.Response
-
-    _decoder: SSEBytesDecoder
-
-    def __init__(
-        self,
-        *,
-        cast_to: type[_T],
-        response: httpx.Response,
-        client: OpenAI,
-    ) -> None:
-        self.response = response
-        self._cast_to = cast_to
-        self._client = client
-        self._decoder = client._make_sse_decoder()
-        self._iterator = self.__stream__()
-
-    def __next__(self) -> _T:
-        return self._iterator.__next__()
-
-    def __iter__(self) -> Iterator[_T]:
-        for item in self._iterator:
-            yield item
-
-    def _iter_events(self) -> Iterator[ServerSentEvent]:
-        yield from self._decoder.iter_bytes(self.response.iter_bytes())
-
-    def __stream__(self) -> Iterator[_T]:
-        cast_to = cast(Any, self._cast_to)
-        response = self.response
-        process_data = self._client._process_response_data
-        iterator = self._iter_events()
-
-        for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
-
-            if sse.event is None:
-                data = sse.json()
-                if is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data=data, cast_to=cast_to, response=response)
-
-            else:
-                data = sse.json()
-
-                if sse.event == "error" and is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
-
-        # Ensure the entire stream is consumed
-        for _sse in iterator:
-            ...
-
-    def __enter__(self) -> Self:
-        return self
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        self.close()
-
-    def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called if the response body is read to completion.
-        """
-        self.response.close()
-
-
-class AsyncStream(Generic[_T]):
-    """Provides the core interface to iterate over an asynchronous stream response."""
-
-    response: httpx.Response
-
-    _decoder: SSEDecoder | SSEBytesDecoder
-
-    def __init__(
-        self,
-        *,
-        cast_to: type[_T],
-        response: httpx.Response,
-        client: AsyncOpenAI,
-    ) -> None:
-        self.response = response
-        self._cast_to = cast_to
-        self._client = client
-        self._decoder = client._make_sse_decoder()
-        self._iterator = self.__stream__()
-
-    async def __anext__(self) -> _T:
-        return await self._iterator.__anext__()
-
-    async def __aiter__(self) -> AsyncIterator[_T]:
-        async for item in self._iterator:
-            yield item
-
-    async def _iter_events(self) -> AsyncIterator[ServerSentEvent]:
-        async for sse in self._decoder.aiter_bytes(self.response.aiter_bytes()):
-            yield sse
-
-    async def __stream__(self) -> AsyncIterator[_T]:
-        cast_to = cast(Any, self._cast_to)
-        response = self.response
-        process_data = self._client._process_response_data
-        iterator = self._iter_events()
-
-        async for sse in iterator:
-            if sse.data.startswith("[DONE]"):
-                break
-
-            if sse.event is None:
-                data = sse.json()
-                if is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data=data, cast_to=cast_to, response=response)
-
-            else:
-                data = sse.json()
-
-                if sse.event == "error" and is_mapping(data) and data.get("error"):
-                    message = None
-                    error = data.get("error")
-                    if is_mapping(error):
-                        message = error.get("message")
-                    if not message or not isinstance(message, str):
-                        message = "An error occurred during streaming"
-
-                    raise APIError(
-                        message=message,
-                        request=self.response.request,
-                        body=data["error"],
-                    )
-
-                yield process_data(data={"data": data, "event": sse.event}, cast_to=cast_to, response=response)
-
-        # Ensure the entire stream is consumed
-        async for _sse in iterator:
-            ...
-
-    async def __aenter__(self) -> Self:
-        return self
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        await self.close()
-
-    async def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called if the response body is read to completion.
-        """
-        await self.response.aclose()
-
-
-class ServerSentEvent:
-    def __init__(
-        self,
-        *,
-        event: str | None = None,
-        data: str | None = None,
-        id: str | None = None,
-        retry: int | None = None,
-    ) -> None:
-        if data is None:
-            data = ""
-
-        self._id = id
-        self._data = data
-        self._event = event or None
-        self._retry = retry
-
-    @property
-    def event(self) -> str | None:
-        return self._event
-
-    @property
-    def id(self) -> str | None:
-        return self._id
-
-    @property
-    def retry(self) -> int | None:
-        return self._retry
-
-    @property
-    def data(self) -> str:
-        return self._data
-
-    def json(self) -> Any:
-        return json.loads(self.data)
-
-    @override
-    def __repr__(self) -> str:
-        return f"ServerSentEvent(event={self.event}, data={self.data}, id={self.id}, retry={self.retry})"
-
-
-class SSEDecoder:
-    _data: list[str]
-    _event: str | None
-    _retry: int | None
-    _last_event_id: str | None
-
-    def __init__(self) -> None:
-        self._event = None
-        self._data = []
-        self._last_event_id = None
-        self._retry = None
-
-    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
-        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
-        for chunk in self._iter_chunks(iterator):
-            # Split before decoding so splitlines() only uses \r and \n
-            for raw_line in chunk.splitlines():
-                line = raw_line.decode("utf-8")
-                sse = self.decode(line)
-                if sse:
-                    yield sse
-
-    def _iter_chunks(self, iterator: Iterator[bytes]) -> Iterator[bytes]:
-        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
-        data = b""
-        for chunk in iterator:
-            for line in chunk.splitlines(keepends=True):
-                data += line
-                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
-                    yield data
-                    data = b""
-        if data:
-            yield data
-
-    async def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
-        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
-        async for chunk in self._aiter_chunks(iterator):
-            # Split before decoding so splitlines() only uses \r and \n
-            for raw_line in chunk.splitlines():
-                line = raw_line.decode("utf-8")
-                sse = self.decode(line)
-                if sse:
-                    yield sse
-
-    async def _aiter_chunks(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[bytes]:
-        """Given an iterator that yields raw binary data, iterate over it and yield individual SSE chunks"""
-        data = b""
-        async for chunk in iterator:
-            for line in chunk.splitlines(keepends=True):
-                data += line
-                if data.endswith((b"\r\r", b"\n\n", b"\r\n\r\n")):
-                    yield data
-                    data = b""
-        if data:
-            yield data
-
-    def decode(self, line: str) -> ServerSentEvent | None:
-        # See: https://html.spec.whatwg.org/multipage/server-sent-events.html#event-stream-interpretation  # noqa: E501
-
-        if not line:
-            if not self._event and not self._data and not self._last_event_id and self._retry is None:
-                return None
-
-            sse = ServerSentEvent(
-                event=self._event,
-                data="\n".join(self._data),
-                id=self._last_event_id,
-                retry=self._retry,
-            )
-
-            # NOTE: as per the SSE spec, do not reset last_event_id.
-            self._event = None
-            self._data = []
-            self._retry = None
-
-            return sse
-
-        if line.startswith(":"):
-            return None
-
-        fieldname, _, value = line.partition(":")
-
-        if value.startswith(" "):
-            value = value[1:]
-
-        if fieldname == "event":
-            self._event = value
-        elif fieldname == "data":
-            self._data.append(value)
-        elif fieldname == "id":
-            if "\0" in value:
-                pass
-            else:
-                self._last_event_id = value
-        elif fieldname == "retry":
-            try:
-                self._retry = int(value)
-            except (TypeError, ValueError):
-                pass
-        else:
-            pass  # Field is ignored.
-
-        return None
-
-
-@runtime_checkable
-class SSEBytesDecoder(Protocol):
-    def iter_bytes(self, iterator: Iterator[bytes]) -> Iterator[ServerSentEvent]:
-        """Given an iterator that yields raw binary data, iterate over it & yield every event encountered"""
-        ...
-
-    def aiter_bytes(self, iterator: AsyncIterator[bytes]) -> AsyncIterator[ServerSentEvent]:
-        """Given an async iterator that yields raw binary data, iterate over it & yield every event encountered"""
-        ...
-
-
-def is_stream_class_type(typ: type) -> TypeGuard[type[Stream[object]] | type[AsyncStream[object]]]:
-    """TypeGuard for determining whether or not the given type is a subclass of `Stream` / `AsyncStream`"""
-    origin = get_origin(typ) or typ
-    return inspect.isclass(origin) and issubclass(origin, (Stream, AsyncStream))
-
-
-def extract_stream_chunk_type(
-    stream_cls: type,
-    *,
-    failure_message: str | None = None,
-) -> type:
-    """Given a type like `Stream[T]`, returns the generic type variable `T`.
-
-    This also handles the case where a concrete subclass is given, e.g.
-    ```py
-    class MyStream(Stream[bytes]):
-        ...
-
-    extract_stream_chunk_type(MyStream) -> bytes
-    ```
-    """
-    from ._base_client import Stream, AsyncStream
-
-    return extract_type_var_from_base(
-        stream_cls,
-        index=0,
-        generic_bases=cast("tuple[type, ...]", (Stream, AsyncStream)),
-        failure_message=failure_message,
-    )
diff --git a/openai/_types.py b/openai/_types.py
deleted file mode 100644
index de9b1dd4..00000000
--- a/openai/_types.py
+++ /dev/null
@@ -1,222 +0,0 @@
-from __future__ import annotations
-
-from os import PathLike
-from typing import (
-    IO,
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    List,
-    Type,
-    Tuple,
-    Union,
-    Mapping,
-    TypeVar,
-    Callable,
-    Optional,
-    Sequence,
-)
-from typing_extensions import Literal, Protocol, TypeAlias, TypedDict, override, runtime_checkable
-
-import httpx
-import pydantic
-from httpx import URL, Proxy, Timeout, Response, BaseTransport, AsyncBaseTransport
-
-if TYPE_CHECKING:
-    from ._models import BaseModel
-    from ._response import APIResponse, AsyncAPIResponse
-    from ._legacy_response import HttpxBinaryResponseContent
-
-Transport = BaseTransport
-AsyncTransport = AsyncBaseTransport
-Query = Mapping[str, object]
-Body = object
-AnyMapping = Mapping[str, object]
-ModelT = TypeVar("ModelT", bound=pydantic.BaseModel)
-_T = TypeVar("_T")
-
-
-# Approximates httpx internal ProxiesTypes and RequestFiles types
-# while adding support for `PathLike` instances
-ProxiesDict = Dict["str | URL", Union[None, str, URL, Proxy]]
-ProxiesTypes = Union[str, Proxy, ProxiesDict]
-if TYPE_CHECKING:
-    Base64FileInput = Union[IO[bytes], PathLike[str]]
-    FileContent = Union[IO[bytes], bytes, PathLike[str]]
-else:
-    Base64FileInput = Union[IO[bytes], PathLike]
-    FileContent = Union[IO[bytes], bytes, PathLike]  # PathLike is not subscriptable in Python 3.8.
-FileTypes = Union[
-    # file (or bytes)
-    FileContent,
-    # (filename, file (or bytes))
-    Tuple[Optional[str], FileContent],
-    # (filename, file (or bytes), content_type)
-    Tuple[Optional[str], FileContent, Optional[str]],
-    # (filename, file (or bytes), content_type, headers)
-    Tuple[Optional[str], FileContent, Optional[str], Mapping[str, str]],
-]
-RequestFiles = Union[Mapping[str, FileTypes], Sequence[Tuple[str, FileTypes]]]
-
-# duplicate of the above but without our custom file support
-HttpxFileContent = Union[IO[bytes], bytes]
-HttpxFileTypes = Union[
-    # file (or bytes)
-    HttpxFileContent,
-    # (filename, file (or bytes))
-    Tuple[Optional[str], HttpxFileContent],
-    # (filename, file (or bytes), content_type)
-    Tuple[Optional[str], HttpxFileContent, Optional[str]],
-    # (filename, file (or bytes), content_type, headers)
-    Tuple[Optional[str], HttpxFileContent, Optional[str], Mapping[str, str]],
-]
-HttpxRequestFiles = Union[Mapping[str, HttpxFileTypes], Sequence[Tuple[str, HttpxFileTypes]]]
-
-# Workaround to support (cast_to: Type[ResponseT]) -> ResponseT
-# where ResponseT includes `None`. In order to support directly
-# passing `None`, overloads would have to be defined for every
-# method that uses `ResponseT` which would lead to an unacceptable
-# amount of code duplication and make it unreadable. See _base_client.py
-# for example usage.
-#
-# This unfortunately means that you will either have
-# to import this type and pass it explicitly:
-#
-# from openai import NoneType
-# client.get('/foo', cast_to=NoneType)
-#
-# or build it yourself:
-#
-# client.get('/foo', cast_to=type(None))
-if TYPE_CHECKING:
-    NoneType: Type[None]
-else:
-    NoneType = type(None)
-
-
-class RequestOptions(TypedDict, total=False):
-    headers: Headers
-    max_retries: int
-    timeout: float | Timeout | None
-    params: Query
-    extra_json: AnyMapping
-    idempotency_key: str
-
-
-# Sentinel class used until PEP 0661 is accepted
-class NotGiven:
-    """
-    A sentinel singleton class used to distinguish omitted keyword arguments
-    from those passed in with the value None (which may have different behavior).
-
-    For example:
-
-    ```py
-    def get(timeout: Union[int, NotGiven, None] = NotGiven()) -> Response:
-        ...
-
-
-    get(timeout=1)  # 1s timeout
-    get(timeout=None)  # No timeout
-    get()  # Default timeout behavior, which may not be statically known at the method definition.
-    ```
-    """
-
-    def __bool__(self) -> Literal[False]:
-        return False
-
-    @override
-    def __repr__(self) -> str:
-        return "NOT_GIVEN"
-
-
-NotGivenOr = Union[_T, NotGiven]
-NOT_GIVEN = NotGiven()
-
-
-class Omit:
-    """In certain situations you need to be able to represent a case where a default value has
-    to be explicitly removed and `None` is not an appropriate substitute, for example:
-
-    ```py
-    # as the default `Content-Type` header is `application/json` that will be sent
-    client.post("/upload/files", files={"file": b"my raw file content"})
-
-    # you can't explicitly override the header as it has to be dynamically generated
-    # to look something like: 'multipart/form-data; boundary=0d8382fcf5f8c3be01ca2e11002d2983'
-    client.post(..., headers={"Content-Type": "multipart/form-data"})
-
-    # instead you can remove the default `application/json` header by passing Omit
-    client.post(..., headers={"Content-Type": Omit()})
-    ```
-    """
-
-    def __bool__(self) -> Literal[False]:
-        return False
-
-
-@runtime_checkable
-class ModelBuilderProtocol(Protocol):
-    @classmethod
-    def build(
-        cls: type[_T],
-        *,
-        response: Response,
-        data: object,
-    ) -> _T:
-        ...
-
-
-Headers = Mapping[str, Union[str, Omit]]
-
-
-class HeadersLikeProtocol(Protocol):
-    def get(self, __key: str) -> str | None:
-        ...
-
-
-HeadersLike = Union[Headers, HeadersLikeProtocol]
-
-ResponseT = TypeVar(
-    "ResponseT",
-    bound=Union[
-        object,
-        str,
-        None,
-        "BaseModel",
-        List[Any],
-        Dict[str, Any],
-        Response,
-        ModelBuilderProtocol,
-        "APIResponse[Any]",
-        "AsyncAPIResponse[Any]",
-        "HttpxBinaryResponseContent",
-    ],
-)
-
-StrBytesIntFloat = Union[str, bytes, int, float]
-
-# Note: copied from Pydantic
-# https://github.com/pydantic/pydantic/blob/32ea570bf96e84234d2992e1ddf40ab8a565925a/pydantic/main.py#L49
-IncEx: TypeAlias = "set[int] | set[str] | dict[int, Any] | dict[str, Any] | None"
-
-PostParser = Callable[[Any], Any]
-
-
-@runtime_checkable
-class InheritsGeneric(Protocol):
-    """Represents a type that has inherited from `Generic`
-
-    The `__orig_bases__` property can be used to determine the resolved
-    type variable for a given base class.
-    """
-
-    __orig_bases__: tuple[_GenericAlias]
-
-
-class _GenericAlias(Protocol):
-    __origin__: type[object]
-
-
-class HttpxSendArgs(TypedDict, total=False):
-    auth: httpx.Auth
diff --git a/openai/_utils/__init__.py b/openai/_utils/__init__.py
deleted file mode 100644
index 56978941..00000000
--- a/openai/_utils/__init__.py
+++ /dev/null
@@ -1,50 +0,0 @@
-from ._sync import asyncify as asyncify
-from ._proxy import LazyProxy as LazyProxy
-from ._utils import (
-    flatten as flatten,
-    is_dict as is_dict,
-    is_list as is_list,
-    is_given as is_given,
-    is_tuple as is_tuple,
-    is_mapping as is_mapping,
-    is_tuple_t as is_tuple_t,
-    parse_date as parse_date,
-    is_iterable as is_iterable,
-    is_sequence as is_sequence,
-    coerce_float as coerce_float,
-    is_mapping_t as is_mapping_t,
-    removeprefix as removeprefix,
-    removesuffix as removesuffix,
-    extract_files as extract_files,
-    is_sequence_t as is_sequence_t,
-    required_args as required_args,
-    coerce_boolean as coerce_boolean,
-    coerce_integer as coerce_integer,
-    file_from_path as file_from_path,
-    parse_datetime as parse_datetime,
-    strip_not_given as strip_not_given,
-    deepcopy_minimal as deepcopy_minimal,
-    get_async_library as get_async_library,
-    maybe_coerce_float as maybe_coerce_float,
-    get_required_header as get_required_header,
-    maybe_coerce_boolean as maybe_coerce_boolean,
-    maybe_coerce_integer as maybe_coerce_integer,
-)
-from ._typing import (
-    is_list_type as is_list_type,
-    is_union_type as is_union_type,
-    extract_type_arg as extract_type_arg,
-    is_iterable_type as is_iterable_type,
-    is_required_type as is_required_type,
-    is_annotated_type as is_annotated_type,
-    strip_annotated_type as strip_annotated_type,
-    extract_type_var_from_base as extract_type_var_from_base,
-)
-from ._streams import consume_sync_iterator as consume_sync_iterator, consume_async_iterator as consume_async_iterator
-from ._transform import (
-    PropertyInfo as PropertyInfo,
-    transform as transform,
-    async_transform as async_transform,
-    maybe_transform as maybe_transform,
-    async_maybe_transform as async_maybe_transform,
-)
diff --git a/openai/_utils/_logs.py b/openai/_utils/_logs.py
deleted file mode 100644
index e5113fd8..00000000
--- a/openai/_utils/_logs.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import os
-import logging
-
-logger: logging.Logger = logging.getLogger("openai")
-httpx_logger: logging.Logger = logging.getLogger("httpx")
-
-
-def _basic_config() -> None:
-    # e.g. [2023-10-05 14:12:26 - openai._base_client:818 - DEBUG] HTTP Request: POST http://127.0.0.1:4010/foo/bar "200 OK"
-    logging.basicConfig(
-        format="[%(asctime)s - %(name)s:%(lineno)d - %(levelname)s] %(message)s",
-        datefmt="%Y-%m-%d %H:%M:%S",
-    )
-
-
-def setup_logging() -> None:
-    env = os.environ.get("OPENAI_LOG")
-    if env == "debug":
-        _basic_config()
-        logger.setLevel(logging.DEBUG)
-        httpx_logger.setLevel(logging.DEBUG)
-    elif env == "info":
-        _basic_config()
-        logger.setLevel(logging.INFO)
-        httpx_logger.setLevel(logging.INFO)
diff --git a/openai/_utils/_proxy.py b/openai/_utils/_proxy.py
deleted file mode 100644
index b9c12dc3..00000000
--- a/openai/_utils/_proxy.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-from typing import Generic, TypeVar, Iterable, cast
-from typing_extensions import override
-
-T = TypeVar("T")
-
-
-class LazyProxy(Generic[T], ABC):
-    """Implements data methods to pretend that an instance is another instance.
-
-    This includes forwarding attribute access and othe methods.
-    """
-
-    # Note: we have to special case proxies that themselves return proxies
-    # to support using a proxy as a catch-all for any random access, e.g. `proxy.foo.bar.baz`
-
-    def __getattr__(self, attr: str) -> object:
-        proxied = self.__get_proxied__()
-        if isinstance(proxied, LazyProxy):
-            return proxied  # pyright: ignore
-        return getattr(proxied, attr)
-
-    @override
-    def __repr__(self) -> str:
-        proxied = self.__get_proxied__()
-        if isinstance(proxied, LazyProxy):
-            return proxied.__class__.__name__
-        return repr(self.__get_proxied__())
-
-    @override
-    def __str__(self) -> str:
-        proxied = self.__get_proxied__()
-        if isinstance(proxied, LazyProxy):
-            return proxied.__class__.__name__
-        return str(proxied)
-
-    @override
-    def __dir__(self) -> Iterable[str]:
-        proxied = self.__get_proxied__()
-        if isinstance(proxied, LazyProxy):
-            return []
-        return proxied.__dir__()
-
-    @property  # type: ignore
-    @override
-    def __class__(self) -> type:  # pyright: ignore
-        proxied = self.__get_proxied__()
-        if issubclass(type(proxied), LazyProxy):
-            return type(proxied)
-        return proxied.__class__
-
-    def __get_proxied__(self) -> T:
-        return self.__load__()
-
-    def __as_proxied__(self) -> T:
-        """Helper method that returns the current proxy, typed as the loaded object"""
-        return cast(T, self)
-
-    @abstractmethod
-    def __load__(self) -> T:
-        ...
diff --git a/openai/_utils/_streams.py b/openai/_utils/_streams.py
deleted file mode 100644
index f4a0208f..00000000
--- a/openai/_utils/_streams.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from typing import Any
-from typing_extensions import Iterator, AsyncIterator
-
-
-def consume_sync_iterator(iterator: Iterator[Any]) -> None:
-    for _ in iterator:
-        ...
-
-
-async def consume_async_iterator(iterator: AsyncIterator[Any]) -> None:
-    async for _ in iterator:
-        ...
diff --git a/openai/_utils/_sync.py b/openai/_utils/_sync.py
deleted file mode 100644
index 595924e5..00000000
--- a/openai/_utils/_sync.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from __future__ import annotations
-
-import functools
-from typing import TypeVar, Callable, Awaitable
-from typing_extensions import ParamSpec
-
-import anyio
-import anyio.to_thread
-
-T_Retval = TypeVar("T_Retval")
-T_ParamSpec = ParamSpec("T_ParamSpec")
-
-
-# copied from `asyncer`, https://github.com/tiangolo/asyncer
-def asyncify(
-    function: Callable[T_ParamSpec, T_Retval],
-    *,
-    cancellable: bool = False,
-    limiter: anyio.CapacityLimiter | None = None,
-) -> Callable[T_ParamSpec, Awaitable[T_Retval]]:
-    """
-    Take a blocking function and create an async one that receives the same
-    positional and keyword arguments, and that when called, calls the original function
-    in a worker thread using `anyio.to_thread.run_sync()`. Internally,
-    `asyncer.asyncify()` uses the same `anyio.to_thread.run_sync()`, but it supports
-    keyword arguments additional to positional arguments and it adds better support for
-    autocompletion and inline errors for the arguments of the function called and the
-    return value.
-
-    If the `cancellable` option is enabled and the task waiting for its completion is
-    cancelled, the thread will still run its course but its return value (or any raised
-    exception) will be ignored.
-
-    Use it like this:
-
-    ```Python
-    def do_work(arg1, arg2, kwarg1="", kwarg2="") -> str:
-        # Do work
-        return "Some result"
-
-
-    result = await to_thread.asyncify(do_work)("spam", "ham", kwarg1="a", kwarg2="b")
-    print(result)
-    ```
-
-    ## Arguments
-
-    `function`: a blocking regular callable (e.g. a function)
-    `cancellable`: `True` to allow cancellation of the operation
-    `limiter`: capacity limiter to use to limit the total amount of threads running
-        (if omitted, the default limiter is used)
-
-    ## Return
-
-    An async function that takes the same positional and keyword arguments as the
-    original one, that when called runs the same original function in a thread worker
-    and returns the result.
-    """
-
-    async def wrapper(*args: T_ParamSpec.args, **kwargs: T_ParamSpec.kwargs) -> T_Retval:
-        partial_f = functools.partial(function, *args, **kwargs)
-        return await anyio.to_thread.run_sync(partial_f, cancellable=cancellable, limiter=limiter)
-
-    return wrapper
diff --git a/openai/_utils/_transform.py b/openai/_utils/_transform.py
deleted file mode 100644
index 47e262a5..00000000
--- a/openai/_utils/_transform.py
+++ /dev/null
@@ -1,382 +0,0 @@
-from __future__ import annotations
-
-import io
-import base64
-import pathlib
-from typing import Any, Mapping, TypeVar, cast
-from datetime import date, datetime
-from typing_extensions import Literal, get_args, override, get_type_hints
-
-import anyio
-import pydantic
-
-from ._utils import (
-    is_list,
-    is_mapping,
-    is_iterable,
-)
-from .._files import is_base64_file_input
-from ._typing import (
-    is_list_type,
-    is_union_type,
-    extract_type_arg,
-    is_iterable_type,
-    is_required_type,
-    is_annotated_type,
-    strip_annotated_type,
-)
-from .._compat import model_dump, is_typeddict
-
-_T = TypeVar("_T")
-
-
-# TODO: support for drilling globals() and locals()
-# TODO: ensure works correctly with forward references in all cases
-
-
-PropertyFormat = Literal["iso8601", "base64", "custom"]
-
-
-class PropertyInfo:
-    """Metadata class to be used in Annotated types to provide information about a given type.
-
-    For example:
-
-    class MyParams(TypedDict):
-        account_holder_name: Annotated[str, PropertyInfo(alias='accountHolderName')]
-
-    This means that {'account_holder_name': 'Robert'} will be transformed to {'accountHolderName': 'Robert'} before being sent to the API.
-    """
-
-    alias: str | None
-    format: PropertyFormat | None
-    format_template: str | None
-    discriminator: str | None
-
-    def __init__(
-        self,
-        *,
-        alias: str | None = None,
-        format: PropertyFormat | None = None,
-        format_template: str | None = None,
-        discriminator: str | None = None,
-    ) -> None:
-        self.alias = alias
-        self.format = format
-        self.format_template = format_template
-        self.discriminator = discriminator
-
-    @override
-    def __repr__(self) -> str:
-        return f"{self.__class__.__name__}(alias='{self.alias}', format={self.format}, format_template='{self.format_template}', discriminator='{self.discriminator}')"
-
-
-def maybe_transform(
-    data: object,
-    expected_type: object,
-) -> Any | None:
-    """Wrapper over `transform()` that allows `None` to be passed.
-
-    See `transform()` for more details.
-    """
-    if data is None:
-        return None
-    return transform(data, expected_type)
-
-
-# Wrapper over _transform_recursive providing fake types
-def transform(
-    data: _T,
-    expected_type: object,
-) -> _T:
-    """Transform dictionaries based off of type information from the given type, for example:
-
-    ```py
-    class Params(TypedDict, total=False):
-        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
-
-
-    transformed = transform({"card_id": "<my card ID>"}, Params)
-    # {'cardID': '<my card ID>'}
-    ```
-
-    Any keys / data that does not have type information given will be included as is.
-
-    It should be noted that the transformations that this function does are not represented in the type system.
-    """
-    transformed = _transform_recursive(data, annotation=cast(type, expected_type))
-    return cast(_T, transformed)
-
-
-def _get_annotated_type(type_: type) -> type | None:
-    """If the given type is an `Annotated` type then it is returned, if not `None` is returned.
-
-    This also unwraps the type when applicable, e.g. `Required[Annotated[T, ...]]`
-    """
-    if is_required_type(type_):
-        # Unwrap `Required[Annotated[T, ...]]` to `Annotated[T, ...]`
-        type_ = get_args(type_)[0]
-
-    if is_annotated_type(type_):
-        return type_
-
-    return None
-
-
-def _maybe_transform_key(key: str, type_: type) -> str:
-    """Transform the given `data` based on the annotations provided in `type_`.
-
-    Note: this function only looks at `Annotated` types that contain `PropertInfo` metadata.
-    """
-    annotated_type = _get_annotated_type(type_)
-    if annotated_type is None:
-        # no `Annotated` definition for this type, no transformation needed
-        return key
-
-    # ignore the first argument as it is the actual type
-    annotations = get_args(annotated_type)[1:]
-    for annotation in annotations:
-        if isinstance(annotation, PropertyInfo) and annotation.alias is not None:
-            return annotation.alias
-
-    return key
-
-
-def _transform_recursive(
-    data: object,
-    *,
-    annotation: type,
-    inner_type: type | None = None,
-) -> object:
-    """Transform the given data against the expected type.
-
-    Args:
-        annotation: The direct type annotation given to the particular piece of data.
-            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
-
-        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
-            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
-            the list can be transformed using the metadata from the container type.
-
-            Defaults to the same value as the `annotation` argument.
-    """
-    if inner_type is None:
-        inner_type = annotation
-
-    stripped_type = strip_annotated_type(inner_type)
-    if is_typeddict(stripped_type) and is_mapping(data):
-        return _transform_typeddict(data, stripped_type)
-
-    if (
-        # List[T]
-        (is_list_type(stripped_type) and is_list(data))
-        # Iterable[T]
-        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
-    ):
-        inner_type = extract_type_arg(stripped_type, 0)
-        return [_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
-
-    if is_union_type(stripped_type):
-        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
-        #
-        # TODO: there may be edge cases where the same normalized field name will transform to two different names
-        # in different subtypes.
-        for subtype in get_args(stripped_type):
-            data = _transform_recursive(data, annotation=annotation, inner_type=subtype)
-        return data
-
-    if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
-
-    annotated_type = _get_annotated_type(annotation)
-    if annotated_type is None:
-        return data
-
-    # ignore the first argument as it is the actual type
-    annotations = get_args(annotated_type)[1:]
-    for annotation in annotations:
-        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
-            return _format_data(data, annotation.format, annotation.format_template)
-
-    return data
-
-
-def _format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
-    if isinstance(data, (date, datetime)):
-        if format_ == "iso8601":
-            return data.isoformat()
-
-        if format_ == "custom" and format_template is not None:
-            return data.strftime(format_template)
-
-    if format_ == "base64" and is_base64_file_input(data):
-        binary: str | bytes | None = None
-
-        if isinstance(data, pathlib.Path):
-            binary = data.read_bytes()
-        elif isinstance(data, io.IOBase):
-            binary = data.read()
-
-            if isinstance(binary, str):  # type: ignore[unreachable]
-                binary = binary.encode()
-
-        if not isinstance(binary, bytes):
-            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
-
-        return base64.b64encode(binary).decode("ascii")
-
-    return data
-
-
-def _transform_typeddict(
-    data: Mapping[str, object],
-    expected_type: type,
-) -> Mapping[str, object]:
-    result: dict[str, object] = {}
-    annotations = get_type_hints(expected_type, include_extras=True)
-    for key, value in data.items():
-        type_ = annotations.get(key)
-        if type_ is None:
-            # we do not have a type annotation for this field, leave it as is
-            result[key] = value
-        else:
-            result[_maybe_transform_key(key, type_)] = _transform_recursive(value, annotation=type_)
-    return result
-
-
-async def async_maybe_transform(
-    data: object,
-    expected_type: object,
-) -> Any | None:
-    """Wrapper over `async_transform()` that allows `None` to be passed.
-
-    See `async_transform()` for more details.
-    """
-    if data is None:
-        return None
-    return await async_transform(data, expected_type)
-
-
-async def async_transform(
-    data: _T,
-    expected_type: object,
-) -> _T:
-    """Transform dictionaries based off of type information from the given type, for example:
-
-    ```py
-    class Params(TypedDict, total=False):
-        card_id: Required[Annotated[str, PropertyInfo(alias="cardID")]]
-
-
-    transformed = transform({"card_id": "<my card ID>"}, Params)
-    # {'cardID': '<my card ID>'}
-    ```
-
-    Any keys / data that does not have type information given will be included as is.
-
-    It should be noted that the transformations that this function does are not represented in the type system.
-    """
-    transformed = await _async_transform_recursive(data, annotation=cast(type, expected_type))
-    return cast(_T, transformed)
-
-
-async def _async_transform_recursive(
-    data: object,
-    *,
-    annotation: type,
-    inner_type: type | None = None,
-) -> object:
-    """Transform the given data against the expected type.
-
-    Args:
-        annotation: The direct type annotation given to the particular piece of data.
-            This may or may not be wrapped in metadata types, e.g. `Required[T]`, `Annotated[T, ...]` etc
-
-        inner_type: If applicable, this is the "inside" type. This is useful in certain cases where the outside type
-            is a container type such as `List[T]`. In that case `inner_type` should be set to `T` so that each entry in
-            the list can be transformed using the metadata from the container type.
-
-            Defaults to the same value as the `annotation` argument.
-    """
-    if inner_type is None:
-        inner_type = annotation
-
-    stripped_type = strip_annotated_type(inner_type)
-    if is_typeddict(stripped_type) and is_mapping(data):
-        return await _async_transform_typeddict(data, stripped_type)
-
-    if (
-        # List[T]
-        (is_list_type(stripped_type) and is_list(data))
-        # Iterable[T]
-        or (is_iterable_type(stripped_type) and is_iterable(data) and not isinstance(data, str))
-    ):
-        inner_type = extract_type_arg(stripped_type, 0)
-        return [await _async_transform_recursive(d, annotation=annotation, inner_type=inner_type) for d in data]
-
-    if is_union_type(stripped_type):
-        # For union types we run the transformation against all subtypes to ensure that everything is transformed.
-        #
-        # TODO: there may be edge cases where the same normalized field name will transform to two different names
-        # in different subtypes.
-        for subtype in get_args(stripped_type):
-            data = await _async_transform_recursive(data, annotation=annotation, inner_type=subtype)
-        return data
-
-    if isinstance(data, pydantic.BaseModel):
-        return model_dump(data, exclude_unset=True)
-
-    annotated_type = _get_annotated_type(annotation)
-    if annotated_type is None:
-        return data
-
-    # ignore the first argument as it is the actual type
-    annotations = get_args(annotated_type)[1:]
-    for annotation in annotations:
-        if isinstance(annotation, PropertyInfo) and annotation.format is not None:
-            return await _async_format_data(data, annotation.format, annotation.format_template)
-
-    return data
-
-
-async def _async_format_data(data: object, format_: PropertyFormat, format_template: str | None) -> object:
-    if isinstance(data, (date, datetime)):
-        if format_ == "iso8601":
-            return data.isoformat()
-
-        if format_ == "custom" and format_template is not None:
-            return data.strftime(format_template)
-
-    if format_ == "base64" and is_base64_file_input(data):
-        binary: str | bytes | None = None
-
-        if isinstance(data, pathlib.Path):
-            binary = await anyio.Path(data).read_bytes()
-        elif isinstance(data, io.IOBase):
-            binary = data.read()
-
-            if isinstance(binary, str):  # type: ignore[unreachable]
-                binary = binary.encode()
-
-        if not isinstance(binary, bytes):
-            raise RuntimeError(f"Could not read bytes from {data}; Received {type(binary)}")
-
-        return base64.b64encode(binary).decode("ascii")
-
-    return data
-
-
-async def _async_transform_typeddict(
-    data: Mapping[str, object],
-    expected_type: type,
-) -> Mapping[str, object]:
-    result: dict[str, object] = {}
-    annotations = get_type_hints(expected_type, include_extras=True)
-    for key, value in data.items():
-        type_ = annotations.get(key)
-        if type_ is None:
-            # we do not have a type annotation for this field, leave it as is
-            result[key] = value
-        else:
-            result[_maybe_transform_key(key, type_)] = await _async_transform_recursive(value, annotation=type_)
-    return result
diff --git a/openai/_utils/_typing.py b/openai/_utils/_typing.py
deleted file mode 100644
index c036991f..00000000
--- a/openai/_utils/_typing.py
+++ /dev/null
@@ -1,120 +0,0 @@
-from __future__ import annotations
-
-from typing import Any, TypeVar, Iterable, cast
-from collections import abc as _c_abc
-from typing_extensions import Required, Annotated, get_args, get_origin
-
-from .._types import InheritsGeneric
-from .._compat import is_union as _is_union
-
-
-def is_annotated_type(typ: type) -> bool:
-    return get_origin(typ) == Annotated
-
-
-def is_list_type(typ: type) -> bool:
-    return (get_origin(typ) or typ) == list
-
-
-def is_iterable_type(typ: type) -> bool:
-    """If the given type is `typing.Iterable[T]`"""
-    origin = get_origin(typ) or typ
-    return origin == Iterable or origin == _c_abc.Iterable
-
-
-def is_union_type(typ: type) -> bool:
-    return _is_union(get_origin(typ))
-
-
-def is_required_type(typ: type) -> bool:
-    return get_origin(typ) == Required
-
-
-def is_typevar(typ: type) -> bool:
-    # type ignore is required because type checkers
-    # think this expression will always return False
-    return type(typ) == TypeVar  # type: ignore
-
-
-# Extracts T from Annotated[T, ...] or from Required[Annotated[T, ...]]
-def strip_annotated_type(typ: type) -> type:
-    if is_required_type(typ) or is_annotated_type(typ):
-        return strip_annotated_type(cast(type, get_args(typ)[0]))
-
-    return typ
-
-
-def extract_type_arg(typ: type, index: int) -> type:
-    args = get_args(typ)
-    try:
-        return cast(type, args[index])
-    except IndexError as err:
-        raise RuntimeError(f"Expected type {typ} to have a type argument at index {index} but it did not") from err
-
-
-def extract_type_var_from_base(
-    typ: type,
-    *,
-    generic_bases: tuple[type, ...],
-    index: int,
-    failure_message: str | None = None,
-) -> type:
-    """Given a type like `Foo[T]`, returns the generic type variable `T`.
-
-    This also handles the case where a concrete subclass is given, e.g.
-    ```py
-    class MyResponse(Foo[bytes]):
-        ...
-
-    extract_type_var(MyResponse, bases=(Foo,), index=0) -> bytes
-    ```
-
-    And where a generic subclass is given:
-    ```py
-    _T = TypeVar('_T')
-    class MyResponse(Foo[_T]):
-        ...
-
-    extract_type_var(MyResponse[bytes], bases=(Foo,), index=0) -> bytes
-    ```
-    """
-    cls = cast(object, get_origin(typ) or typ)
-    if cls in generic_bases:
-        # we're given the class directly
-        return extract_type_arg(typ, index)
-
-    # if a subclass is given
-    # ---
-    # this is needed as __orig_bases__ is not present in the typeshed stubs
-    # because it is intended to be for internal use only, however there does
-    # not seem to be a way to resolve generic TypeVars for inherited subclasses
-    # without using it.
-    if isinstance(cls, InheritsGeneric):
-        target_base_class: Any | None = None
-        for base in cls.__orig_bases__:
-            if base.__origin__ in generic_bases:
-                target_base_class = base
-                break
-
-        if target_base_class is None:
-            raise RuntimeError(
-                "Could not find the generic base class;\n"
-                "This should never happen;\n"
-                f"Does {cls} inherit from one of {generic_bases} ?"
-            )
-
-        extracted = extract_type_arg(target_base_class, index)
-        if is_typevar(extracted):
-            # If the extracted type argument is itself a type variable
-            # then that means the subclass itself is generic, so we have
-            # to resolve the type argument from the class itself, not
-            # the base class.
-            #
-            # Note: if there is more than 1 type argument, the subclass could
-            # change the ordering of the type arguments, this is not currently
-            # supported.
-            return extract_type_arg(typ, index)
-
-        return extracted
-
-    raise RuntimeError(failure_message or f"Could not resolve inner type variable at index {index} for {typ}")
diff --git a/openai/_utils/_utils.py b/openai/_utils/_utils.py
deleted file mode 100644
index 93c95517..00000000
--- a/openai/_utils/_utils.py
+++ /dev/null
@@ -1,391 +0,0 @@
-from __future__ import annotations
-
-import os
-import re
-import inspect
-import functools
-from typing import (
-    Any,
-    Tuple,
-    Mapping,
-    TypeVar,
-    Callable,
-    Iterable,
-    Sequence,
-    cast,
-    overload,
-)
-from pathlib import Path
-from typing_extensions import TypeGuard
-
-import sniffio
-
-from .._types import Headers, NotGiven, FileTypes, NotGivenOr, HeadersLike
-from .._compat import parse_date as parse_date, parse_datetime as parse_datetime
-
-_T = TypeVar("_T")
-_TupleT = TypeVar("_TupleT", bound=Tuple[object, ...])
-_MappingT = TypeVar("_MappingT", bound=Mapping[str, object])
-_SequenceT = TypeVar("_SequenceT", bound=Sequence[object])
-CallableT = TypeVar("CallableT", bound=Callable[..., Any])
-
-
-def flatten(t: Iterable[Iterable[_T]]) -> list[_T]:
-    return [item for sublist in t for item in sublist]
-
-
-def extract_files(
-    # TODO: this needs to take Dict but variance issues.....
-    # create protocol type ?
-    query: Mapping[str, object],
-    *,
-    paths: Sequence[Sequence[str]],
-) -> list[tuple[str, FileTypes]]:
-    """Recursively extract files from the given dictionary based on specified paths.
-
-    A path may look like this ['foo', 'files', '<array>', 'data'].
-
-    Note: this mutates the given dictionary.
-    """
-    files: list[tuple[str, FileTypes]] = []
-    for path in paths:
-        files.extend(_extract_items(query, path, index=0, flattened_key=None))
-    return files
-
-
-def _extract_items(
-    obj: object,
-    path: Sequence[str],
-    *,
-    index: int,
-    flattened_key: str | None,
-) -> list[tuple[str, FileTypes]]:
-    try:
-        key = path[index]
-    except IndexError:
-        if isinstance(obj, NotGiven):
-            # no value was provided - we can safely ignore
-            return []
-
-        # cyclical import
-        from .._files import assert_is_file_content
-
-        # We have exhausted the path, return the entry we found.
-        assert_is_file_content(obj, key=flattened_key)
-        assert flattened_key is not None
-        return [(flattened_key, cast(FileTypes, obj))]
-
-    index += 1
-    if is_dict(obj):
-        try:
-            # We are at the last entry in the path so we must remove the field
-            if (len(path)) == index:
-                item = obj.pop(key)
-            else:
-                item = obj[key]
-        except KeyError:
-            # Key was not present in the dictionary, this is not indicative of an error
-            # as the given path may not point to a required field. We also do not want
-            # to enforce required fields as the API may differ from the spec in some cases.
-            return []
-        if flattened_key is None:
-            flattened_key = key
-        else:
-            flattened_key += f"[{key}]"
-        return _extract_items(
-            item,
-            path,
-            index=index,
-            flattened_key=flattened_key,
-        )
-    elif is_list(obj):
-        if key != "<array>":
-            return []
-
-        return flatten(
-            [
-                _extract_items(
-                    item,
-                    path,
-                    index=index,
-                    flattened_key=flattened_key + "[]" if flattened_key is not None else "[]",
-                )
-                for item in obj
-            ]
-        )
-
-    # Something unexpected was passed, just ignore it.
-    return []
-
-
-def is_given(obj: NotGivenOr[_T]) -> TypeGuard[_T]:
-    return not isinstance(obj, NotGiven)
-
-
-# Type safe methods for narrowing types with TypeVars.
-# The default narrowing for isinstance(obj, dict) is dict[unknown, unknown],
-# however this cause Pyright to rightfully report errors. As we know we don't
-# care about the contained types we can safely use `object` in it's place.
-#
-# There are two separate functions defined, `is_*` and `is_*_t` for different use cases.
-# `is_*` is for when you're dealing with an unknown input
-# `is_*_t` is for when you're narrowing a known union type to a specific subset
-
-
-def is_tuple(obj: object) -> TypeGuard[tuple[object, ...]]:
-    return isinstance(obj, tuple)
-
-
-def is_tuple_t(obj: _TupleT | object) -> TypeGuard[_TupleT]:
-    return isinstance(obj, tuple)
-
-
-def is_sequence(obj: object) -> TypeGuard[Sequence[object]]:
-    return isinstance(obj, Sequence)
-
-
-def is_sequence_t(obj: _SequenceT | object) -> TypeGuard[_SequenceT]:
-    return isinstance(obj, Sequence)
-
-
-def is_mapping(obj: object) -> TypeGuard[Mapping[str, object]]:
-    return isinstance(obj, Mapping)
-
-
-def is_mapping_t(obj: _MappingT | object) -> TypeGuard[_MappingT]:
-    return isinstance(obj, Mapping)
-
-
-def is_dict(obj: object) -> TypeGuard[dict[object, object]]:
-    return isinstance(obj, dict)
-
-
-def is_list(obj: object) -> TypeGuard[list[object]]:
-    return isinstance(obj, list)
-
-
-def is_iterable(obj: object) -> TypeGuard[Iterable[object]]:
-    return isinstance(obj, Iterable)
-
-
-def deepcopy_minimal(item: _T) -> _T:
-    """Minimal reimplementation of copy.deepcopy() that will only copy certain object types:
-
-    - mappings, e.g. `dict`
-    - list
-
-    This is done for performance reasons.
-    """
-    if is_mapping(item):
-        return cast(_T, {k: deepcopy_minimal(v) for k, v in item.items()})
-    if is_list(item):
-        return cast(_T, [deepcopy_minimal(entry) for entry in item])
-    return item
-
-
-# copied from https://github.com/Rapptz/RoboDanny
-def human_join(seq: Sequence[str], *, delim: str = ", ", final: str = "or") -> str:
-    size = len(seq)
-    if size == 0:
-        return ""
-
-    if size == 1:
-        return seq[0]
-
-    if size == 2:
-        return f"{seq[0]} {final} {seq[1]}"
-
-    return delim.join(seq[:-1]) + f" {final} {seq[-1]}"
-
-
-def quote(string: str) -> str:
-    """Add single quotation marks around the given string. Does *not* do any escaping."""
-    return f"'{string}'"
-
-
-def required_args(*variants: Sequence[str]) -> Callable[[CallableT], CallableT]:
-    """Decorator to enforce a given set of arguments or variants of arguments are passed to the decorated function.
-
-    Useful for enforcing runtime validation of overloaded functions.
-
-    Example usage:
-    ```py
-    @overload
-    def foo(*, a: str) -> str:
-        ...
-
-
-    @overload
-    def foo(*, b: bool) -> str:
-        ...
-
-
-    # This enforces the same constraints that a static type checker would
-    # i.e. that either a or b must be passed to the function
-    @required_args(["a"], ["b"])
-    def foo(*, a: str | None = None, b: bool | None = None) -> str:
-        ...
-    ```
-    """
-
-    def inner(func: CallableT) -> CallableT:
-        params = inspect.signature(func).parameters
-        positional = [
-            name
-            for name, param in params.items()
-            if param.kind
-            in {
-                param.POSITIONAL_ONLY,
-                param.POSITIONAL_OR_KEYWORD,
-            }
-        ]
-
-        @functools.wraps(func)
-        def wrapper(*args: object, **kwargs: object) -> object:
-            given_params: set[str] = set()
-            for i, _ in enumerate(args):
-                try:
-                    given_params.add(positional[i])
-                except IndexError:
-                    raise TypeError(
-                        f"{func.__name__}() takes {len(positional)} argument(s) but {len(args)} were given"
-                    ) from None
-
-            for key in kwargs.keys():
-                given_params.add(key)
-
-            for variant in variants:
-                matches = all((param in given_params for param in variant))
-                if matches:
-                    break
-            else:  # no break
-                if len(variants) > 1:
-                    variations = human_join(
-                        ["(" + human_join([quote(arg) for arg in variant], final="and") + ")" for variant in variants]
-                    )
-                    msg = f"Missing required arguments; Expected either {variations} arguments to be given"
-                else:
-                    # TODO: this error message is not deterministic
-                    missing = list(set(variants[0]) - given_params)
-                    if len(missing) > 1:
-                        msg = f"Missing required arguments: {human_join([quote(arg) for arg in missing])}"
-                    else:
-                        msg = f"Missing required argument: {quote(missing[0])}"
-                raise TypeError(msg)
-            return func(*args, **kwargs)
-
-        return wrapper  # type: ignore
-
-    return inner
-
-
-_K = TypeVar("_K")
-_V = TypeVar("_V")
-
-
-@overload
-def strip_not_given(obj: None) -> None:
-    ...
-
-
-@overload
-def strip_not_given(obj: Mapping[_K, _V | NotGiven]) -> dict[_K, _V]:
-    ...
-
-
-@overload
-def strip_not_given(obj: object) -> object:
-    ...
-
-
-def strip_not_given(obj: object | None) -> object:
-    """Remove all top-level keys where their values are instances of `NotGiven`"""
-    if obj is None:
-        return None
-
-    if not is_mapping(obj):
-        return obj
-
-    return {key: value for key, value in obj.items() if not isinstance(value, NotGiven)}
-
-
-def coerce_integer(val: str) -> int:
-    return int(val, base=10)
-
-
-def coerce_float(val: str) -> float:
-    return float(val)
-
-
-def coerce_boolean(val: str) -> bool:
-    return val == "true" or val == "1" or val == "on"
-
-
-def maybe_coerce_integer(val: str | None) -> int | None:
-    if val is None:
-        return None
-    return coerce_integer(val)
-
-
-def maybe_coerce_float(val: str | None) -> float | None:
-    if val is None:
-        return None
-    return coerce_float(val)
-
-
-def maybe_coerce_boolean(val: str | None) -> bool | None:
-    if val is None:
-        return None
-    return coerce_boolean(val)
-
-
-def removeprefix(string: str, prefix: str) -> str:
-    """Remove a prefix from a string.
-
-    Backport of `str.removeprefix` for Python < 3.9
-    """
-    if string.startswith(prefix):
-        return string[len(prefix) :]
-    return string
-
-
-def removesuffix(string: str, suffix: str) -> str:
-    """Remove a suffix from a string.
-
-    Backport of `str.removesuffix` for Python < 3.9
-    """
-    if string.endswith(suffix):
-        return string[: -len(suffix)]
-    return string
-
-
-def file_from_path(path: str) -> FileTypes:
-    contents = Path(path).read_bytes()
-    file_name = os.path.basename(path)
-    return (file_name, contents)
-
-
-def get_required_header(headers: HeadersLike, header: str) -> str:
-    lower_header = header.lower()
-    if isinstance(headers, Mapping):
-        headers = cast(Headers, headers)
-        for k, v in headers.items():
-            if k.lower() == lower_header and isinstance(v, str):
-                return v
-
-    """ to deal with the case where the header looks like Stainless-Event-Id """
-    intercaps_header = re.sub(r"([^\w])(\w)", lambda pat: pat.group(1) + pat.group(2).upper(), header.capitalize())
-
-    for normalized_header in [header, lower_header, header.upper(), intercaps_header]:
-        value = headers.get(normalized_header)
-        if value:
-            return value
-
-    raise ValueError(f"Could not find {header} header")
-
-
-def get_async_library() -> str:
-    try:
-        return sniffio.current_async_library()
-    except Exception:
-        return "false"
diff --git a/openai/_version.py b/openai/_version.py
deleted file mode 100644
index 85803a60..00000000
--- a/openai/_version.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-__title__ = "openai"
-__version__ = "1.16.2"  # x-release-please-version
diff --git a/openai/cli/__init__.py b/openai/cli/__init__.py
deleted file mode 100644
index d453d5e1..00000000
--- a/openai/cli/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._cli import main as main
diff --git a/openai/cli/_api/__init__.py b/openai/cli/_api/__init__.py
deleted file mode 100644
index 56a0260a..00000000
--- a/openai/cli/_api/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._main import register_commands as register_commands
diff --git a/openai/cli/_api/_main.py b/openai/cli/_api/_main.py
deleted file mode 100644
index fe5a5e6f..00000000
--- a/openai/cli/_api/_main.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from __future__ import annotations
-
-from argparse import ArgumentParser
-
-from . import chat, audio, files, image, models, completions
-
-
-def register_commands(parser: ArgumentParser) -> None:
-    subparsers = parser.add_subparsers(help="All API subcommands")
-
-    chat.register(subparsers)
-    image.register(subparsers)
-    audio.register(subparsers)
-    files.register(subparsers)
-    models.register(subparsers)
-    completions.register(subparsers)
diff --git a/openai/cli/_api/audio.py b/openai/cli/_api/audio.py
deleted file mode 100644
index 90d21b99..00000000
--- a/openai/cli/_api/audio.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, Optional, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from ..._types import NOT_GIVEN
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    # transcriptions
-    sub = subparser.add_parser("audio.transcriptions.create")
-
-    # Required
-    sub.add_argument("-m", "--model", type=str, default="whisper-1")
-    sub.add_argument("-f", "--file", type=str, required=True)
-    # Optional
-    sub.add_argument("--response-format", type=str)
-    sub.add_argument("--language", type=str)
-    sub.add_argument("-t", "--temperature", type=float)
-    sub.add_argument("--prompt", type=str)
-    sub.set_defaults(func=CLIAudio.transcribe, args_model=CLITranscribeArgs)
-
-    # translations
-    sub = subparser.add_parser("audio.translations.create")
-
-    # Required
-    sub.add_argument("-f", "--file", type=str, required=True)
-    # Optional
-    sub.add_argument("-m", "--model", type=str, default="whisper-1")
-    sub.add_argument("--response-format", type=str)
-    # TODO: doesn't seem to be supported by the API
-    # sub.add_argument("--language", type=str)
-    sub.add_argument("-t", "--temperature", type=float)
-    sub.add_argument("--prompt", type=str)
-    sub.set_defaults(func=CLIAudio.translate, args_model=CLITranslationArgs)
-
-
-class CLITranscribeArgs(BaseModel):
-    model: str
-    file: str
-    response_format: Optional[str] = None
-    language: Optional[str] = None
-    temperature: Optional[float] = None
-    prompt: Optional[str] = None
-
-
-class CLITranslationArgs(BaseModel):
-    model: str
-    file: str
-    response_format: Optional[str] = None
-    language: Optional[str] = None
-    temperature: Optional[float] = None
-    prompt: Optional[str] = None
-
-
-class CLIAudio:
-    @staticmethod
-    def transcribe(args: CLITranscribeArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        model = get_client().audio.transcriptions.create(
-            file=(args.file, buffer_reader),
-            model=args.model,
-            language=args.language or NOT_GIVEN,
-            temperature=args.temperature or NOT_GIVEN,
-            prompt=args.prompt or NOT_GIVEN,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(model)
-
-    @staticmethod
-    def translate(args: CLITranslationArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        model = get_client().audio.translations.create(
-            file=(args.file, buffer_reader),
-            model=args.model,
-            temperature=args.temperature or NOT_GIVEN,
-            prompt=args.prompt or NOT_GIVEN,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(model)
diff --git a/openai/cli/_api/chat/__init__.py b/openai/cli/_api/chat/__init__.py
deleted file mode 100644
index 87d97163..00000000
--- a/openai/cli/_api/chat/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from . import completions
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    completions.register(subparser)
diff --git a/openai/cli/_api/chat/completions.py b/openai/cli/_api/chat/completions.py
deleted file mode 100644
index c299741f..00000000
--- a/openai/cli/_api/chat/completions.py
+++ /dev/null
@@ -1,156 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING, List, Optional, cast
-from argparse import ArgumentParser
-from typing_extensions import Literal, NamedTuple
-
-from ..._utils import get_client
-from ..._models import BaseModel
-from ...._streaming import Stream
-from ....types.chat import (
-    ChatCompletionRole,
-    ChatCompletionChunk,
-    CompletionCreateParams,
-)
-from ....types.chat.completion_create_params import (
-    CompletionCreateParamsStreaming,
-    CompletionCreateParamsNonStreaming,
-)
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("chat.completions.create")
-
-    sub._action_groups.pop()
-    req = sub.add_argument_group("required arguments")
-    opt = sub.add_argument_group("optional arguments")
-
-    req.add_argument(
-        "-g",
-        "--message",
-        action="append",
-        nargs=2,
-        metavar=("ROLE", "CONTENT"),
-        help="A message in `{role} {content}` format. Use this argument multiple times to add multiple messages.",
-        required=True,
-    )
-    req.add_argument(
-        "-m",
-        "--model",
-        help="The model to use.",
-        required=True,
-    )
-
-    opt.add_argument(
-        "-n",
-        "--n",
-        help="How many completions to generate for the conversation.",
-        type=int,
-    )
-    opt.add_argument("-M", "--max-tokens", help="The maximum number of tokens to generate.", type=int)
-    opt.add_argument(
-        "-t",
-        "--temperature",
-        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
-
-Mutually exclusive with `top_p`.""",
-        type=float,
-    )
-    opt.add_argument(
-        "-P",
-        "--top_p",
-        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
-
-            Mutually exclusive with `temperature`.""",
-        type=float,
-    )
-    opt.add_argument(
-        "--stop",
-        help="A stop sequence at which to stop generating tokens for the message.",
-    )
-    opt.add_argument("--stream", help="Stream messages as they're ready.", action="store_true")
-    sub.set_defaults(func=CLIChatCompletion.create, args_model=CLIChatCompletionCreateArgs)
-
-
-class CLIMessage(NamedTuple):
-    role: ChatCompletionRole
-    content: str
-
-
-class CLIChatCompletionCreateArgs(BaseModel):
-    message: List[CLIMessage]
-    model: str
-    n: Optional[int] = None
-    max_tokens: Optional[int] = None
-    temperature: Optional[float] = None
-    top_p: Optional[float] = None
-    stop: Optional[str] = None
-    stream: bool = False
-
-
-class CLIChatCompletion:
-    @staticmethod
-    def create(args: CLIChatCompletionCreateArgs) -> None:
-        params: CompletionCreateParams = {
-            "model": args.model,
-            "messages": [
-                {"role": cast(Literal["user"], message.role), "content": message.content} for message in args.message
-            ],
-            "n": args.n,
-            "temperature": args.temperature,
-            "top_p": args.top_p,
-            "stop": args.stop,
-            # type checkers are not good at inferring union types so we have to set stream afterwards
-            "stream": False,
-        }
-        if args.stream:
-            params["stream"] = args.stream  # type: ignore
-        if args.max_tokens is not None:
-            params["max_tokens"] = args.max_tokens
-
-        if args.stream:
-            return CLIChatCompletion._stream_create(cast(CompletionCreateParamsStreaming, params))
-
-        return CLIChatCompletion._create(cast(CompletionCreateParamsNonStreaming, params))
-
-    @staticmethod
-    def _create(params: CompletionCreateParamsNonStreaming) -> None:
-        completion = get_client().chat.completions.create(**params)
-        should_print_header = len(completion.choices) > 1
-        for choice in completion.choices:
-            if should_print_header:
-                sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-            content = choice.message.content if choice.message.content is not None else "None"
-            sys.stdout.write(content)
-
-            if should_print_header or not content.endswith("\n"):
-                sys.stdout.write("\n")
-
-            sys.stdout.flush()
-
-    @staticmethod
-    def _stream_create(params: CompletionCreateParamsStreaming) -> None:
-        # cast is required for mypy
-        stream = cast(  # pyright: ignore[reportUnnecessaryCast]
-            Stream[ChatCompletionChunk], get_client().chat.completions.create(**params)
-        )
-        for chunk in stream:
-            should_print_header = len(chunk.choices) > 1
-            for choice in chunk.choices:
-                if should_print_header:
-                    sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-                content = choice.delta.content or ""
-                sys.stdout.write(content)
-
-                if should_print_header:
-                    sys.stdout.write("\n")
-
-                sys.stdout.flush()
-
-        sys.stdout.write("\n")
diff --git a/openai/cli/_api/completions.py b/openai/cli/_api/completions.py
deleted file mode 100644
index cbdb35bf..00000000
--- a/openai/cli/_api/completions.py
+++ /dev/null
@@ -1,173 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING, Optional, cast
-from argparse import ArgumentParser
-from functools import partial
-
-from openai.types.completion import Completion
-
-from .._utils import get_client
-from ..._types import NOT_GIVEN, NotGivenOr
-from ..._utils import is_given
-from .._errors import CLIError
-from .._models import BaseModel
-from ..._streaming import Stream
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("completions.create")
-
-    # Required
-    sub.add_argument(
-        "-m",
-        "--model",
-        help="The model to use",
-        required=True,
-    )
-
-    # Optional
-    sub.add_argument("-p", "--prompt", help="An optional prompt to complete from")
-    sub.add_argument("--stream", help="Stream tokens as they're ready.", action="store_true")
-    sub.add_argument("-M", "--max-tokens", help="The maximum number of tokens to generate", type=int)
-    sub.add_argument(
-        "-t",
-        "--temperature",
-        help="""What sampling temperature to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.
-
-Mutually exclusive with `top_p`.""",
-        type=float,
-    )
-    sub.add_argument(
-        "-P",
-        "--top_p",
-        help="""An alternative to sampling with temperature, called nucleus sampling, where the considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10%% probability mass are considered.
-
-            Mutually exclusive with `temperature`.""",
-        type=float,
-    )
-    sub.add_argument(
-        "-n",
-        "--n",
-        help="How many sub-completions to generate for each prompt.",
-        type=int,
-    )
-    sub.add_argument(
-        "--logprobs",
-        help="Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. So for example, if `logprobs` is 10, the API will return a list of the 10 most likely tokens. If `logprobs` is 0, only the chosen tokens will have logprobs returned.",
-        type=int,
-    )
-    sub.add_argument(
-        "--best_of",
-        help="Generates `best_of` completions server-side and returns the 'best' (the one with the highest log probability per token). Results cannot be streamed.",
-        type=int,
-    )
-    sub.add_argument(
-        "--echo",
-        help="Echo back the prompt in addition to the completion",
-        action="store_true",
-    )
-    sub.add_argument(
-        "--frequency_penalty",
-        help="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim.",
-        type=float,
-    )
-    sub.add_argument(
-        "--presence_penalty",
-        help="Positive values penalize new tokens based on whether they appear in the text so far, increasing the model's likelihood to talk about new topics.",
-        type=float,
-    )
-    sub.add_argument("--suffix", help="The suffix that comes after a completion of inserted text.")
-    sub.add_argument("--stop", help="A stop sequence at which to stop generating tokens.")
-    sub.add_argument(
-        "--user",
-        help="A unique identifier representing your end-user, which can help OpenAI to monitor and detect abuse.",
-    )
-    # TODO: add support for logit_bias
-    sub.set_defaults(func=CLICompletions.create, args_model=CLICompletionCreateArgs)
-
-
-class CLICompletionCreateArgs(BaseModel):
-    model: str
-    stream: bool = False
-
-    prompt: Optional[str] = None
-    n: NotGivenOr[int] = NOT_GIVEN
-    stop: NotGivenOr[str] = NOT_GIVEN
-    user: NotGivenOr[str] = NOT_GIVEN
-    echo: NotGivenOr[bool] = NOT_GIVEN
-    suffix: NotGivenOr[str] = NOT_GIVEN
-    best_of: NotGivenOr[int] = NOT_GIVEN
-    top_p: NotGivenOr[float] = NOT_GIVEN
-    logprobs: NotGivenOr[int] = NOT_GIVEN
-    max_tokens: NotGivenOr[int] = NOT_GIVEN
-    temperature: NotGivenOr[float] = NOT_GIVEN
-    presence_penalty: NotGivenOr[float] = NOT_GIVEN
-    frequency_penalty: NotGivenOr[float] = NOT_GIVEN
-
-
-class CLICompletions:
-    @staticmethod
-    def create(args: CLICompletionCreateArgs) -> None:
-        if is_given(args.n) and args.n > 1 and args.stream:
-            raise CLIError("Can't stream completions with n>1 with the current CLI")
-
-        make_request = partial(
-            get_client().completions.create,
-            n=args.n,
-            echo=args.echo,
-            stop=args.stop,
-            user=args.user,
-            model=args.model,
-            top_p=args.top_p,
-            prompt=args.prompt,
-            suffix=args.suffix,
-            best_of=args.best_of,
-            logprobs=args.logprobs,
-            max_tokens=args.max_tokens,
-            temperature=args.temperature,
-            presence_penalty=args.presence_penalty,
-            frequency_penalty=args.frequency_penalty,
-        )
-
-        if args.stream:
-            return CLICompletions._stream_create(
-                # mypy doesn't understand the `partial` function but pyright does
-                cast(Stream[Completion], make_request(stream=True))  # pyright: ignore[reportUnnecessaryCast]
-            )
-
-        return CLICompletions._create(make_request())
-
-    @staticmethod
-    def _create(completion: Completion) -> None:
-        should_print_header = len(completion.choices) > 1
-        for choice in completion.choices:
-            if should_print_header:
-                sys.stdout.write("===== Completion {} =====\n".format(choice.index))
-
-            sys.stdout.write(choice.text)
-
-            if should_print_header or not choice.text.endswith("\n"):
-                sys.stdout.write("\n")
-
-            sys.stdout.flush()
-
-    @staticmethod
-    def _stream_create(stream: Stream[Completion]) -> None:
-        for completion in stream:
-            should_print_header = len(completion.choices) > 1
-            for choice in sorted(completion.choices, key=lambda c: c.index):
-                if should_print_header:
-                    sys.stdout.write("===== Chat Completion {} =====\n".format(choice.index))
-
-                sys.stdout.write(choice.text)
-
-                if should_print_header:
-                    sys.stdout.write("\n")
-
-                sys.stdout.flush()
-
-        sys.stdout.write("\n")
diff --git a/openai/cli/_api/files.py b/openai/cli/_api/files.py
deleted file mode 100644
index 5f3631b2..00000000
--- a/openai/cli/_api/files.py
+++ /dev/null
@@ -1,80 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("files.create")
-
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="File to upload",
-    )
-    sub.add_argument(
-        "-p",
-        "--purpose",
-        help="Why are you uploading this file? (see https://platform.openai.com/docs/api-reference/ for purposes)",
-        required=True,
-    )
-    sub.set_defaults(func=CLIFile.create, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.retrieve")
-    sub.add_argument("-i", "--id", required=True, help="The files ID")
-    sub.set_defaults(func=CLIFile.get, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.delete")
-    sub.add_argument("-i", "--id", required=True, help="The files ID")
-    sub.set_defaults(func=CLIFile.delete, args_model=CLIFileCreateArgs)
-
-    sub = subparser.add_parser("files.list")
-    sub.set_defaults(func=CLIFile.list)
-
-
-class CLIFileIDArgs(BaseModel):
-    id: str
-
-
-class CLIFileCreateArgs(BaseModel):
-    file: str
-    purpose: str
-
-
-class CLIFile:
-    @staticmethod
-    def create(args: CLIFileCreateArgs) -> None:
-        with open(args.file, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        file = get_client().files.create(
-            file=(args.file, buffer_reader),
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            purpose=cast(Any, args.purpose),
-        )
-        print_model(file)
-
-    @staticmethod
-    def get(args: CLIFileIDArgs) -> None:
-        file = get_client().files.retrieve(file_id=args.id)
-        print_model(file)
-
-    @staticmethod
-    def delete(args: CLIFileIDArgs) -> None:
-        file = get_client().files.delete(file_id=args.id)
-        print_model(file)
-
-    @staticmethod
-    def list() -> None:
-        files = get_client().files.list()
-        for file in files:
-            print_model(file)
diff --git a/openai/cli/_api/image.py b/openai/cli/_api/image.py
deleted file mode 100644
index 3e2a0a90..00000000
--- a/openai/cli/_api/image.py
+++ /dev/null
@@ -1,139 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any, cast
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from ..._types import NOT_GIVEN, NotGiven, NotGivenOr
-from .._models import BaseModel
-from .._progress import BufferReader
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("images.generate")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-p", "--prompt", type=str, required=True)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.set_defaults(func=CLIImage.create, args_model=CLIImageCreateArgs)
-
-    sub = subparser.add_parser("images.edit")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-p", "--prompt", type=str, required=True)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument(
-        "-I",
-        "--image",
-        type=str,
-        required=True,
-        help="Image to modify. Should be a local path and a PNG encoded image.",
-    )
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.add_argument(
-        "-M",
-        "--mask",
-        type=str,
-        required=False,
-        help="Path to a mask image. It should be the same size as the image you're editing and a RGBA PNG image. The Alpha channel acts as the mask.",
-    )
-    sub.set_defaults(func=CLIImage.edit, args_model=CLIImageEditArgs)
-
-    sub = subparser.add_parser("images.create_variation")
-    sub.add_argument("-m", "--model", type=str)
-    sub.add_argument("-n", "--num-images", type=int, default=1)
-    sub.add_argument(
-        "-I",
-        "--image",
-        type=str,
-        required=True,
-        help="Image to modify. Should be a local path and a PNG encoded image.",
-    )
-    sub.add_argument("-s", "--size", type=str, default="1024x1024", help="Size of the output image")
-    sub.add_argument("--response-format", type=str, default="url")
-    sub.set_defaults(func=CLIImage.create_variation, args_model=CLIImageCreateVariationArgs)
-
-
-class CLIImageCreateArgs(BaseModel):
-    prompt: str
-    num_images: int
-    size: str
-    response_format: str
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImageCreateVariationArgs(BaseModel):
-    image: str
-    num_images: int
-    size: str
-    response_format: str
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImageEditArgs(BaseModel):
-    image: str
-    num_images: int
-    size: str
-    response_format: str
-    prompt: str
-    mask: NotGivenOr[str] = NOT_GIVEN
-    model: NotGivenOr[str] = NOT_GIVEN
-
-
-class CLIImage:
-    @staticmethod
-    def create(args: CLIImageCreateArgs) -> None:
-        image = get_client().images.generate(
-            model=args.model,
-            prompt=args.prompt,
-            n=args.num_images,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
-
-    @staticmethod
-    def create_variation(args: CLIImageCreateVariationArgs) -> None:
-        with open(args.image, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Upload progress")
-
-        image = get_client().images.create_variation(
-            model=args.model,
-            image=("image", buffer_reader),
-            n=args.num_images,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
-
-    @staticmethod
-    def edit(args: CLIImageEditArgs) -> None:
-        with open(args.image, "rb") as file_reader:
-            buffer_reader = BufferReader(file_reader.read(), desc="Image upload progress")
-
-        if isinstance(args.mask, NotGiven):
-            mask: NotGivenOr[BufferReader] = NOT_GIVEN
-        else:
-            with open(args.mask, "rb") as file_reader:
-                mask = BufferReader(file_reader.read(), desc="Mask progress")
-
-        image = get_client().images.edit(
-            model=args.model,
-            prompt=args.prompt,
-            image=("image", buffer_reader),
-            n=args.num_images,
-            mask=("mask", mask) if not isinstance(mask, NotGiven) else mask,
-            # casts required because the API is typed for enums
-            # but we don't want to validate that here for forwards-compat
-            size=cast(Any, args.size),
-            response_format=cast(Any, args.response_format),
-        )
-        print_model(image)
diff --git a/openai/cli/_api/models.py b/openai/cli/_api/models.py
deleted file mode 100644
index 017218fa..00000000
--- a/openai/cli/_api/models.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from .._utils import get_client, print_model
-from .._models import BaseModel
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("models.list")
-    sub.set_defaults(func=CLIModels.list)
-
-    sub = subparser.add_parser("models.retrieve")
-    sub.add_argument("-i", "--id", required=True, help="The model ID")
-    sub.set_defaults(func=CLIModels.get, args_model=CLIModelIDArgs)
-
-    sub = subparser.add_parser("models.delete")
-    sub.add_argument("-i", "--id", required=True, help="The model ID")
-    sub.set_defaults(func=CLIModels.delete, args_model=CLIModelIDArgs)
-
-
-class CLIModelIDArgs(BaseModel):
-    id: str
-
-
-class CLIModels:
-    @staticmethod
-    def get(args: CLIModelIDArgs) -> None:
-        model = get_client().models.retrieve(model=args.id)
-        print_model(model)
-
-    @staticmethod
-    def delete(args: CLIModelIDArgs) -> None:
-        model = get_client().models.delete(model=args.id)
-        print_model(model)
-
-    @staticmethod
-    def list() -> None:
-        models = get_client().models.list()
-        for model in models:
-            print_model(model)
diff --git a/openai/cli/_cli.py b/openai/cli/_cli.py
deleted file mode 100644
index 72e5c923..00000000
--- a/openai/cli/_cli.py
+++ /dev/null
@@ -1,234 +0,0 @@
-from __future__ import annotations
-
-import sys
-import logging
-import argparse
-from typing import Any, List, Type, Optional
-from typing_extensions import ClassVar
-
-import httpx
-import pydantic
-
-import openai
-
-from . import _tools
-from .. import _ApiType, __version__
-from ._api import register_commands
-from ._utils import can_use_http2
-from .._types import ProxiesDict
-from ._errors import CLIError, display_error
-from .._compat import PYDANTIC_V2, ConfigDict, model_parse
-from .._models import BaseModel
-from .._exceptions import APIError
-
-logger = logging.getLogger()
-formatter = logging.Formatter("[%(asctime)s] %(message)s")
-handler = logging.StreamHandler(sys.stderr)
-handler.setFormatter(formatter)
-logger.addHandler(handler)
-
-
-class Arguments(BaseModel):
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(
-            extra="ignore",
-        )
-    else:
-
-        class Config(pydantic.BaseConfig):  # type: ignore
-            extra: Any = pydantic.Extra.ignore  # type: ignore
-
-    verbosity: int
-    version: Optional[str] = None
-
-    api_key: Optional[str]
-    api_base: Optional[str]
-    organization: Optional[str]
-    proxy: Optional[List[str]]
-    api_type: Optional[_ApiType] = None
-    api_version: Optional[str] = None
-
-    # azure
-    azure_endpoint: Optional[str] = None
-    azure_ad_token: Optional[str] = None
-
-    # internal, set by subparsers to parse their specific args
-    args_model: Optional[Type[BaseModel]] = None
-
-    # internal, used so that subparsers can forward unknown arguments
-    unknown_args: List[str] = []
-    allow_unknown_args: bool = False
-
-
-def _build_parser() -> argparse.ArgumentParser:
-    parser = argparse.ArgumentParser(description=None, prog="openai")
-    parser.add_argument(
-        "-v",
-        "--verbose",
-        action="count",
-        dest="verbosity",
-        default=0,
-        help="Set verbosity.",
-    )
-    parser.add_argument("-b", "--api-base", help="What API base url to use.")
-    parser.add_argument("-k", "--api-key", help="What API key to use.")
-    parser.add_argument("-p", "--proxy", nargs="+", help="What proxy to use.")
-    parser.add_argument(
-        "-o",
-        "--organization",
-        help="Which organization to run as (will use your default organization if not specified)",
-    )
-    parser.add_argument(
-        "-t",
-        "--api-type",
-        type=str,
-        choices=("openai", "azure"),
-        help="The backend API to call, must be `openai` or `azure`",
-    )
-    parser.add_argument(
-        "--api-version",
-        help="The Azure API version, e.g. 'https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#rest-api-versioning'",
-    )
-
-    # azure
-    parser.add_argument(
-        "--azure-endpoint",
-        help="The Azure endpoint, e.g. 'https://endpoint.openai.azure.com'",
-    )
-    parser.add_argument(
-        "--azure-ad-token",
-        help="A token from Azure Active Directory, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id",
-    )
-
-    # prints the package version
-    parser.add_argument(
-        "-V",
-        "--version",
-        action="version",
-        version="%(prog)s " + __version__,
-    )
-
-    def help() -> None:
-        parser.print_help()
-
-    parser.set_defaults(func=help)
-
-    subparsers = parser.add_subparsers()
-    sub_api = subparsers.add_parser("api", help="Direct API calls")
-
-    register_commands(sub_api)
-
-    sub_tools = subparsers.add_parser("tools", help="Client side tools for convenience")
-    _tools.register_commands(sub_tools, subparsers)
-
-    return parser
-
-
-def main() -> int:
-    try:
-        _main()
-    except (APIError, CLIError, pydantic.ValidationError) as err:
-        display_error(err)
-        return 1
-    except KeyboardInterrupt:
-        sys.stderr.write("\n")
-        return 1
-    return 0
-
-
-def _parse_args(parser: argparse.ArgumentParser) -> tuple[argparse.Namespace, Arguments, list[str]]:
-    # argparse by default will strip out the `--` but we want to keep it for unknown arguments
-    if "--" in sys.argv:
-        idx = sys.argv.index("--")
-        known_args = sys.argv[1:idx]
-        unknown_args = sys.argv[idx:]
-    else:
-        known_args = sys.argv[1:]
-        unknown_args = []
-
-    parsed, remaining_unknown = parser.parse_known_args(known_args)
-
-    # append any remaining unknown arguments from the initial parsing
-    remaining_unknown.extend(unknown_args)
-
-    args = model_parse(Arguments, vars(parsed))
-    if not args.allow_unknown_args:
-        # we have to parse twice to ensure any unknown arguments
-        # result in an error if that behaviour is desired
-        parser.parse_args()
-
-    return parsed, args, remaining_unknown
-
-
-def _main() -> None:
-    parser = _build_parser()
-    parsed, args, unknown = _parse_args(parser)
-
-    if args.verbosity != 0:
-        sys.stderr.write("Warning: --verbosity isn't supported yet\n")
-
-    proxies: ProxiesDict = {}
-    if args.proxy is not None:
-        for proxy in args.proxy:
-            key = "https://" if proxy.startswith("https") else "http://"
-            if key in proxies:
-                raise CLIError(f"Multiple {key} proxies given - only the last one would be used")
-
-            proxies[key] = proxy
-
-    http_client = httpx.Client(
-        proxies=proxies or None,
-        http2=can_use_http2(),
-    )
-    openai.http_client = http_client
-
-    if args.organization:
-        openai.organization = args.organization
-
-    if args.api_key:
-        openai.api_key = args.api_key
-
-    if args.api_base:
-        openai.base_url = args.api_base
-
-    # azure
-    if args.api_type is not None:
-        openai.api_type = args.api_type
-
-    if args.azure_endpoint is not None:
-        openai.azure_endpoint = args.azure_endpoint
-
-    if args.api_version is not None:
-        openai.api_version = args.api_version
-
-    if args.azure_ad_token is not None:
-        openai.azure_ad_token = args.azure_ad_token
-
-    try:
-        if args.args_model:
-            parsed.func(
-                model_parse(
-                    args.args_model,
-                    {
-                        **{
-                            # we omit None values so that they can be defaulted to `NotGiven`
-                            # and we'll strip it from the API request
-                            key: value
-                            for key, value in vars(parsed).items()
-                            if value is not None
-                        },
-                        "unknown_args": unknown,
-                    },
-                )
-            )
-        else:
-            parsed.func()
-    finally:
-        try:
-            http_client.close()
-        except Exception:
-            pass
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/openai/cli/_errors.py b/openai/cli/_errors.py
deleted file mode 100644
index 2bf06070..00000000
--- a/openai/cli/_errors.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-import sys
-
-import pydantic
-
-from ._utils import Colors, organization_info
-from .._exceptions import APIError, OpenAIError
-
-
-class CLIError(OpenAIError):
-    ...
-
-
-class SilentCLIError(CLIError):
-    ...
-
-
-def display_error(err: CLIError | APIError | pydantic.ValidationError) -> None:
-    if isinstance(err, SilentCLIError):
-        return
-
-    sys.stderr.write("{}{}Error:{} {}\n".format(organization_info(), Colors.FAIL, Colors.ENDC, err))
diff --git a/openai/cli/_models.py b/openai/cli/_models.py
deleted file mode 100644
index 5583db26..00000000
--- a/openai/cli/_models.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from typing import Any
-from typing_extensions import ClassVar
-
-import pydantic
-
-from .. import _models
-from .._compat import PYDANTIC_V2, ConfigDict
-
-
-class BaseModel(_models.BaseModel):
-    if PYDANTIC_V2:
-        model_config: ClassVar[ConfigDict] = ConfigDict(extra="ignore", arbitrary_types_allowed=True)
-    else:
-
-        class Config(pydantic.BaseConfig):  # type: ignore
-            extra: Any = pydantic.Extra.ignore  # type: ignore
-            arbitrary_types_allowed: bool = True
diff --git a/openai/cli/_progress.py b/openai/cli/_progress.py
deleted file mode 100644
index 8a7f2525..00000000
--- a/openai/cli/_progress.py
+++ /dev/null
@@ -1,59 +0,0 @@
-from __future__ import annotations
-
-import io
-from typing import Callable
-from typing_extensions import override
-
-
-class CancelledError(Exception):
-    def __init__(self, msg: str) -> None:
-        self.msg = msg
-        super().__init__(msg)
-
-    @override
-    def __str__(self) -> str:
-        return self.msg
-
-    __repr__ = __str__
-
-
-class BufferReader(io.BytesIO):
-    def __init__(self, buf: bytes = b"", desc: str | None = None) -> None:
-        super().__init__(buf)
-        self._len = len(buf)
-        self._progress = 0
-        self._callback = progress(len(buf), desc=desc)
-
-    def __len__(self) -> int:
-        return self._len
-
-    @override
-    def read(self, n: int | None = -1) -> bytes:
-        chunk = io.BytesIO.read(self, n)
-        self._progress += len(chunk)
-
-        try:
-            self._callback(self._progress)
-        except Exception as e:  # catches exception from the callback
-            raise CancelledError("The upload was cancelled: {}".format(e)) from e
-
-        return chunk
-
-
-def progress(total: float, desc: str | None) -> Callable[[float], None]:
-    import tqdm
-
-    meter = tqdm.tqdm(total=total, unit_scale=True, desc=desc)
-
-    def incr(progress: float) -> None:
-        meter.n = progress
-        if progress == total:
-            meter.close()
-        else:
-            meter.refresh()
-
-    return incr
-
-
-def MB(i: int) -> int:
-    return int(i // 1024**2)
diff --git a/openai/cli/_tools/__init__.py b/openai/cli/_tools/__init__.py
deleted file mode 100644
index 56a0260a..00000000
--- a/openai/cli/_tools/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-from ._main import register_commands as register_commands
diff --git a/openai/cli/_tools/_main.py b/openai/cli/_tools/_main.py
deleted file mode 100644
index bd6cda40..00000000
--- a/openai/cli/_tools/_main.py
+++ /dev/null
@@ -1,17 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from . import migrate, fine_tunes
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register_commands(parser: ArgumentParser, subparser: _SubParsersAction[ArgumentParser]) -> None:
-    migrate.register(subparser)
-
-    namespaced = parser.add_subparsers(title="Tools", help="Convenience client side tools")
-
-    fine_tunes.register(namespaced)
diff --git a/openai/cli/_tools/fine_tunes.py b/openai/cli/_tools/fine_tunes.py
deleted file mode 100644
index 2128b889..00000000
--- a/openai/cli/_tools/fine_tunes.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from __future__ import annotations
-
-import sys
-from typing import TYPE_CHECKING
-from argparse import ArgumentParser
-
-from .._models import BaseModel
-from ...lib._validators import (
-    get_validators,
-    write_out_file,
-    read_any_format,
-    apply_validators,
-    apply_necessary_remediation,
-)
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("fine_tunes.prepare_data")
-    sub.add_argument(
-        "-f",
-        "--file",
-        required=True,
-        help="JSONL, JSON, CSV, TSV, TXT or XLSX file containing prompt-completion examples to be analyzed."
-        "This should be the local file path.",
-    )
-    sub.add_argument(
-        "-q",
-        "--quiet",
-        required=False,
-        action="store_true",
-        help="Auto accepts all suggestions, without asking for user input. To be used within scripts.",
-    )
-    sub.set_defaults(func=prepare_data, args_model=PrepareDataArgs)
-
-
-class PrepareDataArgs(BaseModel):
-    file: str
-
-    quiet: bool
-
-
-def prepare_data(args: PrepareDataArgs) -> None:
-    sys.stdout.write("Analyzing...\n")
-    fname = args.file
-    auto_accept = args.quiet
-    df, remediation = read_any_format(fname)
-    apply_necessary_remediation(None, remediation)
-
-    validators = get_validators()
-
-    assert df is not None
-
-    apply_validators(
-        df,
-        fname,
-        remediation,
-        validators,
-        auto_accept,
-        write_out_file_func=write_out_file,
-    )
diff --git a/openai/cli/_tools/migrate.py b/openai/cli/_tools/migrate.py
deleted file mode 100644
index 53073b86..00000000
--- a/openai/cli/_tools/migrate.py
+++ /dev/null
@@ -1,181 +0,0 @@
-from __future__ import annotations
-
-import os
-import sys
-import json
-import shutil
-import tarfile
-import platform
-import subprocess
-from typing import TYPE_CHECKING, List
-from pathlib import Path
-from argparse import ArgumentParser
-
-import httpx
-
-from .._errors import CLIError, SilentCLIError
-from .._models import BaseModel
-
-if TYPE_CHECKING:
-    from argparse import _SubParsersAction
-
-
-def register(subparser: _SubParsersAction[ArgumentParser]) -> None:
-    sub = subparser.add_parser("migrate")
-    sub.set_defaults(func=migrate, args_model=MigrateArgs, allow_unknown_args=True)
-
-    sub = subparser.add_parser("grit")
-    sub.set_defaults(func=grit, args_model=GritArgs, allow_unknown_args=True)
-
-
-class GritArgs(BaseModel):
-    # internal
-    unknown_args: List[str] = []
-
-
-def grit(args: GritArgs) -> None:
-    grit_path = install()
-
-    try:
-        subprocess.check_call([grit_path, *args.unknown_args])
-    except subprocess.CalledProcessError:
-        # stdout and stderr are forwarded by subprocess so an error will already
-        # have been displayed
-        raise SilentCLIError() from None
-
-
-class MigrateArgs(BaseModel):
-    # internal
-    unknown_args: List[str] = []
-
-
-def migrate(args: MigrateArgs) -> None:
-    grit_path = install()
-
-    try:
-        subprocess.check_call([grit_path, "apply", "openai", *args.unknown_args])
-    except subprocess.CalledProcessError:
-        # stdout and stderr are forwarded by subprocess so an error will already
-        # have been displayed
-        raise SilentCLIError() from None
-
-
-# handles downloading the Grit CLI until they provide their own PyPi package
-
-KEYGEN_ACCOUNT = "custodian-dev"
-
-
-def _cache_dir() -> Path:
-    xdg = os.environ.get("XDG_CACHE_HOME")
-    if xdg is not None:
-        return Path(xdg)
-
-    return Path.home() / ".cache"
-
-
-def _debug(message: str) -> None:
-    if not os.environ.get("DEBUG"):
-        return
-
-    sys.stdout.write(f"[DEBUG]: {message}\n")
-
-
-def install() -> Path:
-    """Installs the Grit CLI and returns the location of the binary"""
-    if sys.platform == "win32":
-        raise CLIError("Windows is not supported yet in the migration CLI")
-
-    platform = "macos" if sys.platform == "darwin" else "linux"
-
-    dir_name = _cache_dir() / "openai-python"
-    install_dir = dir_name / ".install"
-    target_dir = install_dir / "bin"
-
-    target_path = target_dir / "marzano"
-    temp_file = target_dir / "marzano.tmp"
-
-    if target_path.exists():
-        _debug(f"{target_path} already exists")
-        sys.stdout.flush()
-        return target_path
-
-    _debug(f"Using Grit CLI path: {target_path}")
-
-    target_dir.mkdir(parents=True, exist_ok=True)
-
-    if temp_file.exists():
-        temp_file.unlink()
-
-    arch = _get_arch()
-    _debug(f"Using architecture {arch}")
-
-    file_name = f"marzano-{platform}-{arch}"
-    meta_url = f"https://api.keygen.sh/v1/accounts/{KEYGEN_ACCOUNT}/artifacts/{file_name}"
-
-    sys.stdout.write(f"Retrieving Grit CLI metadata from {meta_url}\n")
-    with httpx.Client() as client:
-        response = client.get(meta_url)  # pyright: ignore[reportUnknownMemberType]
-
-        data = response.json()
-        errors = data.get("errors")
-        if errors:
-            for error in errors:
-                sys.stdout.write(f"{error}\n")
-
-            raise CLIError("Could not locate Grit CLI binary - see above errors")
-
-        write_manifest(install_dir, data["data"]["relationships"]["release"]["data"]["id"])
-
-        link = data["data"]["links"]["redirect"]
-        _debug(f"Redirect URL {link}")
-
-        download_response = client.get(link)  # pyright: ignore[reportUnknownMemberType]
-        with open(temp_file, "wb") as file:
-            for chunk in download_response.iter_bytes():
-                file.write(chunk)
-
-    unpacked_dir = target_dir / "cli-bin"
-    unpacked_dir.mkdir(parents=True, exist_ok=True)
-
-    with tarfile.open(temp_file, "r:gz") as archive:
-        archive.extractall(unpacked_dir, filter="data")
-
-    for item in unpacked_dir.iterdir():
-        item.rename(target_dir / item.name)
-
-    shutil.rmtree(unpacked_dir)
-    os.remove(temp_file)
-    os.chmod(target_path, 0o755)
-
-    sys.stdout.flush()
-
-    return target_path
-
-
-def _get_arch() -> str:
-    architecture = platform.machine().lower()
-
-    # Map the architecture names to Node.js equivalents
-    arch_map = {
-        "x86_64": "x64",
-        "amd64": "x64",
-        "armv7l": "arm",
-        "aarch64": "arm64",
-    }
-
-    return arch_map.get(architecture, architecture)
-
-
-def write_manifest(install_path: Path, release: str) -> None:
-    manifest = {
-        "installPath": str(install_path),
-        "binaries": {
-            "marzano": {
-                "name": "marzano",
-                "release": release,
-            },
-        },
-    }
-    manifest_path = Path(install_path) / "manifests.json"
-    with open(manifest_path, "w") as f:
-        json.dump(manifest, f, indent=2)
diff --git a/openai/cli/_utils.py b/openai/cli/_utils.py
deleted file mode 100644
index 673eed61..00000000
--- a/openai/cli/_utils.py
+++ /dev/null
@@ -1,45 +0,0 @@
-from __future__ import annotations
-
-import sys
-
-import openai
-
-from .. import OpenAI, _load_client
-from .._compat import model_json
-from .._models import BaseModel
-
-
-class Colors:
-    HEADER = "\033[95m"
-    OKBLUE = "\033[94m"
-    OKGREEN = "\033[92m"
-    WARNING = "\033[93m"
-    FAIL = "\033[91m"
-    ENDC = "\033[0m"
-    BOLD = "\033[1m"
-    UNDERLINE = "\033[4m"
-
-
-def get_client() -> OpenAI:
-    return _load_client()
-
-
-def organization_info() -> str:
-    organization = openai.organization
-    if organization is not None:
-        return "[organization={}] ".format(organization)
-
-    return ""
-
-
-def print_model(model: BaseModel) -> None:
-    sys.stdout.write(model_json(model, indent=2) + "\n")
-
-
-def can_use_http2() -> bool:
-    try:
-        import h2  # type: ignore  # noqa
-    except ImportError:
-        return False
-
-    return True
diff --git a/openai/lib/.keep b/openai/lib/.keep
deleted file mode 100644
index 5e2c99fd..00000000
--- a/openai/lib/.keep
+++ /dev/null
@@ -1,4 +0,0 @@
-File generated from our OpenAPI spec by Stainless.
-
-This directory can be used to store custom files to expand the SDK.
-It is ignored by Stainless code generation and its content (other than this keep file) won't be touched.
\ No newline at end of file
diff --git a/openai/lib/_old_api.py b/openai/lib/_old_api.py
deleted file mode 100644
index 929c87e8..00000000
--- a/openai/lib/_old_api.py
+++ /dev/null
@@ -1,72 +0,0 @@
-from __future__ import annotations
-
-from typing import TYPE_CHECKING, Any
-from typing_extensions import override
-
-from .._utils import LazyProxy
-from .._exceptions import OpenAIError
-
-INSTRUCTIONS = """
-
-You tried to access openai.{symbol}, but this is no longer supported in openai>=1.0.0 - see the README at https://github.com/openai/openai-python for the API.
-
-You can run `openai migrate` to automatically upgrade your codebase to use the 1.0.0 interface. 
-
-Alternatively, you can pin your installation to the old version, e.g. `pip install openai==0.28`
-
-A detailed migration guide is available here: https://github.com/openai/openai-python/discussions/742
-"""
-
-
-class APIRemovedInV1(OpenAIError):
-    def __init__(self, *, symbol: str) -> None:
-        super().__init__(INSTRUCTIONS.format(symbol=symbol))
-
-
-class APIRemovedInV1Proxy(LazyProxy[Any]):
-    def __init__(self, *, symbol: str) -> None:
-        super().__init__()
-        self._symbol = symbol
-
-    @override
-    def __load__(self) -> Any:
-        # return the proxy until it is eventually called so that
-        # we don't break people that are just checking the attributes
-        # of a module
-        return self
-
-    def __call__(self, *_args: Any, **_kwargs: Any) -> Any:
-        raise APIRemovedInV1(symbol=self._symbol)
-
-
-SYMBOLS = [
-    "Edit",
-    "File",
-    "Audio",
-    "Image",
-    "Model",
-    "Engine",
-    "Customer",
-    "FineTune",
-    "Embedding",
-    "Completion",
-    "Deployment",
-    "Moderation",
-    "ErrorObject",
-    "FineTuningJob",
-    "ChatCompletion",
-]
-
-# we explicitly tell type checkers that nothing is exported
-# from this file so that when we re-export the old symbols
-# in `openai/__init__.py` they aren't added to the auto-complete
-# suggestions given by editors
-if TYPE_CHECKING:
-    __all__: list[str] = []
-else:
-    __all__ = SYMBOLS
-
-
-__locals = locals()
-for symbol in SYMBOLS:
-    __locals[symbol] = APIRemovedInV1Proxy(symbol=symbol)
diff --git a/openai/lib/_validators.py b/openai/lib/_validators.py
deleted file mode 100644
index e36f0e95..00000000
--- a/openai/lib/_validators.py
+++ /dev/null
@@ -1,805 +0,0 @@
-# pyright: basic
-from __future__ import annotations
-
-import os
-import sys
-from typing import Any, TypeVar, Callable, Optional, NamedTuple
-from typing_extensions import TypeAlias
-
-from .._extras import pandas as pd
-
-
-class Remediation(NamedTuple):
-    name: str
-    immediate_msg: Optional[str] = None
-    necessary_msg: Optional[str] = None
-    necessary_fn: Optional[Callable[[Any], Any]] = None
-    optional_msg: Optional[str] = None
-    optional_fn: Optional[Callable[[Any], Any]] = None
-    error_msg: Optional[str] = None
-
-
-OptionalDataFrameT = TypeVar("OptionalDataFrameT", bound="Optional[pd.DataFrame]")
-
-
-def num_examples_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will only print out the number of examples and recommend to the user to increase the number of examples if less than 100.
-    """
-    MIN_EXAMPLES = 100
-    optional_suggestion = (
-        ""
-        if len(df) >= MIN_EXAMPLES
-        else ". In general, we recommend having at least a few hundred examples. We've found that performance tends to linearly increase for every doubling of the number of examples"
-    )
-    immediate_msg = f"\n- Your file contains {len(df)} prompt-completion pairs{optional_suggestion}"
-    return Remediation(name="num_examples", immediate_msg=immediate_msg)
-
-
-def necessary_column_validator(df: pd.DataFrame, necessary_column: str) -> Remediation:
-    """
-    This validator will ensure that the necessary column is present in the dataframe.
-    """
-
-    def lower_case_column(df: pd.DataFrame, column: Any) -> pd.DataFrame:
-        cols = [c for c in df.columns if str(c).lower() == column]
-        df.rename(columns={cols[0]: column.lower()}, inplace=True)
-        return df
-
-    immediate_msg = None
-    necessary_fn = None
-    necessary_msg = None
-    error_msg = None
-
-    if necessary_column not in df.columns:
-        if necessary_column in [str(c).lower() for c in df.columns]:
-
-            def lower_case_column_creator(df: pd.DataFrame) -> pd.DataFrame:
-                return lower_case_column(df, necessary_column)
-
-            necessary_fn = lower_case_column_creator
-            immediate_msg = f"\n- The `{necessary_column}` column/key should be lowercase"
-            necessary_msg = f"Lower case column name to `{necessary_column}`"
-        else:
-            error_msg = f"`{necessary_column}` column/key is missing. Please make sure you name your columns/keys appropriately, then retry"
-
-    return Remediation(
-        name="necessary_column",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-        error_msg=error_msg,
-    )
-
-
-def additional_column_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation:
-    """
-    This validator will remove additional columns from the dataframe.
-    """
-    additional_columns = []
-    necessary_msg = None
-    immediate_msg = None
-    necessary_fn = None  # type: ignore
-
-    if len(df.columns) > 2:
-        additional_columns = [c for c in df.columns if c not in fields]
-        warn_message = ""
-        for ac in additional_columns:
-            dups = [c for c in additional_columns if ac in c]
-            if len(dups) > 0:
-                warn_message += f"\n  WARNING: Some of the additional columns/keys contain `{ac}` in their name. These will be ignored, and the column/key `{ac}` will be used instead. This could also result from a duplicate column/key in the provided file."
-        immediate_msg = f"\n- The input file should contain exactly two columns/keys per row. Additional columns/keys present are: {additional_columns}{warn_message}"
-        necessary_msg = f"Remove additional columns/keys: {additional_columns}"
-
-        def necessary_fn(x: Any) -> Any:
-            return x[fields]
-
-    return Remediation(
-        name="additional_column",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-    )
-
-
-def non_empty_field_validator(df: pd.DataFrame, field: str = "completion") -> Remediation:
-    """
-    This validator will ensure that no completion is empty.
-    """
-    necessary_msg = None
-    necessary_fn = None  # type: ignore
-    immediate_msg = None
-
-    if df[field].apply(lambda x: x == "").any() or df[field].isnull().any():
-        empty_rows = (df[field] == "") | (df[field].isnull())
-        empty_indexes = df.reset_index().index[empty_rows].tolist()
-        immediate_msg = f"\n- `{field}` column/key should not contain empty strings. These are rows: {empty_indexes}"
-
-        def necessary_fn(x: Any) -> Any:
-            return x[x[field] != ""].dropna(subset=[field])
-
-        necessary_msg = f"Remove {len(empty_indexes)} rows with empty {field}s"
-
-    return Remediation(
-        name=f"empty_{field}",
-        immediate_msg=immediate_msg,
-        necessary_msg=necessary_msg,
-        necessary_fn=necessary_fn,
-    )
-
-
-def duplicated_rows_validator(df: pd.DataFrame, fields: list[str] = ["prompt", "completion"]) -> Remediation:
-    """
-    This validator will suggest to the user to remove duplicate rows if they exist.
-    """
-    duplicated_rows = df.duplicated(subset=fields)
-    duplicated_indexes = df.reset_index().index[duplicated_rows].tolist()
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    if len(duplicated_indexes) > 0:
-        immediate_msg = f"\n- There are {len(duplicated_indexes)} duplicated {'-'.join(fields)} sets. These are rows: {duplicated_indexes}"
-        optional_msg = f"Remove {len(duplicated_indexes)} duplicate rows"
-
-        def optional_fn(x: Any) -> Any:
-            return x.drop_duplicates(subset=fields)
-
-    return Remediation(
-        name="duplicated_rows",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def long_examples_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to the user to remove examples that are too long.
-    """
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    ft_type = infer_task_type(df)
-    if ft_type != "open-ended generation":
-
-        def get_long_indexes(d: pd.DataFrame) -> Any:
-            long_examples = d.apply(lambda x: len(x.prompt) + len(x.completion) > 10000, axis=1)
-            return d.reset_index().index[long_examples].tolist()
-
-        long_indexes = get_long_indexes(df)
-
-        if len(long_indexes) > 0:
-            immediate_msg = f"\n- There are {len(long_indexes)} examples that are very long. These are rows: {long_indexes}\nFor conditional generation, and for classification the examples shouldn't be longer than 2048 tokens."
-            optional_msg = f"Remove {len(long_indexes)} long examples"
-
-            def optional_fn(x: Any) -> Any:
-                long_indexes_to_drop = get_long_indexes(x)
-                if long_indexes != long_indexes_to_drop:
-                    sys.stdout.write(
-                        f"The indices of the long examples has changed as a result of a previously applied recommendation.\nThe {len(long_indexes_to_drop)} long examples to be dropped are now at the following indices: {long_indexes_to_drop}\n"
-                    )
-                return x.drop(long_indexes_to_drop)
-
-    return Remediation(
-        name="long_examples",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_prompt_suffix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a common suffix to the prompt if one doesn't already exist in case of classification or conditional generation.
-    """
-    error_msg = None
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    # Find a suffix which is not contained within the prompt otherwise
-    suggested_suffix = "\n\n### =>\n\n"
-    suffix_options = [
-        " ->",
-        "\n\n###\n\n",
-        "\n\n===\n\n",
-        "\n\n---\n\n",
-        "\n\n===>\n\n",
-        "\n\n--->\n\n",
-    ]
-    for suffix_option in suffix_options:
-        if suffix_option == " ->":
-            if df.prompt.str.contains("\n").any():
-                continue
-        if df.prompt.str.contains(suffix_option, regex=False).any():
-            continue
-        suggested_suffix = suffix_option
-        break
-    display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
-
-    ft_type = infer_task_type(df)
-    if ft_type == "open-ended generation":
-        return Remediation(name="common_suffix")
-
-    def add_suffix(x: Any, suffix: Any) -> Any:
-        x["prompt"] += suffix
-        return x
-
-    common_suffix = get_common_xfix(df.prompt, xfix="suffix")
-    if (df.prompt == common_suffix).all():
-        error_msg = f"All prompts are identical: `{common_suffix}`\nConsider leaving the prompts blank if you want to do open-ended generation, otherwise ensure prompts are different"
-        return Remediation(name="common_suffix", error_msg=error_msg)
-
-    if common_suffix != "":
-        common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
-        immediate_msg = f"\n- All prompts end with suffix `{common_suffix_new_line_handled}`"
-        if len(common_suffix) > 10:
-            immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
-        if df.prompt.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
-            immediate_msg += f"\n  WARNING: Some of your prompts contain the suffix `{common_suffix}` more than once. We strongly suggest that you review your prompts and add a unique suffix"
-
-    else:
-        immediate_msg = "\n- Your data does not contain a common separator at the end of your prompts. Having a separator string appended to the end of the prompt makes it clearer to the fine-tuned model where the completion should begin. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples. If you intend to do open-ended generation, then you should leave the prompts empty"
-
-    if common_suffix == "":
-        optional_msg = f"Add a suffix separator `{display_suggested_suffix}` to all prompts"
-
-        def optional_fn(x: Any) -> Any:
-            return add_suffix(x, suggested_suffix)
-
-    return Remediation(
-        name="common_completion_suffix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-        error_msg=error_msg,
-    )
-
-
-def common_prompt_prefix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to remove a common prefix from the prompt if a long one exist.
-    """
-    MAX_PREFIX_LEN = 12
-
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    common_prefix = get_common_xfix(df.prompt, xfix="prefix")
-    if common_prefix == "":
-        return Remediation(name="common_prefix")
-
-    def remove_common_prefix(x: Any, prefix: Any) -> Any:
-        x["prompt"] = x["prompt"].str[len(prefix) :]
-        return x
-
-    if (df.prompt == common_prefix).all():
-        # already handled by common_suffix_validator
-        return Remediation(name="common_prefix")
-
-    if common_prefix != "":
-        immediate_msg = f"\n- All prompts start with prefix `{common_prefix}`"
-        if MAX_PREFIX_LEN < len(common_prefix):
-            immediate_msg += ". Fine-tuning doesn't require the instruction specifying the task, or a few-shot example scenario. Most of the time you should only add the input data into the prompt, and the desired output into the completion"
-            optional_msg = f"Remove prefix `{common_prefix}` from all prompts"
-
-            def optional_fn(x: Any) -> Any:
-                return remove_common_prefix(x, common_prefix)
-
-    return Remediation(
-        name="common_prompt_prefix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_completion_prefix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to remove a common prefix from the completion if a long one exist.
-    """
-    MAX_PREFIX_LEN = 5
-
-    common_prefix = get_common_xfix(df.completion, xfix="prefix")
-    ws_prefix = len(common_prefix) > 0 and common_prefix[0] == " "
-    if len(common_prefix) < MAX_PREFIX_LEN:
-        return Remediation(name="common_prefix")
-
-    def remove_common_prefix(x: Any, prefix: Any, ws_prefix: Any) -> Any:
-        x["completion"] = x["completion"].str[len(prefix) :]
-        if ws_prefix:
-            # keep the single whitespace as prefix
-            x["completion"] = f" {x['completion']}"
-        return x
-
-    if (df.completion == common_prefix).all():
-        # already handled by common_suffix_validator
-        return Remediation(name="common_prefix")
-
-    immediate_msg = f"\n- All completions start with prefix `{common_prefix}`. Most of the time you should only add the output data into the completion, without any prefix"
-    optional_msg = f"Remove prefix `{common_prefix}` from all completions"
-
-    def optional_fn(x: Any) -> Any:
-        return remove_common_prefix(x, common_prefix, ws_prefix)
-
-    return Remediation(
-        name="common_completion_prefix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def common_completion_suffix_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a common suffix to the completion if one doesn't already exist in case of classification or conditional generation.
-    """
-    error_msg = None
-    immediate_msg = None
-    optional_msg = None
-    optional_fn = None  # type: ignore
-
-    ft_type = infer_task_type(df)
-    if ft_type == "open-ended generation" or ft_type == "classification":
-        return Remediation(name="common_suffix")
-
-    common_suffix = get_common_xfix(df.completion, xfix="suffix")
-    if (df.completion == common_suffix).all():
-        error_msg = f"All completions are identical: `{common_suffix}`\nEnsure completions are different, otherwise the model will just repeat `{common_suffix}`"
-        return Remediation(name="common_suffix", error_msg=error_msg)
-
-    # Find a suffix which is not contained within the completion otherwise
-    suggested_suffix = " [END]"
-    suffix_options = [
-        "\n",
-        ".",
-        " END",
-        "***",
-        "+++",
-        "&&&",
-        "$$$",
-        "@@@",
-        "%%%",
-    ]
-    for suffix_option in suffix_options:
-        if df.completion.str.contains(suffix_option, regex=False).any():
-            continue
-        suggested_suffix = suffix_option
-        break
-    display_suggested_suffix = suggested_suffix.replace("\n", "\\n")
-
-    def add_suffix(x: Any, suffix: Any) -> Any:
-        x["completion"] += suffix
-        return x
-
-    if common_suffix != "":
-        common_suffix_new_line_handled = common_suffix.replace("\n", "\\n")
-        immediate_msg = f"\n- All completions end with suffix `{common_suffix_new_line_handled}`"
-        if len(common_suffix) > 10:
-            immediate_msg += f". This suffix seems very long. Consider replacing with a shorter suffix, such as `{display_suggested_suffix}`"
-        if df.completion.str[: -len(common_suffix)].str.contains(common_suffix, regex=False).any():
-            immediate_msg += f"\n  WARNING: Some of your completions contain the suffix `{common_suffix}` more than once. We suggest that you review your completions and add a unique ending"
-
-    else:
-        immediate_msg = "\n- Your data does not contain a common ending at the end of your completions. Having a common ending string appended to the end of the completion makes it clearer to the fine-tuned model where the completion should end. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more detail and examples."
-
-    if common_suffix == "":
-        optional_msg = f"Add a suffix ending `{display_suggested_suffix}` to all completions"
-
-        def optional_fn(x: Any) -> Any:
-            return add_suffix(x, suggested_suffix)
-
-    return Remediation(
-        name="common_completion_suffix",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-        error_msg=error_msg,
-    )
-
-
-def completions_space_start_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will suggest to add a space at the start of the completion if it doesn't already exist. This helps with tokenization.
-    """
-
-    def add_space_start(x: Any) -> Any:
-        x["completion"] = x["completion"].apply(lambda s: ("" if s.startswith(" ") else " ") + s)
-        return x
-
-    optional_msg = None
-    optional_fn = None
-    immediate_msg = None
-
-    if df.completion.str[:1].nunique() != 1 or df.completion.values[0][0] != " ":
-        immediate_msg = "\n- The completion should start with a whitespace character (` `). This tends to produce better results due to the tokenization we use. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details"
-        optional_msg = "Add a whitespace character to the beginning of the completion"
-        optional_fn = add_space_start
-    return Remediation(
-        name="completion_space_start",
-        immediate_msg=immediate_msg,
-        optional_msg=optional_msg,
-        optional_fn=optional_fn,
-    )
-
-
-def lower_case_validator(df: pd.DataFrame, column: Any) -> Remediation | None:
-    """
-    This validator will suggest to lowercase the column values, if more than a third of letters are uppercase.
-    """
-
-    def lower_case(x: Any) -> Any:
-        x[column] = x[column].str.lower()
-        return x
-
-    count_upper = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.isupper())).sum()
-    count_lower = df[column].apply(lambda x: sum(1 for c in x if c.isalpha() and c.islower())).sum()
-
-    if count_upper * 2 > count_lower:
-        return Remediation(
-            name="lower_case",
-            immediate_msg=f"\n- More than a third of your `{column}` column/key is uppercase. Uppercase {column}s tends to perform worse than a mixture of case encountered in normal language. We recommend to lower case the data if that makes sense in your domain. See https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset for more details",
-            optional_msg=f"Lowercase all your data in column/key `{column}`",
-            optional_fn=lower_case,
-        )
-    return None
-
-
-def read_any_format(
-    fname: str, fields: list[str] = ["prompt", "completion"]
-) -> tuple[pd.DataFrame | None, Remediation]:
-    """
-    This function will read a file saved in .csv, .json, .txt, .xlsx or .tsv format using pandas.
-     - for .xlsx it will read the first sheet
-     - for .txt it will assume completions and split on newline
-    """
-    remediation = None
-    necessary_msg = None
-    immediate_msg = None
-    error_msg = None
-    df = None
-
-    if os.path.isfile(fname):
-        try:
-            if fname.lower().endswith(".csv") or fname.lower().endswith(".tsv"):
-                file_extension_str, separator = ("CSV", ",") if fname.lower().endswith(".csv") else ("TSV", "\t")
-                immediate_msg = (
-                    f"\n- Based on your file extension, your file is formatted as a {file_extension_str} file"
-                )
-                necessary_msg = f"Your format `{file_extension_str}` will be converted to `JSONL`"
-                df = pd.read_csv(fname, sep=separator, dtype=str).fillna("")
-            elif fname.lower().endswith(".xlsx"):
-                immediate_msg = "\n- Based on your file extension, your file is formatted as an Excel file"
-                necessary_msg = "Your format `XLSX` will be converted to `JSONL`"
-                xls = pd.ExcelFile(fname)
-                sheets = xls.sheet_names
-                if len(sheets) > 1:
-                    immediate_msg += "\n- Your Excel file contains more than one sheet. Please either save as csv or ensure all data is present in the first sheet. WARNING: Reading only the first sheet..."
-                df = pd.read_excel(fname, dtype=str).fillna("")
-            elif fname.lower().endswith(".txt"):
-                immediate_msg = "\n- Based on your file extension, you provided a text file"
-                necessary_msg = "Your format `TXT` will be converted to `JSONL`"
-                with open(fname, "r") as f:
-                    content = f.read()
-                    df = pd.DataFrame(
-                        [["", line] for line in content.split("\n")],
-                        columns=fields,
-                        dtype=str,
-                    ).fillna("")
-            elif fname.lower().endswith(".jsonl"):
-                df = pd.read_json(fname, lines=True, dtype=str).fillna("")  # type: ignore
-                if len(df) == 1:  # type: ignore
-                    # this is NOT what we expect for a .jsonl file
-                    immediate_msg = "\n- Your JSONL file appears to be in a JSON format. Your file will be converted to JSONL format"
-                    necessary_msg = "Your format `JSON` will be converted to `JSONL`"
-                    df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-                else:
-                    pass  # this is what we expect for a .jsonl file
-            elif fname.lower().endswith(".json"):
-                try:
-                    # to handle case where .json file is actually a .jsonl file
-                    df = pd.read_json(fname, lines=True, dtype=str).fillna("")  # type: ignore
-                    if len(df) == 1:  # type: ignore
-                        # this code path corresponds to a .json file that has one line
-                        df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-                    else:
-                        # this is NOT what we expect for a .json file
-                        immediate_msg = "\n- Your JSON file appears to be in a JSONL format. Your file will be converted to JSONL format"
-                        necessary_msg = "Your format `JSON` will be converted to `JSONL`"
-                except ValueError:
-                    # this code path corresponds to a .json file that has multiple lines (i.e. it is indented)
-                    df = pd.read_json(fname, dtype=str).fillna("")  # type: ignore
-            else:
-                error_msg = (
-                    "Your file must have one of the following extensions: .CSV, .TSV, .XLSX, .TXT, .JSON or .JSONL"
-                )
-                if "." in fname:
-                    error_msg += f" Your file `{fname}` ends with the extension `.{fname.split('.')[-1]}` which is not supported."
-                else:
-                    error_msg += f" Your file `{fname}` is missing a file extension."
-
-        except (ValueError, TypeError):
-            file_extension_str = fname.split(".")[-1].upper()
-            error_msg = f"Your file `{fname}` does not appear to be in valid {file_extension_str} format. Please ensure your file is formatted as a valid {file_extension_str} file."
-
-    else:
-        error_msg = f"File {fname} does not exist."
-
-    remediation = Remediation(
-        name="read_any_format",
-        necessary_msg=necessary_msg,
-        immediate_msg=immediate_msg,
-        error_msg=error_msg,
-    )
-    return df, remediation
-
-
-def format_inferrer_validator(df: pd.DataFrame) -> Remediation:
-    """
-    This validator will infer the likely fine-tuning format of the data, and display it to the user if it is classification.
-    It will also suggest to use ada and explain train/validation split benefits.
-    """
-    ft_type = infer_task_type(df)
-    immediate_msg = None
-    if ft_type == "classification":
-        immediate_msg = f"\n- Based on your data it seems like you're trying to fine-tune a model for {ft_type}\n- For classification, we recommend you try one of the faster and cheaper models, such as `ada`\n- For classification, you can estimate the expected model performance by keeping a held out dataset, which is not used for training"
-    return Remediation(name="num_examples", immediate_msg=immediate_msg)
-
-
-def apply_necessary_remediation(df: OptionalDataFrameT, remediation: Remediation) -> OptionalDataFrameT:
-    """
-    This function will apply a necessary remediation to a dataframe, or print an error message if one exists.
-    """
-    if remediation.error_msg is not None:
-        sys.stderr.write(f"\n\nERROR in {remediation.name} validator: {remediation.error_msg}\n\nAborting...")
-        sys.exit(1)
-    if remediation.immediate_msg is not None:
-        sys.stdout.write(remediation.immediate_msg)
-    if remediation.necessary_fn is not None:
-        df = remediation.necessary_fn(df)
-    return df
-
-
-def accept_suggestion(input_text: str, auto_accept: bool) -> bool:
-    sys.stdout.write(input_text)
-    if auto_accept:
-        sys.stdout.write("Y\n")
-        return True
-    return input().lower() != "n"
-
-
-def apply_optional_remediation(
-    df: pd.DataFrame, remediation: Remediation, auto_accept: bool
-) -> tuple[pd.DataFrame, bool]:
-    """
-    This function will apply an optional remediation to a dataframe, based on the user input.
-    """
-    optional_applied = False
-    input_text = f"- [Recommended] {remediation.optional_msg} [Y/n]: "
-    if remediation.optional_msg is not None:
-        if accept_suggestion(input_text, auto_accept):
-            assert remediation.optional_fn is not None
-            df = remediation.optional_fn(df)
-            optional_applied = True
-    if remediation.necessary_msg is not None:
-        sys.stdout.write(f"- [Necessary] {remediation.necessary_msg}\n")
-    return df, optional_applied
-
-
-def estimate_fine_tuning_time(df: pd.DataFrame) -> None:
-    """
-    Estimate the time it'll take to fine-tune the dataset
-    """
-    ft_format = infer_task_type(df)
-    expected_time = 1.0
-    if ft_format == "classification":
-        num_examples = len(df)
-        expected_time = num_examples * 1.44
-    else:
-        size = df.memory_usage(index=True).sum()
-        expected_time = size * 0.0515
-
-    def format_time(time: float) -> str:
-        if time < 60:
-            return f"{round(time, 2)} seconds"
-        elif time < 3600:
-            return f"{round(time / 60, 2)} minutes"
-        elif time < 86400:
-            return f"{round(time / 3600, 2)} hours"
-        else:
-            return f"{round(time / 86400, 2)} days"
-
-    time_string = format_time(expected_time + 140)
-    sys.stdout.write(
-        f"Once your model starts training, it'll approximately take {time_string} to train a `curie` model, and less for `ada` and `babbage`. Queue will approximately take half an hour per job ahead of you.\n"
-    )
-
-
-def get_outfnames(fname: str, split: bool) -> list[str]:
-    suffixes = ["_train", "_valid"] if split else [""]
-    i = 0
-    while True:
-        index_suffix = f" ({i})" if i > 0 else ""
-        candidate_fnames = [f"{os.path.splitext(fname)[0]}_prepared{suffix}{index_suffix}.jsonl" for suffix in suffixes]
-        if not any(os.path.isfile(f) for f in candidate_fnames):
-            return candidate_fnames
-        i += 1
-
-
-def get_classification_hyperparams(df: pd.DataFrame) -> tuple[int, object]:
-    n_classes = df.completion.nunique()
-    pos_class = None
-    if n_classes == 2:
-        pos_class = df.completion.value_counts().index[0]
-    return n_classes, pos_class
-
-
-def write_out_file(df: pd.DataFrame, fname: str, any_remediations: bool, auto_accept: bool) -> None:
-    """
-    This function will write out a dataframe to a file, if the user would like to proceed, and also offer a fine-tuning command with the newly created file.
-    For classification it will optionally ask the user if they would like to split the data into train/valid files, and modify the suggested command to include the valid set.
-    """
-    ft_format = infer_task_type(df)
-    common_prompt_suffix = get_common_xfix(df.prompt, xfix="suffix")
-    common_completion_suffix = get_common_xfix(df.completion, xfix="suffix")
-
-    split = False
-    input_text = "- [Recommended] Would you like to split into training and validation set? [Y/n]: "
-    if ft_format == "classification":
-        if accept_suggestion(input_text, auto_accept):
-            split = True
-
-    additional_params = ""
-    common_prompt_suffix_new_line_handled = common_prompt_suffix.replace("\n", "\\n")
-    common_completion_suffix_new_line_handled = common_completion_suffix.replace("\n", "\\n")
-    optional_ending_string = (
-        f' Make sure to include `stop=["{common_completion_suffix_new_line_handled}"]` so that the generated texts ends at the expected place.'
-        if len(common_completion_suffix_new_line_handled) > 0
-        else ""
-    )
-
-    input_text = "\n\nYour data will be written to a new JSONL file. Proceed [Y/n]: "
-
-    if not any_remediations and not split:
-        sys.stdout.write(
-            f'\nYou can use your file for fine-tuning:\n> openai api fine_tunes.create -t "{fname}"{additional_params}\n\nAfter you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt.{optional_ending_string}\n'
-        )
-        estimate_fine_tuning_time(df)
-
-    elif accept_suggestion(input_text, auto_accept):
-        fnames = get_outfnames(fname, split)
-        if split:
-            assert len(fnames) == 2 and "train" in fnames[0] and "valid" in fnames[1]
-            MAX_VALID_EXAMPLES = 1000
-            n_train = max(len(df) - MAX_VALID_EXAMPLES, int(len(df) * 0.8))
-            df_train = df.sample(n=n_train, random_state=42)
-            df_valid = df.drop(df_train.index)
-            df_train[["prompt", "completion"]].to_json(  # type: ignore
-                fnames[0], lines=True, orient="records", force_ascii=False
-            )
-            df_valid[["prompt", "completion"]].to_json(fnames[1], lines=True, orient="records", force_ascii=False)
-
-            n_classes, pos_class = get_classification_hyperparams(df)
-            additional_params += " --compute_classification_metrics"
-            if n_classes == 2:
-                additional_params += f' --classification_positive_class "{pos_class}"'
-            else:
-                additional_params += f" --classification_n_classes {n_classes}"
-        else:
-            assert len(fnames) == 1
-            df[["prompt", "completion"]].to_json(fnames[0], lines=True, orient="records", force_ascii=False)
-
-        # Add -v VALID_FILE if we split the file into train / valid
-        files_string = ("s" if split else "") + " to `" + ("` and `".join(fnames))
-        valid_string = f' -v "{fnames[1]}"' if split else ""
-        separator_reminder = (
-            ""
-            if len(common_prompt_suffix_new_line_handled) == 0
-            else f"After you’ve fine-tuned a model, remember that your prompt has to end with the indicator string `{common_prompt_suffix_new_line_handled}` for the model to start generating completions, rather than continuing with the prompt."
-        )
-        sys.stdout.write(
-            f'\nWrote modified file{files_string}`\nFeel free to take a look!\n\nNow use that file when fine-tuning:\n> openai api fine_tunes.create -t "{fnames[0]}"{valid_string}{additional_params}\n\n{separator_reminder}{optional_ending_string}\n'
-        )
-        estimate_fine_tuning_time(df)
-    else:
-        sys.stdout.write("Aborting... did not write the file\n")
-
-
-def infer_task_type(df: pd.DataFrame) -> str:
-    """
-    Infer the likely fine-tuning task type from the data
-    """
-    CLASSIFICATION_THRESHOLD = 3  # min_average instances of each class
-    if sum(df.prompt.str.len()) == 0:
-        return "open-ended generation"
-
-    if len(df.completion.unique()) < len(df) / CLASSIFICATION_THRESHOLD:
-        return "classification"
-
-    return "conditional generation"
-
-
-def get_common_xfix(series: Any, xfix: str = "suffix") -> str:
-    """
-    Finds the longest common suffix or prefix of all the values in a series
-    """
-    common_xfix = ""
-    while True:
-        common_xfixes = (
-            series.str[-(len(common_xfix) + 1) :] if xfix == "suffix" else series.str[: len(common_xfix) + 1]
-        )  # first few or last few characters
-        if common_xfixes.nunique() != 1:  # we found the character at which we don't have a unique xfix anymore
-            break
-        elif common_xfix == common_xfixes.values[0]:  # the entire first row is a prefix of every other row
-            break
-        else:  # the first or last few characters are still common across all rows - let's try to add one more
-            common_xfix = common_xfixes.values[0]
-    return common_xfix
-
-
-Validator: TypeAlias = "Callable[[pd.DataFrame], Remediation | None]"
-
-
-def get_validators() -> list[Validator]:
-    return [
-        num_examples_validator,
-        lambda x: necessary_column_validator(x, "prompt"),
-        lambda x: necessary_column_validator(x, "completion"),
-        additional_column_validator,
-        non_empty_field_validator,
-        format_inferrer_validator,
-        duplicated_rows_validator,
-        long_examples_validator,
-        lambda x: lower_case_validator(x, "prompt"),
-        lambda x: lower_case_validator(x, "completion"),
-        common_prompt_suffix_validator,
-        common_prompt_prefix_validator,
-        common_completion_prefix_validator,
-        common_completion_suffix_validator,
-        completions_space_start_validator,
-    ]
-
-
-def apply_validators(
-    df: pd.DataFrame,
-    fname: str,
-    remediation: Remediation | None,
-    validators: list[Validator],
-    auto_accept: bool,
-    write_out_file_func: Callable[..., Any],
-) -> None:
-    optional_remediations: list[Remediation] = []
-    if remediation is not None:
-        optional_remediations.append(remediation)
-    for validator in validators:
-        remediation = validator(df)
-        if remediation is not None:
-            optional_remediations.append(remediation)
-            df = apply_necessary_remediation(df, remediation)
-
-    any_optional_or_necessary_remediations = any(
-        [
-            remediation
-            for remediation in optional_remediations
-            if remediation.optional_msg is not None or remediation.necessary_msg is not None
-        ]
-    )
-    any_necessary_applied = any(
-        [remediation for remediation in optional_remediations if remediation.necessary_msg is not None]
-    )
-    any_optional_applied = False
-
-    if any_optional_or_necessary_remediations:
-        sys.stdout.write("\n\nBased on the analysis we will perform the following actions:\n")
-        for remediation in optional_remediations:
-            df, optional_applied = apply_optional_remediation(df, remediation, auto_accept)
-            any_optional_applied = any_optional_applied or optional_applied
-    else:
-        sys.stdout.write("\n\nNo remediations found.\n")
-
-    any_optional_or_necessary_applied = any_optional_applied or any_necessary_applied
-
-    write_out_file_func(df, fname, any_optional_or_necessary_applied, auto_accept)
diff --git a/openai/lib/azure.py b/openai/lib/azure.py
deleted file mode 100644
index b3b94de8..00000000
--- a/openai/lib/azure.py
+++ /dev/null
@@ -1,529 +0,0 @@
-from __future__ import annotations
-
-import os
-import inspect
-from typing import Any, Union, Mapping, TypeVar, Callable, Awaitable, overload
-from typing_extensions import Self, override
-
-import httpx
-
-from .._types import NOT_GIVEN, Omit, Timeout, NotGiven
-from .._utils import is_given, is_mapping
-from .._client import OpenAI, AsyncOpenAI
-from .._models import FinalRequestOptions
-from .._streaming import Stream, AsyncStream
-from .._exceptions import OpenAIError
-from .._base_client import DEFAULT_MAX_RETRIES, BaseClient
-
-_deployments_endpoints = set(
-    [
-        "/completions",
-        "/chat/completions",
-        "/embeddings",
-        "/audio/transcriptions",
-        "/audio/translations",
-        "/audio/speech",
-        "/images/generations",
-    ]
-)
-
-
-AzureADTokenProvider = Callable[[], str]
-AsyncAzureADTokenProvider = Callable[[], "str | Awaitable[str]"]
-_HttpxClientT = TypeVar("_HttpxClientT", bound=Union[httpx.Client, httpx.AsyncClient])
-_DefaultStreamT = TypeVar("_DefaultStreamT", bound=Union[Stream[Any], AsyncStream[Any]])
-
-
-# we need to use a sentinel API key value for Azure AD
-# as we don't want to make the `api_key` in the main client Optional
-# and Azure AD tokens may be retrieved on a per-request basis
-API_KEY_SENTINEL = "".join(["<", "missing API key", ">"])
-
-
-class MutuallyExclusiveAuthError(OpenAIError):
-    def __init__(self) -> None:
-        super().__init__(
-            "The `api_key`, `azure_ad_token` and `azure_ad_token_provider` arguments are mutually exclusive; Only one can be passed at a time"
-        )
-
-
-class BaseAzureClient(BaseClient[_HttpxClientT, _DefaultStreamT]):
-    @override
-    def _build_request(
-        self,
-        options: FinalRequestOptions,
-    ) -> httpx.Request:
-        if options.url in _deployments_endpoints and is_mapping(options.json_data):
-            model = options.json_data.get("model")
-            if model is not None and not "/deployments" in str(self.base_url):
-                options.url = f"/deployments/{model}{options.url}"
-
-        return super()._build_request(options)
-
-
-class AzureOpenAI(BaseAzureClient[httpx.Client, Stream[Any]], OpenAI):
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        base_url: str,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    def __init__(
-        self,
-        *,
-        api_version: str | None = None,
-        azure_endpoint: str | None = None,
-        azure_deployment: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        organization: str | None = None,
-        base_url: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.Client | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        """Construct a new synchronous azure openai client instance.
-
-        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
-        - `api_key` from `AZURE_OPENAI_API_KEY`
-        - `organization` from `OPENAI_ORG_ID`
-        - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
-        - `api_version` from `OPENAI_API_VERSION`
-        - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
-
-        Args:
-            azure_endpoint: Your Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`
-
-            azure_ad_token: Your Azure Active Directory token, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
-
-            azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
-
-            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
-        """
-        if api_key is None:
-            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
-
-        if azure_ad_token is None:
-            azure_ad_token = os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_key is None and azure_ad_token is None and azure_ad_token_provider is None:
-            raise OpenAIError(
-                "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, `azure_ad_token_provider`, or the `AZURE_OPENAI_API_KEY` or `AZURE_OPENAI_AD_TOKEN` environment variables."
-            )
-
-        if api_version is None:
-            api_version = os.environ.get("OPENAI_API_VERSION")
-
-        if api_version is None:
-            raise ValueError(
-                "Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable"
-            )
-
-        if default_query is None:
-            default_query = {"api-version": api_version}
-        else:
-            default_query = {**default_query, "api-version": api_version}
-
-        if base_url is None:
-            if azure_endpoint is None:
-                azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-            if azure_endpoint is None:
-                raise ValueError(
-                    "Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable"
-                )
-
-            if azure_deployment is not None:
-                base_url = f"{azure_endpoint}/openai/deployments/{azure_deployment}"
-            else:
-                base_url = f"{azure_endpoint}/openai"
-        else:
-            if azure_endpoint is not None:
-                raise ValueError("base_url and azure_endpoint are mutually exclusive")
-
-        if api_key is None:
-            # define a sentinel value to avoid any typing issues
-            api_key = API_KEY_SENTINEL
-
-        super().__init__(
-            api_key=api_key,
-            organization=organization,
-            base_url=base_url,
-            timeout=timeout,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            default_query=default_query,
-            http_client=http_client,
-            _strict_response_validation=_strict_response_validation,
-        )
-        self._api_version = api_version
-        self._azure_ad_token = azure_ad_token
-        self._azure_ad_token_provider = azure_ad_token_provider
-
-    @override
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        api_version: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AzureADTokenProvider | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.Client | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        return super().copy(
-            api_key=api_key,
-            organization=organization,
-            base_url=base_url,
-            timeout=timeout,
-            http_client=http_client,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            set_default_headers=set_default_headers,
-            default_query=default_query,
-            set_default_query=set_default_query,
-            _extra_kwargs={
-                "api_version": api_version or self._api_version,
-                "azure_ad_token": azure_ad_token or self._azure_ad_token,
-                "azure_ad_token_provider": azure_ad_token_provider or self._azure_ad_token_provider,
-                **_extra_kwargs,
-            },
-        )
-
-    with_options = copy
-
-    def _get_azure_ad_token(self) -> str | None:
-        if self._azure_ad_token is not None:
-            return self._azure_ad_token
-
-        provider = self._azure_ad_token_provider
-        if provider is not None:
-            token = provider()
-            if not token or not isinstance(token, str):  # pyright: ignore[reportUnnecessaryIsInstance]
-                raise ValueError(
-                    f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
-                )
-            return token
-
-        return None
-
-    @override
-    def _prepare_options(self, options: FinalRequestOptions) -> None:
-        headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
-        options.headers = headers
-
-        azure_ad_token = self._get_azure_ad_token()
-        if azure_ad_token is not None:
-            if headers.get("Authorization") is None:
-                headers["Authorization"] = f"Bearer {azure_ad_token}"
-        elif self.api_key is not API_KEY_SENTINEL:
-            if headers.get("api-key") is None:
-                headers["api-key"] = self.api_key
-        else:
-            # should never be hit
-            raise ValueError("Unable to handle auth")
-
-        return super()._prepare_options(options)
-
-
-class AsyncAzureOpenAI(BaseAzureClient[httpx.AsyncClient, AsyncStream[Any]], AsyncOpenAI):
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    @overload
-    def __init__(
-        self,
-        *,
-        base_url: str,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        ...
-
-    def __init__(
-        self,
-        *,
-        azure_endpoint: str | None = None,
-        azure_deployment: str | None = None,
-        api_version: str | None = None,
-        api_key: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        organization: str | None = None,
-        base_url: str | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        max_retries: int = DEFAULT_MAX_RETRIES,
-        default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        http_client: httpx.AsyncClient | None = None,
-        _strict_response_validation: bool = False,
-    ) -> None:
-        """Construct a new asynchronous azure openai client instance.
-
-        This automatically infers the following arguments from their corresponding environment variables if they are not provided:
-        - `api_key` from `AZURE_OPENAI_API_KEY`
-        - `organization` from `OPENAI_ORG_ID`
-        - `azure_ad_token` from `AZURE_OPENAI_AD_TOKEN`
-        - `api_version` from `OPENAI_API_VERSION`
-        - `azure_endpoint` from `AZURE_OPENAI_ENDPOINT`
-
-        Args:
-            azure_endpoint: Your Azure endpoint, including the resource, e.g. `https://example-resource.azure.openai.com/`
-
-            azure_ad_token: Your Azure Active Directory token, https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id
-
-            azure_ad_token_provider: A function that returns an Azure Active Directory token, will be invoked on every request.
-
-            azure_deployment: A model deployment, if given sets the base client URL to include `/deployments/{azure_deployment}`.
-                Note: this means you won't be able to use non-deployment endpoints. Not supported with Assistants APIs.
-        """
-        if api_key is None:
-            api_key = os.environ.get("AZURE_OPENAI_API_KEY")
-
-        if azure_ad_token is None:
-            azure_ad_token = os.environ.get("AZURE_OPENAI_AD_TOKEN")
-
-        if api_key is None and azure_ad_token is None and azure_ad_token_provider is None:
-            raise OpenAIError(
-                "Missing credentials. Please pass one of `api_key`, `azure_ad_token`, `azure_ad_token_provider`, or the `AZURE_OPENAI_API_KEY` or `AZURE_OPENAI_AD_TOKEN` environment variables."
-            )
-
-        if api_version is None:
-            api_version = os.environ.get("OPENAI_API_VERSION")
-
-        if api_version is None:
-            raise ValueError(
-                "Must provide either the `api_version` argument or the `OPENAI_API_VERSION` environment variable"
-            )
-
-        if default_query is None:
-            default_query = {"api-version": api_version}
-        else:
-            default_query = {**default_query, "api-version": api_version}
-
-        if base_url is None:
-            if azure_endpoint is None:
-                azure_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
-
-            if azure_endpoint is None:
-                raise ValueError(
-                    "Must provide one of the `base_url` or `azure_endpoint` arguments, or the `AZURE_OPENAI_ENDPOINT` environment variable"
-                )
-
-            if azure_deployment is not None:
-                base_url = f"{azure_endpoint}/openai/deployments/{azure_deployment}"
-            else:
-                base_url = f"{azure_endpoint}/openai"
-        else:
-            if azure_endpoint is not None:
-                raise ValueError("base_url and azure_endpoint are mutually exclusive")
-
-        if api_key is None:
-            # define a sentinel value to avoid any typing issues
-            api_key = API_KEY_SENTINEL
-
-        super().__init__(
-            api_key=api_key,
-            organization=organization,
-            base_url=base_url,
-            timeout=timeout,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            default_query=default_query,
-            http_client=http_client,
-            _strict_response_validation=_strict_response_validation,
-        )
-        self._api_version = api_version
-        self._azure_ad_token = azure_ad_token
-        self._azure_ad_token_provider = azure_ad_token_provider
-
-    @override
-    def copy(
-        self,
-        *,
-        api_key: str | None = None,
-        organization: str | None = None,
-        api_version: str | None = None,
-        azure_ad_token: str | None = None,
-        azure_ad_token_provider: AsyncAzureADTokenProvider | None = None,
-        base_url: str | httpx.URL | None = None,
-        timeout: float | Timeout | None | NotGiven = NOT_GIVEN,
-        http_client: httpx.AsyncClient | None = None,
-        max_retries: int | NotGiven = NOT_GIVEN,
-        default_headers: Mapping[str, str] | None = None,
-        set_default_headers: Mapping[str, str] | None = None,
-        default_query: Mapping[str, object] | None = None,
-        set_default_query: Mapping[str, object] | None = None,
-        _extra_kwargs: Mapping[str, Any] = {},
-    ) -> Self:
-        """
-        Create a new client instance re-using the same options given to the current client with optional overriding.
-        """
-        return super().copy(
-            api_key=api_key,
-            organization=organization,
-            base_url=base_url,
-            timeout=timeout,
-            http_client=http_client,
-            max_retries=max_retries,
-            default_headers=default_headers,
-            set_default_headers=set_default_headers,
-            default_query=default_query,
-            set_default_query=set_default_query,
-            _extra_kwargs={
-                "api_version": api_version or self._api_version,
-                "azure_ad_token": azure_ad_token or self._azure_ad_token,
-                "azure_ad_token_provider": azure_ad_token_provider or self._azure_ad_token_provider,
-                **_extra_kwargs,
-            },
-        )
-
-    with_options = copy
-
-    async def _get_azure_ad_token(self) -> str | None:
-        if self._azure_ad_token is not None:
-            return self._azure_ad_token
-
-        provider = self._azure_ad_token_provider
-        if provider is not None:
-            token = provider()
-            if inspect.isawaitable(token):
-                token = await token
-            if not token or not isinstance(token, str):
-                raise ValueError(
-                    f"Expected `azure_ad_token_provider` argument to return a string but it returned {token}",
-                )
-            return token
-
-        return None
-
-    @override
-    async def _prepare_options(self, options: FinalRequestOptions) -> None:
-        headers: dict[str, str | Omit] = {**options.headers} if is_given(options.headers) else {}
-        options.headers = headers
-
-        azure_ad_token = await self._get_azure_ad_token()
-        if azure_ad_token is not None:
-            if headers.get("Authorization") is None:
-                headers["Authorization"] = f"Bearer {azure_ad_token}"
-        elif self.api_key is not API_KEY_SENTINEL:
-            if headers.get("api-key") is None:
-                headers["api-key"] = self.api_key
-        else:
-            # should never be hit
-            raise ValueError("Unable to handle auth")
-
-        return await super()._prepare_options(options)
diff --git a/openai/lib/streaming/__init__.py b/openai/lib/streaming/__init__.py
deleted file mode 100644
index eb378d25..00000000
--- a/openai/lib/streaming/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from ._assistants import (
-    AssistantEventHandler as AssistantEventHandler,
-    AssistantEventHandlerT as AssistantEventHandlerT,
-    AssistantStreamManager as AssistantStreamManager,
-    AsyncAssistantEventHandler as AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT as AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager as AsyncAssistantStreamManager,
-)
diff --git a/openai/lib/streaming/_assistants.py b/openai/lib/streaming/_assistants.py
deleted file mode 100644
index 03d97ec2..00000000
--- a/openai/lib/streaming/_assistants.py
+++ /dev/null
@@ -1,1035 +0,0 @@
-from __future__ import annotations
-
-import asyncio
-from types import TracebackType
-from typing import TYPE_CHECKING, Any, Generic, TypeVar, Callable, Iterable, Iterator, cast
-from typing_extensions import Awaitable, AsyncIterable, AsyncIterator, assert_never
-
-import httpx
-
-from ..._utils import is_dict, is_list, consume_sync_iterator, consume_async_iterator
-from ..._models import construct_type
-from ..._streaming import Stream, AsyncStream
-from ...types.beta import AssistantStreamEvent
-from ...types.beta.threads import (
-    Run,
-    Text,
-    Message,
-    ImageFile,
-    TextDelta,
-    MessageDelta,
-    MessageContent,
-    MessageContentDelta,
-)
-from ...types.beta.threads.runs import RunStep, ToolCall, RunStepDelta, ToolCallDelta
-
-
-class AssistantEventHandler:
-    text_deltas: Iterable[str]
-    """Iterator over just the text deltas in the stream.
-
-    This corresponds to the `thread.message.delta` event
-    in the API.
-
-    ```py
-    for text in stream.text_deltas:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    def __init__(self) -> None:
-        self._current_event: AssistantStreamEvent | None = None
-        self._current_message_content_index: int | None = None
-        self._current_message_content: MessageContent | None = None
-        self._current_tool_call_index: int | None = None
-        self._current_tool_call: ToolCall | None = None
-        self.__current_run_step_id: str | None = None
-        self.__current_run: Run | None = None
-        self.__run_step_snapshots: dict[str, RunStep] = {}
-        self.__message_snapshots: dict[str, Message] = {}
-        self.__current_message_snapshot: Message | None = None
-
-        self.text_deltas = self.__text_deltas__()
-        self._iterator = self.__stream__()
-        self.__stream: Stream[AssistantStreamEvent] | None = None
-
-    def _init(self, stream: Stream[AssistantStreamEvent]) -> None:
-        if self.__stream:
-            raise RuntimeError(
-                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
-            )
-
-        self.__stream = stream
-
-    def __next__(self) -> AssistantStreamEvent:
-        return self._iterator.__next__()
-
-    def __iter__(self) -> Iterator[AssistantStreamEvent]:
-        for item in self._iterator:
-            yield item
-
-    @property
-    def current_event(self) -> AssistantStreamEvent | None:
-        return self._current_event
-
-    @property
-    def current_run(self) -> Run | None:
-        return self.__current_run
-
-    @property
-    def current_run_step_snapshot(self) -> RunStep | None:
-        if not self.__current_run_step_id:
-            return None
-
-        return self.__run_step_snapshots[self.__current_run_step_id]
-
-    @property
-    def current_message_snapshot(self) -> Message | None:
-        return self.__current_message_snapshot
-
-    def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called when the context manager exits.
-        """
-        if self.__stream:
-            self.__stream.close()
-
-    def until_done(self) -> None:
-        """Waits until the stream has been consumed"""
-        consume_sync_iterator(self)
-
-    def get_final_run(self) -> Run:
-        """Wait for the stream to finish and returns the completed Run object"""
-        self.until_done()
-
-        if not self.__current_run:
-            raise RuntimeError("No final run object found")
-
-        return self.__current_run
-
-    def get_final_run_steps(self) -> list[RunStep]:
-        """Wait for the stream to finish and returns the steps taken in this run"""
-        self.until_done()
-
-        if not self.__run_step_snapshots:
-            raise RuntimeError("No run steps found")
-
-        return [step for step in self.__run_step_snapshots.values()]
-
-    def get_final_messages(self) -> list[Message]:
-        """Wait for the stream to finish and returns the messages emitted in this run"""
-        self.until_done()
-
-        if not self.__message_snapshots:
-            raise RuntimeError("No messages found")
-
-        return [message for message in self.__message_snapshots.values()]
-
-    def __text_deltas__(self) -> Iterator[str]:
-        for event in self:
-            if event.event != "thread.message.delta":
-                continue
-
-            for content_delta in event.data.delta.content or []:
-                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
-                    yield content_delta.text.value
-
-    # event handlers
-
-    def on_end(self) -> None:
-        """Fires when the stream has finished.
-
-        This happens if the stream is read to completion
-        or if an exception occurs during iteration.
-        """
-
-    def on_event(self, event: AssistantStreamEvent) -> None:
-        """Callback that is fired for every Server-Sent-Event"""
-
-    def on_run_step_created(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is created"""
-
-    def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
-        """Callback that is fired whenever a run step delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the run step. For example, a tool calls event may
-        look like this:
-
-        # delta
-        tool_calls=[
-            RunStepDeltaToolCallsCodeInterpreter(
-                index=0,
-                type='code_interpreter',
-                id=None,
-                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
-            )
-        ]
-        # snapshot
-        tool_calls=[
-            CodeToolCall(
-                id='call_wKayJlcYV12NiadiZuJXxcfx',
-                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
-                type='code_interpreter',
-                index=0
-            )
-        ],
-        """
-
-    def on_run_step_done(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is completed"""
-
-    def on_tool_call_created(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call is created"""
-
-    def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    def on_tool_call_done(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    def on_exception(self, exception: Exception) -> None:
-        """Fired whenever an exception happens during streaming"""
-
-    def on_timeout(self) -> None:
-        """Fires if the request times out"""
-
-    def on_message_created(self, message: Message) -> None:
-        """Callback that is fired when a message is created"""
-
-    def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
-        """Callback that is fired whenever a message delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the message. For example, a text content event may
-        look like this:
-
-        # delta
-        MessageDeltaText(
-            index=0,
-            type='text',
-            text=Text(
-                value=' Jane'
-            ),
-        )
-        # snapshot
-        MessageContentText(
-            index=0,
-            type='text',
-            text=Text(
-                value='Certainly, Jane'
-            ),
-        )
-        """
-
-    def on_message_done(self, message: Message) -> None:
-        """Callback that is fired when a message is completed"""
-
-    def on_text_created(self, text: Text) -> None:
-        """Callback that is fired when a text content block is created"""
-
-    def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
-        """Callback that is fired whenever a text content delta is returned
-        by the API.
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the text. For example:
-
-        on_text_delta(TextDelta(value="The"), Text(value="The")),
-        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
-        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
-        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
-        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
-        """
-
-    def on_text_done(self, text: Text) -> None:
-        """Callback that is fired when a text content block is finished"""
-
-    def on_image_file_done(self, image_file: ImageFile) -> None:
-        """Callback that is fired when an image file block is finished"""
-
-    def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
-        self._current_event = event
-        self.on_event(event)
-
-        self.__current_message_snapshot, new_content = accumulate_event(
-            event=event,
-            current_message_snapshot=self.__current_message_snapshot,
-        )
-        if self.__current_message_snapshot is not None:
-            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
-
-        accumulate_run_step(
-            event=event,
-            run_step_snapshots=self.__run_step_snapshots,
-        )
-
-        for content_delta in new_content:
-            assert self.__current_message_snapshot is not None
-
-            block = self.__current_message_snapshot.content[content_delta.index]
-            if block.type == "text":
-                self.on_text_created(block.text)
-
-        if (
-            event.event == "thread.run.completed"
-            or event.event == "thread.run.cancelled"
-            or event.event == "thread.run.expired"
-            or event.event == "thread.run.failed"
-            or event.event == "thread.run.requires_action"
-        ):
-            self.__current_run = event.data
-            if self._current_tool_call:
-                self.on_tool_call_done(self._current_tool_call)
-        elif (
-            event.event == "thread.run.created"
-            or event.event == "thread.run.in_progress"
-            or event.event == "thread.run.cancelling"
-            or event.event == "thread.run.queued"
-        ):
-            self.__current_run = event.data
-        elif event.event == "thread.message.created":
-            self.on_message_created(event.data)
-        elif event.event == "thread.message.delta":
-            snapshot = self.__current_message_snapshot
-            assert snapshot is not None
-
-            message_delta = event.data.delta
-            if message_delta.content is not None:
-                for content_delta in message_delta.content:
-                    if content_delta.type == "text" and content_delta.text:
-                        snapshot_content = snapshot.content[content_delta.index]
-                        assert snapshot_content.type == "text"
-                        self.on_text_delta(content_delta.text, snapshot_content.text)
-
-                    # If the delta is for a new message content:
-                    # - emit on_text_done/on_image_file_done for the previous message content
-                    # - emit on_text_created/on_image_created for the new message content
-                    if content_delta.index != self._current_message_content_index:
-                        if self._current_message_content is not None:
-                            if self._current_message_content.type == "text":
-                                self.on_text_done(self._current_message_content.text)
-                            elif self._current_message_content.type == "image_file":
-                                self.on_image_file_done(self._current_message_content.image_file)
-
-                        self._current_message_content_index = content_delta.index
-                        self._current_message_content = snapshot.content[content_delta.index]
-
-                    # Update the current_message_content (delta event is correctly emitted already)
-                    self._current_message_content = snapshot.content[content_delta.index]
-
-            self.on_message_delta(event.data.delta, snapshot)
-        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
-            self.__current_message_snapshot = event.data
-            self.__message_snapshots[event.data.id] = event.data
-
-            if self._current_message_content_index is not None:
-                content = event.data.content[self._current_message_content_index]
-                if content.type == "text":
-                    self.on_text_done(content.text)
-                elif content.type == "image_file":
-                    self.on_image_file_done(content.image_file)
-
-            self.on_message_done(event.data)
-        elif event.event == "thread.run.step.created":
-            self.__current_run_step_id = event.data.id
-            self.on_run_step_created(event.data)
-        elif event.event == "thread.run.step.in_progress":
-            self.__current_run_step_id = event.data.id
-        elif event.event == "thread.run.step.delta":
-            step_snapshot = self.__run_step_snapshots[event.data.id]
-
-            run_step_delta = event.data.delta
-            if (
-                run_step_delta.step_details
-                and run_step_delta.step_details.type == "tool_calls"
-                and run_step_delta.step_details.tool_calls is not None
-            ):
-                assert step_snapshot.step_details.type == "tool_calls"
-                for tool_call_delta in run_step_delta.step_details.tool_calls:
-                    if tool_call_delta.index == self._current_tool_call_index:
-                        self.on_tool_call_delta(
-                            tool_call_delta,
-                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
-                        )
-
-                    # If the delta is for a new tool call:
-                    # - emit on_tool_call_done for the previous tool_call
-                    # - emit on_tool_call_created for the new tool_call
-                    if tool_call_delta.index != self._current_tool_call_index:
-                        if self._current_tool_call is not None:
-                            self.on_tool_call_done(self._current_tool_call)
-
-                        self._current_tool_call_index = tool_call_delta.index
-                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-                        self.on_tool_call_created(self._current_tool_call)
-
-                    # Update the current_tool_call (delta event is correctly emitted already)
-                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-
-            self.on_run_step_delta(
-                event.data.delta,
-                step_snapshot,
-            )
-        elif (
-            event.event == "thread.run.step.completed"
-            or event.event == "thread.run.step.cancelled"
-            or event.event == "thread.run.step.expired"
-            or event.event == "thread.run.step.failed"
-        ):
-            if self._current_tool_call:
-                self.on_tool_call_done(self._current_tool_call)
-
-            self.on_run_step_done(event.data)
-            self.__current_run_step_id = None
-        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
-            # currently no special handling
-            ...
-        else:
-            # we only want to error at build-time
-            if TYPE_CHECKING:  # type: ignore[unreachable]
-                assert_never(event)
-
-        self._current_event = None
-
-    def __stream__(self) -> Iterator[AssistantStreamEvent]:
-        stream = self.__stream
-        if not stream:
-            raise RuntimeError("Stream has not been started yet")
-
-        try:
-            for event in stream:
-                self._emit_sse_event(event)
-
-                yield event
-        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
-            self.on_timeout()
-            self.on_exception(exc)
-            raise
-        except Exception as exc:
-            self.on_exception(exc)
-            raise
-        finally:
-            self.on_end()
-
-
-AssistantEventHandlerT = TypeVar("AssistantEventHandlerT", bound=AssistantEventHandler)
-
-
-class AssistantStreamManager(Generic[AssistantEventHandlerT]):
-    """Wrapper over AssistantStreamEventHandler that is returned by `.stream()`
-    so that a context manager can be used.
-
-    ```py
-    with client.threads.create_and_run_stream(...) as stream:
-        for event in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Callable[[], Stream[AssistantStreamEvent]],
-        *,
-        event_handler: AssistantEventHandlerT,
-    ) -> None:
-        self.__stream: Stream[AssistantStreamEvent] | None = None
-        self.__event_handler = event_handler
-        self.__api_request = api_request
-
-    def __enter__(self) -> AssistantEventHandlerT:
-        self.__stream = self.__api_request()
-        self.__event_handler._init(self.__stream)
-        return self.__event_handler
-
-    def __exit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            self.__stream.close()
-
-
-class AsyncAssistantEventHandler:
-    text_deltas: AsyncIterable[str]
-    """Iterator over just the text deltas in the stream.
-
-    This corresponds to the `thread.message.delta` event
-    in the API.
-
-    ```py
-    async for text in stream.text_deltas:
-        print(text, end="", flush=True)
-    print()
-    ```
-    """
-
-    def __init__(self) -> None:
-        self._current_event: AssistantStreamEvent | None = None
-        self._current_message_content_index: int | None = None
-        self._current_message_content: MessageContent | None = None
-        self._current_tool_call_index: int | None = None
-        self._current_tool_call: ToolCall | None = None
-        self.__current_run_step_id: str | None = None
-        self.__current_run: Run | None = None
-        self.__run_step_snapshots: dict[str, RunStep] = {}
-        self.__message_snapshots: dict[str, Message] = {}
-        self.__current_message_snapshot: Message | None = None
-
-        self.text_deltas = self.__text_deltas__()
-        self._iterator = self.__stream__()
-        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
-
-    def _init(self, stream: AsyncStream[AssistantStreamEvent]) -> None:
-        if self.__stream:
-            raise RuntimeError(
-                "A single event handler cannot be shared between multiple streams; You will need to construct a new event handler instance"
-            )
-
-        self.__stream = stream
-
-    async def __anext__(self) -> AssistantStreamEvent:
-        return await self._iterator.__anext__()
-
-    async def __aiter__(self) -> AsyncIterator[AssistantStreamEvent]:
-        async for item in self._iterator:
-            yield item
-
-    async def close(self) -> None:
-        """
-        Close the response and release the connection.
-
-        Automatically called when the context manager exits.
-        """
-        if self.__stream:
-            await self.__stream.close()
-
-    @property
-    def current_event(self) -> AssistantStreamEvent | None:
-        return self._current_event
-
-    @property
-    def current_run(self) -> Run | None:
-        return self.__current_run
-
-    @property
-    def current_run_step_snapshot(self) -> RunStep | None:
-        if not self.__current_run_step_id:
-            return None
-
-        return self.__run_step_snapshots[self.__current_run_step_id]
-
-    @property
-    def current_message_snapshot(self) -> Message | None:
-        return self.__current_message_snapshot
-
-    async def until_done(self) -> None:
-        """Waits until the stream has been consumed"""
-        await consume_async_iterator(self)
-
-    async def get_final_run(self) -> Run:
-        """Wait for the stream to finish and returns the completed Run object"""
-        await self.until_done()
-
-        if not self.__current_run:
-            raise RuntimeError("No final run object found")
-
-        return self.__current_run
-
-    async def get_final_run_steps(self) -> list[RunStep]:
-        """Wait for the stream to finish and returns the steps taken in this run"""
-        await self.until_done()
-
-        if not self.__run_step_snapshots:
-            raise RuntimeError("No run steps found")
-
-        return [step for step in self.__run_step_snapshots.values()]
-
-    async def get_final_messages(self) -> list[Message]:
-        """Wait for the stream to finish and returns the messages emitted in this run"""
-        await self.until_done()
-
-        if not self.__message_snapshots:
-            raise RuntimeError("No messages found")
-
-        return [message for message in self.__message_snapshots.values()]
-
-    async def __text_deltas__(self) -> AsyncIterator[str]:
-        async for event in self:
-            if event.event != "thread.message.delta":
-                continue
-
-            for content_delta in event.data.delta.content or []:
-                if content_delta.type == "text" and content_delta.text and content_delta.text.value:
-                    yield content_delta.text.value
-
-    # event handlers
-
-    async def on_end(self) -> None:
-        """Fires when the stream has finished.
-
-        This happens if the stream is read to completion
-        or if an exception occurs during iteration.
-        """
-
-    async def on_event(self, event: AssistantStreamEvent) -> None:
-        """Callback that is fired for every Server-Sent-Event"""
-
-    async def on_run_step_created(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is created"""
-
-    async def on_run_step_delta(self, delta: RunStepDelta, snapshot: RunStep) -> None:
-        """Callback that is fired whenever a run step delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the run step. For example, a tool calls event may
-        look like this:
-
-        # delta
-        tool_calls=[
-            RunStepDeltaToolCallsCodeInterpreter(
-                index=0,
-                type='code_interpreter',
-                id=None,
-                code_interpreter=CodeInterpreter(input=' sympy', outputs=None)
-            )
-        ]
-        # snapshot
-        tool_calls=[
-            CodeToolCall(
-                id='call_wKayJlcYV12NiadiZuJXxcfx',
-                code_interpreter=CodeInterpreter(input='from sympy', outputs=[]),
-                type='code_interpreter',
-                index=0
-            )
-        ],
-        """
-
-    async def on_run_step_done(self, run_step: RunStep) -> None:
-        """Callback that is fired when a run step is completed"""
-
-    async def on_tool_call_created(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call is created"""
-
-    async def on_tool_call_delta(self, delta: ToolCallDelta, snapshot: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    async def on_tool_call_done(self, tool_call: ToolCall) -> None:
-        """Callback that is fired when a tool call delta is encountered"""
-
-    async def on_exception(self, exception: Exception) -> None:
-        """Fired whenever an exception happens during streaming"""
-
-    async def on_timeout(self) -> None:
-        """Fires if the request times out"""
-
-    async def on_message_created(self, message: Message) -> None:
-        """Callback that is fired when a message is created"""
-
-    async def on_message_delta(self, delta: MessageDelta, snapshot: Message) -> None:
-        """Callback that is fired whenever a message delta is returned from the API
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the message. For example, a text content event may
-        look like this:
-
-        # delta
-        MessageDeltaText(
-            index=0,
-            type='text',
-            text=Text(
-                value=' Jane'
-            ),
-        )
-        # snapshot
-        MessageContentText(
-            index=0,
-            type='text',
-            text=Text(
-                value='Certainly, Jane'
-            ),
-        )
-        """
-
-    async def on_message_done(self, message: Message) -> None:
-        """Callback that is fired when a message is completed"""
-
-    async def on_text_created(self, text: Text) -> None:
-        """Callback that is fired when a text content block is created"""
-
-    async def on_text_delta(self, delta: TextDelta, snapshot: Text) -> None:
-        """Callback that is fired whenever a text content delta is returned
-        by the API.
-
-        The first argument is just the delta as sent by the API and the second argument
-        is the accumulated snapshot of the text. For example:
-
-        on_text_delta(TextDelta(value="The"), Text(value="The")),
-        on_text_delta(TextDelta(value=" solution"), Text(value="The solution")),
-        on_text_delta(TextDelta(value=" to"), Text(value="The solution to")),
-        on_text_delta(TextDelta(value=" the"), Text(value="The solution to the")),
-        on_text_delta(TextDelta(value=" equation"), Text(value="The solution to the equivalent")),
-        """
-
-    async def on_text_done(self, text: Text) -> None:
-        """Callback that is fired when a text content block is finished"""
-
-    async def on_image_file_done(self, image_file: ImageFile) -> None:
-        """Callback that is fired when an image file block is finished"""
-
-    async def _emit_sse_event(self, event: AssistantStreamEvent) -> None:
-        self._current_event = event
-        await self.on_event(event)
-
-        self.__current_message_snapshot, new_content = accumulate_event(
-            event=event,
-            current_message_snapshot=self.__current_message_snapshot,
-        )
-        if self.__current_message_snapshot is not None:
-            self.__message_snapshots[self.__current_message_snapshot.id] = self.__current_message_snapshot
-
-        accumulate_run_step(
-            event=event,
-            run_step_snapshots=self.__run_step_snapshots,
-        )
-
-        for content_delta in new_content:
-            assert self.__current_message_snapshot is not None
-
-            block = self.__current_message_snapshot.content[content_delta.index]
-            if block.type == "text":
-                await self.on_text_created(block.text)
-
-        if (
-            event.event == "thread.run.completed"
-            or event.event == "thread.run.cancelled"
-            or event.event == "thread.run.expired"
-            or event.event == "thread.run.failed"
-            or event.event == "thread.run.requires_action"
-        ):
-            self.__current_run = event.data
-            if self._current_tool_call:
-                await self.on_tool_call_done(self._current_tool_call)
-        elif (
-            event.event == "thread.run.created"
-            or event.event == "thread.run.in_progress"
-            or event.event == "thread.run.cancelling"
-            or event.event == "thread.run.queued"
-        ):
-            self.__current_run = event.data
-        elif event.event == "thread.message.created":
-            await self.on_message_created(event.data)
-        elif event.event == "thread.message.delta":
-            snapshot = self.__current_message_snapshot
-            assert snapshot is not None
-
-            message_delta = event.data.delta
-            if message_delta.content is not None:
-                for content_delta in message_delta.content:
-                    if content_delta.type == "text" and content_delta.text:
-                        snapshot_content = snapshot.content[content_delta.index]
-                        assert snapshot_content.type == "text"
-                        await self.on_text_delta(content_delta.text, snapshot_content.text)
-
-                    # If the delta is for a new message content:
-                    # - emit on_text_done/on_image_file_done for the previous message content
-                    # - emit on_text_created/on_image_created for the new message content
-                    if content_delta.index != self._current_message_content_index:
-                        if self._current_message_content is not None:
-                            if self._current_message_content.type == "text":
-                                await self.on_text_done(self._current_message_content.text)
-                            elif self._current_message_content.type == "image_file":
-                                await self.on_image_file_done(self._current_message_content.image_file)
-
-                        self._current_message_content_index = content_delta.index
-                        self._current_message_content = snapshot.content[content_delta.index]
-
-                    # Update the current_message_content (delta event is correctly emitted already)
-                    self._current_message_content = snapshot.content[content_delta.index]
-
-            await self.on_message_delta(event.data.delta, snapshot)
-        elif event.event == "thread.message.completed" or event.event == "thread.message.incomplete":
-            self.__current_message_snapshot = event.data
-            self.__message_snapshots[event.data.id] = event.data
-
-            if self._current_message_content_index is not None:
-                content = event.data.content[self._current_message_content_index]
-                if content.type == "text":
-                    await self.on_text_done(content.text)
-                elif content.type == "image_file":
-                    await self.on_image_file_done(content.image_file)
-
-            await self.on_message_done(event.data)
-        elif event.event == "thread.run.step.created":
-            self.__current_run_step_id = event.data.id
-            await self.on_run_step_created(event.data)
-        elif event.event == "thread.run.step.in_progress":
-            self.__current_run_step_id = event.data.id
-        elif event.event == "thread.run.step.delta":
-            step_snapshot = self.__run_step_snapshots[event.data.id]
-
-            run_step_delta = event.data.delta
-            if (
-                run_step_delta.step_details
-                and run_step_delta.step_details.type == "tool_calls"
-                and run_step_delta.step_details.tool_calls is not None
-            ):
-                assert step_snapshot.step_details.type == "tool_calls"
-                for tool_call_delta in run_step_delta.step_details.tool_calls:
-                    if tool_call_delta.index == self._current_tool_call_index:
-                        await self.on_tool_call_delta(
-                            tool_call_delta,
-                            step_snapshot.step_details.tool_calls[tool_call_delta.index],
-                        )
-
-                    # If the delta is for a new tool call:
-                    # - emit on_tool_call_done for the previous tool_call
-                    # - emit on_tool_call_created for the new tool_call
-                    if tool_call_delta.index != self._current_tool_call_index:
-                        if self._current_tool_call is not None:
-                            await self.on_tool_call_done(self._current_tool_call)
-
-                        self._current_tool_call_index = tool_call_delta.index
-                        self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-                        await self.on_tool_call_created(self._current_tool_call)
-
-                    # Update the current_tool_call (delta event is correctly emitted already)
-                    self._current_tool_call = step_snapshot.step_details.tool_calls[tool_call_delta.index]
-
-            await self.on_run_step_delta(
-                event.data.delta,
-                step_snapshot,
-            )
-        elif (
-            event.event == "thread.run.step.completed"
-            or event.event == "thread.run.step.cancelled"
-            or event.event == "thread.run.step.expired"
-            or event.event == "thread.run.step.failed"
-        ):
-            if self._current_tool_call:
-                await self.on_tool_call_done(self._current_tool_call)
-
-            await self.on_run_step_done(event.data)
-            self.__current_run_step_id = None
-        elif event.event == "thread.created" or event.event == "thread.message.in_progress" or event.event == "error":
-            # currently no special handling
-            ...
-        else:
-            # we only want to error at build-time
-            if TYPE_CHECKING:  # type: ignore[unreachable]
-                assert_never(event)
-
-        self._current_event = None
-
-    async def __stream__(self) -> AsyncIterator[AssistantStreamEvent]:
-        stream = self.__stream
-        if not stream:
-            raise RuntimeError("Stream has not been started yet")
-
-        try:
-            async for event in stream:
-                await self._emit_sse_event(event)
-
-                yield event
-        except (httpx.TimeoutException, asyncio.TimeoutError) as exc:
-            await self.on_timeout()
-            await self.on_exception(exc)
-            raise
-        except Exception as exc:
-            await self.on_exception(exc)
-            raise
-        finally:
-            await self.on_end()
-
-
-AsyncAssistantEventHandlerT = TypeVar("AsyncAssistantEventHandlerT", bound=AsyncAssistantEventHandler)
-
-
-class AsyncAssistantStreamManager(Generic[AsyncAssistantEventHandlerT]):
-    """Wrapper over AsyncAssistantStreamEventHandler that is returned by `.stream()`
-    so that an async context manager can be used without `await`ing the
-    original client call.
-
-    ```py
-    async with client.threads.create_and_run_stream(...) as stream:
-        async for event in stream:
-            ...
-    ```
-    """
-
-    def __init__(
-        self,
-        api_request: Awaitable[AsyncStream[AssistantStreamEvent]],
-        *,
-        event_handler: AsyncAssistantEventHandlerT,
-    ) -> None:
-        self.__stream: AsyncStream[AssistantStreamEvent] | None = None
-        self.__event_handler = event_handler
-        self.__api_request = api_request
-
-    async def __aenter__(self) -> AsyncAssistantEventHandlerT:
-        self.__stream = await self.__api_request
-        self.__event_handler._init(self.__stream)
-        return self.__event_handler
-
-    async def __aexit__(
-        self,
-        exc_type: type[BaseException] | None,
-        exc: BaseException | None,
-        exc_tb: TracebackType | None,
-    ) -> None:
-        if self.__stream is not None:
-            await self.__stream.close()
-
-
-def accumulate_run_step(
-    *,
-    event: AssistantStreamEvent,
-    run_step_snapshots: dict[str, RunStep],
-) -> None:
-    if event.event == "thread.run.step.created":
-        run_step_snapshots[event.data.id] = event.data
-        return
-
-    if event.event == "thread.run.step.delta":
-        data = event.data
-        snapshot = run_step_snapshots[data.id]
-
-        if data.delta:
-            merged = accumulate_delta(
-                cast(
-                    "dict[object, object]",
-                    snapshot.model_dump(exclude_unset=True),
-                ),
-                cast(
-                    "dict[object, object]",
-                    data.delta.model_dump(exclude_unset=True),
-                ),
-            )
-            run_step_snapshots[snapshot.id] = cast(RunStep, construct_type(type_=RunStep, value=merged))
-
-    return None
-
-
-def accumulate_event(
-    *,
-    event: AssistantStreamEvent,
-    current_message_snapshot: Message | None,
-) -> tuple[Message | None, list[MessageContentDelta]]:
-    """Returns a tuple of message snapshot and newly created text message deltas"""
-    if event.event == "thread.message.created":
-        return event.data, []
-
-    new_content: list[MessageContentDelta] = []
-
-    if event.event != "thread.message.delta":
-        return current_message_snapshot, []
-
-    if not current_message_snapshot:
-        raise RuntimeError("Encountered a message delta with no previous snapshot")
-
-    data = event.data
-    if data.delta.content:
-        for content_delta in data.delta.content:
-            try:
-                block = current_message_snapshot.content[content_delta.index]
-            except IndexError:
-                current_message_snapshot.content.insert(
-                    content_delta.index,
-                    cast(
-                        MessageContent,
-                        construct_type(
-                            # mypy doesn't allow Content for some reason
-                            type_=cast(Any, MessageContent),
-                            value=content_delta.model_dump(exclude_unset=True),
-                        ),
-                    ),
-                )
-                new_content.append(content_delta)
-            else:
-                merged = accumulate_delta(
-                    cast(
-                        "dict[object, object]",
-                        block.model_dump(exclude_unset=True),
-                    ),
-                    cast(
-                        "dict[object, object]",
-                        content_delta.model_dump(exclude_unset=True),
-                    ),
-                )
-                current_message_snapshot.content[content_delta.index] = cast(
-                    MessageContent,
-                    construct_type(
-                        # mypy doesn't allow Content for some reason
-                        type_=cast(Any, MessageContent),
-                        value=merged,
-                    ),
-                )
-
-    return current_message_snapshot, new_content
-
-
-def accumulate_delta(acc: dict[object, object], delta: dict[object, object]) -> dict[object, object]:
-    for key, delta_value in delta.items():
-        if key not in acc:
-            acc[key] = delta_value
-            continue
-
-        acc_value = acc[key]
-        if acc_value is None:
-            acc[key] = delta_value
-            continue
-
-        # the `index` property is used in arrays of objects so it should
-        # not be accumulated like other values e.g.
-        # [{'foo': 'bar', 'index': 0}]
-        #
-        # the same applies to `type` properties as they're used for
-        # discriminated unions
-        if key == "index" or key == "type":
-            acc[key] = delta_value
-            continue
-
-        if isinstance(acc_value, str) and isinstance(delta_value, str):
-            acc_value += delta_value
-        elif isinstance(acc_value, (int, float)) and isinstance(delta_value, (int, float)):
-            acc_value += delta_value
-        elif is_dict(acc_value) and is_dict(delta_value):
-            acc_value = accumulate_delta(acc_value, delta_value)
-        elif is_list(acc_value) and is_list(delta_value):
-            # for lists of non-dictionary items we'll only ever get new entries
-            # in the array, existing entries will never be changed
-            if all(isinstance(x, (str, int, float)) for x in acc_value):
-                acc_value.extend(delta_value)
-                continue
-
-            for delta_entry in delta_value:
-                if not is_dict(delta_entry):
-                    raise TypeError(f"Unexpected list delta entry is not a dictionary: {delta_entry}")
-
-                try:
-                    index = delta_entry["index"]
-                except KeyError as exc:
-                    raise RuntimeError(f"Expected list delta entry to have an `index` key; {delta_entry}") from exc
-
-                if not isinstance(index, int):
-                    raise TypeError(f"Unexpected, list delta entry `index` value is not an integer; {index}")
-
-                try:
-                    acc_entry = acc_value[index]
-                except IndexError:
-                    acc_value.insert(index, delta_entry)
-                else:
-                    if not is_dict(acc_entry):
-                        raise TypeError("not handled yet")
-
-                    acc_value[index] = accumulate_delta(acc_entry, delta_entry)
-
-        acc[key] = acc_value
-
-    return acc
diff --git a/openai/pagination.py b/openai/pagination.py
deleted file mode 100644
index 82936382..00000000
--- a/openai/pagination.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Any, List, Generic, TypeVar, Optional, cast
-from typing_extensions import Protocol, override, runtime_checkable
-
-from ._base_client import BasePage, PageInfo, BaseSyncPage, BaseAsyncPage
-
-__all__ = ["SyncPage", "AsyncPage", "SyncCursorPage", "AsyncCursorPage"]
-
-_T = TypeVar("_T")
-
-
-@runtime_checkable
-class CursorPageItem(Protocol):
-    id: Optional[str]
-
-
-class SyncPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
-    """Note: no pagination actually occurs yet, this is for forwards-compatibility."""
-
-    data: List[_T]
-    object: str
-
-    @override
-    def _get_page_items(self) -> List[_T]:
-        data = self.data
-        if not data:
-            return []
-        return data
-
-    @override
-    def next_page_info(self) -> None:
-        """
-        This page represents a response that isn't actually paginated at the API level
-        so there will never be a next page.
-        """
-        return None
-
-
-class AsyncPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
-    """Note: no pagination actually occurs yet, this is for forwards-compatibility."""
-
-    data: List[_T]
-    object: str
-
-    @override
-    def _get_page_items(self) -> List[_T]:
-        data = self.data
-        if not data:
-            return []
-        return data
-
-    @override
-    def next_page_info(self) -> None:
-        """
-        This page represents a response that isn't actually paginated at the API level
-        so there will never be a next page.
-        """
-        return None
-
-
-class SyncCursorPage(BaseSyncPage[_T], BasePage[_T], Generic[_T]):
-    data: List[_T]
-
-    @override
-    def _get_page_items(self) -> List[_T]:
-        data = self.data
-        if not data:
-            return []
-        return data
-
-    @override
-    def next_page_info(self) -> Optional[PageInfo]:
-        data = self.data
-        if not data:
-            return None
-
-        item = cast(Any, data[-1])
-        if not isinstance(item, CursorPageItem) or item.id is None:
-            # TODO emit warning log
-            return None
-
-        return PageInfo(params={"after": item.id})
-
-
-class AsyncCursorPage(BaseAsyncPage[_T], BasePage[_T], Generic[_T]):
-    data: List[_T]
-
-    @override
-    def _get_page_items(self) -> List[_T]:
-        data = self.data
-        if not data:
-            return []
-        return data
-
-    @override
-    def next_page_info(self) -> Optional[PageInfo]:
-        data = self.data
-        if not data:
-            return None
-
-        item = cast(Any, data[-1])
-        if not isinstance(item, CursorPageItem) or item.id is None:
-            # TODO emit warning log
-            return None
-
-        return PageInfo(params={"after": item.id})
diff --git a/openai/py.typed b/openai/py.typed
deleted file mode 100644
index e69de29b..00000000
diff --git a/openai/resources/__init__.py b/openai/resources/__init__.py
deleted file mode 100644
index 64aa12d2..00000000
--- a/openai/resources/__init__.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .beta import (
-    Beta,
-    AsyncBeta,
-    BetaWithRawResponse,
-    AsyncBetaWithRawResponse,
-    BetaWithStreamingResponse,
-    AsyncBetaWithStreamingResponse,
-)
-from .chat import (
-    Chat,
-    AsyncChat,
-    ChatWithRawResponse,
-    AsyncChatWithRawResponse,
-    ChatWithStreamingResponse,
-    AsyncChatWithStreamingResponse,
-)
-from .audio import (
-    Audio,
-    AsyncAudio,
-    AudioWithRawResponse,
-    AsyncAudioWithRawResponse,
-    AudioWithStreamingResponse,
-    AsyncAudioWithStreamingResponse,
-)
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
-)
-from .images import (
-    Images,
-    AsyncImages,
-    ImagesWithRawResponse,
-    AsyncImagesWithRawResponse,
-    ImagesWithStreamingResponse,
-    AsyncImagesWithStreamingResponse,
-)
-from .models import (
-    Models,
-    AsyncModels,
-    ModelsWithRawResponse,
-    AsyncModelsWithRawResponse,
-    ModelsWithStreamingResponse,
-    AsyncModelsWithStreamingResponse,
-)
-from .embeddings import (
-    Embeddings,
-    AsyncEmbeddings,
-    EmbeddingsWithRawResponse,
-    AsyncEmbeddingsWithRawResponse,
-    EmbeddingsWithStreamingResponse,
-    AsyncEmbeddingsWithStreamingResponse,
-)
-from .completions import (
-    Completions,
-    AsyncCompletions,
-    CompletionsWithRawResponse,
-    AsyncCompletionsWithRawResponse,
-    CompletionsWithStreamingResponse,
-    AsyncCompletionsWithStreamingResponse,
-)
-from .fine_tuning import (
-    FineTuning,
-    AsyncFineTuning,
-    FineTuningWithRawResponse,
-    AsyncFineTuningWithRawResponse,
-    FineTuningWithStreamingResponse,
-    AsyncFineTuningWithStreamingResponse,
-)
-from .moderations import (
-    Moderations,
-    AsyncModerations,
-    ModerationsWithRawResponse,
-    AsyncModerationsWithRawResponse,
-    ModerationsWithStreamingResponse,
-    AsyncModerationsWithStreamingResponse,
-)
-
-__all__ = [
-    "Completions",
-    "AsyncCompletions",
-    "CompletionsWithRawResponse",
-    "AsyncCompletionsWithRawResponse",
-    "CompletionsWithStreamingResponse",
-    "AsyncCompletionsWithStreamingResponse",
-    "Chat",
-    "AsyncChat",
-    "ChatWithRawResponse",
-    "AsyncChatWithRawResponse",
-    "ChatWithStreamingResponse",
-    "AsyncChatWithStreamingResponse",
-    "Embeddings",
-    "AsyncEmbeddings",
-    "EmbeddingsWithRawResponse",
-    "AsyncEmbeddingsWithRawResponse",
-    "EmbeddingsWithStreamingResponse",
-    "AsyncEmbeddingsWithStreamingResponse",
-    "Files",
-    "AsyncFiles",
-    "FilesWithRawResponse",
-    "AsyncFilesWithRawResponse",
-    "FilesWithStreamingResponse",
-    "AsyncFilesWithStreamingResponse",
-    "Images",
-    "AsyncImages",
-    "ImagesWithRawResponse",
-    "AsyncImagesWithRawResponse",
-    "ImagesWithStreamingResponse",
-    "AsyncImagesWithStreamingResponse",
-    "Audio",
-    "AsyncAudio",
-    "AudioWithRawResponse",
-    "AsyncAudioWithRawResponse",
-    "AudioWithStreamingResponse",
-    "AsyncAudioWithStreamingResponse",
-    "Moderations",
-    "AsyncModerations",
-    "ModerationsWithRawResponse",
-    "AsyncModerationsWithRawResponse",
-    "ModerationsWithStreamingResponse",
-    "AsyncModerationsWithStreamingResponse",
-    "Models",
-    "AsyncModels",
-    "ModelsWithRawResponse",
-    "AsyncModelsWithRawResponse",
-    "ModelsWithStreamingResponse",
-    "AsyncModelsWithStreamingResponse",
-    "FineTuning",
-    "AsyncFineTuning",
-    "FineTuningWithRawResponse",
-    "AsyncFineTuningWithRawResponse",
-    "FineTuningWithStreamingResponse",
-    "AsyncFineTuningWithStreamingResponse",
-    "Beta",
-    "AsyncBeta",
-    "BetaWithRawResponse",
-    "AsyncBetaWithRawResponse",
-    "BetaWithStreamingResponse",
-    "AsyncBetaWithStreamingResponse",
-]
diff --git a/openai/resources/audio/__init__.py b/openai/resources/audio/__init__.py
deleted file mode 100644
index 7da1d2db..00000000
--- a/openai/resources/audio/__init__.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .audio import (
-    Audio,
-    AsyncAudio,
-    AudioWithRawResponse,
-    AsyncAudioWithRawResponse,
-    AudioWithStreamingResponse,
-    AsyncAudioWithStreamingResponse,
-)
-from .speech import (
-    Speech,
-    AsyncSpeech,
-    SpeechWithRawResponse,
-    AsyncSpeechWithRawResponse,
-    SpeechWithStreamingResponse,
-    AsyncSpeechWithStreamingResponse,
-)
-from .translations import (
-    Translations,
-    AsyncTranslations,
-    TranslationsWithRawResponse,
-    AsyncTranslationsWithRawResponse,
-    TranslationsWithStreamingResponse,
-    AsyncTranslationsWithStreamingResponse,
-)
-from .transcriptions import (
-    Transcriptions,
-    AsyncTranscriptions,
-    TranscriptionsWithRawResponse,
-    AsyncTranscriptionsWithRawResponse,
-    TranscriptionsWithStreamingResponse,
-    AsyncTranscriptionsWithStreamingResponse,
-)
-
-__all__ = [
-    "Transcriptions",
-    "AsyncTranscriptions",
-    "TranscriptionsWithRawResponse",
-    "AsyncTranscriptionsWithRawResponse",
-    "TranscriptionsWithStreamingResponse",
-    "AsyncTranscriptionsWithStreamingResponse",
-    "Translations",
-    "AsyncTranslations",
-    "TranslationsWithRawResponse",
-    "AsyncTranslationsWithRawResponse",
-    "TranslationsWithStreamingResponse",
-    "AsyncTranslationsWithStreamingResponse",
-    "Speech",
-    "AsyncSpeech",
-    "SpeechWithRawResponse",
-    "AsyncSpeechWithRawResponse",
-    "SpeechWithStreamingResponse",
-    "AsyncSpeechWithStreamingResponse",
-    "Audio",
-    "AsyncAudio",
-    "AudioWithRawResponse",
-    "AsyncAudioWithRawResponse",
-    "AudioWithStreamingResponse",
-    "AsyncAudioWithStreamingResponse",
-]
diff --git a/openai/resources/audio/audio.py b/openai/resources/audio/audio.py
deleted file mode 100644
index 537ad573..00000000
--- a/openai/resources/audio/audio.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .speech import (
-    Speech,
-    AsyncSpeech,
-    SpeechWithRawResponse,
-    AsyncSpeechWithRawResponse,
-    SpeechWithStreamingResponse,
-    AsyncSpeechWithStreamingResponse,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from .translations import (
-    Translations,
-    AsyncTranslations,
-    TranslationsWithRawResponse,
-    AsyncTranslationsWithRawResponse,
-    TranslationsWithStreamingResponse,
-    AsyncTranslationsWithStreamingResponse,
-)
-from .transcriptions import (
-    Transcriptions,
-    AsyncTranscriptions,
-    TranscriptionsWithRawResponse,
-    AsyncTranscriptionsWithRawResponse,
-    TranscriptionsWithStreamingResponse,
-    AsyncTranscriptionsWithStreamingResponse,
-)
-
-__all__ = ["Audio", "AsyncAudio"]
-
-
-class Audio(SyncAPIResource):
-    @cached_property
-    def transcriptions(self) -> Transcriptions:
-        return Transcriptions(self._client)
-
-    @cached_property
-    def translations(self) -> Translations:
-        return Translations(self._client)
-
-    @cached_property
-    def speech(self) -> Speech:
-        return Speech(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AudioWithRawResponse:
-        return AudioWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AudioWithStreamingResponse:
-        return AudioWithStreamingResponse(self)
-
-
-class AsyncAudio(AsyncAPIResource):
-    @cached_property
-    def transcriptions(self) -> AsyncTranscriptions:
-        return AsyncTranscriptions(self._client)
-
-    @cached_property
-    def translations(self) -> AsyncTranslations:
-        return AsyncTranslations(self._client)
-
-    @cached_property
-    def speech(self) -> AsyncSpeech:
-        return AsyncSpeech(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncAudioWithRawResponse:
-        return AsyncAudioWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
-        return AsyncAudioWithStreamingResponse(self)
-
-
-class AudioWithRawResponse:
-    def __init__(self, audio: Audio) -> None:
-        self._audio = audio
-
-    @cached_property
-    def transcriptions(self) -> TranscriptionsWithRawResponse:
-        return TranscriptionsWithRawResponse(self._audio.transcriptions)
-
-    @cached_property
-    def translations(self) -> TranslationsWithRawResponse:
-        return TranslationsWithRawResponse(self._audio.translations)
-
-    @cached_property
-    def speech(self) -> SpeechWithRawResponse:
-        return SpeechWithRawResponse(self._audio.speech)
-
-
-class AsyncAudioWithRawResponse:
-    def __init__(self, audio: AsyncAudio) -> None:
-        self._audio = audio
-
-    @cached_property
-    def transcriptions(self) -> AsyncTranscriptionsWithRawResponse:
-        return AsyncTranscriptionsWithRawResponse(self._audio.transcriptions)
-
-    @cached_property
-    def translations(self) -> AsyncTranslationsWithRawResponse:
-        return AsyncTranslationsWithRawResponse(self._audio.translations)
-
-    @cached_property
-    def speech(self) -> AsyncSpeechWithRawResponse:
-        return AsyncSpeechWithRawResponse(self._audio.speech)
-
-
-class AudioWithStreamingResponse:
-    def __init__(self, audio: Audio) -> None:
-        self._audio = audio
-
-    @cached_property
-    def transcriptions(self) -> TranscriptionsWithStreamingResponse:
-        return TranscriptionsWithStreamingResponse(self._audio.transcriptions)
-
-    @cached_property
-    def translations(self) -> TranslationsWithStreamingResponse:
-        return TranslationsWithStreamingResponse(self._audio.translations)
-
-    @cached_property
-    def speech(self) -> SpeechWithStreamingResponse:
-        return SpeechWithStreamingResponse(self._audio.speech)
-
-
-class AsyncAudioWithStreamingResponse:
-    def __init__(self, audio: AsyncAudio) -> None:
-        self._audio = audio
-
-    @cached_property
-    def transcriptions(self) -> AsyncTranscriptionsWithStreamingResponse:
-        return AsyncTranscriptionsWithStreamingResponse(self._audio.transcriptions)
-
-    @cached_property
-    def translations(self) -> AsyncTranslationsWithStreamingResponse:
-        return AsyncTranslationsWithStreamingResponse(self._audio.translations)
-
-    @cached_property
-    def speech(self) -> AsyncSpeechWithStreamingResponse:
-        return AsyncSpeechWithStreamingResponse(self._audio.speech)
diff --git a/openai/resources/audio/speech.py b/openai/resources/audio/speech.py
deleted file mode 100644
index e26c5805..00000000
--- a/openai/resources/audio/speech.py
+++ /dev/null
@@ -1,213 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal
-
-import httpx
-
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import (
-    StreamedBinaryAPIResponse,
-    AsyncStreamedBinaryAPIResponse,
-    to_custom_streamed_response_wrapper,
-    async_to_custom_streamed_response_wrapper,
-)
-from ...types.audio import speech_create_params
-from ..._base_client import (
-    make_request_options,
-)
-
-__all__ = ["Speech", "AsyncSpeech"]
-
-
-class Speech(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> SpeechWithRawResponse:
-        return SpeechWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> SpeechWithStreamingResponse:
-        return SpeechWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        input: str,
-        model: Union[str, Literal["tts-1", "tts-1-hd"]],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
-        speed: float | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> _legacy_response.HttpxBinaryResponseContent:
-        """
-        Generates audio from the input text.
-
-        Args:
-          input: The text to generate audio for. The maximum length is 4096 characters.
-
-          model:
-              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-              `tts-1` or `tts-1-hd`
-
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
-              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
-
-          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
-              `wav`, and `pcm`.
-
-          speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
-              the default.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
-        return self._post(
-            "/audio/speech",
-            body=maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                    "voice": voice,
-                    "response_format": response_format,
-                    "speed": speed,
-                },
-                speech_create_params.SpeechCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=_legacy_response.HttpxBinaryResponseContent,
-        )
-
-
-class AsyncSpeech(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncSpeechWithRawResponse:
-        return AsyncSpeechWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
-        return AsyncSpeechWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        input: str,
-        model: Union[str, Literal["tts-1", "tts-1-hd"]],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
-        response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
-        speed: float | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> _legacy_response.HttpxBinaryResponseContent:
-        """
-        Generates audio from the input text.
-
-        Args:
-          input: The text to generate audio for. The maximum length is 4096 characters.
-
-          model:
-              One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-              `tts-1` or `tts-1-hd`
-
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
-              [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
-
-          response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
-              `wav`, and `pcm`.
-
-          speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
-              the default.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
-        return await self._post(
-            "/audio/speech",
-            body=await async_maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                    "voice": voice,
-                    "response_format": response_format,
-                    "speed": speed,
-                },
-                speech_create_params.SpeechCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=_legacy_response.HttpxBinaryResponseContent,
-        )
-
-
-class SpeechWithRawResponse:
-    def __init__(self, speech: Speech) -> None:
-        self._speech = speech
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            speech.create,
-        )
-
-
-class AsyncSpeechWithRawResponse:
-    def __init__(self, speech: AsyncSpeech) -> None:
-        self._speech = speech
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            speech.create,
-        )
-
-
-class SpeechWithStreamingResponse:
-    def __init__(self, speech: Speech) -> None:
-        self._speech = speech
-
-        self.create = to_custom_streamed_response_wrapper(
-            speech.create,
-            StreamedBinaryAPIResponse,
-        )
-
-
-class AsyncSpeechWithStreamingResponse:
-    def __init__(self, speech: AsyncSpeech) -> None:
-        self._speech = speech
-
-        self.create = async_to_custom_streamed_response_wrapper(
-            speech.create,
-            AsyncStreamedBinaryAPIResponse,
-        )
diff --git a/openai/resources/audio/transcriptions.py b/openai/resources/audio/transcriptions.py
deleted file mode 100644
index 353f28ab..00000000
--- a/openai/resources/audio/transcriptions.py
+++ /dev/null
@@ -1,256 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union, Mapping, cast
-from typing_extensions import Literal
-
-import httpx
-
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...types.audio import Transcription, transcription_create_params
-from ..._base_client import (
-    make_request_options,
-)
-
-__all__ = ["Transcriptions", "AsyncTranscriptions"]
-
-
-class Transcriptions(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> TranscriptionsWithRawResponse:
-        return TranscriptionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
-        return TranscriptionsWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
-        language: str | NotGiven = NOT_GIVEN,
-        prompt: str | NotGiven = NOT_GIVEN,
-        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription:
-        """
-        Transcribes audio into the input language.
-
-        Args:
-          file:
-              The audio file object (not file name) to transcribe, in one of these formats:
-              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
-
-          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
-              Whisper V2 model) is currently available.
-
-          language: The language of the input audio. Supplying the input language in
-              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-              improve accuracy and latency.
-
-          prompt: An optional text to guide the model's style or continue a previous audio
-              segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-              should match the audio language.
-
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
-
-          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
-              output more random, while lower values like 0.2 will make it more focused and
-              deterministic. If set to 0, the model will use
-              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-              automatically increase the temperature until certain thresholds are hit.
-
-          timestamp_granularities: The timestamp granularities to populate for this transcription.
-              `response_format` must be set `verbose_json` to use timestamp granularities.
-              Either or both of these options are supported: `word`, or `segment`. Note: There
-              is no additional latency for segment timestamps, but generating word timestamps
-              incurs additional latency.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "file": file,
-                "model": model,
-                "language": language,
-                "prompt": prompt,
-                "response_format": response_format,
-                "temperature": temperature,
-                "timestamp_granularities": timestamp_granularities,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/audio/transcriptions",
-            body=maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Transcription,
-        )
-
-
-class AsyncTranscriptions(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
-        return AsyncTranscriptionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
-        return AsyncTranscriptionsWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
-        language: str | NotGiven = NOT_GIVEN,
-        prompt: str | NotGiven = NOT_GIVEN,
-        response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Transcription:
-        """
-        Transcribes audio into the input language.
-
-        Args:
-          file:
-              The audio file object (not file name) to transcribe, in one of these formats:
-              flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
-
-          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
-              Whisper V2 model) is currently available.
-
-          language: The language of the input audio. Supplying the input language in
-              [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-              improve accuracy and latency.
-
-          prompt: An optional text to guide the model's style or continue a previous audio
-              segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-              should match the audio language.
-
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
-
-          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
-              output more random, while lower values like 0.2 will make it more focused and
-              deterministic. If set to 0, the model will use
-              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-              automatically increase the temperature until certain thresholds are hit.
-
-          timestamp_granularities: The timestamp granularities to populate for this transcription.
-              `response_format` must be set `verbose_json` to use timestamp granularities.
-              Either or both of these options are supported: `word`, or `segment`. Note: There
-              is no additional latency for segment timestamps, but generating word timestamps
-              incurs additional latency.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "file": file,
-                "model": model,
-                "language": language,
-                "prompt": prompt,
-                "response_format": response_format,
-                "temperature": temperature,
-                "timestamp_granularities": timestamp_granularities,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/audio/transcriptions",
-            body=await async_maybe_transform(body, transcription_create_params.TranscriptionCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Transcription,
-        )
-
-
-class TranscriptionsWithRawResponse:
-    def __init__(self, transcriptions: Transcriptions) -> None:
-        self._transcriptions = transcriptions
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            transcriptions.create,
-        )
-
-
-class AsyncTranscriptionsWithRawResponse:
-    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
-        self._transcriptions = transcriptions
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            transcriptions.create,
-        )
-
-
-class TranscriptionsWithStreamingResponse:
-    def __init__(self, transcriptions: Transcriptions) -> None:
-        self._transcriptions = transcriptions
-
-        self.create = to_streamed_response_wrapper(
-            transcriptions.create,
-        )
-
-
-class AsyncTranscriptionsWithStreamingResponse:
-    def __init__(self, transcriptions: AsyncTranscriptions) -> None:
-        self._transcriptions = transcriptions
-
-        self.create = async_to_streamed_response_wrapper(
-            transcriptions.create,
-        )
diff --git a/openai/resources/audio/translations.py b/openai/resources/audio/translations.py
deleted file mode 100644
index 79020a5e..00000000
--- a/openai/resources/audio/translations.py
+++ /dev/null
@@ -1,226 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Mapping, cast
-from typing_extensions import Literal
-
-import httpx
-
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from ..._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...types.audio import Translation, translation_create_params
-from ..._base_client import (
-    make_request_options,
-)
-
-__all__ = ["Translations", "AsyncTranslations"]
-
-
-class Translations(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> TranslationsWithRawResponse:
-        return TranslationsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> TranslationsWithStreamingResponse:
-        return TranslationsWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
-        prompt: str | NotGiven = NOT_GIVEN,
-        response_format: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Translation:
-        """
-        Translates audio into English.
-
-        Args:
-          file: The audio file object (not file name) translate, in one of these formats: flac,
-              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
-
-          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
-              Whisper V2 model) is currently available.
-
-          prompt: An optional text to guide the model's style or continue a previous audio
-              segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-              should be in English.
-
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
-
-          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
-              output more random, while lower values like 0.2 will make it more focused and
-              deterministic. If set to 0, the model will use
-              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-              automatically increase the temperature until certain thresholds are hit.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "file": file,
-                "model": model,
-                "prompt": prompt,
-                "response_format": response_format,
-                "temperature": temperature,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/audio/translations",
-            body=maybe_transform(body, translation_create_params.TranslationCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Translation,
-        )
-
-
-class AsyncTranslations(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
-        return AsyncTranslationsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
-        return AsyncTranslationsWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        file: FileTypes,
-        model: Union[str, Literal["whisper-1"]],
-        prompt: str | NotGiven = NOT_GIVEN,
-        response_format: str | NotGiven = NOT_GIVEN,
-        temperature: float | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Translation:
-        """
-        Translates audio into English.
-
-        Args:
-          file: The audio file object (not file name) translate, in one of these formats: flac,
-              mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
-
-          model: ID of the model to use. Only `whisper-1` (which is powered by our open source
-              Whisper V2 model) is currently available.
-
-          prompt: An optional text to guide the model's style or continue a previous audio
-              segment. The
-              [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-              should be in English.
-
-          response_format: The format of the transcript output, in one of these options: `json`, `text`,
-              `srt`, `verbose_json`, or `vtt`.
-
-          temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
-              output more random, while lower values like 0.2 will make it more focused and
-              deterministic. If set to 0, the model will use
-              [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-              automatically increase the temperature until certain thresholds are hit.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "file": file,
-                "model": model,
-                "prompt": prompt,
-                "response_format": response_format,
-                "temperature": temperature,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/audio/translations",
-            body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Translation,
-        )
-
-
-class TranslationsWithRawResponse:
-    def __init__(self, translations: Translations) -> None:
-        self._translations = translations
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            translations.create,
-        )
-
-
-class AsyncTranslationsWithRawResponse:
-    def __init__(self, translations: AsyncTranslations) -> None:
-        self._translations = translations
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            translations.create,
-        )
-
-
-class TranslationsWithStreamingResponse:
-    def __init__(self, translations: Translations) -> None:
-        self._translations = translations
-
-        self.create = to_streamed_response_wrapper(
-            translations.create,
-        )
-
-
-class AsyncTranslationsWithStreamingResponse:
-    def __init__(self, translations: AsyncTranslations) -> None:
-        self._translations = translations
-
-        self.create = async_to_streamed_response_wrapper(
-            translations.create,
-        )
diff --git a/openai/resources/beta/__init__.py b/openai/resources/beta/__init__.py
deleted file mode 100644
index 87fea252..00000000
--- a/openai/resources/beta/__init__.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .beta import (
-    Beta,
-    AsyncBeta,
-    BetaWithRawResponse,
-    AsyncBetaWithRawResponse,
-    BetaWithStreamingResponse,
-    AsyncBetaWithStreamingResponse,
-)
-from .threads import (
-    Threads,
-    AsyncThreads,
-    ThreadsWithRawResponse,
-    AsyncThreadsWithRawResponse,
-    ThreadsWithStreamingResponse,
-    AsyncThreadsWithStreamingResponse,
-)
-from .assistants import (
-    Assistants,
-    AsyncAssistants,
-    AssistantsWithRawResponse,
-    AsyncAssistantsWithRawResponse,
-    AssistantsWithStreamingResponse,
-    AsyncAssistantsWithStreamingResponse,
-)
-
-__all__ = [
-    "Assistants",
-    "AsyncAssistants",
-    "AssistantsWithRawResponse",
-    "AsyncAssistantsWithRawResponse",
-    "AssistantsWithStreamingResponse",
-    "AsyncAssistantsWithStreamingResponse",
-    "Threads",
-    "AsyncThreads",
-    "ThreadsWithRawResponse",
-    "AsyncThreadsWithRawResponse",
-    "ThreadsWithStreamingResponse",
-    "AsyncThreadsWithStreamingResponse",
-    "Beta",
-    "AsyncBeta",
-    "BetaWithRawResponse",
-    "AsyncBetaWithRawResponse",
-    "BetaWithStreamingResponse",
-    "AsyncBetaWithStreamingResponse",
-]
diff --git a/openai/resources/beta/assistants/__init__.py b/openai/resources/beta/assistants/__init__.py
deleted file mode 100644
index 736def93..00000000
--- a/openai/resources/beta/assistants/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
-)
-from .assistants import (
-    Assistants,
-    AsyncAssistants,
-    AssistantsWithRawResponse,
-    AsyncAssistantsWithRawResponse,
-    AssistantsWithStreamingResponse,
-    AsyncAssistantsWithStreamingResponse,
-)
-
-__all__ = [
-    "Files",
-    "AsyncFiles",
-    "FilesWithRawResponse",
-    "AsyncFilesWithRawResponse",
-    "FilesWithStreamingResponse",
-    "AsyncFilesWithStreamingResponse",
-    "Assistants",
-    "AsyncAssistants",
-    "AssistantsWithRawResponse",
-    "AsyncAssistantsWithRawResponse",
-    "AssistantsWithStreamingResponse",
-    "AsyncAssistantsWithStreamingResponse",
-]
diff --git a/openai/resources/beta/assistants/assistants.py b/openai/resources/beta/assistants/assistants.py
deleted file mode 100644
index 232451ab..00000000
--- a/openai/resources/beta/assistants/assistants.py
+++ /dev/null
@@ -1,747 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable, Optional
-from typing_extensions import Literal
-
-import httpx
-
-from .... import _legacy_response
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
-)
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ....types.beta import (
-    Assistant,
-    AssistantDeleted,
-    AssistantToolParam,
-    assistant_list_params,
-    assistant_create_params,
-    assistant_update_params,
-)
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-
-__all__ = ["Assistants", "AsyncAssistants"]
-
-
-class Assistants(SyncAPIResource):
-    @cached_property
-    def files(self) -> Files:
-        return Files(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AssistantsWithRawResponse:
-        return AssistantsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AssistantsWithStreamingResponse:
-        return AssistantsWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        model: str,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """
-        Create an assistant with a model and instructions.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          description: The description of the assistant. The maximum length is 512 characters.
-
-          file_ids: A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
-              characters.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          name: The name of the assistant. The maximum length is 256 characters.
-
-          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            "/assistants",
-            body=maybe_transform(
-                {
-                    "model": model,
-                    "description": description,
-                    "file_ids": file_ids,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "name": name,
-                    "tools": tools,
-                },
-                assistant_create_params.AssistantCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    def retrieve(
-        self,
-        assistant_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """
-        Retrieves an assistant.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/assistants/{assistant_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    def update(
-        self,
-        assistant_id: str,
-        *,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """Modifies an assistant.
-
-        Args:
-          description: The description of the assistant.
-
-        The maximum length is 512 characters.
-
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order. If a
-              file was previously attached to the list but does not show up in the list, it
-              will be deleted from the assistant.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
-              characters.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          name: The name of the assistant. The maximum length is 256 characters.
-
-          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/assistants/{assistant_id}",
-            body=maybe_transform(
-                {
-                    "description": description,
-                    "file_ids": file_ids,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "name": name,
-                    "tools": tools,
-                },
-                assistant_update_params.AssistantUpdateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[Assistant]:
-        """Returns a list of assistants.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            "/assistants",
-            page=SyncCursorPage[Assistant],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    assistant_list_params.AssistantListParams,
-                ),
-            ),
-            model=Assistant,
-        )
-
-    def delete(
-        self,
-        assistant_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantDeleted:
-        """
-        Delete an assistant.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._delete(
-            f"/assistants/{assistant_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AssistantDeleted,
-        )
-
-
-class AsyncAssistants(AsyncAPIResource):
-    @cached_property
-    def files(self) -> AsyncFiles:
-        return AsyncFiles(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
-        return AsyncAssistantsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
-        return AsyncAssistantsWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        model: str,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """
-        Create an assistant with a model and instructions.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          description: The description of the assistant. The maximum length is 512 characters.
-
-          file_ids: A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
-              characters.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          name: The name of the assistant. The maximum length is 256 characters.
-
-          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            "/assistants",
-            body=await async_maybe_transform(
-                {
-                    "model": model,
-                    "description": description,
-                    "file_ids": file_ids,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "name": name,
-                    "tools": tools,
-                },
-                assistant_create_params.AssistantCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    async def retrieve(
-        self,
-        assistant_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """
-        Retrieves an assistant.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/assistants/{assistant_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    async def update(
-        self,
-        assistant_id: str,
-        *,
-        description: Optional[str] | NotGiven = NOT_GIVEN,
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: str | NotGiven = NOT_GIVEN,
-        name: Optional[str] | NotGiven = NOT_GIVEN,
-        tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Assistant:
-        """Modifies an assistant.
-
-        Args:
-          description: The description of the assistant.
-
-        The maximum length is 512 characters.
-
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-              attached to this assistant. There can be a maximum of 20 files attached to the
-              assistant. Files are ordered by their creation date in ascending order. If a
-              file was previously attached to the list but does not show up in the list, it
-              will be deleted from the assistant.
-
-          instructions: The system instructions that the assistant uses. The maximum length is 32768
-              characters.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          name: The name of the assistant. The maximum length is 256 characters.
-
-          tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
-              assistant. Tools can be of types `code_interpreter`, `retrieval`, or `function`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/assistants/{assistant_id}",
-            body=await async_maybe_transform(
-                {
-                    "description": description,
-                    "file_ids": file_ids,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "name": name,
-                    "tools": tools,
-                },
-                assistant_update_params.AssistantUpdateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Assistant,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
-        """Returns a list of assistants.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            "/assistants",
-            page=AsyncCursorPage[Assistant],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    assistant_list_params.AssistantListParams,
-                ),
-            ),
-            model=Assistant,
-        )
-
-    async def delete(
-        self,
-        assistant_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantDeleted:
-        """
-        Delete an assistant.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._delete(
-            f"/assistants/{assistant_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AssistantDeleted,
-        )
-
-
-class AssistantsWithRawResponse:
-    def __init__(self, assistants: Assistants) -> None:
-        self._assistants = assistants
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            assistants.create,
-        )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            assistants.retrieve,
-        )
-        self.update = _legacy_response.to_raw_response_wrapper(
-            assistants.update,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            assistants.list,
-        )
-        self.delete = _legacy_response.to_raw_response_wrapper(
-            assistants.delete,
-        )
-
-    @cached_property
-    def files(self) -> FilesWithRawResponse:
-        return FilesWithRawResponse(self._assistants.files)
-
-
-class AsyncAssistantsWithRawResponse:
-    def __init__(self, assistants: AsyncAssistants) -> None:
-        self._assistants = assistants
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            assistants.create,
-        )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            assistants.retrieve,
-        )
-        self.update = _legacy_response.async_to_raw_response_wrapper(
-            assistants.update,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            assistants.list,
-        )
-        self.delete = _legacy_response.async_to_raw_response_wrapper(
-            assistants.delete,
-        )
-
-    @cached_property
-    def files(self) -> AsyncFilesWithRawResponse:
-        return AsyncFilesWithRawResponse(self._assistants.files)
-
-
-class AssistantsWithStreamingResponse:
-    def __init__(self, assistants: Assistants) -> None:
-        self._assistants = assistants
-
-        self.create = to_streamed_response_wrapper(
-            assistants.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            assistants.retrieve,
-        )
-        self.update = to_streamed_response_wrapper(
-            assistants.update,
-        )
-        self.list = to_streamed_response_wrapper(
-            assistants.list,
-        )
-        self.delete = to_streamed_response_wrapper(
-            assistants.delete,
-        )
-
-    @cached_property
-    def files(self) -> FilesWithStreamingResponse:
-        return FilesWithStreamingResponse(self._assistants.files)
-
-
-class AsyncAssistantsWithStreamingResponse:
-    def __init__(self, assistants: AsyncAssistants) -> None:
-        self._assistants = assistants
-
-        self.create = async_to_streamed_response_wrapper(
-            assistants.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            assistants.retrieve,
-        )
-        self.update = async_to_streamed_response_wrapper(
-            assistants.update,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            assistants.list,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            assistants.delete,
-        )
-
-    @cached_property
-    def files(self) -> AsyncFilesWithStreamingResponse:
-        return AsyncFilesWithStreamingResponse(self._assistants.files)
diff --git a/openai/resources/beta/assistants/files.py b/openai/resources/beta/assistants/files.py
deleted file mode 100644
index dc57dfb9..00000000
--- a/openai/resources/beta/assistants/files.py
+++ /dev/null
@@ -1,483 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal
-
-import httpx
-
-from .... import _legacy_response
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ....pagination import SyncCursorPage, AsyncCursorPage
-from ...._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from ....types.beta.assistants import AssistantFile, FileDeleteResponse, file_list_params, file_create_params
-
-__all__ = ["Files", "AsyncFiles"]
-
-
-class Files(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> FilesWithRawResponse:
-        return FilesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> FilesWithStreamingResponse:
-        return FilesWithStreamingResponse(self)
-
-    def create(
-        self,
-        assistant_id: str,
-        *,
-        file_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
-        """
-        Create an assistant file by attaching a
-        [File](https://platform.openai.com/docs/api-reference/files) to an
-        [assistant](https://platform.openai.com/docs/api-reference/assistants).
-
-        Args:
-          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-              `purpose="assistants"`) that the assistant should use. Useful for tools like
-              `retrieval` and `code_interpreter` that can access files.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/assistants/{assistant_id}/files",
-            body=maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AssistantFile,
-        )
-
-    def retrieve(
-        self,
-        file_id: str,
-        *,
-        assistant_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
-        """
-        Retrieves an AssistantFile.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/assistants/{assistant_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AssistantFile,
-        )
-
-    def list(
-        self,
-        assistant_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[AssistantFile]:
-        """
-        Returns a list of assistant files.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/assistants/{assistant_id}/files",
-            page=SyncCursorPage[AssistantFile],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            model=AssistantFile,
-        )
-
-    def delete(
-        self,
-        file_id: str,
-        *,
-        assistant_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileDeleteResponse:
-        """
-        Delete an assistant file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._delete(
-            f"/assistants/{assistant_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileDeleteResponse,
-        )
-
-
-class AsyncFiles(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncFilesWithRawResponse:
-        return AsyncFilesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
-        return AsyncFilesWithStreamingResponse(self)
-
-    async def create(
-        self,
-        assistant_id: str,
-        *,
-        file_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
-        """
-        Create an assistant file by attaching a
-        [File](https://platform.openai.com/docs/api-reference/files) to an
-        [assistant](https://platform.openai.com/docs/api-reference/assistants).
-
-        Args:
-          file_id: A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-              `purpose="assistants"`) that the assistant should use. Useful for tools like
-              `retrieval` and `code_interpreter` that can access files.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/assistants/{assistant_id}/files",
-            body=await async_maybe_transform({"file_id": file_id}, file_create_params.FileCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AssistantFile,
-        )
-
-    async def retrieve(
-        self,
-        file_id: str,
-        *,
-        assistant_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantFile:
-        """
-        Retrieves an AssistantFile.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/assistants/{assistant_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=AssistantFile,
-        )
-
-    def list(
-        self,
-        assistant_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[AssistantFile, AsyncCursorPage[AssistantFile]]:
-        """
-        Returns a list of assistant files.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/assistants/{assistant_id}/files",
-            page=AsyncCursorPage[AssistantFile],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            model=AssistantFile,
-        )
-
-    async def delete(
-        self,
-        file_id: str,
-        *,
-        assistant_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileDeleteResponse:
-        """
-        Delete an assistant file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not assistant_id:
-            raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._delete(
-            f"/assistants/{assistant_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileDeleteResponse,
-        )
-
-
-class FilesWithRawResponse:
-    def __init__(self, files: Files) -> None:
-        self._files = files
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            files.create,
-        )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            files.list,
-        )
-        self.delete = _legacy_response.to_raw_response_wrapper(
-            files.delete,
-        )
-
-
-class AsyncFilesWithRawResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self._files = files
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            files.create,
-        )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            files.list,
-        )
-        self.delete = _legacy_response.async_to_raw_response_wrapper(
-            files.delete,
-        )
-
-
-class FilesWithStreamingResponse:
-    def __init__(self, files: Files) -> None:
-        self._files = files
-
-        self.create = to_streamed_response_wrapper(
-            files.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            files.list,
-        )
-        self.delete = to_streamed_response_wrapper(
-            files.delete,
-        )
-
-
-class AsyncFilesWithStreamingResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self._files = files
-
-        self.create = async_to_streamed_response_wrapper(
-            files.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            files.list,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            files.delete,
-        )
diff --git a/openai/resources/beta/beta.py b/openai/resources/beta/beta.py
deleted file mode 100644
index 67baad27..00000000
--- a/openai/resources/beta/beta.py
+++ /dev/null
@@ -1,114 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .threads import (
-    Threads,
-    AsyncThreads,
-    ThreadsWithRawResponse,
-    AsyncThreadsWithRawResponse,
-    ThreadsWithStreamingResponse,
-    AsyncThreadsWithStreamingResponse,
-)
-from ..._compat import cached_property
-from .assistants import (
-    Assistants,
-    AsyncAssistants,
-    AssistantsWithRawResponse,
-    AsyncAssistantsWithRawResponse,
-    AssistantsWithStreamingResponse,
-    AsyncAssistantsWithStreamingResponse,
-)
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from .threads.threads import Threads, AsyncThreads
-from .assistants.assistants import Assistants, AsyncAssistants
-
-__all__ = ["Beta", "AsyncBeta"]
-
-
-class Beta(SyncAPIResource):
-    @cached_property
-    def assistants(self) -> Assistants:
-        return Assistants(self._client)
-
-    @cached_property
-    def threads(self) -> Threads:
-        return Threads(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> BetaWithRawResponse:
-        return BetaWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> BetaWithStreamingResponse:
-        return BetaWithStreamingResponse(self)
-
-
-class AsyncBeta(AsyncAPIResource):
-    @cached_property
-    def assistants(self) -> AsyncAssistants:
-        return AsyncAssistants(self._client)
-
-    @cached_property
-    def threads(self) -> AsyncThreads:
-        return AsyncThreads(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncBetaWithRawResponse:
-        return AsyncBetaWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncBetaWithStreamingResponse:
-        return AsyncBetaWithStreamingResponse(self)
-
-
-class BetaWithRawResponse:
-    def __init__(self, beta: Beta) -> None:
-        self._beta = beta
-
-    @cached_property
-    def assistants(self) -> AssistantsWithRawResponse:
-        return AssistantsWithRawResponse(self._beta.assistants)
-
-    @cached_property
-    def threads(self) -> ThreadsWithRawResponse:
-        return ThreadsWithRawResponse(self._beta.threads)
-
-
-class AsyncBetaWithRawResponse:
-    def __init__(self, beta: AsyncBeta) -> None:
-        self._beta = beta
-
-    @cached_property
-    def assistants(self) -> AsyncAssistantsWithRawResponse:
-        return AsyncAssistantsWithRawResponse(self._beta.assistants)
-
-    @cached_property
-    def threads(self) -> AsyncThreadsWithRawResponse:
-        return AsyncThreadsWithRawResponse(self._beta.threads)
-
-
-class BetaWithStreamingResponse:
-    def __init__(self, beta: Beta) -> None:
-        self._beta = beta
-
-    @cached_property
-    def assistants(self) -> AssistantsWithStreamingResponse:
-        return AssistantsWithStreamingResponse(self._beta.assistants)
-
-    @cached_property
-    def threads(self) -> ThreadsWithStreamingResponse:
-        return ThreadsWithStreamingResponse(self._beta.threads)
-
-
-class AsyncBetaWithStreamingResponse:
-    def __init__(self, beta: AsyncBeta) -> None:
-        self._beta = beta
-
-    @cached_property
-    def assistants(self) -> AsyncAssistantsWithStreamingResponse:
-        return AsyncAssistantsWithStreamingResponse(self._beta.assistants)
-
-    @cached_property
-    def threads(self) -> AsyncThreadsWithStreamingResponse:
-        return AsyncThreadsWithStreamingResponse(self._beta.threads)
diff --git a/openai/resources/beta/threads/__init__.py b/openai/resources/beta/threads/__init__.py
deleted file mode 100644
index a66e445b..00000000
--- a/openai/resources/beta/threads/__init__.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .runs import (
-    Runs,
-    AsyncRuns,
-    RunsWithRawResponse,
-    AsyncRunsWithRawResponse,
-    RunsWithStreamingResponse,
-    AsyncRunsWithStreamingResponse,
-)
-from .threads import (
-    Threads,
-    AsyncThreads,
-    ThreadsWithRawResponse,
-    AsyncThreadsWithRawResponse,
-    ThreadsWithStreamingResponse,
-    AsyncThreadsWithStreamingResponse,
-)
-from .messages import (
-    Messages,
-    AsyncMessages,
-    MessagesWithRawResponse,
-    AsyncMessagesWithRawResponse,
-    MessagesWithStreamingResponse,
-    AsyncMessagesWithStreamingResponse,
-)
-
-__all__ = [
-    "Runs",
-    "AsyncRuns",
-    "RunsWithRawResponse",
-    "AsyncRunsWithRawResponse",
-    "RunsWithStreamingResponse",
-    "AsyncRunsWithStreamingResponse",
-    "Messages",
-    "AsyncMessages",
-    "MessagesWithRawResponse",
-    "AsyncMessagesWithRawResponse",
-    "MessagesWithStreamingResponse",
-    "AsyncMessagesWithStreamingResponse",
-    "Threads",
-    "AsyncThreads",
-    "ThreadsWithRawResponse",
-    "AsyncThreadsWithRawResponse",
-    "ThreadsWithStreamingResponse",
-    "AsyncThreadsWithStreamingResponse",
-]
diff --git a/openai/resources/beta/threads/messages/__init__.py b/openai/resources/beta/threads/messages/__init__.py
deleted file mode 100644
index a3286e6a..00000000
--- a/openai/resources/beta/threads/messages/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
-)
-from .messages import (
-    Messages,
-    AsyncMessages,
-    MessagesWithRawResponse,
-    AsyncMessagesWithRawResponse,
-    MessagesWithStreamingResponse,
-    AsyncMessagesWithStreamingResponse,
-)
-
-__all__ = [
-    "Files",
-    "AsyncFiles",
-    "FilesWithRawResponse",
-    "AsyncFilesWithRawResponse",
-    "FilesWithStreamingResponse",
-    "AsyncFilesWithStreamingResponse",
-    "Messages",
-    "AsyncMessages",
-    "MessagesWithRawResponse",
-    "AsyncMessagesWithRawResponse",
-    "MessagesWithStreamingResponse",
-    "AsyncMessagesWithStreamingResponse",
-]
diff --git a/openai/resources/beta/threads/messages/files.py b/openai/resources/beta/threads/messages/files.py
deleted file mode 100644
index 349f9972..00000000
--- a/openai/resources/beta/threads/messages/files.py
+++ /dev/null
@@ -1,312 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal
-
-import httpx
-
-from ..... import _legacy_response
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
-from ....._compat import cached_property
-from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....types.beta.threads.messages import MessageFile, file_list_params
-
-__all__ = ["Files", "AsyncFiles"]
-
-
-class Files(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> FilesWithRawResponse:
-        return FilesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> FilesWithStreamingResponse:
-        return FilesWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        file_id: str,
-        *,
-        thread_id: str,
-        message_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> MessageFile:
-        """
-        Retrieves a message file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/threads/{thread_id}/messages/{message_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=MessageFile,
-        )
-
-    def list(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[MessageFile]:
-        """Returns a list of message files.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/messages/{message_id}/files",
-            page=SyncCursorPage[MessageFile],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            model=MessageFile,
-        )
-
-
-class AsyncFiles(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncFilesWithRawResponse:
-        return AsyncFilesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
-        return AsyncFilesWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        file_id: str,
-        *,
-        thread_id: str,
-        message_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> MessageFile:
-        """
-        Retrieves a message file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/messages/{message_id}/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=MessageFile,
-        )
-
-    def list(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[MessageFile, AsyncCursorPage[MessageFile]]:
-        """Returns a list of message files.
-
-        Args:
-          after: A cursor for use in pagination.
-
-        `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/messages/{message_id}/files",
-            page=AsyncCursorPage[MessageFile],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    file_list_params.FileListParams,
-                ),
-            ),
-            model=MessageFile,
-        )
-
-
-class FilesWithRawResponse:
-    def __init__(self, files: Files) -> None:
-        self._files = files
-
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            files.list,
-        )
-
-
-class AsyncFilesWithRawResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self._files = files
-
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            files.list,
-        )
-
-
-class FilesWithStreamingResponse:
-    def __init__(self, files: Files) -> None:
-        self._files = files
-
-        self.retrieve = to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            files.list,
-        )
-
-
-class AsyncFilesWithStreamingResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self._files = files
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            files.list,
-        )
diff --git a/openai/resources/beta/threads/messages/messages.py b/openai/resources/beta/threads/messages/messages.py
deleted file mode 100644
index bbce3e99..00000000
--- a/openai/resources/beta/threads/messages/messages.py
+++ /dev/null
@@ -1,588 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-import httpx
-
-from ..... import _legacy_response
-from .files import (
-    Files,
-    AsyncFiles,
-    FilesWithRawResponse,
-    AsyncFilesWithRawResponse,
-    FilesWithStreamingResponse,
-    AsyncFilesWithStreamingResponse,
-)
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
-from ....._compat import cached_property
-from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....types.beta.threads import Message, message_list_params, message_create_params, message_update_params
-
-__all__ = ["Messages", "AsyncMessages"]
-
-
-class Messages(SyncAPIResource):
-    @cached_property
-    def files(self) -> Files:
-        return Files(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> MessagesWithRawResponse:
-        return MessagesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> MessagesWithStreamingResponse:
-        return MessagesWithStreamingResponse(self)
-
-    def create(
-        self,
-        thread_id: str,
-        *,
-        content: str,
-        role: Literal["user", "assistant"],
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Message:
-        """
-        Create a message.
-
-        Args:
-          content: The content of the message.
-
-          role:
-              The role of the entity that is creating the message. Allowed values include:
-
-              - `user`: Indicates the message is sent by an actual user and should be used in
-                most cases to represent user-generated messages.
-              - `assistant`: Indicates the message is generated by the assistant. Use this
-                value to insert messages from the assistant into the conversation.
-
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the message should use. There can be a maximum of 10 files attached to a
-              message. Useful for tools like `retrieval` and `code_interpreter` that can
-              access and use files.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/messages",
-            body=maybe_transform(
-                {
-                    "content": content,
-                    "role": role,
-                    "file_ids": file_ids,
-                    "metadata": metadata,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Message,
-        )
-
-    def retrieve(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Message:
-        """
-        Retrieve a message.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/threads/{thread_id}/messages/{message_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Message,
-        )
-
-    def update(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Message:
-        """
-        Modifies a message.
-
-        Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/messages/{message_id}",
-            body=maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Message,
-        )
-
-    def list(
-        self,
-        thread_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        run_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[Message]:
-        """
-        Returns a list of messages for a given thread.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          run_id: Filter messages by the run ID that generated them.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/messages",
-            page=SyncCursorPage[Message],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                        "run_id": run_id,
-                    },
-                    message_list_params.MessageListParams,
-                ),
-            ),
-            model=Message,
-        )
-
-
-class AsyncMessages(AsyncAPIResource):
-    @cached_property
-    def files(self) -> AsyncFiles:
-        return AsyncFiles(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncMessagesWithRawResponse:
-        return AsyncMessagesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncMessagesWithStreamingResponse:
-        return AsyncMessagesWithStreamingResponse(self)
-
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        content: str,
-        role: Literal["user", "assistant"],
-        file_ids: List[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Message:
-        """
-        Create a message.
-
-        Args:
-          content: The content of the message.
-
-          role:
-              The role of the entity that is creating the message. Allowed values include:
-
-              - `user`: Indicates the message is sent by an actual user and should be used in
-                most cases to represent user-generated messages.
-              - `assistant`: Indicates the message is generated by the assistant. Use this
-                value to insert messages from the assistant into the conversation.
-
-          file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-              the message should use. There can be a maximum of 10 files attached to a
-              message. Useful for tools like `retrieval` and `code_interpreter` that can
-              access and use files.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/messages",
-            body=await async_maybe_transform(
-                {
-                    "content": content,
-                    "role": role,
-                    "file_ids": file_ids,
-                    "metadata": metadata,
-                },
-                message_create_params.MessageCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Message,
-        )
-
-    async def retrieve(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Message:
-        """
-        Retrieve a message.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/messages/{message_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Message,
-        )
-
-    async def update(
-        self,
-        message_id: str,
-        *,
-        thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Message:
-        """
-        Modifies a message.
-
-        Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not message_id:
-            raise ValueError(f"Expected a non-empty value for `message_id` but received {message_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/messages/{message_id}",
-            body=await async_maybe_transform({"metadata": metadata}, message_update_params.MessageUpdateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Message,
-        )
-
-    def list(
-        self,
-        thread_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        run_id: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Message, AsyncCursorPage[Message]]:
-        """
-        Returns a list of messages for a given thread.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          run_id: Filter messages by the run ID that generated them.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/messages",
-            page=AsyncCursorPage[Message],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                        "run_id": run_id,
-                    },
-                    message_list_params.MessageListParams,
-                ),
-            ),
-            model=Message,
-        )
-
-
-class MessagesWithRawResponse:
-    def __init__(self, messages: Messages) -> None:
-        self._messages = messages
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            messages.create,
-        )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            messages.retrieve,
-        )
-        self.update = _legacy_response.to_raw_response_wrapper(
-            messages.update,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            messages.list,
-        )
-
-    @cached_property
-    def files(self) -> FilesWithRawResponse:
-        return FilesWithRawResponse(self._messages.files)
-
-
-class AsyncMessagesWithRawResponse:
-    def __init__(self, messages: AsyncMessages) -> None:
-        self._messages = messages
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            messages.create,
-        )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            messages.retrieve,
-        )
-        self.update = _legacy_response.async_to_raw_response_wrapper(
-            messages.update,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            messages.list,
-        )
-
-    @cached_property
-    def files(self) -> AsyncFilesWithRawResponse:
-        return AsyncFilesWithRawResponse(self._messages.files)
-
-
-class MessagesWithStreamingResponse:
-    def __init__(self, messages: Messages) -> None:
-        self._messages = messages
-
-        self.create = to_streamed_response_wrapper(
-            messages.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            messages.retrieve,
-        )
-        self.update = to_streamed_response_wrapper(
-            messages.update,
-        )
-        self.list = to_streamed_response_wrapper(
-            messages.list,
-        )
-
-    @cached_property
-    def files(self) -> FilesWithStreamingResponse:
-        return FilesWithStreamingResponse(self._messages.files)
-
-
-class AsyncMessagesWithStreamingResponse:
-    def __init__(self, messages: AsyncMessages) -> None:
-        self._messages = messages
-
-        self.create = async_to_streamed_response_wrapper(
-            messages.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            messages.retrieve,
-        )
-        self.update = async_to_streamed_response_wrapper(
-            messages.update,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            messages.list,
-        )
-
-    @cached_property
-    def files(self) -> AsyncFilesWithStreamingResponse:
-        return AsyncFilesWithStreamingResponse(self._messages.files)
diff --git a/openai/resources/beta/threads/runs/__init__.py b/openai/resources/beta/threads/runs/__init__.py
deleted file mode 100644
index 50aa9fae..00000000
--- a/openai/resources/beta/threads/runs/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .runs import (
-    Runs,
-    AsyncRuns,
-    RunsWithRawResponse,
-    AsyncRunsWithRawResponse,
-    RunsWithStreamingResponse,
-    AsyncRunsWithStreamingResponse,
-)
-from .steps import (
-    Steps,
-    AsyncSteps,
-    StepsWithRawResponse,
-    AsyncStepsWithRawResponse,
-    StepsWithStreamingResponse,
-    AsyncStepsWithStreamingResponse,
-)
-
-__all__ = [
-    "Steps",
-    "AsyncSteps",
-    "StepsWithRawResponse",
-    "AsyncStepsWithRawResponse",
-    "StepsWithStreamingResponse",
-    "AsyncStepsWithStreamingResponse",
-    "Runs",
-    "AsyncRuns",
-    "RunsWithRawResponse",
-    "AsyncRunsWithRawResponse",
-    "RunsWithStreamingResponse",
-    "AsyncRunsWithStreamingResponse",
-]
diff --git a/openai/resources/beta/threads/runs/runs.py b/openai/resources/beta/threads/runs/runs.py
deleted file mode 100644
index 4529c650..00000000
--- a/openai/resources/beta/threads/runs/runs.py
+++ /dev/null
@@ -1,2223 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import time
-import typing_extensions
-from typing import Iterable, Optional, overload
-from functools import partial
-from typing_extensions import Literal
-
-import httpx
-
-from ..... import _legacy_response
-from .steps import (
-    Steps,
-    AsyncSteps,
-    StepsWithRawResponse,
-    AsyncStepsWithRawResponse,
-    StepsWithStreamingResponse,
-    AsyncStepsWithStreamingResponse,
-)
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import (
-    is_given,
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
-from ....._compat import cached_property
-from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ....._streaming import Stream, AsyncStream
-from .....pagination import SyncCursorPage, AsyncCursorPage
-from .....types.beta import AssistantToolParam, AssistantStreamEvent
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....lib.streaming import (
-    AssistantEventHandler,
-    AssistantEventHandlerT,
-    AssistantStreamManager,
-    AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager,
-)
-from .....types.beta.threads import (
-    Run,
-    run_list_params,
-    run_create_params,
-    run_update_params,
-    run_submit_tool_outputs_params,
-)
-
-__all__ = ["Runs", "AsyncRuns"]
-
-
-class Runs(SyncAPIResource):
-    @cached_property
-    def steps(self) -> Steps:
-        return Steps(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> RunsWithRawResponse:
-        return RunsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> RunsWithStreamingResponse:
-        return RunsWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        stream: Literal[True],
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[AssistantStreamEvent]:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        stream: bool,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | Stream[AssistantStreamEvent]:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["assistant_id"], ["assistant_id", "stream"])
-    def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | Stream[AssistantStreamEvent]:
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "tools": tools,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=stream or False,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-
-    def retrieve(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Retrieves a run.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/threads/{thread_id}/runs/{run_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
-
-    def update(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Modifies a run.
-
-        Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/runs/{run_id}",
-            body=maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
-
-    def list(
-        self,
-        thread_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[Run]:
-        """
-        Returns a list of runs belonging to a thread.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/runs",
-            page=SyncCursorPage[Run],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    run_list_params.RunListParams,
-                ),
-            ),
-            model=Run,
-        )
-
-    def cancel(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Cancels a run that is `in_progress`.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/runs/{run_id}/cancel",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
-
-    def create_and_poll(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a run an poll for a terminal state. More information on Run
-        lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = self.create(
-            thread_id=thread_id,
-            assistant_id=assistant_id,
-            additional_instructions=additional_instructions,
-            instructions=instructions,
-            metadata=metadata,
-            model=model,
-            temperature=temperature,
-            # We assume we are not streaming when polling
-            stream=False,
-            tools=tools,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.poll(
-            run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            poll_interval_ms=poll_interval_ms,
-            timeout=timeout,
-        )
-
-    @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
-
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v1",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "temperature": temperature,
-                    "stream": True,
-                    "tools": tools,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
-
-    def poll(
-        self,
-        run_id: str,
-        thread_id: str,
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to poll a run status until it reaches a terminal state. More
-        information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
-
-        if is_given(poll_interval_ms):
-            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired"}
-        while True:
-            response = self.with_raw_response.retrieve(
-                thread_id=thread_id,
-                run_id=run_id,
-                extra_headers=extra_headers,
-                extra_body=extra_body,
-                extra_query=extra_query,
-                timeout=timeout,
-            )
-
-            run = response.parse()
-            # Return if we reached a terminal state
-            if run.status in terminal_states:
-                return run
-
-            if not is_given(poll_interval_ms):
-                from_header = response.headers.get("openai-poll-after-ms")
-                if from_header is not None:
-                    poll_interval_ms = int(from_header)
-                else:
-                    poll_interval_ms = 1000
-
-            time.sleep(poll_interval_ms / 1000)
-
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
-
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v1",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "temperature": temperature,
-                    "stream": True,
-                    "tools": tools,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
-
-    @overload
-    def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        stream: Literal[True],
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[AssistantStreamEvent]:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        stream: bool,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | Stream[AssistantStreamEvent]:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
-    def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | Stream[AssistantStreamEvent]:
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": stream,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=stream or False,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-
-    def submit_tool_outputs_and_poll(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to submit a tool output to a run and poll for a terminal run state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = self.submit_tool_outputs(
-            run_id=run_id,
-            thread_id=thread_id,
-            tool_outputs=tool_outputs,
-            stream=False,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.poll(
-            run_id=run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            poll_interval_ms=poll_interval_ms,
-        )
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v1",
-            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = partial(
-            self._post,
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": True,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(request, event_handler=event_handler or AssistantEventHandler())
-
-
-class AsyncRuns(AsyncAPIResource):
-    @cached_property
-    def steps(self) -> AsyncSteps:
-        return AsyncSteps(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncRunsWithRawResponse:
-        return AsyncRunsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncRunsWithStreamingResponse:
-        return AsyncRunsWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        stream: Literal[True],
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[AssistantStreamEvent]:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        stream: bool,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | AsyncStream[AssistantStreamEvent]:
-        """
-        Create a run.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          additional_instructions: Appends additional instructions at the end of the instructions for the run. This
-              is useful for modifying the behavior on a per-run basis without overriding other
-              instructions.
-
-          instructions: Overrides the
-              [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-              of the assistant. This is useful for modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["assistant_id"], ["assistant_id", "stream"])
-    async def create(
-        self,
-        thread_id: str,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | AsyncStream[AssistantStreamEvent]:
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs",
-            body=await async_maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "tools": tools,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=stream or False,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-
-    async def retrieve(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Retrieves a run.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/runs/{run_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
-
-    async def update(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Modifies a run.
-
-        Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}",
-            body=await async_maybe_transform({"metadata": metadata}, run_update_params.RunUpdateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
-
-    def list(
-        self,
-        thread_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Run, AsyncCursorPage[Run]]:
-        """
-        Returns a list of runs belonging to a thread.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/runs",
-            page=AsyncCursorPage[Run],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    run_list_params.RunListParams,
-                ),
-            ),
-            model=Run,
-        )
-
-    async def cancel(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Cancels a run that is `in_progress`.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}/cancel",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-        )
-
-    async def create_and_poll(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a run an poll for a terminal state. More information on Run
-        lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.create(
-            thread_id=thread_id,
-            assistant_id=assistant_id,
-            additional_instructions=additional_instructions,
-            instructions=instructions,
-            metadata=metadata,
-            model=model,
-            temperature=temperature,
-            # We assume we are not streaming when polling
-            stream=False,
-            tools=tools,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.poll(
-            run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            poll_interval_ms=poll_interval_ms,
-            timeout=timeout,
-        )
-
-    @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
-
-    @typing_extensions.deprecated("use `stream` instead")
-    def create_and_stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v1",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "temperature": temperature,
-                    "stream": True,
-                    "tools": tools,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
-    async def poll(
-        self,
-        run_id: str,
-        thread_id: str,
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to poll a run status until it reaches a terminal state. More
-        information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        extra_headers = {"X-Stainless-Poll-Helper": "true", **(extra_headers or {})}
-
-        if is_given(poll_interval_ms):
-            extra_headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
-
-        terminal_states = {"requires_action", "cancelled", "completed", "failed", "expired"}
-        while True:
-            response = await self.with_raw_response.retrieve(
-                thread_id=thread_id,
-                run_id=run_id,
-                extra_headers=extra_headers,
-                extra_body=extra_body,
-                extra_query=extra_query,
-                timeout=timeout,
-            )
-
-            run = response.parse()
-            # Return if we reached a terminal state
-            if run.status in terminal_states:
-                return run
-
-            if not is_given(poll_interval_ms):
-                from_header = response.headers.get("openai-poll-after-ms")
-                if from_header is not None:
-                    poll_interval_ms = int(from_header)
-                else:
-                    poll_interval_ms = 1000
-
-            time.sleep(poll_interval_ms / 1000)
-
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a Run stream"""
-        ...
-
-    @overload
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a Run stream"""
-        ...
-
-    def stream(
-        self,
-        *,
-        assistant_id: str,
-        additional_instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[AssistantToolParam]] | NotGiven = NOT_GIVEN,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a Run stream"""
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v1",
-            "X-Stainless-Stream-Helper": "threads.runs.create_and_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            f"/threads/{thread_id}/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "additional_instructions": additional_instructions,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "temperature": temperature,
-                    "stream": True,
-                    "tools": tools,
-                },
-                run_create_params.RunCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
-    @overload
-    async def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        stream: Literal[True],
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[AssistantStreamEvent]:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        stream: bool,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | AsyncStream[AssistantStreamEvent]:
-        """
-        When a run has the `status: "requires_action"` and `required_action.type` is
-        `submit_tool_outputs`, this endpoint can be used to submit the outputs from the
-        tool calls once they're all completed. All outputs must be submitted in a single
-        request.
-
-        Args:
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          tool_outputs: A list of tools for which the outputs are being submitted.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["thread_id", "tool_outputs"], ["thread_id", "stream", "tool_outputs"])
-    async def submit_tool_outputs(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | AsyncStream[AssistantStreamEvent]:
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=await async_maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": stream,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=stream or False,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-
-    async def submit_tool_outputs_and_poll(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to submit a tool output to a run and poll for a terminal run state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.submit_tool_outputs(
-            run_id=run_id,
-            thread_id=thread_id,
-            tool_outputs=tool_outputs,
-            stream=False,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.poll(
-            run_id=run.id,
-            thread_id=thread_id,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-            poll_interval_ms=poll_interval_ms,
-        )
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    @overload
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        ...
-
-    def submit_tool_outputs_stream(
-        self,
-        *,
-        tool_outputs: Iterable[run_submit_tool_outputs_params.ToolOutput],
-        run_id: str,
-        thread_id: str,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """
-        Submit the tool outputs from a previous run and stream the run to a terminal
-        state. More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v1",
-            "X-Stainless-Stream-Helper": "threads.runs.submit_tool_outputs_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            f"/threads/{thread_id}/runs/{run_id}/submit_tool_outputs",
-            body=maybe_transform(
-                {
-                    "tool_outputs": tool_outputs,
-                    "stream": True,
-                },
-                run_submit_tool_outputs_params.RunSubmitToolOutputsParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
-
-class RunsWithRawResponse:
-    def __init__(self, runs: Runs) -> None:
-        self._runs = runs
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            runs.create,
-        )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            runs.retrieve,
-        )
-        self.update = _legacy_response.to_raw_response_wrapper(
-            runs.update,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            runs.list,
-        )
-        self.cancel = _legacy_response.to_raw_response_wrapper(
-            runs.cancel,
-        )
-        self.submit_tool_outputs = _legacy_response.to_raw_response_wrapper(
-            runs.submit_tool_outputs,
-        )
-
-    @cached_property
-    def steps(self) -> StepsWithRawResponse:
-        return StepsWithRawResponse(self._runs.steps)
-
-
-class AsyncRunsWithRawResponse:
-    def __init__(self, runs: AsyncRuns) -> None:
-        self._runs = runs
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            runs.create,
-        )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            runs.retrieve,
-        )
-        self.update = _legacy_response.async_to_raw_response_wrapper(
-            runs.update,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            runs.list,
-        )
-        self.cancel = _legacy_response.async_to_raw_response_wrapper(
-            runs.cancel,
-        )
-        self.submit_tool_outputs = _legacy_response.async_to_raw_response_wrapper(
-            runs.submit_tool_outputs,
-        )
-
-    @cached_property
-    def steps(self) -> AsyncStepsWithRawResponse:
-        return AsyncStepsWithRawResponse(self._runs.steps)
-
-
-class RunsWithStreamingResponse:
-    def __init__(self, runs: Runs) -> None:
-        self._runs = runs
-
-        self.create = to_streamed_response_wrapper(
-            runs.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            runs.retrieve,
-        )
-        self.update = to_streamed_response_wrapper(
-            runs.update,
-        )
-        self.list = to_streamed_response_wrapper(
-            runs.list,
-        )
-        self.cancel = to_streamed_response_wrapper(
-            runs.cancel,
-        )
-        self.submit_tool_outputs = to_streamed_response_wrapper(
-            runs.submit_tool_outputs,
-        )
-
-    @cached_property
-    def steps(self) -> StepsWithStreamingResponse:
-        return StepsWithStreamingResponse(self._runs.steps)
-
-
-class AsyncRunsWithStreamingResponse:
-    def __init__(self, runs: AsyncRuns) -> None:
-        self._runs = runs
-
-        self.create = async_to_streamed_response_wrapper(
-            runs.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            runs.retrieve,
-        )
-        self.update = async_to_streamed_response_wrapper(
-            runs.update,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            runs.list,
-        )
-        self.cancel = async_to_streamed_response_wrapper(
-            runs.cancel,
-        )
-        self.submit_tool_outputs = async_to_streamed_response_wrapper(
-            runs.submit_tool_outputs,
-        )
-
-    @cached_property
-    def steps(self) -> AsyncStepsWithStreamingResponse:
-        return AsyncStepsWithStreamingResponse(self._runs.steps)
diff --git a/openai/resources/beta/threads/runs/steps.py b/openai/resources/beta/threads/runs/steps.py
deleted file mode 100644
index 118bd882..00000000
--- a/openai/resources/beta/threads/runs/steps.py
+++ /dev/null
@@ -1,310 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal
-
-import httpx
-
-from ..... import _legacy_response
-from ....._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ....._utils import maybe_transform
-from ....._compat import cached_property
-from ....._resource import SyncAPIResource, AsyncAPIResource
-from ....._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .....pagination import SyncCursorPage, AsyncCursorPage
-from ....._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from .....types.beta.threads.runs import RunStep, step_list_params
-
-__all__ = ["Steps", "AsyncSteps"]
-
-
-class Steps(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> StepsWithRawResponse:
-        return StepsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> StepsWithStreamingResponse:
-        return StepsWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        step_id: str,
-        *,
-        thread_id: str,
-        run_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> RunStep:
-        """
-        Retrieves a run step.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        if not step_id:
-            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=RunStep,
-        )
-
-    def list(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[RunStep]:
-        """
-        Returns a list of run steps belonging to a run.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/runs/{run_id}/steps",
-            page=SyncCursorPage[RunStep],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    step_list_params.StepListParams,
-                ),
-            ),
-            model=RunStep,
-        )
-
-
-class AsyncSteps(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncStepsWithRawResponse:
-        return AsyncStepsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncStepsWithStreamingResponse:
-        return AsyncStepsWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        step_id: str,
-        *,
-        thread_id: str,
-        run_id: str,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> RunStep:
-        """
-        Retrieves a run step.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        if not step_id:
-            raise ValueError(f"Expected a non-empty value for `step_id` but received {step_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}/runs/{run_id}/steps/{step_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=RunStep,
-        )
-
-    def list(
-        self,
-        run_id: str,
-        *,
-        thread_id: str,
-        after: str | NotGiven = NOT_GIVEN,
-        before: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[RunStep, AsyncCursorPage[RunStep]]:
-        """
-        Returns a list of run steps belonging to a run.
-
-        Args:
-          after: A cursor for use in pagination. `after` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include after=obj_foo in order to
-              fetch the next page of the list.
-
-          before: A cursor for use in pagination. `before` is an object ID that defines your place
-              in the list. For instance, if you make a list request and receive 100 objects,
-              ending with obj_foo, your subsequent call can include before=obj_foo in order to
-              fetch the previous page of the list.
-
-          limit: A limit on the number of objects to be returned. Limit can range between 1 and
-              100, and the default is 20.
-
-          order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
-              order and `desc` for descending order.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        if not run_id:
-            raise ValueError(f"Expected a non-empty value for `run_id` but received {run_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get_api_list(
-            f"/threads/{thread_id}/runs/{run_id}/steps",
-            page=AsyncCursorPage[RunStep],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "before": before,
-                        "limit": limit,
-                        "order": order,
-                    },
-                    step_list_params.StepListParams,
-                ),
-            ),
-            model=RunStep,
-        )
-
-
-class StepsWithRawResponse:
-    def __init__(self, steps: Steps) -> None:
-        self._steps = steps
-
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            steps.retrieve,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            steps.list,
-        )
-
-
-class AsyncStepsWithRawResponse:
-    def __init__(self, steps: AsyncSteps) -> None:
-        self._steps = steps
-
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            steps.retrieve,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            steps.list,
-        )
-
-
-class StepsWithStreamingResponse:
-    def __init__(self, steps: Steps) -> None:
-        self._steps = steps
-
-        self.retrieve = to_streamed_response_wrapper(
-            steps.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            steps.list,
-        )
-
-
-class AsyncStepsWithStreamingResponse:
-    def __init__(self, steps: AsyncSteps) -> None:
-        self._steps = steps
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            steps.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            steps.list,
-        )
diff --git a/openai/resources/beta/threads/threads.py b/openai/resources/beta/threads/threads.py
deleted file mode 100644
index 3509267d..00000000
--- a/openai/resources/beta/threads/threads.py
+++ /dev/null
@@ -1,1259 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable, Optional, overload
-from functools import partial
-from typing_extensions import Literal
-
-import httpx
-
-from .... import _legacy_response
-from .runs import (
-    Runs,
-    AsyncRuns,
-    RunsWithRawResponse,
-    AsyncRunsWithRawResponse,
-    RunsWithStreamingResponse,
-    AsyncRunsWithStreamingResponse,
-)
-from .messages import (
-    Messages,
-    AsyncMessages,
-    MessagesWithRawResponse,
-    AsyncMessagesWithRawResponse,
-    MessagesWithStreamingResponse,
-    AsyncMessagesWithStreamingResponse,
-)
-from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ...._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
-from .runs.runs import Runs, AsyncRuns
-from ...._compat import cached_property
-from ...._resource import SyncAPIResource, AsyncAPIResource
-from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...._streaming import Stream, AsyncStream
-from ....types.beta import (
-    Thread,
-    ThreadDeleted,
-    AssistantStreamEvent,
-    thread_create_params,
-    thread_update_params,
-    thread_create_and_run_params,
-)
-from ...._base_client import (
-    make_request_options,
-)
-from ....lib.streaming import (
-    AssistantEventHandler,
-    AssistantEventHandlerT,
-    AssistantStreamManager,
-    AsyncAssistantEventHandler,
-    AsyncAssistantEventHandlerT,
-    AsyncAssistantStreamManager,
-)
-from .messages.messages import Messages, AsyncMessages
-from ....types.beta.threads import Run
-
-__all__ = ["Threads", "AsyncThreads"]
-
-
-class Threads(SyncAPIResource):
-    @cached_property
-    def runs(self) -> Runs:
-        return Runs(self._client)
-
-    @cached_property
-    def messages(self) -> Messages:
-        return Messages(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> ThreadsWithRawResponse:
-        return ThreadsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ThreadsWithStreamingResponse:
-        return ThreadsWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Thread:
-        """
-        Create a thread.
-
-        Args:
-          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
-              start the thread with.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            "/threads",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "metadata": metadata,
-                },
-                thread_create_params.ThreadCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Thread,
-        )
-
-    def retrieve(
-        self,
-        thread_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Thread:
-        """
-        Retrieves a thread.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._get(
-            f"/threads/{thread_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Thread,
-        )
-
-    def update(
-        self,
-        thread_id: str,
-        *,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Thread:
-        """
-        Modifies a thread.
-
-        Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            f"/threads/{thread_id}",
-            body=maybe_transform({"metadata": metadata}, thread_update_params.ThreadUpdateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Thread,
-        )
-
-    def delete(
-        self,
-        thread_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadDeleted:
-        """
-        Delete a thread.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._delete(
-            f"/threads/{thread_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ThreadDeleted,
-        )
-
-    @overload
-    def create_and_run(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Create a thread and run it in one request.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          instructions: Override the default system message of the assistant. This is useful for
-              modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          thread: If no thread is provided, an empty thread will be created.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create_and_run(
-        self,
-        *,
-        assistant_id: str,
-        stream: Literal[True],
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[AssistantStreamEvent]:
-        """
-        Create a thread and run it in one request.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          instructions: Override the default system message of the assistant. This is useful for
-              modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          thread: If no thread is provided, an empty thread will be created.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create_and_run(
-        self,
-        *,
-        assistant_id: str,
-        stream: bool,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | Stream[AssistantStreamEvent]:
-        """
-        Create a thread and run it in one request.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          instructions: Override the default system message of the assistant. This is useful for
-              modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          thread: If no thread is provided, an empty thread will be created.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["assistant_id"], ["assistant_id", "stream"])
-    def create_and_run(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | Stream[AssistantStreamEvent]:
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return self._post(
-            "/threads/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "thread": thread,
-                    "tools": tools,
-                },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=stream or False,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-
-    def create_and_run_poll(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a thread, start a run and then poll for a terminal state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = self.create_and_run(
-            assistant_id=assistant_id,
-            instructions=instructions,
-            metadata=metadata,
-            model=model,
-            temperature=temperature,
-            stream=False,
-            thread=thread,
-            tools=tools,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return self.runs.poll(run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms)
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler]:
-        """Create a thread and stream the run back"""
-        ...
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        event_handler: AssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        ...
-
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        event_handler: AssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AssistantStreamManager[AssistantEventHandler] | AssistantStreamManager[AssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v1",
-            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        make_request = partial(
-            self._post,
-            "/threads/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "temperature": temperature,
-                    "stream": True,
-                    "thread": thread,
-                    "tools": tools,
-                },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=Stream[AssistantStreamEvent],
-        )
-        return AssistantStreamManager(make_request, event_handler=event_handler or AssistantEventHandler())
-
-
-class AsyncThreads(AsyncAPIResource):
-    @cached_property
-    def runs(self) -> AsyncRuns:
-        return AsyncRuns(self._client)
-
-    @cached_property
-    def messages(self) -> AsyncMessages:
-        return AsyncMessages(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncThreadsWithRawResponse:
-        return AsyncThreadsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncThreadsWithStreamingResponse:
-        return AsyncThreadsWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        messages: Iterable[thread_create_params.Message] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Thread:
-        """
-        Create a thread.
-
-        Args:
-          messages: A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
-              start the thread with.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            "/threads",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "metadata": metadata,
-                },
-                thread_create_params.ThreadCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Thread,
-        )
-
-    async def retrieve(
-        self,
-        thread_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Thread:
-        """
-        Retrieves a thread.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._get(
-            f"/threads/{thread_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Thread,
-        )
-
-    async def update(
-        self,
-        thread_id: str,
-        *,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Thread:
-        """
-        Modifies a thread.
-
-        Args:
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            f"/threads/{thread_id}",
-            body=await async_maybe_transform({"metadata": metadata}, thread_update_params.ThreadUpdateParams),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Thread,
-        )
-
-    async def delete(
-        self,
-        thread_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ThreadDeleted:
-        """
-        Delete a thread.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not thread_id:
-            raise ValueError(f"Expected a non-empty value for `thread_id` but received {thread_id!r}")
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._delete(
-            f"/threads/{thread_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ThreadDeleted,
-        )
-
-    @overload
-    async def create_and_run(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        Create a thread and run it in one request.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          instructions: Override the default system message of the assistant. This is useful for
-              modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          thread: If no thread is provided, an empty thread will be created.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create_and_run(
-        self,
-        *,
-        assistant_id: str,
-        stream: Literal[True],
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[AssistantStreamEvent]:
-        """
-        Create a thread and run it in one request.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          instructions: Override the default system message of the assistant. This is useful for
-              modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          thread: If no thread is provided, an empty thread will be created.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create_and_run(
-        self,
-        *,
-        assistant_id: str,
-        stream: bool,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | AsyncStream[AssistantStreamEvent]:
-        """
-        Create a thread and run it in one request.
-
-        Args:
-          assistant_id: The ID of the
-              [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-              execute this run.
-
-          stream: If `true`, returns a stream of events that happen during the Run as server-sent
-              events, terminating when the Run enters a terminal state with a `data: [DONE]`
-              message.
-
-          instructions: Override the default system message of the assistant. This is useful for
-              modifying the behavior on a per-run basis.
-
-          metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
-              for storing additional information about the object in a structured format. Keys
-              can be a maximum of 64 characters long and values can be a maxium of 512
-              characters long.
-
-          model: The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-              be used to execute this run. If a value is provided here, it will override the
-              model associated with the assistant. If not, the model associated with the
-              assistant will be used.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-          thread: If no thread is provided, an empty thread will be created.
-
-          tools: Override the tools the assistant can use for this run. This is useful for
-              modifying the behavior on a per-run basis.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["assistant_id"], ["assistant_id", "stream"])
-    async def create_and_run(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run | AsyncStream[AssistantStreamEvent]:
-        extra_headers = {"OpenAI-Beta": "assistants=v1", **(extra_headers or {})}
-        return await self._post(
-            "/threads/runs",
-            body=await async_maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "thread": thread,
-                    "tools": tools,
-                },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=stream or False,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-
-    async def create_and_run_poll(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        poll_interval_ms: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Run:
-        """
-        A helper to create a thread, start a run and then poll for a terminal state.
-        More information on Run lifecycles can be found here:
-        https://platform.openai.com/docs/assistants/how-it-works/runs-and-run-steps
-        """
-        run = await self.create_and_run(
-            assistant_id=assistant_id,
-            instructions=instructions,
-            metadata=metadata,
-            model=model,
-            temperature=temperature,
-            stream=False,
-            thread=thread,
-            tools=tools,
-            extra_headers=extra_headers,
-            extra_query=extra_query,
-            extra_body=extra_body,
-            timeout=timeout,
-        )
-        return await self.runs.poll(
-            run.id, run.thread_id, extra_headers, extra_query, extra_body, timeout, poll_interval_ms
-        )
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandler]:
-        """Create a thread and stream the run back"""
-        ...
-
-    @overload
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        event_handler: AsyncAssistantEventHandlerT,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]:
-        """Create a thread and stream the run back"""
-        ...
-
-    def create_and_run_stream(
-        self,
-        *,
-        assistant_id: str,
-        instructions: Optional[str] | NotGiven = NOT_GIVEN,
-        metadata: Optional[object] | NotGiven = NOT_GIVEN,
-        model: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        thread: thread_create_and_run_params.Thread | NotGiven = NOT_GIVEN,
-        tools: Optional[Iterable[thread_create_and_run_params.Tool]] | NotGiven = NOT_GIVEN,
-        event_handler: AsyncAssistantEventHandlerT | None = None,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> (
-        AsyncAssistantStreamManager[AsyncAssistantEventHandler]
-        | AsyncAssistantStreamManager[AsyncAssistantEventHandlerT]
-    ):
-        """Create a thread and stream the run back"""
-        extra_headers = {
-            "OpenAI-Beta": "assistants=v1",
-            "X-Stainless-Stream-Helper": "threads.create_and_run_stream",
-            "X-Stainless-Custom-Event-Handler": "true" if event_handler else "false",
-            **(extra_headers or {}),
-        }
-        request = self._post(
-            "/threads/runs",
-            body=maybe_transform(
-                {
-                    "assistant_id": assistant_id,
-                    "instructions": instructions,
-                    "metadata": metadata,
-                    "model": model,
-                    "temperature": temperature,
-                    "stream": True,
-                    "thread": thread,
-                    "tools": tools,
-                },
-                thread_create_and_run_params.ThreadCreateAndRunParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Run,
-            stream=True,
-            stream_cls=AsyncStream[AssistantStreamEvent],
-        )
-        return AsyncAssistantStreamManager(request, event_handler=event_handler or AsyncAssistantEventHandler())
-
-
-class ThreadsWithRawResponse:
-    def __init__(self, threads: Threads) -> None:
-        self._threads = threads
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            threads.create,
-        )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            threads.retrieve,
-        )
-        self.update = _legacy_response.to_raw_response_wrapper(
-            threads.update,
-        )
-        self.delete = _legacy_response.to_raw_response_wrapper(
-            threads.delete,
-        )
-        self.create_and_run = _legacy_response.to_raw_response_wrapper(
-            threads.create_and_run,
-        )
-
-    @cached_property
-    def runs(self) -> RunsWithRawResponse:
-        return RunsWithRawResponse(self._threads.runs)
-
-    @cached_property
-    def messages(self) -> MessagesWithRawResponse:
-        return MessagesWithRawResponse(self._threads.messages)
-
-
-class AsyncThreadsWithRawResponse:
-    def __init__(self, threads: AsyncThreads) -> None:
-        self._threads = threads
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            threads.create,
-        )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            threads.retrieve,
-        )
-        self.update = _legacy_response.async_to_raw_response_wrapper(
-            threads.update,
-        )
-        self.delete = _legacy_response.async_to_raw_response_wrapper(
-            threads.delete,
-        )
-        self.create_and_run = _legacy_response.async_to_raw_response_wrapper(
-            threads.create_and_run,
-        )
-
-    @cached_property
-    def runs(self) -> AsyncRunsWithRawResponse:
-        return AsyncRunsWithRawResponse(self._threads.runs)
-
-    @cached_property
-    def messages(self) -> AsyncMessagesWithRawResponse:
-        return AsyncMessagesWithRawResponse(self._threads.messages)
-
-
-class ThreadsWithStreamingResponse:
-    def __init__(self, threads: Threads) -> None:
-        self._threads = threads
-
-        self.create = to_streamed_response_wrapper(
-            threads.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            threads.retrieve,
-        )
-        self.update = to_streamed_response_wrapper(
-            threads.update,
-        )
-        self.delete = to_streamed_response_wrapper(
-            threads.delete,
-        )
-        self.create_and_run = to_streamed_response_wrapper(
-            threads.create_and_run,
-        )
-
-    @cached_property
-    def runs(self) -> RunsWithStreamingResponse:
-        return RunsWithStreamingResponse(self._threads.runs)
-
-    @cached_property
-    def messages(self) -> MessagesWithStreamingResponse:
-        return MessagesWithStreamingResponse(self._threads.messages)
-
-
-class AsyncThreadsWithStreamingResponse:
-    def __init__(self, threads: AsyncThreads) -> None:
-        self._threads = threads
-
-        self.create = async_to_streamed_response_wrapper(
-            threads.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            threads.retrieve,
-        )
-        self.update = async_to_streamed_response_wrapper(
-            threads.update,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            threads.delete,
-        )
-        self.create_and_run = async_to_streamed_response_wrapper(
-            threads.create_and_run,
-        )
-
-    @cached_property
-    def runs(self) -> AsyncRunsWithStreamingResponse:
-        return AsyncRunsWithStreamingResponse(self._threads.runs)
-
-    @cached_property
-    def messages(self) -> AsyncMessagesWithStreamingResponse:
-        return AsyncMessagesWithStreamingResponse(self._threads.messages)
diff --git a/openai/resources/chat/__init__.py b/openai/resources/chat/__init__.py
deleted file mode 100644
index 52dfdcea..00000000
--- a/openai/resources/chat/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .chat import (
-    Chat,
-    AsyncChat,
-    ChatWithRawResponse,
-    AsyncChatWithRawResponse,
-    ChatWithStreamingResponse,
-    AsyncChatWithStreamingResponse,
-)
-from .completions import (
-    Completions,
-    AsyncCompletions,
-    CompletionsWithRawResponse,
-    AsyncCompletionsWithRawResponse,
-    CompletionsWithStreamingResponse,
-    AsyncCompletionsWithStreamingResponse,
-)
-
-__all__ = [
-    "Completions",
-    "AsyncCompletions",
-    "CompletionsWithRawResponse",
-    "AsyncCompletionsWithRawResponse",
-    "CompletionsWithStreamingResponse",
-    "AsyncCompletionsWithStreamingResponse",
-    "Chat",
-    "AsyncChat",
-    "ChatWithRawResponse",
-    "AsyncChatWithRawResponse",
-    "ChatWithStreamingResponse",
-    "AsyncChatWithStreamingResponse",
-]
diff --git a/openai/resources/chat/chat.py b/openai/resources/chat/chat.py
deleted file mode 100644
index d14d0555..00000000
--- a/openai/resources/chat/chat.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from .completions import (
-    Completions,
-    AsyncCompletions,
-    CompletionsWithRawResponse,
-    AsyncCompletionsWithRawResponse,
-    CompletionsWithStreamingResponse,
-    AsyncCompletionsWithStreamingResponse,
-)
-
-__all__ = ["Chat", "AsyncChat"]
-
-
-class Chat(SyncAPIResource):
-    @cached_property
-    def completions(self) -> Completions:
-        return Completions(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> ChatWithRawResponse:
-        return ChatWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ChatWithStreamingResponse:
-        return ChatWithStreamingResponse(self)
-
-
-class AsyncChat(AsyncAPIResource):
-    @cached_property
-    def completions(self) -> AsyncCompletions:
-        return AsyncCompletions(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncChatWithRawResponse:
-        return AsyncChatWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncChatWithStreamingResponse:
-        return AsyncChatWithStreamingResponse(self)
-
-
-class ChatWithRawResponse:
-    def __init__(self, chat: Chat) -> None:
-        self._chat = chat
-
-    @cached_property
-    def completions(self) -> CompletionsWithRawResponse:
-        return CompletionsWithRawResponse(self._chat.completions)
-
-
-class AsyncChatWithRawResponse:
-    def __init__(self, chat: AsyncChat) -> None:
-        self._chat = chat
-
-    @cached_property
-    def completions(self) -> AsyncCompletionsWithRawResponse:
-        return AsyncCompletionsWithRawResponse(self._chat.completions)
-
-
-class ChatWithStreamingResponse:
-    def __init__(self, chat: Chat) -> None:
-        self._chat = chat
-
-    @cached_property
-    def completions(self) -> CompletionsWithStreamingResponse:
-        return CompletionsWithStreamingResponse(self._chat.completions)
-
-
-class AsyncChatWithStreamingResponse:
-    def __init__(self, chat: AsyncChat) -> None:
-        self._chat = chat
-
-    @cached_property
-    def completions(self) -> AsyncCompletionsWithStreamingResponse:
-        return AsyncCompletionsWithStreamingResponse(self._chat.completions)
diff --git a/openai/resources/chat/completions.py b/openai/resources/chat/completions.py
deleted file mode 100644
index 30006036..00000000
--- a/openai/resources/chat/completions.py
+++ /dev/null
@@ -1,1403 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable, Optional, overload
-from typing_extensions import Literal
-
-import httpx
-
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ..._streaming import Stream, AsyncStream
-from ...types.chat import (
-    ChatCompletion,
-    ChatCompletionChunk,
-    ChatCompletionToolParam,
-    ChatCompletionMessageParam,
-    ChatCompletionToolChoiceOptionParam,
-    completion_create_params,
-)
-from ..._base_client import (
-    make_request_options,
-)
-
-__all__ = ["Completions", "AsyncCompletions"]
-
-
-class Completions(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> CompletionsWithRawResponse:
-        return CompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
-        return CompletionsWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        stream: Literal[True],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        stream: bool,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | Stream[ChatCompletionChunk]:
-        return self._post(
-            "/chat/completions",
-            body=maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=Stream[ChatCompletionChunk],
-        )
-
-
-class AsyncCompletions(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
-        return AsyncCompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
-        return AsyncCompletionsWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        stream: Literal[True],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        stream: bool,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        """
-        Creates a model response for the given chat conversation.
-
-        Args:
-          messages: A list of messages comprising the conversation so far.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-
-          model: ID of the model to use. See the
-              [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-              table for details on which models work with the Chat API.
-
-          stream: If set, partial message deltas will be sent, like in ChatGPT. Tokens will be
-              sent as data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          function_call: Deprecated in favor of `tool_choice`.
-
-              Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via `{"name": "my_function"}` forces the model to call that
-              function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          functions: Deprecated in favor of `tools`.
-
-              A list of functions the model may generate JSON inputs for.
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the
-              tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-              bias is added to the logits generated by the model prior to sampling. The exact
-              effect will vary per model, but values between -1 and 1 should decrease or
-              increase likelihood of selection; values like -100 or 100 should result in a ban
-              or exclusive selection of the relevant token.
-
-          logprobs: Whether to return log probabilities of the output tokens or not. If true,
-              returns the log probabilities of each output token returned in the `content` of
-              `message`. This option is currently not available on the `gpt-4-vision-preview`
-              model.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the chat
-              completion.
-
-              The total length of input tokens and generated tokens is limited by the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many chat completion choices to generate for each input message. Note that
-              you will be charged based on the number of generated tokens across all of the
-              choices. Keep `n` as `1` to minimize costs.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          response_format: An object specifying the format that the model must output. Compatible with
-              [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-              all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-              Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-              message the model generates is valid JSON.
-
-              **Important:** when using JSON mode, you **must** also instruct the model to
-              produce JSON yourself via a system or user message. Without this, the model may
-              generate an unending stream of whitespace until the generation reaches the token
-              limit, resulting in a long-running and seemingly "stuck" request. Also note that
-              the message content may be partially cut off if `finish_reason="length"`, which
-              indicates the generation exceeded `max_tokens` or the conversation exceeded the
-              max context length.
-
-          seed: This feature is in Beta. If specified, our system will make a best effort to
-              sample deterministically, such that repeated requests with the same `seed` and
-              parameters should return the same result. Determinism is not guaranteed, and you
-              should refer to the `system_fingerprint` response parameter to monitor changes
-              in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          tool_choice: Controls which (if any) function is called by the model. `none` means the model
-              will not call a function and instead generates a message. `auto` means the model
-              can pick between generating a message or calling a function. Specifying a
-              particular function via
-              `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-              call that function.
-
-              `none` is the default when no functions are present. `auto` is the default if
-              functions are present.
-
-          tools: A list of tools the model may call. Currently, only functions are supported as a
-              tool. Use this to provide a list of functions the model may generate JSON inputs
-              for. A max of 128 functions are supported.
-
-          top_logprobs: An integer between 0 and 20 specifying the number of most likely tokens to
-              return at each token position, each with an associated log probability.
-              `logprobs` must be set to `true` if this parameter is used.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["messages", "model"], ["messages", "model", "stream"])
-    async def create(
-        self,
-        *,
-        messages: Iterable[ChatCompletionMessageParam],
-        model: Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ],
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
-        functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        response_format: completion_create_params.ResponseFormat | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str]] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
-        tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
-        top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
-        return await self._post(
-            "/chat/completions",
-            body=await async_maybe_transform(
-                {
-                    "messages": messages,
-                    "model": model,
-                    "frequency_penalty": frequency_penalty,
-                    "function_call": function_call,
-                    "functions": functions,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "response_format": response_format,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "temperature": temperature,
-                    "tool_choice": tool_choice,
-                    "tools": tools,
-                    "top_logprobs": top_logprobs,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ChatCompletion,
-            stream=stream or False,
-            stream_cls=AsyncStream[ChatCompletionChunk],
-        )
-
-
-class CompletionsWithRawResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithRawResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class CompletionsWithStreamingResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.create = to_streamed_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithStreamingResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.create = async_to_streamed_response_wrapper(
-            completions.create,
-        )
diff --git a/openai/resources/completions.py b/openai/resources/completions.py
deleted file mode 100644
index db87c83c..00000000
--- a/openai/resources/completions.py
+++ /dev/null
@@ -1,1102 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable, Optional, overload
-from typing_extensions import Literal
-
-import httpx
-
-from .. import _legacy_response
-from ..types import Completion, completion_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    required_args,
-    maybe_transform,
-    async_maybe_transform,
-)
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._streaming import Stream, AsyncStream
-from .._base_client import (
-    make_request_options,
-)
-
-__all__ = ["Completions", "AsyncCompletions"]
-
-
-class Completions(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> CompletionsWithRawResponse:
-        return CompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> CompletionsWithStreamingResponse:
-        return CompletionsWithStreamingResponse(self)
-
-    @overload
-    def create(
-        self,
-        *,
-        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Completion:
-        """
-        Creates a completion for the provided prompt and parameters.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          prompt: The prompt(s) to generate completions for, encoded as a string, array of
-              strings, array of tokens, or array of token arrays.
-
-              Note that <|endoftext|> is the document separator that the model sees during
-              training, so if a prompt is not specified the model will generate as if from the
-              beginning of a new document.
-
-          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
-              the highest log probability per token). Results cannot be streamed.
-
-              When used with `n`, `best_of` controls the number of candidate completions and
-              `n` specifies how many to return – `best_of` must be greater than `n`.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          echo: Echo back the prompt in addition to the completion
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
-              tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
-              Mathematically, the bias is added to the logits generated by the model prior to
-              sampling. The exact effect will vary per model, but values between -1 and 1
-              should decrease or increase likelihood of selection; values like -100 or 100
-              should result in a ban or exclusive selection of the relevant token.
-
-              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
-              from being generated.
-
-          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
-              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
-              list of the 5 most likely tokens. The API will always return the `logprob` of
-              the sampled token, so there may be up to `logprobs+1` elements in the response.
-
-              The maximum value for `logprobs` is 5.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
-              completion.
-
-              The token count of your prompt plus `max_tokens` cannot exceed the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many completions to generate for each prompt.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          seed: If specified, our system will make a best effort to sample deterministically,
-              such that repeated requests with the same `seed` and parameters should return
-              the same result.
-
-              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
-              response parameter to monitor changes in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
-              returned text will not contain the stop sequence.
-
-          stream: Whether to stream back partial progress. If set, tokens will be sent as
-              data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          suffix: The suffix that comes after a completion of inserted text.
-
-              This parameter is only supported for `gpt-3.5-turbo-instruct`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        stream: Literal[True],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Stream[Completion]:
-        """
-        Creates a completion for the provided prompt and parameters.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          prompt: The prompt(s) to generate completions for, encoded as a string, array of
-              strings, array of tokens, or array of token arrays.
-
-              Note that <|endoftext|> is the document separator that the model sees during
-              training, so if a prompt is not specified the model will generate as if from the
-              beginning of a new document.
-
-          stream: Whether to stream back partial progress. If set, tokens will be sent as
-              data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
-              the highest log probability per token). Results cannot be streamed.
-
-              When used with `n`, `best_of` controls the number of candidate completions and
-              `n` specifies how many to return – `best_of` must be greater than `n`.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          echo: Echo back the prompt in addition to the completion
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
-              tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
-              Mathematically, the bias is added to the logits generated by the model prior to
-              sampling. The exact effect will vary per model, but values between -1 and 1
-              should decrease or increase likelihood of selection; values like -100 or 100
-              should result in a ban or exclusive selection of the relevant token.
-
-              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
-              from being generated.
-
-          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
-              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
-              list of the 5 most likely tokens. The API will always return the `logprob` of
-              the sampled token, so there may be up to `logprobs+1` elements in the response.
-
-              The maximum value for `logprobs` is 5.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
-              completion.
-
-              The token count of your prompt plus `max_tokens` cannot exceed the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many completions to generate for each prompt.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          seed: If specified, our system will make a best effort to sample deterministically,
-              such that repeated requests with the same `seed` and parameters should return
-              the same result.
-
-              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
-              response parameter to monitor changes in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
-              returned text will not contain the stop sequence.
-
-          suffix: The suffix that comes after a completion of inserted text.
-
-              This parameter is only supported for `gpt-3.5-turbo-instruct`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    def create(
-        self,
-        *,
-        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        stream: bool,
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Completion | Stream[Completion]:
-        """
-        Creates a completion for the provided prompt and parameters.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          prompt: The prompt(s) to generate completions for, encoded as a string, array of
-              strings, array of tokens, or array of token arrays.
-
-              Note that <|endoftext|> is the document separator that the model sees during
-              training, so if a prompt is not specified the model will generate as if from the
-              beginning of a new document.
-
-          stream: Whether to stream back partial progress. If set, tokens will be sent as
-              data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
-              the highest log probability per token). Results cannot be streamed.
-
-              When used with `n`, `best_of` controls the number of candidate completions and
-              `n` specifies how many to return – `best_of` must be greater than `n`.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          echo: Echo back the prompt in addition to the completion
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
-              tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
-              Mathematically, the bias is added to the logits generated by the model prior to
-              sampling. The exact effect will vary per model, but values between -1 and 1
-              should decrease or increase likelihood of selection; values like -100 or 100
-              should result in a ban or exclusive selection of the relevant token.
-
-              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
-              from being generated.
-
-          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
-              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
-              list of the 5 most likely tokens. The API will always return the `logprob` of
-              the sampled token, so there may be up to `logprobs+1` elements in the response.
-
-              The maximum value for `logprobs` is 5.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
-              completion.
-
-              The token count of your prompt plus `max_tokens` cannot exceed the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many completions to generate for each prompt.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          seed: If specified, our system will make a best effort to sample deterministically,
-              such that repeated requests with the same `seed` and parameters should return
-              the same result.
-
-              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
-              response parameter to monitor changes in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
-              returned text will not contain the stop sequence.
-
-          suffix: The suffix that comes after a completion of inserted text.
-
-              This parameter is only supported for `gpt-3.5-turbo-instruct`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
-    def create(
-        self,
-        *,
-        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Completion | Stream[Completion]:
-        return self._post(
-            "/completions",
-            body=maybe_transform(
-                {
-                    "model": model,
-                    "prompt": prompt,
-                    "best_of": best_of,
-                    "echo": echo,
-                    "frequency_penalty": frequency_penalty,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "suffix": suffix,
-                    "temperature": temperature,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Completion,
-            stream=stream or False,
-            stream_cls=Stream[Completion],
-        )
-
-
-class AsyncCompletions(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
-        return AsyncCompletionsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
-        return AsyncCompletionsWithStreamingResponse(self)
-
-    @overload
-    async def create(
-        self,
-        *,
-        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Completion:
-        """
-        Creates a completion for the provided prompt and parameters.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          prompt: The prompt(s) to generate completions for, encoded as a string, array of
-              strings, array of tokens, or array of token arrays.
-
-              Note that <|endoftext|> is the document separator that the model sees during
-              training, so if a prompt is not specified the model will generate as if from the
-              beginning of a new document.
-
-          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
-              the highest log probability per token). Results cannot be streamed.
-
-              When used with `n`, `best_of` controls the number of candidate completions and
-              `n` specifies how many to return – `best_of` must be greater than `n`.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          echo: Echo back the prompt in addition to the completion
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
-              tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
-              Mathematically, the bias is added to the logits generated by the model prior to
-              sampling. The exact effect will vary per model, but values between -1 and 1
-              should decrease or increase likelihood of selection; values like -100 or 100
-              should result in a ban or exclusive selection of the relevant token.
-
-              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
-              from being generated.
-
-          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
-              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
-              list of the 5 most likely tokens. The API will always return the `logprob` of
-              the sampled token, so there may be up to `logprobs+1` elements in the response.
-
-              The maximum value for `logprobs` is 5.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
-              completion.
-
-              The token count of your prompt plus `max_tokens` cannot exceed the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many completions to generate for each prompt.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          seed: If specified, our system will make a best effort to sample deterministically,
-              such that repeated requests with the same `seed` and parameters should return
-              the same result.
-
-              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
-              response parameter to monitor changes in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
-              returned text will not contain the stop sequence.
-
-          stream: Whether to stream back partial progress. If set, tokens will be sent as
-              data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          suffix: The suffix that comes after a completion of inserted text.
-
-              This parameter is only supported for `gpt-3.5-turbo-instruct`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        stream: Literal[True],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncStream[Completion]:
-        """
-        Creates a completion for the provided prompt and parameters.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          prompt: The prompt(s) to generate completions for, encoded as a string, array of
-              strings, array of tokens, or array of token arrays.
-
-              Note that <|endoftext|> is the document separator that the model sees during
-              training, so if a prompt is not specified the model will generate as if from the
-              beginning of a new document.
-
-          stream: Whether to stream back partial progress. If set, tokens will be sent as
-              data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
-              the highest log probability per token). Results cannot be streamed.
-
-              When used with `n`, `best_of` controls the number of candidate completions and
-              `n` specifies how many to return – `best_of` must be greater than `n`.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          echo: Echo back the prompt in addition to the completion
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
-              tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
-              Mathematically, the bias is added to the logits generated by the model prior to
-              sampling. The exact effect will vary per model, but values between -1 and 1
-              should decrease or increase likelihood of selection; values like -100 or 100
-              should result in a ban or exclusive selection of the relevant token.
-
-              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
-              from being generated.
-
-          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
-              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
-              list of the 5 most likely tokens. The API will always return the `logprob` of
-              the sampled token, so there may be up to `logprobs+1` elements in the response.
-
-              The maximum value for `logprobs` is 5.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
-              completion.
-
-              The token count of your prompt plus `max_tokens` cannot exceed the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many completions to generate for each prompt.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          seed: If specified, our system will make a best effort to sample deterministically,
-              such that repeated requests with the same `seed` and parameters should return
-              the same result.
-
-              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
-              response parameter to monitor changes in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
-              returned text will not contain the stop sequence.
-
-          suffix: The suffix that comes after a completion of inserted text.
-
-              This parameter is only supported for `gpt-3.5-turbo-instruct`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @overload
-    async def create(
-        self,
-        *,
-        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        stream: bool,
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Completion | AsyncStream[Completion]:
-        """
-        Creates a completion for the provided prompt and parameters.
-
-        Args:
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          prompt: The prompt(s) to generate completions for, encoded as a string, array of
-              strings, array of tokens, or array of token arrays.
-
-              Note that <|endoftext|> is the document separator that the model sees during
-              training, so if a prompt is not specified the model will generate as if from the
-              beginning of a new document.
-
-          stream: Whether to stream back partial progress. If set, tokens will be sent as
-              data-only
-              [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-              as they become available, with the stream terminated by a `data: [DONE]`
-              message.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-
-          best_of: Generates `best_of` completions server-side and returns the "best" (the one with
-              the highest log probability per token). Results cannot be streamed.
-
-              When used with `n`, `best_of` controls the number of candidate completions and
-              `n` specifies how many to return – `best_of` must be greater than `n`.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          echo: Echo back the prompt in addition to the completion
-
-          frequency_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on their
-              existing frequency in the text so far, decreasing the model's likelihood to
-              repeat the same line verbatim.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          logit_bias: Modify the likelihood of specified tokens appearing in the completion.
-
-              Accepts a JSON object that maps tokens (specified by their token ID in the GPT
-              tokenizer) to an associated bias value from -100 to 100. You can use this
-              [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
-              Mathematically, the bias is added to the logits generated by the model prior to
-              sampling. The exact effect will vary per model, but values between -1 and 1
-              should decrease or increase likelihood of selection; values like -100 or 100
-              should result in a ban or exclusive selection of the relevant token.
-
-              As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
-              from being generated.
-
-          logprobs: Include the log probabilities on the `logprobs` most likely output tokens, as
-              well the chosen tokens. For example, if `logprobs` is 5, the API will return a
-              list of the 5 most likely tokens. The API will always return the `logprob` of
-              the sampled token, so there may be up to `logprobs+1` elements in the response.
-
-              The maximum value for `logprobs` is 5.
-
-          max_tokens: The maximum number of [tokens](/tokenizer) that can be generated in the
-              completion.
-
-              The token count of your prompt plus `max_tokens` cannot exceed the model's
-              context length.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          n: How many completions to generate for each prompt.
-
-              **Note:** Because this parameter generates many completions, it can quickly
-              consume your token quota. Use carefully and ensure that you have reasonable
-              settings for `max_tokens` and `stop`.
-
-          presence_penalty: Number between -2.0 and 2.0. Positive values penalize new tokens based on
-              whether they appear in the text so far, increasing the model's likelihood to
-              talk about new topics.
-
-              [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-
-          seed: If specified, our system will make a best effort to sample deterministically,
-              such that repeated requests with the same `seed` and parameters should return
-              the same result.
-
-              Determinism is not guaranteed, and you should refer to the `system_fingerprint`
-              response parameter to monitor changes in the backend.
-
-          stop: Up to 4 sequences where the API will stop generating further tokens. The
-              returned text will not contain the stop sequence.
-
-          suffix: The suffix that comes after a completion of inserted text.
-
-              This parameter is only supported for `gpt-3.5-turbo-instruct`.
-
-          temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
-              make the output more random, while lower values like 0.2 will make it more
-              focused and deterministic.
-
-              We generally recommend altering this or `top_p` but not both.
-
-          top_p: An alternative to sampling with temperature, called nucleus sampling, where the
-              model considers the results of the tokens with top_p probability mass. So 0.1
-              means only the tokens comprising the top 10% probability mass are considered.
-
-              We generally recommend altering this or `temperature` but not both.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        ...
-
-    @required_args(["model", "prompt"], ["model", "prompt", "stream"])
-    async def create(
-        self,
-        *,
-        model: Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]],
-        prompt: Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None],
-        best_of: Optional[int] | NotGiven = NOT_GIVEN,
-        echo: Optional[bool] | NotGiven = NOT_GIVEN,
-        frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
-        logprobs: Optional[int] | NotGiven = NOT_GIVEN,
-        max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
-        seed: Optional[int] | NotGiven = NOT_GIVEN,
-        stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
-        stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        temperature: Optional[float] | NotGiven = NOT_GIVEN,
-        top_p: Optional[float] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Completion | AsyncStream[Completion]:
-        return await self._post(
-            "/completions",
-            body=await async_maybe_transform(
-                {
-                    "model": model,
-                    "prompt": prompt,
-                    "best_of": best_of,
-                    "echo": echo,
-                    "frequency_penalty": frequency_penalty,
-                    "logit_bias": logit_bias,
-                    "logprobs": logprobs,
-                    "max_tokens": max_tokens,
-                    "n": n,
-                    "presence_penalty": presence_penalty,
-                    "seed": seed,
-                    "stop": stop,
-                    "stream": stream,
-                    "suffix": suffix,
-                    "temperature": temperature,
-                    "top_p": top_p,
-                    "user": user,
-                },
-                completion_create_params.CompletionCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Completion,
-            stream=stream or False,
-            stream_cls=AsyncStream[Completion],
-        )
-
-
-class CompletionsWithRawResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithRawResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            completions.create,
-        )
-
-
-class CompletionsWithStreamingResponse:
-    def __init__(self, completions: Completions) -> None:
-        self._completions = completions
-
-        self.create = to_streamed_response_wrapper(
-            completions.create,
-        )
-
-
-class AsyncCompletionsWithStreamingResponse:
-    def __init__(self, completions: AsyncCompletions) -> None:
-        self._completions = completions
-
-        self.create = async_to_streamed_response_wrapper(
-            completions.create,
-        )
diff --git a/openai/resources/embeddings.py b/openai/resources/embeddings.py
deleted file mode 100644
index a083b626..00000000
--- a/openai/resources/embeddings.py
+++ /dev/null
@@ -1,261 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import base64
-from typing import List, Union, Iterable, cast
-from typing_extensions import Literal
-
-import httpx
-
-from .. import _legacy_response
-from ..types import CreateEmbeddingResponse, embedding_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import is_given, maybe_transform
-from .._compat import cached_property
-from .._extras import numpy as np, has_numpy
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
-
-__all__ = ["Embeddings", "AsyncEmbeddings"]
-
-
-class Embeddings(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> EmbeddingsWithRawResponse:
-        return EmbeddingsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> EmbeddingsWithStreamingResponse:
-        return EmbeddingsWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        model: Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]],
-        dimensions: int | NotGiven = NOT_GIVEN,
-        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CreateEmbeddingResponse:
-        """
-        Creates an embedding vector representing the input text.
-
-        Args:
-          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
-              inputs in a single request, pass an array of strings or array of token arrays.
-              The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
-              dimensions or less.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          dimensions: The number of dimensions the resulting output embeddings should have. Only
-              supported in `text-embedding-3` and later models.
-
-          encoding_format: The format to return the embeddings in. Can be either `float` or
-              [`base64`](https://pypi.org/project/pybase64/).
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        params = {
-            "input": input,
-            "model": model,
-            "user": user,
-            "dimensions": dimensions,
-            "encoding_format": encoding_format,
-        }
-        if not is_given(encoding_format) and has_numpy():
-            params["encoding_format"] = "base64"
-
-        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
-            if is_given(encoding_format):
-                # don't modify the response object if a user explicitly asked for a format
-                return obj
-
-            for embedding in obj.data:
-                data = cast(object, embedding.embedding)
-                if not isinstance(data, str):
-                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
-                    continue
-
-                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
-                    base64.b64decode(data), dtype="float32"
-                ).tolist()
-
-            return obj
-
-        return self._post(
-            "/embeddings",
-            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=parser,
-            ),
-            cast_to=CreateEmbeddingResponse,
-        )
-
-
-class AsyncEmbeddings(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncEmbeddingsWithRawResponse:
-        return AsyncEmbeddingsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncEmbeddingsWithStreamingResponse:
-        return AsyncEmbeddingsWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        input: Union[str, List[str], Iterable[int], Iterable[Iterable[int]]],
-        model: Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]],
-        dimensions: int | NotGiven = NOT_GIVEN,
-        encoding_format: Literal["float", "base64"] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> CreateEmbeddingResponse:
-        """
-        Creates an embedding vector representing the input text.
-
-        Args:
-          input: Input text to embed, encoded as a string or array of tokens. To embed multiple
-              inputs in a single request, pass an array of strings or array of token arrays.
-              The input must not exceed the max input tokens for the model (8192 tokens for
-              `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
-              dimensions or less.
-              [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
-
-          model: ID of the model to use. You can use the
-              [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-              see all of your available models, or see our
-              [Model overview](https://platform.openai.com/docs/models/overview) for
-              descriptions of them.
-
-          dimensions: The number of dimensions the resulting output embeddings should have. Only
-              supported in `text-embedding-3` and later models.
-
-          encoding_format: The format to return the embeddings in. Can be either `float` or
-              [`base64`](https://pypi.org/project/pybase64/).
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        params = {
-            "input": input,
-            "model": model,
-            "user": user,
-            "dimensions": dimensions,
-            "encoding_format": encoding_format,
-        }
-        if not is_given(encoding_format) and has_numpy():
-            params["encoding_format"] = "base64"
-
-        def parser(obj: CreateEmbeddingResponse) -> CreateEmbeddingResponse:
-            if is_given(encoding_format):
-                # don't modify the response object if a user explicitly asked for a format
-                return obj
-
-            for embedding in obj.data:
-                data = cast(object, embedding.embedding)
-                if not isinstance(data, str):
-                    # numpy is not installed / base64 optimisation isn't enabled for this model yet
-                    continue
-
-                embedding.embedding = np.frombuffer(  # type: ignore[no-untyped-call]
-                    base64.b64decode(data), dtype="float32"
-                ).tolist()
-
-            return obj
-
-        return await self._post(
-            "/embeddings",
-            body=maybe_transform(params, embedding_create_params.EmbeddingCreateParams),
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                post_parser=parser,
-            ),
-            cast_to=CreateEmbeddingResponse,
-        )
-
-
-class EmbeddingsWithRawResponse:
-    def __init__(self, embeddings: Embeddings) -> None:
-        self._embeddings = embeddings
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            embeddings.create,
-        )
-
-
-class AsyncEmbeddingsWithRawResponse:
-    def __init__(self, embeddings: AsyncEmbeddings) -> None:
-        self._embeddings = embeddings
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            embeddings.create,
-        )
-
-
-class EmbeddingsWithStreamingResponse:
-    def __init__(self, embeddings: Embeddings) -> None:
-        self._embeddings = embeddings
-
-        self.create = to_streamed_response_wrapper(
-            embeddings.create,
-        )
-
-
-class AsyncEmbeddingsWithStreamingResponse:
-    def __init__(self, embeddings: AsyncEmbeddings) -> None:
-        self._embeddings = embeddings
-
-        self.create = async_to_streamed_response_wrapper(
-            embeddings.create,
-        )
diff --git a/openai/resources/files.py b/openai/resources/files.py
deleted file mode 100644
index 33860ada..00000000
--- a/openai/resources/files.py
+++ /dev/null
@@ -1,689 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import time
-import typing_extensions
-from typing import Mapping, cast
-from typing_extensions import Literal
-
-import httpx
-
-from .. import _legacy_response
-from ..types import FileObject, FileDeleted, file_list_params, file_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import (
-    StreamedBinaryAPIResponse,
-    AsyncStreamedBinaryAPIResponse,
-    to_streamed_response_wrapper,
-    async_to_streamed_response_wrapper,
-    to_custom_streamed_response_wrapper,
-    async_to_custom_streamed_response_wrapper,
-)
-from ..pagination import SyncPage, AsyncPage
-from .._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-
-__all__ = ["Files", "AsyncFiles"]
-
-
-class Files(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> FilesWithRawResponse:
-        return FilesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> FilesWithStreamingResponse:
-        return FilesWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        file: FileTypes,
-        purpose: Literal["fine-tune", "assistants"],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileObject:
-        """Upload a file that can be used across various endpoints.
-
-        The size of all the
-        files uploaded by one organization can be up to 100 GB.
-
-        The size of individual files can be a maximum of 512 MB or 2 million tokens for
-        Assistants. See the
-        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
-        learn more about the types of files supported. The Fine-tuning API only supports
-        `.jsonl` files.
-
-        Please [contact us](https://help.openai.com/) if you need to increase these
-        storage limits.
-
-        Args:
-          file: The File object (not file name) to be uploaded.
-
-          purpose: The intended purpose of the uploaded file.
-
-              Use "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-              "assistants" for
-              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-              us to validate the format of the uploaded file is correct for fine-tuning.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "file": file,
-                "purpose": purpose,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/files",
-            body=maybe_transform(body, file_create_params.FileCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileObject,
-        )
-
-    def retrieve(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileObject:
-        """
-        Returns information about a specific file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._get(
-            f"/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileObject,
-        )
-
-    def list(
-        self,
-        *,
-        purpose: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncPage[FileObject]:
-        """
-        Returns a list of files that belong to the user's organization.
-
-        Args:
-          purpose: Only return files with the given purpose.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get_api_list(
-            "/files",
-            page=SyncPage[FileObject],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"purpose": purpose}, file_list_params.FileListParams),
-            ),
-            model=FileObject,
-        )
-
-    def delete(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileDeleted:
-        """
-        Delete a file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._delete(
-            f"/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileDeleted,
-        )
-
-    def content(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> _legacy_response.HttpxBinaryResponseContent:
-        """
-        Returns the contents of the specified file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
-        return self._get(
-            f"/files/{file_id}/content",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=_legacy_response.HttpxBinaryResponseContent,
-        )
-
-    @typing_extensions.deprecated("The `.content()` method should be used instead")
-    def retrieve_content(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> str:
-        """
-        Returns the contents of the specified file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return self._get(
-            f"/files/{file_id}/content",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=str,
-        )
-
-    def wait_for_processing(
-        self,
-        id: str,
-        *,
-        poll_interval: float = 5.0,
-        max_wait_seconds: float = 30 * 60,
-    ) -> FileObject:
-        """Waits for the given file to be processed, default timeout is 30 mins."""
-        TERMINAL_STATES = {"processed", "error", "deleted"}
-
-        start = time.time()
-        file = self.retrieve(id)
-        while file.status not in TERMINAL_STATES:
-            self._sleep(poll_interval)
-
-            file = self.retrieve(id)
-            if time.time() - start > max_wait_seconds:
-                raise RuntimeError(
-                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
-                )
-
-        return file
-
-
-class AsyncFiles(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncFilesWithRawResponse:
-        return AsyncFilesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncFilesWithStreamingResponse:
-        return AsyncFilesWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        file: FileTypes,
-        purpose: Literal["fine-tune", "assistants"],
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileObject:
-        """Upload a file that can be used across various endpoints.
-
-        The size of all the
-        files uploaded by one organization can be up to 100 GB.
-
-        The size of individual files can be a maximum of 512 MB or 2 million tokens for
-        Assistants. See the
-        [Assistants Tools guide](https://platform.openai.com/docs/assistants/tools) to
-        learn more about the types of files supported. The Fine-tuning API only supports
-        `.jsonl` files.
-
-        Please [contact us](https://help.openai.com/) if you need to increase these
-        storage limits.
-
-        Args:
-          file: The File object (not file name) to be uploaded.
-
-          purpose: The intended purpose of the uploaded file.
-
-              Use "fine-tune" for
-              [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-              "assistants" for
-              [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-              [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-              us to validate the format of the uploaded file is correct for fine-tuning.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "file": file,
-                "purpose": purpose,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/files",
-            body=await async_maybe_transform(body, file_create_params.FileCreateParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileObject,
-        )
-
-    async def retrieve(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileObject:
-        """
-        Returns information about a specific file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._get(
-            f"/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileObject,
-        )
-
-    def list(
-        self,
-        *,
-        purpose: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[FileObject, AsyncPage[FileObject]]:
-        """
-        Returns a list of files that belong to the user's organization.
-
-        Args:
-          purpose: Only return files with the given purpose.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get_api_list(
-            "/files",
-            page=AsyncPage[FileObject],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform({"purpose": purpose}, file_list_params.FileListParams),
-            ),
-            model=FileObject,
-        )
-
-    async def delete(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FileDeleted:
-        """
-        Delete a file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._delete(
-            f"/files/{file_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FileDeleted,
-        )
-
-    async def content(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> _legacy_response.HttpxBinaryResponseContent:
-        """
-        Returns the contents of the specified file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        extra_headers = {"Accept": "application/binary", **(extra_headers or {})}
-        return await self._get(
-            f"/files/{file_id}/content",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=_legacy_response.HttpxBinaryResponseContent,
-        )
-
-    @typing_extensions.deprecated("The `.content()` method should be used instead")
-    async def retrieve_content(
-        self,
-        file_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> str:
-        """
-        Returns the contents of the specified file.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not file_id:
-            raise ValueError(f"Expected a non-empty value for `file_id` but received {file_id!r}")
-        return await self._get(
-            f"/files/{file_id}/content",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=str,
-        )
-
-    async def wait_for_processing(
-        self,
-        id: str,
-        *,
-        poll_interval: float = 5.0,
-        max_wait_seconds: float = 30 * 60,
-    ) -> FileObject:
-        """Waits for the given file to be processed, default timeout is 30 mins."""
-        TERMINAL_STATES = {"processed", "error", "deleted"}
-
-        start = time.time()
-        file = await self.retrieve(id)
-        while file.status not in TERMINAL_STATES:
-            await self._sleep(poll_interval)
-
-            file = await self.retrieve(id)
-            if time.time() - start > max_wait_seconds:
-                raise RuntimeError(
-                    f"Giving up on waiting for file {id} to finish processing after {max_wait_seconds} seconds."
-                )
-
-        return file
-
-
-class FilesWithRawResponse:
-    def __init__(self, files: Files) -> None:
-        self._files = files
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            files.create,
-        )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            files.list,
-        )
-        self.delete = _legacy_response.to_raw_response_wrapper(
-            files.delete,
-        )
-        self.content = _legacy_response.to_raw_response_wrapper(
-            files.content,
-        )
-        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
-            _legacy_response.to_raw_response_wrapper(
-                files.retrieve_content  # pyright: ignore[reportDeprecated],
-            )
-        )
-
-
-class AsyncFilesWithRawResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self._files = files
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            files.create,
-        )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            files.retrieve,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            files.list,
-        )
-        self.delete = _legacy_response.async_to_raw_response_wrapper(
-            files.delete,
-        )
-        self.content = _legacy_response.async_to_raw_response_wrapper(
-            files.content,
-        )
-        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
-            _legacy_response.async_to_raw_response_wrapper(
-                files.retrieve_content  # pyright: ignore[reportDeprecated],
-            )
-        )
-
-
-class FilesWithStreamingResponse:
-    def __init__(self, files: Files) -> None:
-        self._files = files
-
-        self.create = to_streamed_response_wrapper(
-            files.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            files.list,
-        )
-        self.delete = to_streamed_response_wrapper(
-            files.delete,
-        )
-        self.content = to_custom_streamed_response_wrapper(
-            files.content,
-            StreamedBinaryAPIResponse,
-        )
-        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
-            to_streamed_response_wrapper(
-                files.retrieve_content  # pyright: ignore[reportDeprecated],
-            )
-        )
-
-
-class AsyncFilesWithStreamingResponse:
-    def __init__(self, files: AsyncFiles) -> None:
-        self._files = files
-
-        self.create = async_to_streamed_response_wrapper(
-            files.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            files.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            files.list,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            files.delete,
-        )
-        self.content = async_to_custom_streamed_response_wrapper(
-            files.content,
-            AsyncStreamedBinaryAPIResponse,
-        )
-        self.retrieve_content = (  # pyright: ignore[reportDeprecated]
-            async_to_streamed_response_wrapper(
-                files.retrieve_content  # pyright: ignore[reportDeprecated],
-            )
-        )
diff --git a/openai/resources/fine_tuning/__init__.py b/openai/resources/fine_tuning/__init__.py
deleted file mode 100644
index 7765231f..00000000
--- a/openai/resources/fine_tuning/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .jobs import (
-    Jobs,
-    AsyncJobs,
-    JobsWithRawResponse,
-    AsyncJobsWithRawResponse,
-    JobsWithStreamingResponse,
-    AsyncJobsWithStreamingResponse,
-)
-from .fine_tuning import (
-    FineTuning,
-    AsyncFineTuning,
-    FineTuningWithRawResponse,
-    AsyncFineTuningWithRawResponse,
-    FineTuningWithStreamingResponse,
-    AsyncFineTuningWithStreamingResponse,
-)
-
-__all__ = [
-    "Jobs",
-    "AsyncJobs",
-    "JobsWithRawResponse",
-    "AsyncJobsWithRawResponse",
-    "JobsWithStreamingResponse",
-    "AsyncJobsWithStreamingResponse",
-    "FineTuning",
-    "AsyncFineTuning",
-    "FineTuningWithRawResponse",
-    "AsyncFineTuningWithRawResponse",
-    "FineTuningWithStreamingResponse",
-    "AsyncFineTuningWithStreamingResponse",
-]
diff --git a/openai/resources/fine_tuning/fine_tuning.py b/openai/resources/fine_tuning/fine_tuning.py
deleted file mode 100644
index 659b3e85..00000000
--- a/openai/resources/fine_tuning/fine_tuning.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .jobs import (
-    Jobs,
-    AsyncJobs,
-    JobsWithRawResponse,
-    AsyncJobsWithRawResponse,
-    JobsWithStreamingResponse,
-    AsyncJobsWithStreamingResponse,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-
-__all__ = ["FineTuning", "AsyncFineTuning"]
-
-
-class FineTuning(SyncAPIResource):
-    @cached_property
-    def jobs(self) -> Jobs:
-        return Jobs(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> FineTuningWithRawResponse:
-        return FineTuningWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> FineTuningWithStreamingResponse:
-        return FineTuningWithStreamingResponse(self)
-
-
-class AsyncFineTuning(AsyncAPIResource):
-    @cached_property
-    def jobs(self) -> AsyncJobs:
-        return AsyncJobs(self._client)
-
-    @cached_property
-    def with_raw_response(self) -> AsyncFineTuningWithRawResponse:
-        return AsyncFineTuningWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncFineTuningWithStreamingResponse:
-        return AsyncFineTuningWithStreamingResponse(self)
-
-
-class FineTuningWithRawResponse:
-    def __init__(self, fine_tuning: FineTuning) -> None:
-        self._fine_tuning = fine_tuning
-
-    @cached_property
-    def jobs(self) -> JobsWithRawResponse:
-        return JobsWithRawResponse(self._fine_tuning.jobs)
-
-
-class AsyncFineTuningWithRawResponse:
-    def __init__(self, fine_tuning: AsyncFineTuning) -> None:
-        self._fine_tuning = fine_tuning
-
-    @cached_property
-    def jobs(self) -> AsyncJobsWithRawResponse:
-        return AsyncJobsWithRawResponse(self._fine_tuning.jobs)
-
-
-class FineTuningWithStreamingResponse:
-    def __init__(self, fine_tuning: FineTuning) -> None:
-        self._fine_tuning = fine_tuning
-
-    @cached_property
-    def jobs(self) -> JobsWithStreamingResponse:
-        return JobsWithStreamingResponse(self._fine_tuning.jobs)
-
-
-class AsyncFineTuningWithStreamingResponse:
-    def __init__(self, fine_tuning: AsyncFineTuning) -> None:
-        self._fine_tuning = fine_tuning
-
-    @cached_property
-    def jobs(self) -> AsyncJobsWithStreamingResponse:
-        return AsyncJobsWithStreamingResponse(self._fine_tuning.jobs)
diff --git a/openai/resources/fine_tuning/jobs.py b/openai/resources/fine_tuning/jobs.py
deleted file mode 100644
index a0c3e24d..00000000
--- a/openai/resources/fine_tuning/jobs.py
+++ /dev/null
@@ -1,638 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Optional
-from typing_extensions import Literal
-
-import httpx
-
-from ... import _legacy_response
-from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from ..._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
-from ..._compat import cached_property
-from ..._resource import SyncAPIResource, AsyncAPIResource
-from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ...pagination import SyncCursorPage, AsyncCursorPage
-from ..._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-from ...types.fine_tuning import (
-    FineTuningJob,
-    FineTuningJobEvent,
-    job_list_params,
-    job_create_params,
-    job_list_events_params,
-)
-
-__all__ = ["Jobs", "AsyncJobs"]
-
-
-class Jobs(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> JobsWithRawResponse:
-        return JobsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> JobsWithStreamingResponse:
-        return JobsWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
-        training_file: str,
-        hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTuningJob:
-        """
-        Creates a fine-tuning job which begins the process of creating a new model from
-        a given dataset.
-
-        Response includes details of the enqueued job including job status and the name
-        of the fine-tuned models once complete.
-
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
-
-        Args:
-          model: The name of the model to fine-tune. You can select one of the
-              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
-
-          training_file: The ID of an uploaded file that contains training data.
-
-              See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
-              for how to upload a file.
-
-              Your dataset must be formatted as a JSONL file. Additionally, you must upload
-              your file with the purpose `fine-tune`.
-
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-              for more details.
-
-          hyperparameters: The hyperparameters used for the fine-tuning job.
-
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
-              name.
-
-              For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
-
-          validation_file: The ID of an uploaded file that contains validation data.
-
-              If you provide this file, the data is used to generate validation metrics
-              periodically during fine-tuning. These metrics can be viewed in the fine-tuning
-              results file. The same data should not be present in both train and validation
-              files.
-
-              Your dataset must be formatted as a JSONL file. You must upload your file with
-              the purpose `fine-tune`.
-
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-              for more details.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/fine_tuning/jobs",
-            body=maybe_transform(
-                {
-                    "model": model,
-                    "training_file": training_file,
-                    "hyperparameters": hyperparameters,
-                    "suffix": suffix,
-                    "validation_file": validation_file,
-                },
-                job_create_params.JobCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTuningJob,
-        )
-
-    def retrieve(
-        self,
-        fine_tuning_job_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTuningJob:
-        """
-        Get info about a fine-tuning job.
-
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not fine_tuning_job_id:
-            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
-        return self._get(
-            f"/fine_tuning/jobs/{fine_tuning_job_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTuningJob,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[FineTuningJob]:
-        """
-        List your organization's fine-tuning jobs
-
-        Args:
-          after: Identifier for the last job from the previous pagination request.
-
-          limit: Number of fine-tuning jobs to retrieve.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get_api_list(
-            "/fine_tuning/jobs",
-            page=SyncCursorPage[FineTuningJob],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                    },
-                    job_list_params.JobListParams,
-                ),
-            ),
-            model=FineTuningJob,
-        )
-
-    def cancel(
-        self,
-        fine_tuning_job_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTuningJob:
-        """
-        Immediately cancel a fine-tune job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not fine_tuning_job_id:
-            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
-        return self._post(
-            f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTuningJob,
-        )
-
-    def list_events(
-        self,
-        fine_tuning_job_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncCursorPage[FineTuningJobEvent]:
-        """
-        Get status updates for a fine-tuning job.
-
-        Args:
-          after: Identifier for the last event from the previous pagination request.
-
-          limit: Number of events to retrieve.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not fine_tuning_job_id:
-            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
-        return self._get_api_list(
-            f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
-            page=SyncCursorPage[FineTuningJobEvent],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                    },
-                    job_list_events_params.JobListEventsParams,
-                ),
-            ),
-            model=FineTuningJobEvent,
-        )
-
-
-class AsyncJobs(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncJobsWithRawResponse:
-        return AsyncJobsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncJobsWithStreamingResponse:
-        return AsyncJobsWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        model: Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]],
-        training_file: str,
-        hyperparameters: job_create_params.Hyperparameters | NotGiven = NOT_GIVEN,
-        suffix: Optional[str] | NotGiven = NOT_GIVEN,
-        validation_file: Optional[str] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTuningJob:
-        """
-        Creates a fine-tuning job which begins the process of creating a new model from
-        a given dataset.
-
-        Response includes details of the enqueued job including job status and the name
-        of the fine-tuned models once complete.
-
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
-
-        Args:
-          model: The name of the model to fine-tune. You can select one of the
-              [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
-
-          training_file: The ID of an uploaded file that contains training data.
-
-              See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
-              for how to upload a file.
-
-              Your dataset must be formatted as a JSONL file. Additionally, you must upload
-              your file with the purpose `fine-tune`.
-
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-              for more details.
-
-          hyperparameters: The hyperparameters used for the fine-tuning job.
-
-          suffix: A string of up to 18 characters that will be added to your fine-tuned model
-              name.
-
-              For example, a `suffix` of "custom-model-name" would produce a model name like
-              `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
-
-          validation_file: The ID of an uploaded file that contains validation data.
-
-              If you provide this file, the data is used to generate validation metrics
-              periodically during fine-tuning. These metrics can be viewed in the fine-tuning
-              results file. The same data should not be present in both train and validation
-              files.
-
-              Your dataset must be formatted as a JSONL file. You must upload your file with
-              the purpose `fine-tune`.
-
-              See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-              for more details.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/fine_tuning/jobs",
-            body=await async_maybe_transform(
-                {
-                    "model": model,
-                    "training_file": training_file,
-                    "hyperparameters": hyperparameters,
-                    "suffix": suffix,
-                    "validation_file": validation_file,
-                },
-                job_create_params.JobCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTuningJob,
-        )
-
-    async def retrieve(
-        self,
-        fine_tuning_job_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTuningJob:
-        """
-        Get info about a fine-tuning job.
-
-        [Learn more about fine-tuning](https://platform.openai.com/docs/guides/fine-tuning)
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not fine_tuning_job_id:
-            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
-        return await self._get(
-            f"/fine_tuning/jobs/{fine_tuning_job_id}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTuningJob,
-        )
-
-    def list(
-        self,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[FineTuningJob, AsyncCursorPage[FineTuningJob]]:
-        """
-        List your organization's fine-tuning jobs
-
-        Args:
-          after: Identifier for the last job from the previous pagination request.
-
-          limit: Number of fine-tuning jobs to retrieve.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._get_api_list(
-            "/fine_tuning/jobs",
-            page=AsyncCursorPage[FineTuningJob],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                    },
-                    job_list_params.JobListParams,
-                ),
-            ),
-            model=FineTuningJob,
-        )
-
-    async def cancel(
-        self,
-        fine_tuning_job_id: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> FineTuningJob:
-        """
-        Immediately cancel a fine-tune job.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not fine_tuning_job_id:
-            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
-        return await self._post(
-            f"/fine_tuning/jobs/{fine_tuning_job_id}/cancel",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=FineTuningJob,
-        )
-
-    def list_events(
-        self,
-        fine_tuning_job_id: str,
-        *,
-        after: str | NotGiven = NOT_GIVEN,
-        limit: int | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[FineTuningJobEvent, AsyncCursorPage[FineTuningJobEvent]]:
-        """
-        Get status updates for a fine-tuning job.
-
-        Args:
-          after: Identifier for the last event from the previous pagination request.
-
-          limit: Number of events to retrieve.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not fine_tuning_job_id:
-            raise ValueError(f"Expected a non-empty value for `fine_tuning_job_id` but received {fine_tuning_job_id!r}")
-        return self._get_api_list(
-            f"/fine_tuning/jobs/{fine_tuning_job_id}/events",
-            page=AsyncCursorPage[FineTuningJobEvent],
-            options=make_request_options(
-                extra_headers=extra_headers,
-                extra_query=extra_query,
-                extra_body=extra_body,
-                timeout=timeout,
-                query=maybe_transform(
-                    {
-                        "after": after,
-                        "limit": limit,
-                    },
-                    job_list_events_params.JobListEventsParams,
-                ),
-            ),
-            model=FineTuningJobEvent,
-        )
-
-
-class JobsWithRawResponse:
-    def __init__(self, jobs: Jobs) -> None:
-        self._jobs = jobs
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            jobs.create,
-        )
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            jobs.retrieve,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            jobs.list,
-        )
-        self.cancel = _legacy_response.to_raw_response_wrapper(
-            jobs.cancel,
-        )
-        self.list_events = _legacy_response.to_raw_response_wrapper(
-            jobs.list_events,
-        )
-
-
-class AsyncJobsWithRawResponse:
-    def __init__(self, jobs: AsyncJobs) -> None:
-        self._jobs = jobs
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            jobs.create,
-        )
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            jobs.retrieve,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            jobs.list,
-        )
-        self.cancel = _legacy_response.async_to_raw_response_wrapper(
-            jobs.cancel,
-        )
-        self.list_events = _legacy_response.async_to_raw_response_wrapper(
-            jobs.list_events,
-        )
-
-
-class JobsWithStreamingResponse:
-    def __init__(self, jobs: Jobs) -> None:
-        self._jobs = jobs
-
-        self.create = to_streamed_response_wrapper(
-            jobs.create,
-        )
-        self.retrieve = to_streamed_response_wrapper(
-            jobs.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            jobs.list,
-        )
-        self.cancel = to_streamed_response_wrapper(
-            jobs.cancel,
-        )
-        self.list_events = to_streamed_response_wrapper(
-            jobs.list_events,
-        )
-
-
-class AsyncJobsWithStreamingResponse:
-    def __init__(self, jobs: AsyncJobs) -> None:
-        self._jobs = jobs
-
-        self.create = async_to_streamed_response_wrapper(
-            jobs.create,
-        )
-        self.retrieve = async_to_streamed_response_wrapper(
-            jobs.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            jobs.list,
-        )
-        self.cancel = async_to_streamed_response_wrapper(
-            jobs.cancel,
-        )
-        self.list_events = async_to_streamed_response_wrapper(
-            jobs.list_events,
-        )
diff --git a/openai/resources/images.py b/openai/resources/images.py
deleted file mode 100644
index e12fa51b..00000000
--- a/openai/resources/images.py
+++ /dev/null
@@ -1,587 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Mapping, Optional, cast
-from typing_extensions import Literal
-
-import httpx
-
-from .. import _legacy_response
-from ..types import (
-    ImagesResponse,
-    image_edit_params,
-    image_generate_params,
-    image_create_variation_params,
-)
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
-from .._utils import (
-    extract_files,
-    maybe_transform,
-    deepcopy_minimal,
-    async_maybe_transform,
-)
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
-
-__all__ = ["Images", "AsyncImages"]
-
-
-class Images(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ImagesWithRawResponse:
-        return ImagesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ImagesWithStreamingResponse:
-        return ImagesWithStreamingResponse(self)
-
-    def create_variation(
-        self,
-        *,
-        image: FileTypes,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ImagesResponse:
-        """
-        Creates a variation of a given image.
-
-        Args:
-          image: The image to use as the basis for the variation(s). Must be a valid PNG file,
-              less than 4MB, and square.
-
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
-
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
-
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
-
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "image": image,
-                "model": model,
-                "n": n,
-                "response_format": response_format,
-                "size": size,
-                "user": user,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/images/variations",
-            body=maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ImagesResponse,
-        )
-
-    def edit(
-        self,
-        *,
-        image: FileTypes,
-        prompt: str,
-        mask: FileTypes | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ImagesResponse:
-        """
-        Creates an edited or extended image given an original image and a prompt.
-
-        Args:
-          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-              is not provided, image must have transparency, which will be used as the mask.
-
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters.
-
-          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
-              indicate where `image` should be edited. Must be a valid PNG file, less than
-              4MB, and have the same dimensions as `image`.
-
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
-
-          n: The number of images to generate. Must be between 1 and 10.
-
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
-
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "image": image,
-                "prompt": prompt,
-                "mask": mask,
-                "model": model,
-                "n": n,
-                "response_format": response_format,
-                "size": size,
-                "user": user,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return self._post(
-            "/images/edits",
-            body=maybe_transform(body, image_edit_params.ImageEditParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ImagesResponse,
-        )
-
-    def generate(
-        self,
-        *,
-        prompt: str,
-        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
-        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ImagesResponse:
-        """
-        Creates an image given a prompt.
-
-        Args:
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
-
-          model: The model to use for image generation.
-
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
-
-          quality: The quality of the image that will be generated. `hd` creates images with finer
-              details and greater consistency across the image. This param is only supported
-              for `dall-e-3`.
-
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
-
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-              `1024x1792` for `dall-e-3` models.
-
-          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-              causes the model to lean towards generating hyper-real and dramatic images.
-              Natural causes the model to produce more natural, less hyper-real looking
-              images. This param is only supported for `dall-e-3`.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/images/generations",
-            body=maybe_transform(
-                {
-                    "prompt": prompt,
-                    "model": model,
-                    "n": n,
-                    "quality": quality,
-                    "response_format": response_format,
-                    "size": size,
-                    "style": style,
-                    "user": user,
-                },
-                image_generate_params.ImageGenerateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ImagesResponse,
-        )
-
-
-class AsyncImages(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncImagesWithRawResponse:
-        return AsyncImagesWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncImagesWithStreamingResponse:
-        return AsyncImagesWithStreamingResponse(self)
-
-    async def create_variation(
-        self,
-        *,
-        image: FileTypes,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ImagesResponse:
-        """
-        Creates a variation of a given image.
-
-        Args:
-          image: The image to use as the basis for the variation(s). Must be a valid PNG file,
-              less than 4MB, and square.
-
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
-
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
-
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
-
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "image": image,
-                "model": model,
-                "n": n,
-                "response_format": response_format,
-                "size": size,
-                "user": user,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/images/variations",
-            body=await async_maybe_transform(body, image_create_variation_params.ImageCreateVariationParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ImagesResponse,
-        )
-
-    async def edit(
-        self,
-        *,
-        image: FileTypes,
-        prompt: str,
-        mask: FileTypes | NotGiven = NOT_GIVEN,
-        model: Union[str, Literal["dall-e-2"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ImagesResponse:
-        """
-        Creates an edited or extended image given an original image and a prompt.
-
-        Args:
-          image: The image to edit. Must be a valid PNG file, less than 4MB, and square. If mask
-              is not provided, image must have transparency, which will be used as the mask.
-
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters.
-
-          mask: An additional image whose fully transparent areas (e.g. where alpha is zero)
-              indicate where `image` should be edited. Must be a valid PNG file, less than
-              4MB, and have the same dimensions as `image`.
-
-          model: The model to use for image generation. Only `dall-e-2` is supported at this
-              time.
-
-          n: The number of images to generate. Must be between 1 and 10.
-
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
-
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024`.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        body = deepcopy_minimal(
-            {
-                "image": image,
-                "prompt": prompt,
-                "mask": mask,
-                "model": model,
-                "n": n,
-                "response_format": response_format,
-                "size": size,
-                "user": user,
-            }
-        )
-        files = extract_files(cast(Mapping[str, object], body), paths=[["image"], ["mask"]])
-        if files:
-            # It should be noted that the actual Content-Type header that will be
-            # sent to the server will contain a `boundary` parameter, e.g.
-            # multipart/form-data; boundary=---abc--
-            extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
-        return await self._post(
-            "/images/edits",
-            body=await async_maybe_transform(body, image_edit_params.ImageEditParams),
-            files=files,
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ImagesResponse,
-        )
-
-    async def generate(
-        self,
-        *,
-        prompt: str,
-        model: Union[str, Literal["dall-e-2", "dall-e-3"], None] | NotGiven = NOT_GIVEN,
-        n: Optional[int] | NotGiven = NOT_GIVEN,
-        quality: Literal["standard", "hd"] | NotGiven = NOT_GIVEN,
-        response_format: Optional[Literal["url", "b64_json"]] | NotGiven = NOT_GIVEN,
-        size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]] | NotGiven = NOT_GIVEN,
-        style: Optional[Literal["vivid", "natural"]] | NotGiven = NOT_GIVEN,
-        user: str | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ImagesResponse:
-        """
-        Creates an image given a prompt.
-
-        Args:
-          prompt: A text description of the desired image(s). The maximum length is 1000
-              characters for `dall-e-2` and 4000 characters for `dall-e-3`.
-
-          model: The model to use for image generation.
-
-          n: The number of images to generate. Must be between 1 and 10. For `dall-e-3`, only
-              `n=1` is supported.
-
-          quality: The quality of the image that will be generated. `hd` creates images with finer
-              details and greater consistency across the image. This param is only supported
-              for `dall-e-3`.
-
-          response_format: The format in which the generated images are returned. Must be one of `url` or
-              `b64_json`. URLs are only valid for 60 minutes after the image has been
-              generated.
-
-          size: The size of the generated images. Must be one of `256x256`, `512x512`, or
-              `1024x1024` for `dall-e-2`. Must be one of `1024x1024`, `1792x1024`, or
-              `1024x1792` for `dall-e-3` models.
-
-          style: The style of the generated images. Must be one of `vivid` or `natural`. Vivid
-              causes the model to lean towards generating hyper-real and dramatic images.
-              Natural causes the model to produce more natural, less hyper-real looking
-              images. This param is only supported for `dall-e-3`.
-
-          user: A unique identifier representing your end-user, which can help OpenAI to monitor
-              and detect abuse.
-              [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/images/generations",
-            body=await async_maybe_transform(
-                {
-                    "prompt": prompt,
-                    "model": model,
-                    "n": n,
-                    "quality": quality,
-                    "response_format": response_format,
-                    "size": size,
-                    "style": style,
-                    "user": user,
-                },
-                image_generate_params.ImageGenerateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ImagesResponse,
-        )
-
-
-class ImagesWithRawResponse:
-    def __init__(self, images: Images) -> None:
-        self._images = images
-
-        self.create_variation = _legacy_response.to_raw_response_wrapper(
-            images.create_variation,
-        )
-        self.edit = _legacy_response.to_raw_response_wrapper(
-            images.edit,
-        )
-        self.generate = _legacy_response.to_raw_response_wrapper(
-            images.generate,
-        )
-
-
-class AsyncImagesWithRawResponse:
-    def __init__(self, images: AsyncImages) -> None:
-        self._images = images
-
-        self.create_variation = _legacy_response.async_to_raw_response_wrapper(
-            images.create_variation,
-        )
-        self.edit = _legacy_response.async_to_raw_response_wrapper(
-            images.edit,
-        )
-        self.generate = _legacy_response.async_to_raw_response_wrapper(
-            images.generate,
-        )
-
-
-class ImagesWithStreamingResponse:
-    def __init__(self, images: Images) -> None:
-        self._images = images
-
-        self.create_variation = to_streamed_response_wrapper(
-            images.create_variation,
-        )
-        self.edit = to_streamed_response_wrapper(
-            images.edit,
-        )
-        self.generate = to_streamed_response_wrapper(
-            images.generate,
-        )
-
-
-class AsyncImagesWithStreamingResponse:
-    def __init__(self, images: AsyncImages) -> None:
-        self._images = images
-
-        self.create_variation = async_to_streamed_response_wrapper(
-            images.create_variation,
-        )
-        self.edit = async_to_streamed_response_wrapper(
-            images.edit,
-        )
-        self.generate = async_to_streamed_response_wrapper(
-            images.generate,
-        )
diff --git a/openai/resources/models.py b/openai/resources/models.py
deleted file mode 100644
index 4e36e208..00000000
--- a/openai/resources/models.py
+++ /dev/null
@@ -1,283 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-import httpx
-
-from .. import _legacy_response
-from ..types import Model, ModelDeleted
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from ..pagination import SyncPage, AsyncPage
-from .._base_client import (
-    AsyncPaginator,
-    make_request_options,
-)
-
-__all__ = ["Models", "AsyncModels"]
-
-
-class Models(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ModelsWithRawResponse:
-        return ModelsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ModelsWithStreamingResponse:
-        return ModelsWithStreamingResponse(self)
-
-    def retrieve(
-        self,
-        model: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Model:
-        """
-        Retrieves a model instance, providing basic information about the model such as
-        the owner and permissioning.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not model:
-            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
-        return self._get(
-            f"/models/{model}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Model,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> SyncPage[Model]:
-        """
-        Lists the currently available models, and provides basic information about each
-        one such as the owner and availability.
-        """
-        return self._get_api_list(
-            "/models",
-            page=SyncPage[Model],
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            model=Model,
-        )
-
-    def delete(
-        self,
-        model: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModelDeleted:
-        """Delete a fine-tuned model.
-
-        You must have the Owner role in your organization to
-        delete a model.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not model:
-            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
-        return self._delete(
-            f"/models/{model}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ModelDeleted,
-        )
-
-
-class AsyncModels(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncModelsWithRawResponse:
-        return AsyncModelsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncModelsWithStreamingResponse:
-        return AsyncModelsWithStreamingResponse(self)
-
-    async def retrieve(
-        self,
-        model: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> Model:
-        """
-        Retrieves a model instance, providing basic information about the model such as
-        the owner and permissioning.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not model:
-            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
-        return await self._get(
-            f"/models/{model}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=Model,
-        )
-
-    def list(
-        self,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> AsyncPaginator[Model, AsyncPage[Model]]:
-        """
-        Lists the currently available models, and provides basic information about each
-        one such as the owner and availability.
-        """
-        return self._get_api_list(
-            "/models",
-            page=AsyncPage[Model],
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            model=Model,
-        )
-
-    async def delete(
-        self,
-        model: str,
-        *,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModelDeleted:
-        """Delete a fine-tuned model.
-
-        You must have the Owner role in your organization to
-        delete a model.
-
-        Args:
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        if not model:
-            raise ValueError(f"Expected a non-empty value for `model` but received {model!r}")
-        return await self._delete(
-            f"/models/{model}",
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ModelDeleted,
-        )
-
-
-class ModelsWithRawResponse:
-    def __init__(self, models: Models) -> None:
-        self._models = models
-
-        self.retrieve = _legacy_response.to_raw_response_wrapper(
-            models.retrieve,
-        )
-        self.list = _legacy_response.to_raw_response_wrapper(
-            models.list,
-        )
-        self.delete = _legacy_response.to_raw_response_wrapper(
-            models.delete,
-        )
-
-
-class AsyncModelsWithRawResponse:
-    def __init__(self, models: AsyncModels) -> None:
-        self._models = models
-
-        self.retrieve = _legacy_response.async_to_raw_response_wrapper(
-            models.retrieve,
-        )
-        self.list = _legacy_response.async_to_raw_response_wrapper(
-            models.list,
-        )
-        self.delete = _legacy_response.async_to_raw_response_wrapper(
-            models.delete,
-        )
-
-
-class ModelsWithStreamingResponse:
-    def __init__(self, models: Models) -> None:
-        self._models = models
-
-        self.retrieve = to_streamed_response_wrapper(
-            models.retrieve,
-        )
-        self.list = to_streamed_response_wrapper(
-            models.list,
-        )
-        self.delete = to_streamed_response_wrapper(
-            models.delete,
-        )
-
-
-class AsyncModelsWithStreamingResponse:
-    def __init__(self, models: AsyncModels) -> None:
-        self._models = models
-
-        self.retrieve = async_to_streamed_response_wrapper(
-            models.retrieve,
-        )
-        self.list = async_to_streamed_response_wrapper(
-            models.list,
-        )
-        self.delete = async_to_streamed_response_wrapper(
-            models.delete,
-        )
diff --git a/openai/resources/moderations.py b/openai/resources/moderations.py
deleted file mode 100644
index 385b672f..00000000
--- a/openai/resources/moderations.py
+++ /dev/null
@@ -1,180 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union
-from typing_extensions import Literal
-
-import httpx
-
-from .. import _legacy_response
-from ..types import ModerationCreateResponse, moderation_create_params
-from .._types import NOT_GIVEN, Body, Query, Headers, NotGiven
-from .._utils import (
-    maybe_transform,
-    async_maybe_transform,
-)
-from .._compat import cached_property
-from .._resource import SyncAPIResource, AsyncAPIResource
-from .._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
-from .._base_client import (
-    make_request_options,
-)
-
-__all__ = ["Moderations", "AsyncModerations"]
-
-
-class Moderations(SyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> ModerationsWithRawResponse:
-        return ModerationsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> ModerationsWithStreamingResponse:
-        return ModerationsWithStreamingResponse(self)
-
-    def create(
-        self,
-        *,
-        input: Union[str, List[str]],
-        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModerationCreateResponse:
-        """
-        Classifies if text is potentially harmful.
-
-        Args:
-          input: The input text to classify
-
-          model: Two content moderations models are available: `text-moderation-stable` and
-              `text-moderation-latest`.
-
-              The default is `text-moderation-latest` which will be automatically upgraded
-              over time. This ensures you are always using our most accurate model. If you use
-              `text-moderation-stable`, we will provide advanced notice before updating the
-              model. Accuracy of `text-moderation-stable` may be slightly lower than for
-              `text-moderation-latest`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return self._post(
-            "/moderations",
-            body=maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                },
-                moderation_create_params.ModerationCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ModerationCreateResponse,
-        )
-
-
-class AsyncModerations(AsyncAPIResource):
-    @cached_property
-    def with_raw_response(self) -> AsyncModerationsWithRawResponse:
-        return AsyncModerationsWithRawResponse(self)
-
-    @cached_property
-    def with_streaming_response(self) -> AsyncModerationsWithStreamingResponse:
-        return AsyncModerationsWithStreamingResponse(self)
-
-    async def create(
-        self,
-        *,
-        input: Union[str, List[str]],
-        model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]] | NotGiven = NOT_GIVEN,
-        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
-        # The extra values given here take precedence over values defined on the client or passed to this method.
-        extra_headers: Headers | None = None,
-        extra_query: Query | None = None,
-        extra_body: Body | None = None,
-        timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
-    ) -> ModerationCreateResponse:
-        """
-        Classifies if text is potentially harmful.
-
-        Args:
-          input: The input text to classify
-
-          model: Two content moderations models are available: `text-moderation-stable` and
-              `text-moderation-latest`.
-
-              The default is `text-moderation-latest` which will be automatically upgraded
-              over time. This ensures you are always using our most accurate model. If you use
-              `text-moderation-stable`, we will provide advanced notice before updating the
-              model. Accuracy of `text-moderation-stable` may be slightly lower than for
-              `text-moderation-latest`.
-
-          extra_headers: Send extra headers
-
-          extra_query: Add additional query parameters to the request
-
-          extra_body: Add additional JSON properties to the request
-
-          timeout: Override the client-level default timeout for this request, in seconds
-        """
-        return await self._post(
-            "/moderations",
-            body=await async_maybe_transform(
-                {
-                    "input": input,
-                    "model": model,
-                },
-                moderation_create_params.ModerationCreateParams,
-            ),
-            options=make_request_options(
-                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
-            ),
-            cast_to=ModerationCreateResponse,
-        )
-
-
-class ModerationsWithRawResponse:
-    def __init__(self, moderations: Moderations) -> None:
-        self._moderations = moderations
-
-        self.create = _legacy_response.to_raw_response_wrapper(
-            moderations.create,
-        )
-
-
-class AsyncModerationsWithRawResponse:
-    def __init__(self, moderations: AsyncModerations) -> None:
-        self._moderations = moderations
-
-        self.create = _legacy_response.async_to_raw_response_wrapper(
-            moderations.create,
-        )
-
-
-class ModerationsWithStreamingResponse:
-    def __init__(self, moderations: Moderations) -> None:
-        self._moderations = moderations
-
-        self.create = to_streamed_response_wrapper(
-            moderations.create,
-        )
-
-
-class AsyncModerationsWithStreamingResponse:
-    def __init__(self, moderations: AsyncModerations) -> None:
-        self._moderations = moderations
-
-        self.create = async_to_streamed_response_wrapper(
-            moderations.create,
-        )
diff --git a/openai/types/__init__.py b/openai/types/__init__.py
deleted file mode 100644
index 0917e22a..00000000
--- a/openai/types/__init__.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .image import Image as Image
-from .model import Model as Model
-from .shared import (
-    ErrorObject as ErrorObject,
-    FunctionDefinition as FunctionDefinition,
-    FunctionParameters as FunctionParameters,
-)
-from .embedding import Embedding as Embedding
-from .completion import Completion as Completion
-from .moderation import Moderation as Moderation
-from .file_object import FileObject as FileObject
-from .file_content import FileContent as FileContent
-from .file_deleted import FileDeleted as FileDeleted
-from .model_deleted import ModelDeleted as ModelDeleted
-from .images_response import ImagesResponse as ImagesResponse
-from .completion_usage import CompletionUsage as CompletionUsage
-from .file_list_params import FileListParams as FileListParams
-from .completion_choice import CompletionChoice as CompletionChoice
-from .image_edit_params import ImageEditParams as ImageEditParams
-from .file_create_params import FileCreateParams as FileCreateParams
-from .image_generate_params import ImageGenerateParams as ImageGenerateParams
-from .embedding_create_params import EmbeddingCreateParams as EmbeddingCreateParams
-from .completion_create_params import CompletionCreateParams as CompletionCreateParams
-from .moderation_create_params import ModerationCreateParams as ModerationCreateParams
-from .create_embedding_response import CreateEmbeddingResponse as CreateEmbeddingResponse
-from .moderation_create_response import ModerationCreateResponse as ModerationCreateResponse
-from .image_create_variation_params import ImageCreateVariationParams as ImageCreateVariationParams
diff --git a/openai/types/audio/__init__.py b/openai/types/audio/__init__.py
deleted file mode 100644
index 8d2c44c8..00000000
--- a/openai/types/audio/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .translation import Translation as Translation
-from .transcription import Transcription as Transcription
-from .speech_create_params import SpeechCreateParams as SpeechCreateParams
-from .translation_create_params import TranslationCreateParams as TranslationCreateParams
-from .transcription_create_params import TranscriptionCreateParams as TranscriptionCreateParams
diff --git a/openai/types/audio/speech_create_params.py b/openai/types/audio/speech_create_params.py
deleted file mode 100644
index 8d75ec4c..00000000
--- a/openai/types/audio/speech_create_params.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["SpeechCreateParams"]
-
-
-class SpeechCreateParams(TypedDict, total=False):
-    input: Required[str]
-    """The text to generate audio for. The maximum length is 4096 characters."""
-
-    model: Required[Union[str, Literal["tts-1", "tts-1-hd"]]]
-    """
-    One of the available [TTS models](https://platform.openai.com/docs/models/tts):
-    `tts-1` or `tts-1-hd`
-    """
-
-    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
-    """The voice to use when generating the audio.
-
-    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
-    Previews of the voices are available in the
-    [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech/voice-options).
-    """
-
-    response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"]
-    """The format to audio in.
-
-    Supported formats are `mp3`, `opus`, `aac`, `flac`, `wav`, and `pcm`.
-    """
-
-    speed: float
-    """The speed of the generated audio.
-
-    Select a value from `0.25` to `4.0`. `1.0` is the default.
-    """
diff --git a/openai/types/audio/transcription.py b/openai/types/audio/transcription.py
deleted file mode 100644
index fa512e27..00000000
--- a/openai/types/audio/transcription.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-
-__all__ = ["Transcription"]
-
-
-class Transcription(BaseModel):
-    text: str
-    """The transcribed text."""
diff --git a/openai/types/audio/transcription_create_params.py b/openai/types/audio/transcription_create_params.py
deleted file mode 100644
index 6b2d5bae..00000000
--- a/openai/types/audio/transcription_create_params.py
+++ /dev/null
@@ -1,65 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union
-from typing_extensions import Literal, Required, TypedDict
-
-from ..._types import FileTypes
-
-__all__ = ["TranscriptionCreateParams"]
-
-
-class TranscriptionCreateParams(TypedDict, total=False):
-    file: Required[FileTypes]
-    """
-    The audio file object (not file name) to transcribe, in one of these formats:
-    flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
-    """
-
-    model: Required[Union[str, Literal["whisper-1"]]]
-    """ID of the model to use.
-
-    Only `whisper-1` (which is powered by our open source Whisper V2 model) is
-    currently available.
-    """
-
-    language: str
-    """The language of the input audio.
-
-    Supplying the input language in
-    [ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) format will
-    improve accuracy and latency.
-    """
-
-    prompt: str
-    """An optional text to guide the model's style or continue a previous audio
-    segment.
-
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-    should match the audio language.
-    """
-
-    response_format: Literal["json", "text", "srt", "verbose_json", "vtt"]
-    """
-    The format of the transcript output, in one of these options: `json`, `text`,
-    `srt`, `verbose_json`, or `vtt`.
-    """
-
-    temperature: float
-    """The sampling temperature, between 0 and 1.
-
-    Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic. If set to 0, the model will use
-    [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-    automatically increase the temperature until certain thresholds are hit.
-    """
-
-    timestamp_granularities: List[Literal["word", "segment"]]
-    """The timestamp granularities to populate for this transcription.
-
-    `response_format` must be set `verbose_json` to use timestamp granularities.
-    Either or both of these options are supported: `word`, or `segment`. Note: There
-    is no additional latency for segment timestamps, but generating word timestamps
-    incurs additional latency.
-    """
diff --git a/openai/types/audio/translation.py b/openai/types/audio/translation.py
deleted file mode 100644
index efc56f7f..00000000
--- a/openai/types/audio/translation.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ..._models import BaseModel
-
-__all__ = ["Translation"]
-
-
-class Translation(BaseModel):
-    text: str
diff --git a/openai/types/audio/translation_create_params.py b/openai/types/audio/translation_create_params.py
deleted file mode 100644
index f23a41ed..00000000
--- a/openai/types/audio/translation_create_params.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal, Required, TypedDict
-
-from ..._types import FileTypes
-
-__all__ = ["TranslationCreateParams"]
-
-
-class TranslationCreateParams(TypedDict, total=False):
-    file: Required[FileTypes]
-    """
-    The audio file object (not file name) translate, in one of these formats: flac,
-    mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
-    """
-
-    model: Required[Union[str, Literal["whisper-1"]]]
-    """ID of the model to use.
-
-    Only `whisper-1` (which is powered by our open source Whisper V2 model) is
-    currently available.
-    """
-
-    prompt: str
-    """An optional text to guide the model's style or continue a previous audio
-    segment.
-
-    The [prompt](https://platform.openai.com/docs/guides/speech-to-text/prompting)
-    should be in English.
-    """
-
-    response_format: str
-    """
-    The format of the transcript output, in one of these options: `json`, `text`,
-    `srt`, `verbose_json`, or `vtt`.
-    """
-
-    temperature: float
-    """The sampling temperature, between 0 and 1.
-
-    Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic. If set to 0, the model will use
-    [log probability](https://en.wikipedia.org/wiki/Log_probability) to
-    automatically increase the temperature until certain thresholds are hit.
-    """
diff --git a/openai/types/beta/__init__.py b/openai/types/beta/__init__.py
deleted file mode 100644
index a7de0272..00000000
--- a/openai/types/beta/__init__.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .thread import Thread as Thread
-from .assistant import Assistant as Assistant
-from .function_tool import FunctionTool as FunctionTool
-from .assistant_tool import AssistantTool as AssistantTool
-from .retrieval_tool import RetrievalTool as RetrievalTool
-from .thread_deleted import ThreadDeleted as ThreadDeleted
-from .assistant_deleted import AssistantDeleted as AssistantDeleted
-from .function_tool_param import FunctionToolParam as FunctionToolParam
-from .assistant_tool_param import AssistantToolParam as AssistantToolParam
-from .retrieval_tool_param import RetrievalToolParam as RetrievalToolParam
-from .thread_create_params import ThreadCreateParams as ThreadCreateParams
-from .thread_update_params import ThreadUpdateParams as ThreadUpdateParams
-from .assistant_list_params import AssistantListParams as AssistantListParams
-from .code_interpreter_tool import CodeInterpreterTool as CodeInterpreterTool
-from .assistant_stream_event import AssistantStreamEvent as AssistantStreamEvent
-from .assistant_create_params import AssistantCreateParams as AssistantCreateParams
-from .assistant_update_params import AssistantUpdateParams as AssistantUpdateParams
-from .code_interpreter_tool_param import CodeInterpreterToolParam as CodeInterpreterToolParam
-from .thread_create_and_run_params import ThreadCreateAndRunParams as ThreadCreateAndRunParams
diff --git a/openai/types/beta/assistant.py b/openai/types/beta/assistant.py
deleted file mode 100644
index 32561a9a..00000000
--- a/openai/types/beta/assistant.py
+++ /dev/null
@@ -1,64 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .assistant_tool import AssistantTool
-
-__all__ = ["Assistant"]
-
-
-class Assistant(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the assistant was created."""
-
-    description: Optional[str] = None
-    """The description of the assistant. The maximum length is 512 characters."""
-
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order.
-    """
-
-    instructions: Optional[str] = None
-    """The system instructions that the assistant uses.
-
-    The maximum length is 32768 characters.
-    """
-
-    metadata: Optional[object] = None
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: str
-    """ID of the model to use.
-
-    You can use the
-    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-    see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
-    """
-
-    name: Optional[str] = None
-    """The name of the assistant. The maximum length is 256 characters."""
-
-    object: Literal["assistant"]
-    """The object type, which is always `assistant`."""
-
-    tools: List[AssistantTool]
-    """A list of tool enabled on the assistant.
-
-    There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
-    """
diff --git a/openai/types/beta/assistant_create_params.py b/openai/types/beta/assistant_create_params.py
deleted file mode 100644
index 8bad3236..00000000
--- a/openai/types/beta/assistant_create_params.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable, Optional
-from typing_extensions import Required, TypedDict
-
-from .assistant_tool_param import AssistantToolParam
-
-__all__ = ["AssistantCreateParams"]
-
-
-class AssistantCreateParams(TypedDict, total=False):
-    model: Required[str]
-    """ID of the model to use.
-
-    You can use the
-    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-    see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
-    """
-
-    description: Optional[str]
-    """The description of the assistant. The maximum length is 512 characters."""
-
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order.
-    """
-
-    instructions: Optional[str]
-    """The system instructions that the assistant uses.
-
-    The maximum length is 32768 characters.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    name: Optional[str]
-    """The name of the assistant. The maximum length is 256 characters."""
-
-    tools: Iterable[AssistantToolParam]
-    """A list of tool enabled on the assistant.
-
-    There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
-    """
diff --git a/openai/types/beta/assistant_deleted.py b/openai/types/beta/assistant_deleted.py
deleted file mode 100644
index 3be40cd6..00000000
--- a/openai/types/beta/assistant_deleted.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["AssistantDeleted"]
-
-
-class AssistantDeleted(BaseModel):
-    id: str
-
-    deleted: bool
-
-    object: Literal["assistant.deleted"]
diff --git a/openai/types/beta/assistant_list_params.py b/openai/types/beta/assistant_list_params.py
deleted file mode 100644
index f54f6312..00000000
--- a/openai/types/beta/assistant_list_params.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["AssistantListParams"]
-
-
-class AssistantListParams(TypedDict, total=False):
-    after: str
-    """A cursor for use in pagination.
-
-    `after` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include after=obj_foo in order to fetch the next page of the
-    list.
-    """
-
-    before: str
-    """A cursor for use in pagination.
-
-    `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include before=obj_foo in order to fetch the previous page
-    of the list.
-    """
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 100, and the default is 20.
-    """
-
-    order: Literal["asc", "desc"]
-    """Sort order by the `created_at` timestamp of the objects.
-
-    `asc` for ascending order and `desc` for descending order.
-    """
diff --git a/openai/types/beta/assistant_stream_event.py b/openai/types/beta/assistant_stream_event.py
deleted file mode 100644
index 90471f7d..00000000
--- a/openai/types/beta/assistant_stream_event.py
+++ /dev/null
@@ -1,276 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Literal, Annotated
-
-from .thread import Thread
-from ..shared import ErrorObject
-from .threads import Run, Message, MessageDeltaEvent
-from ..._utils import PropertyInfo
-from ..._models import BaseModel
-from .threads.runs import RunStep, RunStepDeltaEvent
-
-__all__ = [
-    "AssistantStreamEvent",
-    "ThreadCreated",
-    "ThreadRunCreated",
-    "ThreadRunQueued",
-    "ThreadRunInProgress",
-    "ThreadRunRequiresAction",
-    "ThreadRunCompleted",
-    "ThreadRunFailed",
-    "ThreadRunCancelling",
-    "ThreadRunCancelled",
-    "ThreadRunExpired",
-    "ThreadRunStepCreated",
-    "ThreadRunStepInProgress",
-    "ThreadRunStepDelta",
-    "ThreadRunStepCompleted",
-    "ThreadRunStepFailed",
-    "ThreadRunStepCancelled",
-    "ThreadRunStepExpired",
-    "ThreadMessageCreated",
-    "ThreadMessageInProgress",
-    "ThreadMessageDelta",
-    "ThreadMessageCompleted",
-    "ThreadMessageIncomplete",
-    "ErrorEvent",
-]
-
-
-class ThreadCreated(BaseModel):
-    data: Thread
-    """
-    Represents a thread that contains
-    [messages](https://platform.openai.com/docs/api-reference/messages).
-    """
-
-    event: Literal["thread.created"]
-
-
-class ThreadRunCreated(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.created"]
-
-
-class ThreadRunQueued(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.queued"]
-
-
-class ThreadRunInProgress(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.in_progress"]
-
-
-class ThreadRunRequiresAction(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.requires_action"]
-
-
-class ThreadRunCompleted(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.completed"]
-
-
-class ThreadRunFailed(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.failed"]
-
-
-class ThreadRunCancelling(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.cancelling"]
-
-
-class ThreadRunCancelled(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.cancelled"]
-
-
-class ThreadRunExpired(BaseModel):
-    data: Run
-    """
-    Represents an execution run on a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.run.expired"]
-
-
-class ThreadRunStepCreated(BaseModel):
-    data: RunStep
-    """Represents a step in execution of a run."""
-
-    event: Literal["thread.run.step.created"]
-
-
-class ThreadRunStepInProgress(BaseModel):
-    data: RunStep
-    """Represents a step in execution of a run."""
-
-    event: Literal["thread.run.step.in_progress"]
-
-
-class ThreadRunStepDelta(BaseModel):
-    data: RunStepDeltaEvent
-    """Represents a run step delta i.e.
-
-    any changed fields on a run step during streaming.
-    """
-
-    event: Literal["thread.run.step.delta"]
-
-
-class ThreadRunStepCompleted(BaseModel):
-    data: RunStep
-    """Represents a step in execution of a run."""
-
-    event: Literal["thread.run.step.completed"]
-
-
-class ThreadRunStepFailed(BaseModel):
-    data: RunStep
-    """Represents a step in execution of a run."""
-
-    event: Literal["thread.run.step.failed"]
-
-
-class ThreadRunStepCancelled(BaseModel):
-    data: RunStep
-    """Represents a step in execution of a run."""
-
-    event: Literal["thread.run.step.cancelled"]
-
-
-class ThreadRunStepExpired(BaseModel):
-    data: RunStep
-    """Represents a step in execution of a run."""
-
-    event: Literal["thread.run.step.expired"]
-
-
-class ThreadMessageCreated(BaseModel):
-    data: Message
-    """
-    Represents a message within a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.message.created"]
-
-
-class ThreadMessageInProgress(BaseModel):
-    data: Message
-    """
-    Represents a message within a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.message.in_progress"]
-
-
-class ThreadMessageDelta(BaseModel):
-    data: MessageDeltaEvent
-    """Represents a message delta i.e.
-
-    any changed fields on a message during streaming.
-    """
-
-    event: Literal["thread.message.delta"]
-
-
-class ThreadMessageCompleted(BaseModel):
-    data: Message
-    """
-    Represents a message within a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.message.completed"]
-
-
-class ThreadMessageIncomplete(BaseModel):
-    data: Message
-    """
-    Represents a message within a
-    [thread](https://platform.openai.com/docs/api-reference/threads).
-    """
-
-    event: Literal["thread.message.incomplete"]
-
-
-class ErrorEvent(BaseModel):
-    data: ErrorObject
-
-    event: Literal["error"]
-
-
-AssistantStreamEvent = Annotated[
-    Union[
-        ThreadCreated,
-        ThreadRunCreated,
-        ThreadRunQueued,
-        ThreadRunInProgress,
-        ThreadRunRequiresAction,
-        ThreadRunCompleted,
-        ThreadRunFailed,
-        ThreadRunCancelling,
-        ThreadRunCancelled,
-        ThreadRunExpired,
-        ThreadRunStepCreated,
-        ThreadRunStepInProgress,
-        ThreadRunStepDelta,
-        ThreadRunStepCompleted,
-        ThreadRunStepFailed,
-        ThreadRunStepCancelled,
-        ThreadRunStepExpired,
-        ThreadMessageCreated,
-        ThreadMessageInProgress,
-        ThreadMessageDelta,
-        ThreadMessageCompleted,
-        ThreadMessageIncomplete,
-        ErrorEvent,
-    ],
-    PropertyInfo(discriminator="event"),
-]
diff --git a/openai/types/beta/assistant_tool.py b/openai/types/beta/assistant_tool.py
deleted file mode 100644
index a4420385..00000000
--- a/openai/types/beta/assistant_tool.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated
-
-from ..._utils import PropertyInfo
-from .function_tool import FunctionTool
-from .retrieval_tool import RetrievalTool
-from .code_interpreter_tool import CodeInterpreterTool
-
-__all__ = ["AssistantTool"]
-
-AssistantTool = Annotated[Union[CodeInterpreterTool, RetrievalTool, FunctionTool], PropertyInfo(discriminator="type")]
diff --git a/openai/types/beta/assistant_tool_param.py b/openai/types/beta/assistant_tool_param.py
deleted file mode 100644
index d5758f16..00000000
--- a/openai/types/beta/assistant_tool_param.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-
-from .function_tool_param import FunctionToolParam
-from .retrieval_tool_param import RetrievalToolParam
-from .code_interpreter_tool_param import CodeInterpreterToolParam
-
-__all__ = ["AssistantToolParam"]
-
-AssistantToolParam = Union[CodeInterpreterToolParam, RetrievalToolParam, FunctionToolParam]
diff --git a/openai/types/beta/assistant_update_params.py b/openai/types/beta/assistant_update_params.py
deleted file mode 100644
index 7c96aca8..00000000
--- a/openai/types/beta/assistant_update_params.py
+++ /dev/null
@@ -1,58 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable, Optional
-from typing_extensions import TypedDict
-
-from .assistant_tool_param import AssistantToolParam
-
-__all__ = ["AssistantUpdateParams"]
-
-
-class AssistantUpdateParams(TypedDict, total=False):
-    description: Optional[str]
-    """The description of the assistant. The maximum length is 512 characters."""
-
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs
-    attached to this assistant. There can be a maximum of 20 files attached to the
-    assistant. Files are ordered by their creation date in ascending order. If a
-    file was previously attached to the list but does not show up in the list, it
-    will be deleted from the assistant.
-    """
-
-    instructions: Optional[str]
-    """The system instructions that the assistant uses.
-
-    The maximum length is 32768 characters.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: str
-    """ID of the model to use.
-
-    You can use the
-    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-    see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
-    """
-
-    name: Optional[str]
-    """The name of the assistant. The maximum length is 256 characters."""
-
-    tools: Iterable[AssistantToolParam]
-    """A list of tool enabled on the assistant.
-
-    There can be a maximum of 128 tools per assistant. Tools can be of types
-    `code_interpreter`, `retrieval`, or `function`.
-    """
diff --git a/openai/types/beta/assistants/__init__.py b/openai/types/beta/assistants/__init__.py
deleted file mode 100644
index d4dd2de0..00000000
--- a/openai/types/beta/assistants/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .assistant_file import AssistantFile as AssistantFile
-from .file_list_params import FileListParams as FileListParams
-from .file_create_params import FileCreateParams as FileCreateParams
-from .file_delete_response import FileDeleteResponse as FileDeleteResponse
diff --git a/openai/types/beta/assistants/assistant_file.py b/openai/types/beta/assistants/assistant_file.py
deleted file mode 100644
index 25aec07b..00000000
--- a/openai/types/beta/assistants/assistant_file.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["AssistantFile"]
-
-
-class AssistantFile(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    assistant_id: str
-    """The assistant ID that the file is attached to."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the assistant file was created."""
-
-    object: Literal["assistant.file"]
-    """The object type, which is always `assistant.file`."""
diff --git a/openai/types/beta/assistants/file_create_params.py b/openai/types/beta/assistants/file_create_params.py
deleted file mode 100644
index 55f0e8cd..00000000
--- a/openai/types/beta/assistants/file_create_params.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["FileCreateParams"]
-
-
-class FileCreateParams(TypedDict, total=False):
-    file_id: Required[str]
-    """
-    A [File](https://platform.openai.com/docs/api-reference/files) ID (with
-    `purpose="assistants"`) that the assistant should use. Useful for tools like
-    `retrieval` and `code_interpreter` that can access files.
-    """
diff --git a/openai/types/beta/assistants/file_delete_response.py b/openai/types/beta/assistants/file_delete_response.py
deleted file mode 100644
index 685fb2a7..00000000
--- a/openai/types/beta/assistants/file_delete_response.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["FileDeleteResponse"]
-
-
-class FileDeleteResponse(BaseModel):
-    id: str
-
-    deleted: bool
-
-    object: Literal["assistant.file.deleted"]
diff --git a/openai/types/beta/assistants/file_list_params.py b/openai/types/beta/assistants/file_list_params.py
deleted file mode 100644
index 53c493b3..00000000
--- a/openai/types/beta/assistants/file_list_params.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["FileListParams"]
-
-
-class FileListParams(TypedDict, total=False):
-    after: str
-    """A cursor for use in pagination.
-
-    `after` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include after=obj_foo in order to fetch the next page of the
-    list.
-    """
-
-    before: str
-    """A cursor for use in pagination.
-
-    `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include before=obj_foo in order to fetch the previous page
-    of the list.
-    """
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 100, and the default is 20.
-    """
-
-    order: Literal["asc", "desc"]
-    """Sort order by the `created_at` timestamp of the objects.
-
-    `asc` for ascending order and `desc` for descending order.
-    """
diff --git a/openai/types/beta/chat/__init__.py b/openai/types/beta/chat/__init__.py
deleted file mode 100644
index f8ee8b14..00000000
--- a/openai/types/beta/chat/__init__.py
+++ /dev/null
@@ -1,3 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
diff --git a/openai/types/beta/code_interpreter_tool.py b/openai/types/beta/code_interpreter_tool.py
deleted file mode 100644
index 17ab3de6..00000000
--- a/openai/types/beta/code_interpreter_tool.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["CodeInterpreterTool"]
-
-
-class CodeInterpreterTool(BaseModel):
-    type: Literal["code_interpreter"]
-    """The type of tool being defined: `code_interpreter`"""
diff --git a/openai/types/beta/code_interpreter_tool_param.py b/openai/types/beta/code_interpreter_tool_param.py
deleted file mode 100644
index 4f6916d7..00000000
--- a/openai/types/beta/code_interpreter_tool_param.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["CodeInterpreterToolParam"]
-
-
-class CodeInterpreterToolParam(TypedDict, total=False):
-    type: Required[Literal["code_interpreter"]]
-    """The type of tool being defined: `code_interpreter`"""
diff --git a/openai/types/beta/function_tool.py b/openai/types/beta/function_tool.py
deleted file mode 100644
index 5d278e74..00000000
--- a/openai/types/beta/function_tool.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..shared import FunctionDefinition
-from ..._models import BaseModel
-
-__all__ = ["FunctionTool"]
-
-
-class FunctionTool(BaseModel):
-    function: FunctionDefinition
-
-    type: Literal["function"]
-    """The type of tool being defined: `function`"""
diff --git a/openai/types/beta/function_tool_param.py b/openai/types/beta/function_tool_param.py
deleted file mode 100644
index b44c0d47..00000000
--- a/openai/types/beta/function_tool_param.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-from ...types import shared_params
-
-__all__ = ["FunctionToolParam"]
-
-
-class FunctionToolParam(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
-
-    type: Required[Literal["function"]]
-    """The type of tool being defined: `function`"""
diff --git a/openai/types/beta/retrieval_tool.py b/openai/types/beta/retrieval_tool.py
deleted file mode 100644
index b07b785c..00000000
--- a/openai/types/beta/retrieval_tool.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["RetrievalTool"]
-
-
-class RetrievalTool(BaseModel):
-    type: Literal["retrieval"]
-    """The type of tool being defined: `retrieval`"""
diff --git a/openai/types/beta/retrieval_tool_param.py b/openai/types/beta/retrieval_tool_param.py
deleted file mode 100644
index d76c0bee..00000000
--- a/openai/types/beta/retrieval_tool_param.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["RetrievalToolParam"]
-
-
-class RetrievalToolParam(TypedDict, total=False):
-    type: Required[Literal["retrieval"]]
-    """The type of tool being defined: `retrieval`"""
diff --git a/openai/types/beta/thread.py b/openai/types/beta/thread.py
deleted file mode 100644
index 8fd14230..00000000
--- a/openai/types/beta/thread.py
+++ /dev/null
@@ -1,27 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["Thread"]
-
-
-class Thread(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the thread was created."""
-
-    metadata: Optional[object] = None
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    object: Literal["thread"]
-    """The object type, which is always `thread`."""
diff --git a/openai/types/beta/thread_create_and_run_params.py b/openai/types/beta/thread_create_and_run_params.py
deleted file mode 100644
index d4266fc4..00000000
--- a/openai/types/beta/thread_create_and_run_params.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from .function_tool_param import FunctionToolParam
-from .retrieval_tool_param import RetrievalToolParam
-from .code_interpreter_tool_param import CodeInterpreterToolParam
-
-__all__ = [
-    "ThreadCreateAndRunParamsBase",
-    "Thread",
-    "ThreadMessage",
-    "Tool",
-    "ThreadCreateAndRunParamsNonStreaming",
-    "ThreadCreateAndRunParamsStreaming",
-]
-
-
-class ThreadCreateAndRunParamsBase(TypedDict, total=False):
-    assistant_id: Required[str]
-    """
-    The ID of the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-    execute this run.
-    """
-
-    instructions: Optional[str]
-    """Override the default system message of the assistant.
-
-    This is useful for modifying the behavior on a per-run basis.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: Optional[str]
-    """
-    The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-    be used to execute this run. If a value is provided here, it will override the
-    model associated with the assistant. If not, the model associated with the
-    assistant will be used.
-    """
-
-    temperature: Optional[float]
-    """What sampling temperature to use, between 0 and 2.
-
-    Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-    """
-
-    thread: Thread
-    """If no thread is provided, an empty thread will be created."""
-
-    tools: Optional[Iterable[Tool]]
-    """Override the tools the assistant can use for this run.
-
-    This is useful for modifying the behavior on a per-run basis.
-    """
-
-
-class ThreadMessage(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
-
-    role: Required[Literal["user", "assistant"]]
-    """The role of the entity that is creating the message. Allowed values include:
-
-    - `user`: Indicates the message is sent by an actual user and should be used in
-      most cases to represent user-generated messages.
-    - `assistant`: Indicates the message is generated by the assistant. Use this
-      value to insert messages from the assistant into the conversation.
-    """
-
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-
-class Thread(TypedDict, total=False):
-    messages: Iterable[ThreadMessage]
-    """
-    A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
-    start the thread with.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-
-Tool = Union[CodeInterpreterToolParam, RetrievalToolParam, FunctionToolParam]
-
-
-class ThreadCreateAndRunParamsNonStreaming(ThreadCreateAndRunParamsBase):
-    stream: Optional[Literal[False]]
-    """
-    If `true`, returns a stream of events that happen during the Run as server-sent
-    events, terminating when the Run enters a terminal state with a `data: [DONE]`
-    message.
-    """
-
-
-class ThreadCreateAndRunParamsStreaming(ThreadCreateAndRunParamsBase):
-    stream: Required[Literal[True]]
-    """
-    If `true`, returns a stream of events that happen during the Run as server-sent
-    events, terminating when the Run enters a terminal state with a `data: [DONE]`
-    message.
-    """
-
-
-ThreadCreateAndRunParams = Union[ThreadCreateAndRunParamsNonStreaming, ThreadCreateAndRunParamsStreaming]
diff --git a/openai/types/beta/thread_create_params.py b/openai/types/beta/thread_create_params.py
deleted file mode 100644
index 1b382186..00000000
--- a/openai/types/beta/thread_create_params.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ThreadCreateParams", "Message"]
-
-
-class ThreadCreateParams(TypedDict, total=False):
-    messages: Iterable[Message]
-    """
-    A list of [messages](https://platform.openai.com/docs/api-reference/messages) to
-    start the thread with.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-
-class Message(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
-
-    role: Required[Literal["user", "assistant"]]
-    """The role of the entity that is creating the message. Allowed values include:
-
-    - `user`: Indicates the message is sent by an actual user and should be used in
-      most cases to represent user-generated messages.
-    - `assistant`: Indicates the message is generated by the assistant. Use this
-      value to insert messages from the assistant into the conversation.
-    """
-
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
diff --git a/openai/types/beta/thread_deleted.py b/openai/types/beta/thread_deleted.py
deleted file mode 100644
index d3856263..00000000
--- a/openai/types/beta/thread_deleted.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["ThreadDeleted"]
-
-
-class ThreadDeleted(BaseModel):
-    id: str
-
-    deleted: bool
-
-    object: Literal["thread.deleted"]
diff --git a/openai/types/beta/thread_update_params.py b/openai/types/beta/thread_update_params.py
deleted file mode 100644
index 94f1b1e2..00000000
--- a/openai/types/beta/thread_update_params.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Optional
-from typing_extensions import TypedDict
-
-__all__ = ["ThreadUpdateParams"]
-
-
-class ThreadUpdateParams(TypedDict, total=False):
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
diff --git a/openai/types/beta/threads/__init__.py b/openai/types/beta/threads/__init__.py
deleted file mode 100644
index b57ebccb..00000000
--- a/openai/types/beta/threads/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .run import Run as Run
-from .text import Text as Text
-from .message import Message as Message
-from .annotation import Annotation as Annotation
-from .image_file import ImageFile as ImageFile
-from .run_status import RunStatus as RunStatus
-from .text_delta import TextDelta as TextDelta
-from .message_delta import MessageDelta as MessageDelta
-from .message_content import MessageContent as MessageContent
-from .run_list_params import RunListParams as RunListParams
-from .annotation_delta import AnnotationDelta as AnnotationDelta
-from .image_file_delta import ImageFileDelta as ImageFileDelta
-from .text_delta_block import TextDeltaBlock as TextDeltaBlock
-from .run_create_params import RunCreateParams as RunCreateParams
-from .run_update_params import RunUpdateParams as RunUpdateParams
-from .text_content_block import TextContentBlock as TextContentBlock
-from .message_delta_event import MessageDeltaEvent as MessageDeltaEvent
-from .message_list_params import MessageListParams as MessageListParams
-from .file_path_annotation import FilePathAnnotation as FilePathAnnotation
-from .message_content_delta import MessageContentDelta as MessageContentDelta
-from .message_create_params import MessageCreateParams as MessageCreateParams
-from .message_update_params import MessageUpdateParams as MessageUpdateParams
-from .image_file_delta_block import ImageFileDeltaBlock as ImageFileDeltaBlock
-from .file_citation_annotation import FileCitationAnnotation as FileCitationAnnotation
-from .image_file_content_block import ImageFileContentBlock as ImageFileContentBlock
-from .file_path_delta_annotation import FilePathDeltaAnnotation as FilePathDeltaAnnotation
-from .file_citation_delta_annotation import FileCitationDeltaAnnotation as FileCitationDeltaAnnotation
-from .run_submit_tool_outputs_params import RunSubmitToolOutputsParams as RunSubmitToolOutputsParams
-from .required_action_function_tool_call import RequiredActionFunctionToolCall as RequiredActionFunctionToolCall
diff --git a/openai/types/beta/threads/annotation.py b/openai/types/beta/threads/annotation.py
deleted file mode 100644
index 31e228c8..00000000
--- a/openai/types/beta/threads/annotation.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated
-
-from ...._utils import PropertyInfo
-from .file_path_annotation import FilePathAnnotation
-from .file_citation_annotation import FileCitationAnnotation
-
-__all__ = ["Annotation"]
-
-Annotation = Annotated[Union[FileCitationAnnotation, FilePathAnnotation], PropertyInfo(discriminator="type")]
diff --git a/openai/types/beta/threads/annotation_delta.py b/openai/types/beta/threads/annotation_delta.py
deleted file mode 100644
index 91242967..00000000
--- a/openai/types/beta/threads/annotation_delta.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated
-
-from ...._utils import PropertyInfo
-from .file_path_delta_annotation import FilePathDeltaAnnotation
-from .file_citation_delta_annotation import FileCitationDeltaAnnotation
-
-__all__ = ["AnnotationDelta"]
-
-AnnotationDelta = Annotated[
-    Union[FileCitationDeltaAnnotation, FilePathDeltaAnnotation], PropertyInfo(discriminator="type")
-]
diff --git a/openai/types/beta/threads/file_citation_annotation.py b/openai/types/beta/threads/file_citation_annotation.py
deleted file mode 100644
index 68571cd4..00000000
--- a/openai/types/beta/threads/file_citation_annotation.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["FileCitationAnnotation", "FileCitation"]
-
-
-class FileCitation(BaseModel):
-    file_id: str
-    """The ID of the specific File the citation is from."""
-
-    quote: str
-    """The specific quote in the file."""
-
-
-class FileCitationAnnotation(BaseModel):
-    end_index: int
-
-    file_citation: FileCitation
-
-    start_index: int
-
-    text: str
-    """The text in the message content that needs to be replaced."""
-
-    type: Literal["file_citation"]
-    """Always `file_citation`."""
diff --git a/openai/types/beta/threads/file_citation_delta_annotation.py b/openai/types/beta/threads/file_citation_delta_annotation.py
deleted file mode 100644
index b40c0d12..00000000
--- a/openai/types/beta/threads/file_citation_delta_annotation.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["FileCitationDeltaAnnotation", "FileCitation"]
-
-
-class FileCitation(BaseModel):
-    file_id: Optional[str] = None
-    """The ID of the specific File the citation is from."""
-
-    quote: Optional[str] = None
-    """The specific quote in the file."""
-
-
-class FileCitationDeltaAnnotation(BaseModel):
-    index: int
-    """The index of the annotation in the text content part."""
-
-    type: Literal["file_citation"]
-    """Always `file_citation`."""
-
-    end_index: Optional[int] = None
-
-    file_citation: Optional[FileCitation] = None
-
-    start_index: Optional[int] = None
-
-    text: Optional[str] = None
-    """The text in the message content that needs to be replaced."""
diff --git a/openai/types/beta/threads/file_path_annotation.py b/openai/types/beta/threads/file_path_annotation.py
deleted file mode 100644
index 9812737e..00000000
--- a/openai/types/beta/threads/file_path_annotation.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["FilePathAnnotation", "FilePath"]
-
-
-class FilePath(BaseModel):
-    file_id: str
-    """The ID of the file that was generated."""
-
-
-class FilePathAnnotation(BaseModel):
-    end_index: int
-
-    file_path: FilePath
-
-    start_index: int
-
-    text: str
-    """The text in the message content that needs to be replaced."""
-
-    type: Literal["file_path"]
-    """Always `file_path`."""
diff --git a/openai/types/beta/threads/file_path_delta_annotation.py b/openai/types/beta/threads/file_path_delta_annotation.py
deleted file mode 100644
index 0cbb445e..00000000
--- a/openai/types/beta/threads/file_path_delta_annotation.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["FilePathDeltaAnnotation", "FilePath"]
-
-
-class FilePath(BaseModel):
-    file_id: Optional[str] = None
-    """The ID of the file that was generated."""
-
-
-class FilePathDeltaAnnotation(BaseModel):
-    index: int
-    """The index of the annotation in the text content part."""
-
-    type: Literal["file_path"]
-    """Always `file_path`."""
-
-    end_index: Optional[int] = None
-
-    file_path: Optional[FilePath] = None
-
-    start_index: Optional[int] = None
-
-    text: Optional[str] = None
-    """The text in the message content that needs to be replaced."""
diff --git a/openai/types/beta/threads/image_file.py b/openai/types/beta/threads/image_file.py
deleted file mode 100644
index db0d6e82..00000000
--- a/openai/types/beta/threads/image_file.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from ...._models import BaseModel
-
-__all__ = ["ImageFile"]
-
-
-class ImageFile(BaseModel):
-    file_id: str
-    """
-    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
-    in the message content.
-    """
diff --git a/openai/types/beta/threads/image_file_content_block.py b/openai/types/beta/threads/image_file_content_block.py
deleted file mode 100644
index a9099990..00000000
--- a/openai/types/beta/threads/image_file_content_block.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .image_file import ImageFile
-
-__all__ = ["ImageFileContentBlock"]
-
-
-class ImageFileContentBlock(BaseModel):
-    image_file: ImageFile
-
-    type: Literal["image_file"]
-    """Always `image_file`."""
diff --git a/openai/types/beta/threads/image_file_delta.py b/openai/types/beta/threads/image_file_delta.py
deleted file mode 100644
index b0b1d32f..00000000
--- a/openai/types/beta/threads/image_file_delta.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from ...._models import BaseModel
-
-__all__ = ["ImageFileDelta"]
-
-
-class ImageFileDelta(BaseModel):
-    file_id: Optional[str] = None
-    """
-    The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
-    in the message content.
-    """
diff --git a/openai/types/beta/threads/image_file_delta_block.py b/openai/types/beta/threads/image_file_delta_block.py
deleted file mode 100644
index 0a5a2e8a..00000000
--- a/openai/types/beta/threads/image_file_delta_block.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .image_file_delta import ImageFileDelta
-
-__all__ = ["ImageFileDeltaBlock"]
-
-
-class ImageFileDeltaBlock(BaseModel):
-    index: int
-    """The index of the content part in the message."""
-
-    type: Literal["image_file"]
-    """Always `image_file`."""
-
-    image_file: Optional[ImageFileDelta] = None
diff --git a/openai/types/beta/threads/message.py b/openai/types/beta/threads/message.py
deleted file mode 100644
index bde02639..00000000
--- a/openai/types/beta/threads/message.py
+++ /dev/null
@@ -1,81 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .message_content import MessageContent
-
-__all__ = ["Message", "IncompleteDetails"]
-
-
-class IncompleteDetails(BaseModel):
-    reason: Literal["content_filter", "max_tokens", "run_cancelled", "run_expired", "run_failed"]
-    """The reason the message is incomplete."""
-
-
-class Message(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    assistant_id: Optional[str] = None
-    """
-    If applicable, the ID of the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) that
-    authored this message.
-    """
-
-    completed_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the message was completed."""
-
-    content: List[MessageContent]
-    """The content of the message in array of text and/or images."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the message was created."""
-
-    file_ids: List[str]
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs that
-    the assistant should use. Useful for tools like retrieval and code_interpreter
-    that can access files. A maximum of 10 files can be attached to a message.
-    """
-
-    incomplete_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the message was marked as incomplete."""
-
-    incomplete_details: Optional[IncompleteDetails] = None
-    """On an incomplete message, details about why the message is incomplete."""
-
-    metadata: Optional[object] = None
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    object: Literal["thread.message"]
-    """The object type, which is always `thread.message`."""
-
-    role: Literal["user", "assistant"]
-    """The entity that produced the message. One of `user` or `assistant`."""
-
-    run_id: Optional[str] = None
-    """
-    The ID of the [run](https://platform.openai.com/docs/api-reference/runs)
-    associated with the creation of this message. Value is `null` when messages are
-    created manually using the create message or create thread endpoints.
-    """
-
-    status: Literal["in_progress", "incomplete", "completed"]
-    """
-    The status of the message, which can be either `in_progress`, `incomplete`, or
-    `completed`.
-    """
-
-    thread_id: str
-    """
-    The [thread](https://platform.openai.com/docs/api-reference/threads) ID that
-    this message belongs to.
-    """
diff --git a/openai/types/beta/threads/message_content.py b/openai/types/beta/threads/message_content.py
deleted file mode 100644
index bc79b39f..00000000
--- a/openai/types/beta/threads/message_content.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated
-
-from ...._utils import PropertyInfo
-from .text_content_block import TextContentBlock
-from .image_file_content_block import ImageFileContentBlock
-
-__all__ = ["MessageContent"]
-
-MessageContent = Annotated[Union[ImageFileContentBlock, TextContentBlock], PropertyInfo(discriminator="type")]
diff --git a/openai/types/beta/threads/message_content_delta.py b/openai/types/beta/threads/message_content_delta.py
deleted file mode 100644
index 3cbc22c9..00000000
--- a/openai/types/beta/threads/message_content_delta.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated
-
-from ...._utils import PropertyInfo
-from .text_delta_block import TextDeltaBlock
-from .image_file_delta_block import ImageFileDeltaBlock
-
-__all__ = ["MessageContentDelta"]
-
-MessageContentDelta = Annotated[Union[ImageFileDeltaBlock, TextDeltaBlock], PropertyInfo(discriminator="type")]
diff --git a/openai/types/beta/threads/message_create_params.py b/openai/types/beta/threads/message_create_params.py
deleted file mode 100644
index 9b9467ef..00000000
--- a/openai/types/beta/threads/message_create_params.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["MessageCreateParams"]
-
-
-class MessageCreateParams(TypedDict, total=False):
-    content: Required[str]
-    """The content of the message."""
-
-    role: Required[Literal["user", "assistant"]]
-    """The role of the entity that is creating the message. Allowed values include:
-
-    - `user`: Indicates the message is sent by an actual user and should be used in
-      most cases to represent user-generated messages.
-    - `assistant`: Indicates the message is generated by the assistant. Use this
-      value to insert messages from the assistant into the conversation.
-    """
-
-    file_ids: List[str]
-    """
-    A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
-    the message should use. There can be a maximum of 10 files attached to a
-    message. Useful for tools like `retrieval` and `code_interpreter` that can
-    access and use files.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
diff --git a/openai/types/beta/threads/message_delta.py b/openai/types/beta/threads/message_delta.py
deleted file mode 100644
index 3a55e144..00000000
--- a/openai/types/beta/threads/message_delta.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .message_content_delta import MessageContentDelta
-
-__all__ = ["MessageDelta"]
-
-
-class MessageDelta(BaseModel):
-    content: Optional[List[MessageContentDelta]] = None
-    """The content of the message in array of text and/or images."""
-
-    file_ids: Optional[List[str]] = None
-    """
-    A list of [file](https://platform.openai.com/docs/api-reference/files) IDs that
-    the assistant should use. Useful for tools like retrieval and code_interpreter
-    that can access files. A maximum of 10 files can be attached to a message.
-    """
-
-    role: Optional[Literal["user", "assistant"]] = None
-    """The entity that produced the message. One of `user` or `assistant`."""
diff --git a/openai/types/beta/threads/message_delta_event.py b/openai/types/beta/threads/message_delta_event.py
deleted file mode 100644
index 3811cef6..00000000
--- a/openai/types/beta/threads/message_delta_event.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .message_delta import MessageDelta
-
-__all__ = ["MessageDeltaEvent"]
-
-
-class MessageDeltaEvent(BaseModel):
-    id: str
-    """The identifier of the message, which can be referenced in API endpoints."""
-
-    delta: MessageDelta
-    """The delta containing the fields that have changed on the Message."""
-
-    object: Literal["thread.message.delta"]
-    """The object type, which is always `thread.message.delta`."""
diff --git a/openai/types/beta/threads/message_list_params.py b/openai/types/beta/threads/message_list_params.py
deleted file mode 100644
index 18c2442f..00000000
--- a/openai/types/beta/threads/message_list_params.py
+++ /dev/null
@@ -1,42 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["MessageListParams"]
-
-
-class MessageListParams(TypedDict, total=False):
-    after: str
-    """A cursor for use in pagination.
-
-    `after` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include after=obj_foo in order to fetch the next page of the
-    list.
-    """
-
-    before: str
-    """A cursor for use in pagination.
-
-    `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include before=obj_foo in order to fetch the previous page
-    of the list.
-    """
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 100, and the default is 20.
-    """
-
-    order: Literal["asc", "desc"]
-    """Sort order by the `created_at` timestamp of the objects.
-
-    `asc` for ascending order and `desc` for descending order.
-    """
-
-    run_id: str
-    """Filter messages by the run ID that generated them."""
diff --git a/openai/types/beta/threads/message_update_params.py b/openai/types/beta/threads/message_update_params.py
deleted file mode 100644
index 7000f331..00000000
--- a/openai/types/beta/threads/message_update_params.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Optional
-from typing_extensions import Required, TypedDict
-
-__all__ = ["MessageUpdateParams"]
-
-
-class MessageUpdateParams(TypedDict, total=False):
-    thread_id: Required[str]
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
diff --git a/openai/types/beta/threads/messages/__init__.py b/openai/types/beta/threads/messages/__init__.py
deleted file mode 100644
index d1292976..00000000
--- a/openai/types/beta/threads/messages/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .message_file import MessageFile as MessageFile
-from .file_list_params import FileListParams as FileListParams
diff --git a/openai/types/beta/threads/messages/file_list_params.py b/openai/types/beta/threads/messages/file_list_params.py
deleted file mode 100644
index 7e2d6136..00000000
--- a/openai/types/beta/threads/messages/file_list_params.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["FileListParams"]
-
-
-class FileListParams(TypedDict, total=False):
-    thread_id: Required[str]
-
-    after: str
-    """A cursor for use in pagination.
-
-    `after` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include after=obj_foo in order to fetch the next page of the
-    list.
-    """
-
-    before: str
-    """A cursor for use in pagination.
-
-    `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include before=obj_foo in order to fetch the previous page
-    of the list.
-    """
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 100, and the default is 20.
-    """
-
-    order: Literal["asc", "desc"]
-    """Sort order by the `created_at` timestamp of the objects.
-
-    `asc` for ascending order and `desc` for descending order.
-    """
diff --git a/openai/types/beta/threads/messages/message_file.py b/openai/types/beta/threads/messages/message_file.py
deleted file mode 100644
index 342479ab..00000000
--- a/openai/types/beta/threads/messages/message_file.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["MessageFile"]
-
-
-class MessageFile(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the message file was created."""
-
-    message_id: str
-    """
-    The ID of the [message](https://platform.openai.com/docs/api-reference/messages)
-    that the [File](https://platform.openai.com/docs/api-reference/files) is
-    attached to.
-    """
-
-    object: Literal["thread.message.file"]
-    """The object type, which is always `thread.message.file`."""
diff --git a/openai/types/beta/threads/required_action_function_tool_call.py b/openai/types/beta/threads/required_action_function_tool_call.py
deleted file mode 100644
index a24dfd06..00000000
--- a/openai/types/beta/threads/required_action_function_tool_call.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-
-__all__ = ["RequiredActionFunctionToolCall", "Function"]
-
-
-class Function(BaseModel):
-    arguments: str
-    """The arguments that the model expects you to pass to the function."""
-
-    name: str
-    """The name of the function."""
-
-
-class RequiredActionFunctionToolCall(BaseModel):
-    id: str
-    """The ID of the tool call.
-
-    This ID must be referenced when you submit the tool outputs in using the
-    [Submit tool outputs to run](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
-    endpoint.
-    """
-
-    function: Function
-    """The function definition."""
-
-    type: Literal["function"]
-    """The type of tool call the output is required for.
-
-    For now, this is always `function`.
-    """
diff --git a/openai/types/beta/threads/run.py b/openai/types/beta/threads/run.py
deleted file mode 100644
index 3ab27624..00000000
--- a/openai/types/beta/threads/run.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .run_status import RunStatus
-from ..assistant_tool import AssistantTool
-from .required_action_function_tool_call import RequiredActionFunctionToolCall
-
-__all__ = ["Run", "LastError", "RequiredAction", "RequiredActionSubmitToolOutputs", "Usage"]
-
-
-class LastError(BaseModel):
-    code: Literal["server_error", "rate_limit_exceeded", "invalid_prompt"]
-    """One of `server_error`, `rate_limit_exceeded`, or `invalid_prompt`."""
-
-    message: str
-    """A human-readable description of the error."""
-
-
-class RequiredActionSubmitToolOutputs(BaseModel):
-    tool_calls: List[RequiredActionFunctionToolCall]
-    """A list of the relevant tool calls."""
-
-
-class RequiredAction(BaseModel):
-    submit_tool_outputs: RequiredActionSubmitToolOutputs
-    """Details on the tool outputs needed for this run to continue."""
-
-    type: Literal["submit_tool_outputs"]
-    """For now, this is always `submit_tool_outputs`."""
-
-
-class Usage(BaseModel):
-    completion_tokens: int
-    """Number of completion tokens used over the course of the run."""
-
-    prompt_tokens: int
-    """Number of prompt tokens used over the course of the run."""
-
-    total_tokens: int
-    """Total number of tokens used (prompt + completion)."""
-
-
-class Run(BaseModel):
-    id: str
-    """The identifier, which can be referenced in API endpoints."""
-
-    assistant_id: str
-    """
-    The ID of the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
-    execution of this run.
-    """
-
-    cancelled_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run was cancelled."""
-
-    completed_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run was completed."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the run was created."""
-
-    expires_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run will expire."""
-
-    failed_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run failed."""
-
-    file_ids: List[str]
-    """
-    The list of [File](https://platform.openai.com/docs/api-reference/files) IDs the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
-    this run.
-    """
-
-    instructions: str
-    """
-    The instructions that the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
-    this run.
-    """
-
-    last_error: Optional[LastError] = None
-    """The last error associated with this run. Will be `null` if there are no errors."""
-
-    metadata: Optional[object] = None
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: str
-    """
-    The model that the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
-    this run.
-    """
-
-    object: Literal["thread.run"]
-    """The object type, which is always `thread.run`."""
-
-    required_action: Optional[RequiredAction] = None
-    """Details on the action required to continue the run.
-
-    Will be `null` if no action is required.
-    """
-
-    started_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run was started."""
-
-    status: RunStatus
-    """
-    The status of the run, which can be either `queued`, `in_progress`,
-    `requires_action`, `cancelling`, `cancelled`, `failed`, `completed`, or
-    `expired`.
-    """
-
-    thread_id: str
-    """
-    The ID of the [thread](https://platform.openai.com/docs/api-reference/threads)
-    that was executed on as a part of this run.
-    """
-
-    tools: List[AssistantTool]
-    """
-    The list of tools that the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) used for
-    this run.
-    """
-
-    usage: Optional[Usage] = None
-    """Usage statistics related to the run.
-
-    This value will be `null` if the run is not in a terminal state (i.e.
-    `in_progress`, `queued`, etc.).
-    """
-
-    temperature: Optional[float] = None
-    """The sampling temperature used for this run. If not set, defaults to 1."""
diff --git a/openai/types/beta/threads/run_create_params.py b/openai/types/beta/threads/run_create_params.py
deleted file mode 100644
index ac185973..00000000
--- a/openai/types/beta/threads/run_create_params.py
+++ /dev/null
@@ -1,83 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from ..assistant_tool_param import AssistantToolParam
-
-__all__ = ["RunCreateParamsBase", "RunCreateParamsNonStreaming", "RunCreateParamsStreaming"]
-
-
-class RunCreateParamsBase(TypedDict, total=False):
-    assistant_id: Required[str]
-    """
-    The ID of the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants) to use to
-    execute this run.
-    """
-
-    additional_instructions: Optional[str]
-    """Appends additional instructions at the end of the instructions for the run.
-
-    This is useful for modifying the behavior on a per-run basis without overriding
-    other instructions.
-    """
-
-    instructions: Optional[str]
-    """
-    Overrides the
-    [instructions](https://platform.openai.com/docs/api-reference/assistants/createAssistant)
-    of the assistant. This is useful for modifying the behavior on a per-run basis.
-    """
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    model: Optional[str]
-    """
-    The ID of the [Model](https://platform.openai.com/docs/api-reference/models) to
-    be used to execute this run. If a value is provided here, it will override the
-    model associated with the assistant. If not, the model associated with the
-    assistant will be used.
-    """
-
-    temperature: Optional[float]
-    """What sampling temperature to use, between 0 and 2.
-
-    Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-    """
-
-    tools: Optional[Iterable[AssistantToolParam]]
-    """Override the tools the assistant can use for this run.
-
-    This is useful for modifying the behavior on a per-run basis.
-    """
-
-
-class RunCreateParamsNonStreaming(RunCreateParamsBase):
-    stream: Optional[Literal[False]]
-    """
-    If `true`, returns a stream of events that happen during the Run as server-sent
-    events, terminating when the Run enters a terminal state with a `data: [DONE]`
-    message.
-    """
-
-
-class RunCreateParamsStreaming(RunCreateParamsBase):
-    stream: Required[Literal[True]]
-    """
-    If `true`, returns a stream of events that happen during the Run as server-sent
-    events, terminating when the Run enters a terminal state with a `data: [DONE]`
-    message.
-    """
-
-
-RunCreateParams = Union[RunCreateParamsNonStreaming, RunCreateParamsStreaming]
diff --git a/openai/types/beta/threads/run_list_params.py b/openai/types/beta/threads/run_list_params.py
deleted file mode 100644
index 1e32bca4..00000000
--- a/openai/types/beta/threads/run_list_params.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, TypedDict
-
-__all__ = ["RunListParams"]
-
-
-class RunListParams(TypedDict, total=False):
-    after: str
-    """A cursor for use in pagination.
-
-    `after` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include after=obj_foo in order to fetch the next page of the
-    list.
-    """
-
-    before: str
-    """A cursor for use in pagination.
-
-    `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include before=obj_foo in order to fetch the previous page
-    of the list.
-    """
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 100, and the default is 20.
-    """
-
-    order: Literal["asc", "desc"]
-    """Sort order by the `created_at` timestamp of the objects.
-
-    `asc` for ascending order and `desc` for descending order.
-    """
diff --git a/openai/types/beta/threads/run_status.py b/openai/types/beta/threads/run_status.py
deleted file mode 100644
index bf9b4e7b..00000000
--- a/openai/types/beta/threads/run_status.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-__all__ = ["RunStatus"]
-
-RunStatus = Literal[
-    "queued", "in_progress", "requires_action", "cancelling", "cancelled", "failed", "completed", "expired"
-]
diff --git a/openai/types/beta/threads/run_submit_tool_outputs_params.py b/openai/types/beta/threads/run_submit_tool_outputs_params.py
deleted file mode 100644
index ccb5e5e9..00000000
--- a/openai/types/beta/threads/run_submit_tool_outputs_params.py
+++ /dev/null
@@ -1,52 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = [
-    "RunSubmitToolOutputsParamsBase",
-    "ToolOutput",
-    "RunSubmitToolOutputsParamsNonStreaming",
-    "RunSubmitToolOutputsParamsStreaming",
-]
-
-
-class RunSubmitToolOutputsParamsBase(TypedDict, total=False):
-    thread_id: Required[str]
-
-    tool_outputs: Required[Iterable[ToolOutput]]
-    """A list of tools for which the outputs are being submitted."""
-
-
-class ToolOutput(TypedDict, total=False):
-    output: str
-    """The output of the tool call to be submitted to continue the run."""
-
-    tool_call_id: str
-    """
-    The ID of the tool call in the `required_action` object within the run object
-    the output is being submitted for.
-    """
-
-
-class RunSubmitToolOutputsParamsNonStreaming(RunSubmitToolOutputsParamsBase):
-    stream: Optional[Literal[False]]
-    """
-    If `true`, returns a stream of events that happen during the Run as server-sent
-    events, terminating when the Run enters a terminal state with a `data: [DONE]`
-    message.
-    """
-
-
-class RunSubmitToolOutputsParamsStreaming(RunSubmitToolOutputsParamsBase):
-    stream: Required[Literal[True]]
-    """
-    If `true`, returns a stream of events that happen during the Run as server-sent
-    events, terminating when the Run enters a terminal state with a `data: [DONE]`
-    message.
-    """
-
-
-RunSubmitToolOutputsParams = Union[RunSubmitToolOutputsParamsNonStreaming, RunSubmitToolOutputsParamsStreaming]
diff --git a/openai/types/beta/threads/run_update_params.py b/openai/types/beta/threads/run_update_params.py
deleted file mode 100644
index e595eac8..00000000
--- a/openai/types/beta/threads/run_update_params.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Optional
-from typing_extensions import Required, TypedDict
-
-__all__ = ["RunUpdateParams"]
-
-
-class RunUpdateParams(TypedDict, total=False):
-    thread_id: Required[str]
-
-    metadata: Optional[object]
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
diff --git a/openai/types/beta/threads/runs/__init__.py b/openai/types/beta/threads/runs/__init__.py
deleted file mode 100644
index 256510dc..00000000
--- a/openai/types/beta/threads/runs/__init__.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .run_step import RunStep as RunStep
-from .tool_call import ToolCall as ToolCall
-from .run_step_delta import RunStepDelta as RunStepDelta
-from .tool_call_delta import ToolCallDelta as ToolCallDelta
-from .step_list_params import StepListParams as StepListParams
-from .function_tool_call import FunctionToolCall as FunctionToolCall
-from .retrieval_tool_call import RetrievalToolCall as RetrievalToolCall
-from .run_step_delta_event import RunStepDeltaEvent as RunStepDeltaEvent
-from .code_interpreter_logs import CodeInterpreterLogs as CodeInterpreterLogs
-from .tool_call_delta_object import ToolCallDeltaObject as ToolCallDeltaObject
-from .tool_calls_step_details import ToolCallsStepDetails as ToolCallsStepDetails
-from .function_tool_call_delta import FunctionToolCallDelta as FunctionToolCallDelta
-from .retrieval_tool_call_delta import RetrievalToolCallDelta as RetrievalToolCallDelta
-from .code_interpreter_tool_call import CodeInterpreterToolCall as CodeInterpreterToolCall
-from .run_step_delta_message_delta import RunStepDeltaMessageDelta as RunStepDeltaMessageDelta
-from .code_interpreter_output_image import CodeInterpreterOutputImage as CodeInterpreterOutputImage
-from .message_creation_step_details import MessageCreationStepDetails as MessageCreationStepDetails
-from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta as CodeInterpreterToolCallDelta
diff --git a/openai/types/beta/threads/runs/code_interpreter_logs.py b/openai/types/beta/threads/runs/code_interpreter_logs.py
deleted file mode 100644
index 0bf8c1da..00000000
--- a/openai/types/beta/threads/runs/code_interpreter_logs.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["CodeInterpreterLogs"]
-
-
-class CodeInterpreterLogs(BaseModel):
-    index: int
-    """The index of the output in the outputs array."""
-
-    type: Literal["logs"]
-    """Always `logs`."""
-
-    logs: Optional[str] = None
-    """The text output from the Code Interpreter tool call."""
diff --git a/openai/types/beta/threads/runs/code_interpreter_output_image.py b/openai/types/beta/threads/runs/code_interpreter_output_image.py
deleted file mode 100644
index 2257f37e..00000000
--- a/openai/types/beta/threads/runs/code_interpreter_output_image.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["CodeInterpreterOutputImage", "Image"]
-
-
-class Image(BaseModel):
-    file_id: Optional[str] = None
-    """
-    The [file](https://platform.openai.com/docs/api-reference/files) ID of the
-    image.
-    """
-
-
-class CodeInterpreterOutputImage(BaseModel):
-    index: int
-    """The index of the output in the outputs array."""
-
-    type: Literal["image"]
-    """Always `image`."""
-
-    image: Optional[Image] = None
diff --git a/openai/types/beta/threads/runs/code_interpreter_tool_call.py b/openai/types/beta/threads/runs/code_interpreter_tool_call.py
deleted file mode 100644
index 2f072436..00000000
--- a/openai/types/beta/threads/runs/code_interpreter_tool_call.py
+++ /dev/null
@@ -1,70 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union
-from typing_extensions import Literal, Annotated
-
-from ....._utils import PropertyInfo
-from ....._models import BaseModel
-
-__all__ = [
-    "CodeInterpreterToolCall",
-    "CodeInterpreter",
-    "CodeInterpreterOutput",
-    "CodeInterpreterOutputLogs",
-    "CodeInterpreterOutputImage",
-    "CodeInterpreterOutputImageImage",
-]
-
-
-class CodeInterpreterOutputLogs(BaseModel):
-    logs: str
-    """The text output from the Code Interpreter tool call."""
-
-    type: Literal["logs"]
-    """Always `logs`."""
-
-
-class CodeInterpreterOutputImageImage(BaseModel):
-    file_id: str
-    """
-    The [file](https://platform.openai.com/docs/api-reference/files) ID of the
-    image.
-    """
-
-
-class CodeInterpreterOutputImage(BaseModel):
-    image: CodeInterpreterOutputImageImage
-
-    type: Literal["image"]
-    """Always `image`."""
-
-
-CodeInterpreterOutput = Annotated[
-    Union[CodeInterpreterOutputLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
-]
-
-
-class CodeInterpreter(BaseModel):
-    input: str
-    """The input to the Code Interpreter tool call."""
-
-    outputs: List[CodeInterpreterOutput]
-    """The outputs from the Code Interpreter tool call.
-
-    Code Interpreter can output one or more items, including text (`logs`) or images
-    (`image`). Each of these are represented by a different object type.
-    """
-
-
-class CodeInterpreterToolCall(BaseModel):
-    id: str
-    """The ID of the tool call."""
-
-    code_interpreter: CodeInterpreter
-    """The Code Interpreter tool call definition."""
-
-    type: Literal["code_interpreter"]
-    """The type of tool call.
-
-    This is always going to be `code_interpreter` for this type of tool call.
-    """
diff --git a/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py b/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
deleted file mode 100644
index eff76355..00000000
--- a/openai/types/beta/threads/runs/code_interpreter_tool_call_delta.py
+++ /dev/null
@@ -1,44 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal, Annotated
-
-from ....._utils import PropertyInfo
-from ....._models import BaseModel
-from .code_interpreter_logs import CodeInterpreterLogs
-from .code_interpreter_output_image import CodeInterpreterOutputImage
-
-__all__ = ["CodeInterpreterToolCallDelta", "CodeInterpreter", "CodeInterpreterOutput"]
-
-CodeInterpreterOutput = Annotated[
-    Union[CodeInterpreterLogs, CodeInterpreterOutputImage], PropertyInfo(discriminator="type")
-]
-
-
-class CodeInterpreter(BaseModel):
-    input: Optional[str] = None
-    """The input to the Code Interpreter tool call."""
-
-    outputs: Optional[List[CodeInterpreterOutput]] = None
-    """The outputs from the Code Interpreter tool call.
-
-    Code Interpreter can output one or more items, including text (`logs`) or images
-    (`image`). Each of these are represented by a different object type.
-    """
-
-
-class CodeInterpreterToolCallDelta(BaseModel):
-    index: int
-    """The index of the tool call in the tool calls array."""
-
-    type: Literal["code_interpreter"]
-    """The type of tool call.
-
-    This is always going to be `code_interpreter` for this type of tool call.
-    """
-
-    id: Optional[str] = None
-    """The ID of the tool call."""
-
-    code_interpreter: Optional[CodeInterpreter] = None
-    """The Code Interpreter tool call definition."""
diff --git a/openai/types/beta/threads/runs/function_tool_call.py b/openai/types/beta/threads/runs/function_tool_call.py
deleted file mode 100644
index b1d354f8..00000000
--- a/openai/types/beta/threads/runs/function_tool_call.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["FunctionToolCall", "Function"]
-
-
-class Function(BaseModel):
-    arguments: str
-    """The arguments passed to the function."""
-
-    name: str
-    """The name of the function."""
-
-    output: Optional[str] = None
-    """The output of the function.
-
-    This will be `null` if the outputs have not been
-    [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
-    yet.
-    """
-
-
-class FunctionToolCall(BaseModel):
-    id: str
-    """The ID of the tool call object."""
-
-    function: Function
-    """The definition of the function that was called."""
-
-    type: Literal["function"]
-    """The type of tool call.
-
-    This is always going to be `function` for this type of tool call.
-    """
diff --git a/openai/types/beta/threads/runs/function_tool_call_delta.py b/openai/types/beta/threads/runs/function_tool_call_delta.py
deleted file mode 100644
index faaf026f..00000000
--- a/openai/types/beta/threads/runs/function_tool_call_delta.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["FunctionToolCallDelta", "Function"]
-
-
-class Function(BaseModel):
-    arguments: Optional[str] = None
-    """The arguments passed to the function."""
-
-    name: Optional[str] = None
-    """The name of the function."""
-
-    output: Optional[str] = None
-    """The output of the function.
-
-    This will be `null` if the outputs have not been
-    [submitted](https://platform.openai.com/docs/api-reference/runs/submitToolOutputs)
-    yet.
-    """
-
-
-class FunctionToolCallDelta(BaseModel):
-    index: int
-    """The index of the tool call in the tool calls array."""
-
-    type: Literal["function"]
-    """The type of tool call.
-
-    This is always going to be `function` for this type of tool call.
-    """
-
-    id: Optional[str] = None
-    """The ID of the tool call object."""
-
-    function: Optional[Function] = None
-    """The definition of the function that was called."""
diff --git a/openai/types/beta/threads/runs/message_creation_step_details.py b/openai/types/beta/threads/runs/message_creation_step_details.py
deleted file mode 100644
index 73439079..00000000
--- a/openai/types/beta/threads/runs/message_creation_step_details.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["MessageCreationStepDetails", "MessageCreation"]
-
-
-class MessageCreation(BaseModel):
-    message_id: str
-    """The ID of the message that was created by this run step."""
-
-
-class MessageCreationStepDetails(BaseModel):
-    message_creation: MessageCreation
-
-    type: Literal["message_creation"]
-    """Always `message_creation`."""
diff --git a/openai/types/beta/threads/runs/retrieval_tool_call.py b/openai/types/beta/threads/runs/retrieval_tool_call.py
deleted file mode 100644
index 48704ed3..00000000
--- a/openai/types/beta/threads/runs/retrieval_tool_call.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["RetrievalToolCall"]
-
-
-class RetrievalToolCall(BaseModel):
-    id: str
-    """The ID of the tool call object."""
-
-    retrieval: object
-    """For now, this is always going to be an empty object."""
-
-    type: Literal["retrieval"]
-    """The type of tool call.
-
-    This is always going to be `retrieval` for this type of tool call.
-    """
diff --git a/openai/types/beta/threads/runs/retrieval_tool_call_delta.py b/openai/types/beta/threads/runs/retrieval_tool_call_delta.py
deleted file mode 100644
index 33100793..00000000
--- a/openai/types/beta/threads/runs/retrieval_tool_call_delta.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["RetrievalToolCallDelta"]
-
-
-class RetrievalToolCallDelta(BaseModel):
-    index: int
-    """The index of the tool call in the tool calls array."""
-
-    type: Literal["retrieval"]
-    """The type of tool call.
-
-    This is always going to be `retrieval` for this type of tool call.
-    """
-
-    id: Optional[str] = None
-    """The ID of the tool call object."""
-
-    retrieval: Optional[object] = None
-    """For now, this is always going to be an empty object."""
diff --git a/openai/types/beta/threads/runs/run_step.py b/openai/types/beta/threads/runs/run_step.py
deleted file mode 100644
index 7c81dcac..00000000
--- a/openai/types/beta/threads/runs/run_step.py
+++ /dev/null
@@ -1,110 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union, Optional
-from typing_extensions import Literal, Annotated
-
-from ....._utils import PropertyInfo
-from ....._models import BaseModel
-from .tool_calls_step_details import ToolCallsStepDetails
-from .message_creation_step_details import MessageCreationStepDetails
-
-__all__ = ["RunStep", "LastError", "StepDetails", "Usage"]
-
-
-class LastError(BaseModel):
-    code: Literal["server_error", "rate_limit_exceeded"]
-    """One of `server_error` or `rate_limit_exceeded`."""
-
-    message: str
-    """A human-readable description of the error."""
-
-
-StepDetails = Annotated[Union[MessageCreationStepDetails, ToolCallsStepDetails], PropertyInfo(discriminator="type")]
-
-
-class Usage(BaseModel):
-    completion_tokens: int
-    """Number of completion tokens used over the course of the run step."""
-
-    prompt_tokens: int
-    """Number of prompt tokens used over the course of the run step."""
-
-    total_tokens: int
-    """Total number of tokens used (prompt + completion)."""
-
-
-class RunStep(BaseModel):
-    id: str
-    """The identifier of the run step, which can be referenced in API endpoints."""
-
-    assistant_id: str
-    """
-    The ID of the
-    [assistant](https://platform.openai.com/docs/api-reference/assistants)
-    associated with the run step.
-    """
-
-    cancelled_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run step was cancelled."""
-
-    completed_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run step completed."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the run step was created."""
-
-    expired_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run step expired.
-
-    A step is considered expired if the parent run is expired.
-    """
-
-    failed_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the run step failed."""
-
-    last_error: Optional[LastError] = None
-    """The last error associated with this run step.
-
-    Will be `null` if there are no errors.
-    """
-
-    metadata: Optional[object] = None
-    """Set of 16 key-value pairs that can be attached to an object.
-
-    This can be useful for storing additional information about the object in a
-    structured format. Keys can be a maximum of 64 characters long and values can be
-    a maxium of 512 characters long.
-    """
-
-    object: Literal["thread.run.step"]
-    """The object type, which is always `thread.run.step`."""
-
-    run_id: str
-    """
-    The ID of the [run](https://platform.openai.com/docs/api-reference/runs) that
-    this run step is a part of.
-    """
-
-    status: Literal["in_progress", "cancelled", "failed", "completed", "expired"]
-    """
-    The status of the run step, which can be either `in_progress`, `cancelled`,
-    `failed`, `completed`, or `expired`.
-    """
-
-    step_details: StepDetails
-    """The details of the run step."""
-
-    thread_id: str
-    """
-    The ID of the [thread](https://platform.openai.com/docs/api-reference/threads)
-    that was run.
-    """
-
-    type: Literal["message_creation", "tool_calls"]
-    """The type of run step, which can be either `message_creation` or `tool_calls`."""
-
-    usage: Optional[Usage] = None
-    """Usage statistics related to the run step.
-
-    This value will be `null` while the run step's status is `in_progress`.
-    """
diff --git a/openai/types/beta/threads/runs/run_step_delta.py b/openai/types/beta/threads/runs/run_step_delta.py
deleted file mode 100644
index d6b4aefe..00000000
--- a/openai/types/beta/threads/runs/run_step_delta.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union, Optional
-from typing_extensions import Annotated
-
-from ....._utils import PropertyInfo
-from ....._models import BaseModel
-from .tool_call_delta_object import ToolCallDeltaObject
-from .run_step_delta_message_delta import RunStepDeltaMessageDelta
-
-__all__ = ["RunStepDelta", "StepDetails"]
-
-StepDetails = Annotated[Union[RunStepDeltaMessageDelta, ToolCallDeltaObject], PropertyInfo(discriminator="type")]
-
-
-class RunStepDelta(BaseModel):
-    step_details: Optional[StepDetails] = None
-    """The details of the run step."""
diff --git a/openai/types/beta/threads/runs/run_step_delta_event.py b/openai/types/beta/threads/runs/run_step_delta_event.py
deleted file mode 100644
index 7f3f92aa..00000000
--- a/openai/types/beta/threads/runs/run_step_delta_event.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-from .run_step_delta import RunStepDelta
-
-__all__ = ["RunStepDeltaEvent"]
-
-
-class RunStepDeltaEvent(BaseModel):
-    id: str
-    """The identifier of the run step, which can be referenced in API endpoints."""
-
-    delta: RunStepDelta
-    """The delta containing the fields that have changed on the run step."""
-
-    object: Literal["thread.run.step.delta"]
-    """The object type, which is always `thread.run.step.delta`."""
diff --git a/openai/types/beta/threads/runs/run_step_delta_message_delta.py b/openai/types/beta/threads/runs/run_step_delta_message_delta.py
deleted file mode 100644
index f58ed3d9..00000000
--- a/openai/types/beta/threads/runs/run_step_delta_message_delta.py
+++ /dev/null
@@ -1,20 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-
-__all__ = ["RunStepDeltaMessageDelta", "MessageCreation"]
-
-
-class MessageCreation(BaseModel):
-    message_id: Optional[str] = None
-    """The ID of the message that was created by this run step."""
-
-
-class RunStepDeltaMessageDelta(BaseModel):
-    type: Literal["message_creation"]
-    """Always `message_creation`."""
-
-    message_creation: Optional[MessageCreation] = None
diff --git a/openai/types/beta/threads/runs/step_list_params.py b/openai/types/beta/threads/runs/step_list_params.py
deleted file mode 100644
index 606d4445..00000000
--- a/openai/types/beta/threads/runs/step_list_params.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["StepListParams"]
-
-
-class StepListParams(TypedDict, total=False):
-    thread_id: Required[str]
-
-    after: str
-    """A cursor for use in pagination.
-
-    `after` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include after=obj_foo in order to fetch the next page of the
-    list.
-    """
-
-    before: str
-    """A cursor for use in pagination.
-
-    `before` is an object ID that defines your place in the list. For instance, if
-    you make a list request and receive 100 objects, ending with obj_foo, your
-    subsequent call can include before=obj_foo in order to fetch the previous page
-    of the list.
-    """
-
-    limit: int
-    """A limit on the number of objects to be returned.
-
-    Limit can range between 1 and 100, and the default is 20.
-    """
-
-    order: Literal["asc", "desc"]
-    """Sort order by the `created_at` timestamp of the objects.
-
-    `asc` for ascending order and `desc` for descending order.
-    """
diff --git a/openai/types/beta/threads/runs/tool_call.py b/openai/types/beta/threads/runs/tool_call.py
deleted file mode 100644
index dcca797b..00000000
--- a/openai/types/beta/threads/runs/tool_call.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated
-
-from ....._utils import PropertyInfo
-from .function_tool_call import FunctionToolCall
-from .retrieval_tool_call import RetrievalToolCall
-from .code_interpreter_tool_call import CodeInterpreterToolCall
-
-__all__ = ["ToolCall"]
-
-ToolCall = Annotated[
-    Union[CodeInterpreterToolCall, RetrievalToolCall, FunctionToolCall], PropertyInfo(discriminator="type")
-]
diff --git a/openai/types/beta/threads/runs/tool_call_delta.py b/openai/types/beta/threads/runs/tool_call_delta.py
deleted file mode 100644
index fc98981a..00000000
--- a/openai/types/beta/threads/runs/tool_call_delta.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Union
-from typing_extensions import Annotated
-
-from ....._utils import PropertyInfo
-from .function_tool_call_delta import FunctionToolCallDelta
-from .retrieval_tool_call_delta import RetrievalToolCallDelta
-from .code_interpreter_tool_call_delta import CodeInterpreterToolCallDelta
-
-__all__ = ["ToolCallDelta"]
-
-ToolCallDelta = Annotated[
-    Union[CodeInterpreterToolCallDelta, RetrievalToolCallDelta, FunctionToolCallDelta],
-    PropertyInfo(discriminator="type"),
-]
diff --git a/openai/types/beta/threads/runs/tool_call_delta_object.py b/openai/types/beta/threads/runs/tool_call_delta_object.py
deleted file mode 100644
index 9cd59a6e..00000000
--- a/openai/types/beta/threads/runs/tool_call_delta_object.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ....._models import BaseModel
-from .tool_call_delta import ToolCallDelta
-
-__all__ = ["ToolCallDeltaObject"]
-
-
-class ToolCallDeltaObject(BaseModel):
-    type: Literal["tool_calls"]
-    """Always `tool_calls`."""
-
-    tool_calls: Optional[List[ToolCallDelta]] = None
-    """An array of tool calls the run step was involved in.
-
-    These can be associated with one of three types of tools: `code_interpreter`,
-    `retrieval`, or `function`.
-    """
diff --git a/openai/types/beta/threads/runs/tool_calls_step_details.py b/openai/types/beta/threads/runs/tool_calls_step_details.py
deleted file mode 100644
index ca08fabd..00000000
--- a/openai/types/beta/threads/runs/tool_calls_step_details.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import Literal
-
-from .tool_call import ToolCall
-from ....._models import BaseModel
-
-__all__ = ["ToolCallsStepDetails"]
-
-
-class ToolCallsStepDetails(BaseModel):
-    tool_calls: List[ToolCall]
-    """An array of tool calls the run step was involved in.
-
-    These can be associated with one of three types of tools: `code_interpreter`,
-    `retrieval`, or `function`.
-    """
-
-    type: Literal["tool_calls"]
-    """Always `tool_calls`."""
diff --git a/openai/types/beta/threads/text.py b/openai/types/beta/threads/text.py
deleted file mode 100644
index 853bec29..00000000
--- a/openai/types/beta/threads/text.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from ...._models import BaseModel
-from .annotation import Annotation
-
-__all__ = ["Text"]
-
-
-class Text(BaseModel):
-    annotations: List[Annotation]
-
-    value: str
-    """The data that makes up the text."""
diff --git a/openai/types/beta/threads/text_content_block.py b/openai/types/beta/threads/text_content_block.py
deleted file mode 100644
index 3706d6b9..00000000
--- a/openai/types/beta/threads/text_content_block.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .text import Text
-from ...._models import BaseModel
-
-__all__ = ["TextContentBlock"]
-
-
-class TextContentBlock(BaseModel):
-    text: Text
-
-    type: Literal["text"]
-    """Always `text`."""
diff --git a/openai/types/beta/threads/text_delta.py b/openai/types/beta/threads/text_delta.py
deleted file mode 100644
index 09cd3570..00000000
--- a/openai/types/beta/threads/text_delta.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-
-from ...._models import BaseModel
-from .annotation_delta import AnnotationDelta
-
-__all__ = ["TextDelta"]
-
-
-class TextDelta(BaseModel):
-    annotations: Optional[List[AnnotationDelta]] = None
-
-    value: Optional[str] = None
-    """The data that makes up the text."""
diff --git a/openai/types/beta/threads/text_delta_block.py b/openai/types/beta/threads/text_delta_block.py
deleted file mode 100644
index 586116e0..00000000
--- a/openai/types/beta/threads/text_delta_block.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from ...._models import BaseModel
-from .text_delta import TextDelta
-
-__all__ = ["TextDeltaBlock"]
-
-
-class TextDeltaBlock(BaseModel):
-    index: int
-    """The index of the content part in the message."""
-
-    type: Literal["text"]
-    """Always `text`."""
-
-    text: Optional[TextDelta] = None
diff --git a/openai/types/chat/__init__.py b/openai/types/chat/__init__.py
deleted file mode 100644
index 5d122d20..00000000
--- a/openai/types/chat/__init__.py
+++ /dev/null
@@ -1,41 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .chat_completion import ChatCompletion as ChatCompletion
-from .chat_completion_role import ChatCompletionRole as ChatCompletionRole
-from .chat_completion_chunk import ChatCompletionChunk as ChatCompletionChunk
-from .chat_completion_message import ChatCompletionMessage as ChatCompletionMessage
-from .completion_create_params import CompletionCreateParams as CompletionCreateParams
-from .chat_completion_tool_param import ChatCompletionToolParam as ChatCompletionToolParam
-from .chat_completion_message_param import ChatCompletionMessageParam as ChatCompletionMessageParam
-from .chat_completion_token_logprob import ChatCompletionTokenLogprob as ChatCompletionTokenLogprob
-from .chat_completion_message_tool_call import ChatCompletionMessageToolCall as ChatCompletionMessageToolCall
-from .chat_completion_content_part_param import ChatCompletionContentPartParam as ChatCompletionContentPartParam
-from .chat_completion_tool_message_param import ChatCompletionToolMessageParam as ChatCompletionToolMessageParam
-from .chat_completion_user_message_param import ChatCompletionUserMessageParam as ChatCompletionUserMessageParam
-from .chat_completion_system_message_param import ChatCompletionSystemMessageParam as ChatCompletionSystemMessageParam
-from .chat_completion_function_message_param import (
-    ChatCompletionFunctionMessageParam as ChatCompletionFunctionMessageParam,
-)
-from .chat_completion_assistant_message_param import (
-    ChatCompletionAssistantMessageParam as ChatCompletionAssistantMessageParam,
-)
-from .chat_completion_content_part_text_param import (
-    ChatCompletionContentPartTextParam as ChatCompletionContentPartTextParam,
-)
-from .chat_completion_message_tool_call_param import (
-    ChatCompletionMessageToolCallParam as ChatCompletionMessageToolCallParam,
-)
-from .chat_completion_named_tool_choice_param import (
-    ChatCompletionNamedToolChoiceParam as ChatCompletionNamedToolChoiceParam,
-)
-from .chat_completion_content_part_image_param import (
-    ChatCompletionContentPartImageParam as ChatCompletionContentPartImageParam,
-)
-from .chat_completion_tool_choice_option_param import (
-    ChatCompletionToolChoiceOptionParam as ChatCompletionToolChoiceOptionParam,
-)
-from .chat_completion_function_call_option_param import (
-    ChatCompletionFunctionCallOptionParam as ChatCompletionFunctionCallOptionParam,
-)
diff --git a/openai/types/chat/chat_completion.py b/openai/types/chat/chat_completion.py
deleted file mode 100644
index 61a94a25..00000000
--- a/openai/types/chat/chat_completion.py
+++ /dev/null
@@ -1,67 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from ..completion_usage import CompletionUsage
-from .chat_completion_message import ChatCompletionMessage
-from .chat_completion_token_logprob import ChatCompletionTokenLogprob
-
-__all__ = ["ChatCompletion", "Choice", "ChoiceLogprobs"]
-
-
-class ChoiceLogprobs(BaseModel):
-    content: Optional[List[ChatCompletionTokenLogprob]] = None
-    """A list of message content tokens with log probability information."""
-
-
-class Choice(BaseModel):
-    finish_reason: Literal["stop", "length", "tool_calls", "content_filter", "function_call"]
-    """The reason the model stopped generating tokens.
-
-    This will be `stop` if the model hit a natural stop point or a provided stop
-    sequence, `length` if the maximum number of tokens specified in the request was
-    reached, `content_filter` if content was omitted due to a flag from our content
-    filters, `tool_calls` if the model called a tool, or `function_call`
-    (deprecated) if the model called a function.
-    """
-
-    index: int
-    """The index of the choice in the list of choices."""
-
-    logprobs: Optional[ChoiceLogprobs] = None
-    """Log probability information for the choice."""
-
-    message: ChatCompletionMessage
-    """A chat completion message generated by the model."""
-
-
-class ChatCompletion(BaseModel):
-    id: str
-    """A unique identifier for the chat completion."""
-
-    choices: List[Choice]
-    """A list of chat completion choices.
-
-    Can be more than one if `n` is greater than 1.
-    """
-
-    created: int
-    """The Unix timestamp (in seconds) of when the chat completion was created."""
-
-    model: str
-    """The model used for the chat completion."""
-
-    object: Literal["chat.completion"]
-    """The object type, which is always `chat.completion`."""
-
-    system_fingerprint: Optional[str] = None
-    """This fingerprint represents the backend configuration that the model runs with.
-
-    Can be used in conjunction with the `seed` request parameter to understand when
-    backend changes have been made that might impact determinism.
-    """
-
-    usage: Optional[CompletionUsage] = None
-    """Usage statistics for the completion request."""
diff --git a/openai/types/chat/chat_completion_assistant_message_param.py b/openai/types/chat/chat_completion_assistant_message_param.py
deleted file mode 100644
index e1e39948..00000000
--- a/openai/types/chat/chat_completion_assistant_message_param.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from .chat_completion_message_tool_call_param import ChatCompletionMessageToolCallParam
-
-__all__ = ["ChatCompletionAssistantMessageParam", "FunctionCall"]
-
-
-class FunctionCall(TypedDict, total=False):
-    arguments: Required[str]
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: Required[str]
-    """The name of the function to call."""
-
-
-class ChatCompletionAssistantMessageParam(TypedDict, total=False):
-    role: Required[Literal["assistant"]]
-    """The role of the messages author, in this case `assistant`."""
-
-    content: Optional[str]
-    """The contents of the assistant message.
-
-    Required unless `tool_calls` or `function_call` is specified.
-    """
-
-    function_call: FunctionCall
-    """Deprecated and replaced by `tool_calls`.
-
-    The name and arguments of a function that should be called, as generated by the
-    model.
-    """
-
-    name: str
-    """An optional name for the participant.
-
-    Provides the model information to differentiate between participants of the same
-    role.
-    """
-
-    tool_calls: Iterable[ChatCompletionMessageToolCallParam]
-    """The tool calls generated by the model, such as function calls."""
diff --git a/openai/types/chat/chat_completion_chunk.py b/openai/types/chat/chat_completion_chunk.py
deleted file mode 100644
index c2f18bcb..00000000
--- a/openai/types/chat/chat_completion_chunk.py
+++ /dev/null
@@ -1,128 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .chat_completion_token_logprob import ChatCompletionTokenLogprob
-
-__all__ = [
-    "ChatCompletionChunk",
-    "Choice",
-    "ChoiceDelta",
-    "ChoiceDeltaFunctionCall",
-    "ChoiceDeltaToolCall",
-    "ChoiceDeltaToolCallFunction",
-    "ChoiceLogprobs",
-]
-
-
-class ChoiceDeltaFunctionCall(BaseModel):
-    arguments: Optional[str] = None
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: Optional[str] = None
-    """The name of the function to call."""
-
-
-class ChoiceDeltaToolCallFunction(BaseModel):
-    arguments: Optional[str] = None
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: Optional[str] = None
-    """The name of the function to call."""
-
-
-class ChoiceDeltaToolCall(BaseModel):
-    index: int
-
-    id: Optional[str] = None
-    """The ID of the tool call."""
-
-    function: Optional[ChoiceDeltaToolCallFunction] = None
-
-    type: Optional[Literal["function"]] = None
-    """The type of the tool. Currently, only `function` is supported."""
-
-
-class ChoiceDelta(BaseModel):
-    content: Optional[str] = None
-    """The contents of the chunk message."""
-
-    function_call: Optional[ChoiceDeltaFunctionCall] = None
-    """Deprecated and replaced by `tool_calls`.
-
-    The name and arguments of a function that should be called, as generated by the
-    model.
-    """
-
-    role: Optional[Literal["system", "user", "assistant", "tool"]] = None
-    """The role of the author of this message."""
-
-    tool_calls: Optional[List[ChoiceDeltaToolCall]] = None
-
-
-class ChoiceLogprobs(BaseModel):
-    content: Optional[List[ChatCompletionTokenLogprob]] = None
-    """A list of message content tokens with log probability information."""
-
-
-class Choice(BaseModel):
-    delta: ChoiceDelta
-    """A chat completion delta generated by streamed model responses."""
-
-    finish_reason: Optional[Literal["stop", "length", "tool_calls", "content_filter", "function_call"]] = None
-    """The reason the model stopped generating tokens.
-
-    This will be `stop` if the model hit a natural stop point or a provided stop
-    sequence, `length` if the maximum number of tokens specified in the request was
-    reached, `content_filter` if content was omitted due to a flag from our content
-    filters, `tool_calls` if the model called a tool, or `function_call`
-    (deprecated) if the model called a function.
-    """
-
-    index: int
-    """The index of the choice in the list of choices."""
-
-    logprobs: Optional[ChoiceLogprobs] = None
-    """Log probability information for the choice."""
-
-
-class ChatCompletionChunk(BaseModel):
-    id: str
-    """A unique identifier for the chat completion. Each chunk has the same ID."""
-
-    choices: List[Choice]
-    """A list of chat completion choices.
-
-    Can be more than one if `n` is greater than 1.
-    """
-
-    created: int
-    """The Unix timestamp (in seconds) of when the chat completion was created.
-
-    Each chunk has the same timestamp.
-    """
-
-    model: str
-    """The model to generate the completion."""
-
-    object: Literal["chat.completion.chunk"]
-    """The object type, which is always `chat.completion.chunk`."""
-
-    system_fingerprint: Optional[str] = None
-    """
-    This fingerprint represents the backend configuration that the model runs with.
-    Can be used in conjunction with the `seed` request parameter to understand when
-    backend changes have been made that might impact determinism.
-    """
diff --git a/openai/types/chat/chat_completion_content_part_image_param.py b/openai/types/chat/chat_completion_content_part_image_param.py
deleted file mode 100644
index b1a186aa..00000000
--- a/openai/types/chat/chat_completion_content_part_image_param.py
+++ /dev/null
@@ -1,26 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ChatCompletionContentPartImageParam", "ImageURL"]
-
-
-class ImageURL(TypedDict, total=False):
-    url: Required[str]
-    """Either a URL of the image or the base64 encoded image data."""
-
-    detail: Literal["auto", "low", "high"]
-    """Specifies the detail level of the image.
-
-    Learn more in the
-    [Vision guide](https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding).
-    """
-
-
-class ChatCompletionContentPartImageParam(TypedDict, total=False):
-    image_url: Required[ImageURL]
-
-    type: Required[Literal["image_url"]]
-    """The type of the content part."""
diff --git a/openai/types/chat/chat_completion_content_part_param.py b/openai/types/chat/chat_completion_content_part_param.py
deleted file mode 100644
index f9b5f71e..00000000
--- a/openai/types/chat/chat_completion_content_part_param.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-
-from .chat_completion_content_part_text_param import ChatCompletionContentPartTextParam
-from .chat_completion_content_part_image_param import ChatCompletionContentPartImageParam
-
-__all__ = ["ChatCompletionContentPartParam"]
-
-ChatCompletionContentPartParam = Union[ChatCompletionContentPartTextParam, ChatCompletionContentPartImageParam]
diff --git a/openai/types/chat/chat_completion_content_part_text_param.py b/openai/types/chat/chat_completion_content_part_text_param.py
deleted file mode 100644
index a2707444..00000000
--- a/openai/types/chat/chat_completion_content_part_text_param.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ChatCompletionContentPartTextParam"]
-
-
-class ChatCompletionContentPartTextParam(TypedDict, total=False):
-    text: Required[str]
-    """The text content."""
-
-    type: Required[Literal["text"]]
-    """The type of the content part."""
diff --git a/openai/types/chat/chat_completion_function_call_option_param.py b/openai/types/chat/chat_completion_function_call_option_param.py
deleted file mode 100644
index 2bc014af..00000000
--- a/openai/types/chat/chat_completion_function_call_option_param.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-__all__ = ["ChatCompletionFunctionCallOptionParam"]
-
-
-class ChatCompletionFunctionCallOptionParam(TypedDict, total=False):
-    name: Required[str]
-    """The name of the function to call."""
diff --git a/openai/types/chat/chat_completion_function_message_param.py b/openai/types/chat/chat_completion_function_message_param.py
deleted file mode 100644
index 5af12bf9..00000000
--- a/openai/types/chat/chat_completion_function_message_param.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ChatCompletionFunctionMessageParam"]
-
-
-class ChatCompletionFunctionMessageParam(TypedDict, total=False):
-    content: Required[Optional[str]]
-    """The contents of the function message."""
-
-    name: Required[str]
-    """The name of the function to call."""
-
-    role: Required[Literal["function"]]
-    """The role of the messages author, in this case `function`."""
diff --git a/openai/types/chat/chat_completion_message.py b/openai/types/chat/chat_completion_message.py
deleted file mode 100644
index 8db7d17d..00000000
--- a/openai/types/chat/chat_completion_message.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-from .chat_completion_message_tool_call import ChatCompletionMessageToolCall
-
-__all__ = ["ChatCompletionMessage", "FunctionCall"]
-
-
-class FunctionCall(BaseModel):
-    arguments: str
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: str
-    """The name of the function to call."""
-
-
-class ChatCompletionMessage(BaseModel):
-    content: Optional[str] = None
-    """The contents of the message."""
-
-    role: Literal["assistant"]
-    """The role of the author of this message."""
-
-    function_call: Optional[FunctionCall] = None
-    """Deprecated and replaced by `tool_calls`.
-
-    The name and arguments of a function that should be called, as generated by the
-    model.
-    """
-
-    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
-    """The tool calls generated by the model, such as function calls."""
diff --git a/openai/types/chat/chat_completion_message_param.py b/openai/types/chat/chat_completion_message_param.py
deleted file mode 100644
index a3644a53..00000000
--- a/openai/types/chat/chat_completion_message_param.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-
-from .chat_completion_tool_message_param import ChatCompletionToolMessageParam
-from .chat_completion_user_message_param import ChatCompletionUserMessageParam
-from .chat_completion_system_message_param import ChatCompletionSystemMessageParam
-from .chat_completion_function_message_param import ChatCompletionFunctionMessageParam
-from .chat_completion_assistant_message_param import ChatCompletionAssistantMessageParam
-
-__all__ = ["ChatCompletionMessageParam"]
-
-ChatCompletionMessageParam = Union[
-    ChatCompletionSystemMessageParam,
-    ChatCompletionUserMessageParam,
-    ChatCompletionAssistantMessageParam,
-    ChatCompletionToolMessageParam,
-    ChatCompletionFunctionMessageParam,
-]
diff --git a/openai/types/chat/chat_completion_message_tool_call.py b/openai/types/chat/chat_completion_message_tool_call.py
deleted file mode 100644
index 4fec6670..00000000
--- a/openai/types/chat/chat_completion_message_tool_call.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["ChatCompletionMessageToolCall", "Function"]
-
-
-class Function(BaseModel):
-    arguments: str
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: str
-    """The name of the function to call."""
-
-
-class ChatCompletionMessageToolCall(BaseModel):
-    id: str
-    """The ID of the tool call."""
-
-    function: Function
-    """The function that the model called."""
-
-    type: Literal["function"]
-    """The type of the tool. Currently, only `function` is supported."""
diff --git a/openai/types/chat/chat_completion_message_tool_call_param.py b/openai/types/chat/chat_completion_message_tool_call_param.py
deleted file mode 100644
index f616c363..00000000
--- a/openai/types/chat/chat_completion_message_tool_call_param.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ChatCompletionMessageToolCallParam", "Function"]
-
-
-class Function(TypedDict, total=False):
-    arguments: Required[str]
-    """
-    The arguments to call the function with, as generated by the model in JSON
-    format. Note that the model does not always generate valid JSON, and may
-    hallucinate parameters not defined by your function schema. Validate the
-    arguments in your code before calling your function.
-    """
-
-    name: Required[str]
-    """The name of the function to call."""
-
-
-class ChatCompletionMessageToolCallParam(TypedDict, total=False):
-    id: Required[str]
-    """The ID of the tool call."""
-
-    function: Required[Function]
-    """The function that the model called."""
-
-    type: Required[Literal["function"]]
-    """The type of the tool. Currently, only `function` is supported."""
diff --git a/openai/types/chat/chat_completion_named_tool_choice_param.py b/openai/types/chat/chat_completion_named_tool_choice_param.py
deleted file mode 100644
index 369f8b42..00000000
--- a/openai/types/chat/chat_completion_named_tool_choice_param.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ChatCompletionNamedToolChoiceParam", "Function"]
-
-
-class Function(TypedDict, total=False):
-    name: Required[str]
-    """The name of the function to call."""
-
-
-class ChatCompletionNamedToolChoiceParam(TypedDict, total=False):
-    function: Required[Function]
-
-    type: Required[Literal["function"]]
-    """The type of the tool. Currently, only `function` is supported."""
diff --git a/openai/types/chat/chat_completion_role.py b/openai/types/chat/chat_completion_role.py
deleted file mode 100644
index 1fd83888..00000000
--- a/openai/types/chat/chat_completion_role.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-__all__ = ["ChatCompletionRole"]
-
-ChatCompletionRole = Literal["system", "user", "assistant", "tool", "function"]
diff --git a/openai/types/chat/chat_completion_system_message_param.py b/openai/types/chat/chat_completion_system_message_param.py
deleted file mode 100644
index 94bb3f63..00000000
--- a/openai/types/chat/chat_completion_system_message_param.py
+++ /dev/null
@@ -1,22 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ChatCompletionSystemMessageParam"]
-
-
-class ChatCompletionSystemMessageParam(TypedDict, total=False):
-    content: Required[str]
-    """The contents of the system message."""
-
-    role: Required[Literal["system"]]
-    """The role of the messages author, in this case `system`."""
-
-    name: str
-    """An optional name for the participant.
-
-    Provides the model information to differentiate between participants of the same
-    role.
-    """
diff --git a/openai/types/chat/chat_completion_token_logprob.py b/openai/types/chat/chat_completion_token_logprob.py
deleted file mode 100644
index c69e2589..00000000
--- a/openai/types/chat/chat_completion_token_logprob.py
+++ /dev/null
@@ -1,57 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-
-from ..._models import BaseModel
-
-__all__ = ["ChatCompletionTokenLogprob", "TopLogprob"]
-
-
-class TopLogprob(BaseModel):
-    token: str
-    """The token."""
-
-    bytes: Optional[List[int]] = None
-    """A list of integers representing the UTF-8 bytes representation of the token.
-
-    Useful in instances where characters are represented by multiple tokens and
-    their byte representations must be combined to generate the correct text
-    representation. Can be `null` if there is no bytes representation for the token.
-    """
-
-    logprob: float
-    """The log probability of this token, if it is within the top 20 most likely
-    tokens.
-
-    Otherwise, the value `-9999.0` is used to signify that the token is very
-    unlikely.
-    """
-
-
-class ChatCompletionTokenLogprob(BaseModel):
-    token: str
-    """The token."""
-
-    bytes: Optional[List[int]] = None
-    """A list of integers representing the UTF-8 bytes representation of the token.
-
-    Useful in instances where characters are represented by multiple tokens and
-    their byte representations must be combined to generate the correct text
-    representation. Can be `null` if there is no bytes representation for the token.
-    """
-
-    logprob: float
-    """The log probability of this token, if it is within the top 20 most likely
-    tokens.
-
-    Otherwise, the value `-9999.0` is used to signify that the token is very
-    unlikely.
-    """
-
-    top_logprobs: List[TopLogprob]
-    """List of the most likely tokens and their log probability, at this token
-    position.
-
-    In rare cases, there may be fewer than the number of requested `top_logprobs`
-    returned.
-    """
diff --git a/openai/types/chat/chat_completion_tool_choice_option_param.py b/openai/types/chat/chat_completion_tool_choice_option_param.py
deleted file mode 100644
index 9c0ae225..00000000
--- a/openai/types/chat/chat_completion_tool_choice_option_param.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union
-from typing_extensions import Literal
-
-from .chat_completion_named_tool_choice_param import ChatCompletionNamedToolChoiceParam
-
-__all__ = ["ChatCompletionToolChoiceOptionParam"]
-
-ChatCompletionToolChoiceOptionParam = Union[Literal["none", "auto"], ChatCompletionNamedToolChoiceParam]
diff --git a/openai/types/chat/chat_completion_tool_message_param.py b/openai/types/chat/chat_completion_tool_message_param.py
deleted file mode 100644
index 5c590e03..00000000
--- a/openai/types/chat/chat_completion_tool_message_param.py
+++ /dev/null
@@ -1,18 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ChatCompletionToolMessageParam"]
-
-
-class ChatCompletionToolMessageParam(TypedDict, total=False):
-    content: Required[str]
-    """The contents of the tool message."""
-
-    role: Required[Literal["tool"]]
-    """The role of the messages author, in this case `tool`."""
-
-    tool_call_id: Required[str]
-    """Tool call that this message is responding to."""
diff --git a/openai/types/chat/chat_completion_tool_param.py b/openai/types/chat/chat_completion_tool_param.py
deleted file mode 100644
index 0cf6ea72..00000000
--- a/openai/types/chat/chat_completion_tool_param.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-from ...types import shared_params
-
-__all__ = ["ChatCompletionToolParam"]
-
-
-class ChatCompletionToolParam(TypedDict, total=False):
-    function: Required[shared_params.FunctionDefinition]
-
-    type: Required[Literal["function"]]
-    """The type of the tool. Currently, only `function` is supported."""
diff --git a/openai/types/chat/chat_completion_user_message_param.py b/openai/types/chat/chat_completion_user_message_param.py
deleted file mode 100644
index 5c15322a..00000000
--- a/openai/types/chat/chat_completion_user_message_param.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-from .chat_completion_content_part_param import ChatCompletionContentPartParam
-
-__all__ = ["ChatCompletionUserMessageParam"]
-
-
-class ChatCompletionUserMessageParam(TypedDict, total=False):
-    content: Required[Union[str, Iterable[ChatCompletionContentPartParam]]]
-    """The contents of the user message."""
-
-    role: Required[Literal["user"]]
-    """The role of the messages author, in this case `user`."""
-
-    name: str
-    """An optional name for the participant.
-
-    Provides the model information to differentiate between participants of the same
-    role.
-    """
diff --git a/openai/types/chat/completion_create_params.py b/openai/types/chat/completion_create_params.py
deleted file mode 100644
index ab6a7470..00000000
--- a/openai/types/chat/completion_create_params.py
+++ /dev/null
@@ -1,280 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from ...types import shared_params
-from .chat_completion_tool_param import ChatCompletionToolParam
-from .chat_completion_message_param import ChatCompletionMessageParam
-from .chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
-from .chat_completion_function_call_option_param import ChatCompletionFunctionCallOptionParam
-
-__all__ = [
-    "CompletionCreateParamsBase",
-    "FunctionCall",
-    "Function",
-    "ResponseFormat",
-    "CompletionCreateParamsNonStreaming",
-    "CompletionCreateParamsStreaming",
-]
-
-
-class CompletionCreateParamsBase(TypedDict, total=False):
-    messages: Required[Iterable[ChatCompletionMessageParam]]
-    """A list of messages comprising the conversation so far.
-
-    [Example Python code](https://cookbook.openai.com/examples/how_to_format_inputs_to_chatgpt_models).
-    """
-
-    model: Required[
-        Union[
-            str,
-            Literal[
-                "gpt-4-0125-preview",
-                "gpt-4-turbo-preview",
-                "gpt-4-1106-preview",
-                "gpt-4-vision-preview",
-                "gpt-4",
-                "gpt-4-0314",
-                "gpt-4-0613",
-                "gpt-4-32k",
-                "gpt-4-32k-0314",
-                "gpt-4-32k-0613",
-                "gpt-3.5-turbo",
-                "gpt-3.5-turbo-16k",
-                "gpt-3.5-turbo-0301",
-                "gpt-3.5-turbo-0613",
-                "gpt-3.5-turbo-1106",
-                "gpt-3.5-turbo-0125",
-                "gpt-3.5-turbo-16k-0613",
-            ],
-        ]
-    ]
-    """ID of the model to use.
-
-    See the
-    [model endpoint compatibility](https://platform.openai.com/docs/models/model-endpoint-compatibility)
-    table for details on which models work with the Chat API.
-    """
-
-    frequency_penalty: Optional[float]
-    """Number between -2.0 and 2.0.
-
-    Positive values penalize new tokens based on their existing frequency in the
-    text so far, decreasing the model's likelihood to repeat the same line verbatim.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-    """
-
-    function_call: FunctionCall
-    """Deprecated in favor of `tool_choice`.
-
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via `{"name": "my_function"}` forces the model to call that
-    function.
-
-    `none` is the default when no functions are present. `auto` is the default if
-    functions are present.
-    """
-
-    functions: Iterable[Function]
-    """Deprecated in favor of `tools`.
-
-    A list of functions the model may generate JSON inputs for.
-    """
-
-    logit_bias: Optional[Dict[str, int]]
-    """Modify the likelihood of specified tokens appearing in the completion.
-
-    Accepts a JSON object that maps tokens (specified by their token ID in the
-    tokenizer) to an associated bias value from -100 to 100. Mathematically, the
-    bias is added to the logits generated by the model prior to sampling. The exact
-    effect will vary per model, but values between -1 and 1 should decrease or
-    increase likelihood of selection; values like -100 or 100 should result in a ban
-    or exclusive selection of the relevant token.
-    """
-
-    logprobs: Optional[bool]
-    """Whether to return log probabilities of the output tokens or not.
-
-    If true, returns the log probabilities of each output token returned in the
-    `content` of `message`. This option is currently not available on the
-    `gpt-4-vision-preview` model.
-    """
-
-    max_tokens: Optional[int]
-    """
-    The maximum number of [tokens](/tokenizer) that can be generated in the chat
-    completion.
-
-    The total length of input tokens and generated tokens is limited by the model's
-    context length.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
-    """
-
-    n: Optional[int]
-    """How many chat completion choices to generate for each input message.
-
-    Note that you will be charged based on the number of generated tokens across all
-    of the choices. Keep `n` as `1` to minimize costs.
-    """
-
-    presence_penalty: Optional[float]
-    """Number between -2.0 and 2.0.
-
-    Positive values penalize new tokens based on whether they appear in the text so
-    far, increasing the model's likelihood to talk about new topics.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-    """
-
-    response_format: ResponseFormat
-    """An object specifying the format that the model must output.
-
-    Compatible with
-    [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
-    all GPT-3.5 Turbo models newer than `gpt-3.5-turbo-1106`.
-
-    Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
-    message the model generates is valid JSON.
-
-    **Important:** when using JSON mode, you **must** also instruct the model to
-    produce JSON yourself via a system or user message. Without this, the model may
-    generate an unending stream of whitespace until the generation reaches the token
-    limit, resulting in a long-running and seemingly "stuck" request. Also note that
-    the message content may be partially cut off if `finish_reason="length"`, which
-    indicates the generation exceeded `max_tokens` or the conversation exceeded the
-    max context length.
-    """
-
-    seed: Optional[int]
-    """
-    This feature is in Beta. If specified, our system will make a best effort to
-    sample deterministically, such that repeated requests with the same `seed` and
-    parameters should return the same result. Determinism is not guaranteed, and you
-    should refer to the `system_fingerprint` response parameter to monitor changes
-    in the backend.
-    """
-
-    stop: Union[Optional[str], List[str]]
-    """Up to 4 sequences where the API will stop generating further tokens."""
-
-    temperature: Optional[float]
-    """What sampling temperature to use, between 0 and 2.
-
-    Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-
-    We generally recommend altering this or `top_p` but not both.
-    """
-
-    tool_choice: ChatCompletionToolChoiceOptionParam
-    """
-    Controls which (if any) function is called by the model. `none` means the model
-    will not call a function and instead generates a message. `auto` means the model
-    can pick between generating a message or calling a function. Specifying a
-    particular function via
-    `{"type": "function", "function": {"name": "my_function"}}` forces the model to
-    call that function.
-
-    `none` is the default when no functions are present. `auto` is the default if
-    functions are present.
-    """
-
-    tools: Iterable[ChatCompletionToolParam]
-    """A list of tools the model may call.
-
-    Currently, only functions are supported as a tool. Use this to provide a list of
-    functions the model may generate JSON inputs for. A max of 128 functions are
-    supported.
-    """
-
-    top_logprobs: Optional[int]
-    """
-    An integer between 0 and 20 specifying the number of most likely tokens to
-    return at each token position, each with an associated log probability.
-    `logprobs` must be set to `true` if this parameter is used.
-    """
-
-    top_p: Optional[float]
-    """
-    An alternative to sampling with temperature, called nucleus sampling, where the
-    model considers the results of the tokens with top_p probability mass. So 0.1
-    means only the tokens comprising the top 10% probability mass are considered.
-
-    We generally recommend altering this or `temperature` but not both.
-    """
-
-    user: str
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-    """
-
-
-FunctionCall = Union[Literal["none", "auto"], ChatCompletionFunctionCallOptionParam]
-
-
-class Function(TypedDict, total=False):
-    name: Required[str]
-    """The name of the function to be called.
-
-    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
-    of 64.
-    """
-
-    description: str
-    """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
-    """
-
-    parameters: shared_params.FunctionParameters
-    """The parameters the functions accepts, described as a JSON Schema object.
-
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
-    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
-    documentation about the format.
-
-    Omitting `parameters` defines a function with an empty parameter list.
-    """
-
-
-class ResponseFormat(TypedDict, total=False):
-    type: Literal["text", "json_object"]
-    """Must be one of `text` or `json_object`."""
-
-
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
-    stream: Optional[Literal[False]]
-    """If set, partial message deltas will be sent, like in ChatGPT.
-
-    Tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-    """
-
-
-class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
-    stream: Required[Literal[True]]
-    """If set, partial message deltas will be sent, like in ChatGPT.
-
-    Tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-    """
-
-
-CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
diff --git a/openai/types/completion.py b/openai/types/completion.py
deleted file mode 100644
index d3b3102a..00000000
--- a/openai/types/completion.py
+++ /dev/null
@@ -1,37 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .completion_usage import CompletionUsage
-from .completion_choice import CompletionChoice
-
-__all__ = ["Completion"]
-
-
-class Completion(BaseModel):
-    id: str
-    """A unique identifier for the completion."""
-
-    choices: List[CompletionChoice]
-    """The list of completion choices the model generated for the input prompt."""
-
-    created: int
-    """The Unix timestamp (in seconds) of when the completion was created."""
-
-    model: str
-    """The model used for completion."""
-
-    object: Literal["text_completion"]
-    """The object type, which is always "text_completion" """
-
-    system_fingerprint: Optional[str] = None
-    """This fingerprint represents the backend configuration that the model runs with.
-
-    Can be used in conjunction with the `seed` request parameter to understand when
-    backend changes have been made that might impact determinism.
-    """
-
-    usage: Optional[CompletionUsage] = None
-    """Usage statistics for the completion request."""
diff --git a/openai/types/completion_choice.py b/openai/types/completion_choice.py
deleted file mode 100644
index d948ebc9..00000000
--- a/openai/types/completion_choice.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict, List, Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["CompletionChoice", "Logprobs"]
-
-
-class Logprobs(BaseModel):
-    text_offset: Optional[List[int]] = None
-
-    token_logprobs: Optional[List[float]] = None
-
-    tokens: Optional[List[str]] = None
-
-    top_logprobs: Optional[List[Dict[str, float]]] = None
-
-
-class CompletionChoice(BaseModel):
-    finish_reason: Literal["stop", "length", "content_filter"]
-    """The reason the model stopped generating tokens.
-
-    This will be `stop` if the model hit a natural stop point or a provided stop
-    sequence, `length` if the maximum number of tokens specified in the request was
-    reached, or `content_filter` if content was omitted due to a flag from our
-    content filters.
-    """
-
-    index: int
-
-    logprobs: Optional[Logprobs] = None
-
-    text: str
diff --git a/openai/types/completion_create_params.py b/openai/types/completion_create_params.py
deleted file mode 100644
index 36267e90..00000000
--- a/openai/types/completion_create_params.py
+++ /dev/null
@@ -1,182 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict, List, Union, Iterable, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["CompletionCreateParamsBase", "CompletionCreateParamsNonStreaming", "CompletionCreateParamsStreaming"]
-
-
-class CompletionCreateParamsBase(TypedDict, total=False):
-    model: Required[Union[str, Literal["gpt-3.5-turbo-instruct", "davinci-002", "babbage-002"]]]
-    """ID of the model to use.
-
-    You can use the
-    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-    see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
-    """
-
-    prompt: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]], None]]
-    """
-    The prompt(s) to generate completions for, encoded as a string, array of
-    strings, array of tokens, or array of token arrays.
-
-    Note that <|endoftext|> is the document separator that the model sees during
-    training, so if a prompt is not specified the model will generate as if from the
-    beginning of a new document.
-    """
-
-    best_of: Optional[int]
-    """
-    Generates `best_of` completions server-side and returns the "best" (the one with
-    the highest log probability per token). Results cannot be streamed.
-
-    When used with `n`, `best_of` controls the number of candidate completions and
-    `n` specifies how many to return – `best_of` must be greater than `n`.
-
-    **Note:** Because this parameter generates many completions, it can quickly
-    consume your token quota. Use carefully and ensure that you have reasonable
-    settings for `max_tokens` and `stop`.
-    """
-
-    echo: Optional[bool]
-    """Echo back the prompt in addition to the completion"""
-
-    frequency_penalty: Optional[float]
-    """Number between -2.0 and 2.0.
-
-    Positive values penalize new tokens based on their existing frequency in the
-    text so far, decreasing the model's likelihood to repeat the same line verbatim.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-    """
-
-    logit_bias: Optional[Dict[str, int]]
-    """Modify the likelihood of specified tokens appearing in the completion.
-
-    Accepts a JSON object that maps tokens (specified by their token ID in the GPT
-    tokenizer) to an associated bias value from -100 to 100. You can use this
-    [tokenizer tool](/tokenizer?view=bpe) to convert text to token IDs.
-    Mathematically, the bias is added to the logits generated by the model prior to
-    sampling. The exact effect will vary per model, but values between -1 and 1
-    should decrease or increase likelihood of selection; values like -100 or 100
-    should result in a ban or exclusive selection of the relevant token.
-
-    As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token
-    from being generated.
-    """
-
-    logprobs: Optional[int]
-    """
-    Include the log probabilities on the `logprobs` most likely output tokens, as
-    well the chosen tokens. For example, if `logprobs` is 5, the API will return a
-    list of the 5 most likely tokens. The API will always return the `logprob` of
-    the sampled token, so there may be up to `logprobs+1` elements in the response.
-
-    The maximum value for `logprobs` is 5.
-    """
-
-    max_tokens: Optional[int]
-    """
-    The maximum number of [tokens](/tokenizer) that can be generated in the
-    completion.
-
-    The token count of your prompt plus `max_tokens` cannot exceed the model's
-    context length.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
-    """
-
-    n: Optional[int]
-    """How many completions to generate for each prompt.
-
-    **Note:** Because this parameter generates many completions, it can quickly
-    consume your token quota. Use carefully and ensure that you have reasonable
-    settings for `max_tokens` and `stop`.
-    """
-
-    presence_penalty: Optional[float]
-    """Number between -2.0 and 2.0.
-
-    Positive values penalize new tokens based on whether they appear in the text so
-    far, increasing the model's likelihood to talk about new topics.
-
-    [See more information about frequency and presence penalties.](https://platform.openai.com/docs/guides/text-generation/parameter-details)
-    """
-
-    seed: Optional[int]
-    """
-    If specified, our system will make a best effort to sample deterministically,
-    such that repeated requests with the same `seed` and parameters should return
-    the same result.
-
-    Determinism is not guaranteed, and you should refer to the `system_fingerprint`
-    response parameter to monitor changes in the backend.
-    """
-
-    stop: Union[Optional[str], List[str], None]
-    """Up to 4 sequences where the API will stop generating further tokens.
-
-    The returned text will not contain the stop sequence.
-    """
-
-    suffix: Optional[str]
-    """The suffix that comes after a completion of inserted text.
-
-    This parameter is only supported for `gpt-3.5-turbo-instruct`.
-    """
-
-    temperature: Optional[float]
-    """What sampling temperature to use, between 0 and 2.
-
-    Higher values like 0.8 will make the output more random, while lower values like
-    0.2 will make it more focused and deterministic.
-
-    We generally recommend altering this or `top_p` but not both.
-    """
-
-    top_p: Optional[float]
-    """
-    An alternative to sampling with temperature, called nucleus sampling, where the
-    model considers the results of the tokens with top_p probability mass. So 0.1
-    means only the tokens comprising the top 10% probability mass are considered.
-
-    We generally recommend altering this or `temperature` but not both.
-    """
-
-    user: str
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-    """
-
-
-class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase):
-    stream: Optional[Literal[False]]
-    """Whether to stream back partial progress.
-
-    If set, tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-    """
-
-
-class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
-    stream: Required[Literal[True]]
-    """Whether to stream back partial progress.
-
-    If set, tokens will be sent as data-only
-    [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
-    as they become available, with the stream terminated by a `data: [DONE]`
-    message.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_stream_completions).
-    """
-
-
-CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
diff --git a/openai/types/completion_usage.py b/openai/types/completion_usage.py
deleted file mode 100644
index e185a5cc..00000000
--- a/openai/types/completion_usage.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-
-__all__ = ["CompletionUsage"]
-
-
-class CompletionUsage(BaseModel):
-    completion_tokens: int
-    """Number of tokens in the generated completion."""
-
-    prompt_tokens: int
-    """Number of tokens in the prompt."""
-
-    total_tokens: int
-    """Total number of tokens used in the request (prompt + completion)."""
diff --git a/openai/types/create_embedding_response.py b/openai/types/create_embedding_response.py
deleted file mode 100644
index eff247a1..00000000
--- a/openai/types/create_embedding_response.py
+++ /dev/null
@@ -1,31 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import Literal
-
-from .._models import BaseModel
-from .embedding import Embedding
-
-__all__ = ["CreateEmbeddingResponse", "Usage"]
-
-
-class Usage(BaseModel):
-    prompt_tokens: int
-    """The number of tokens used by the prompt."""
-
-    total_tokens: int
-    """The total number of tokens used by the request."""
-
-
-class CreateEmbeddingResponse(BaseModel):
-    data: List[Embedding]
-    """The list of embeddings generated by the model."""
-
-    model: str
-    """The name of the model used to generate the embedding."""
-
-    object: Literal["list"]
-    """The object type, which is always "list"."""
-
-    usage: Usage
-    """The usage information for the request."""
diff --git a/openai/types/embedding.py b/openai/types/embedding.py
deleted file mode 100644
index 769b1d16..00000000
--- a/openai/types/embedding.py
+++ /dev/null
@@ -1,23 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["Embedding"]
-
-
-class Embedding(BaseModel):
-    embedding: List[float]
-    """The embedding vector, which is a list of floats.
-
-    The length of vector depends on the model as listed in the
-    [embedding guide](https://platform.openai.com/docs/guides/embeddings).
-    """
-
-    index: int
-    """The index of the embedding in the list of embeddings."""
-
-    object: Literal["embedding"]
-    """The object type, which is always "embedding"."""
diff --git a/openai/types/embedding_create_params.py b/openai/types/embedding_create_params.py
deleted file mode 100644
index 930b3b79..00000000
--- a/openai/types/embedding_create_params.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["EmbeddingCreateParams"]
-
-
-class EmbeddingCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str], Iterable[int], Iterable[Iterable[int]]]]
-    """Input text to embed, encoded as a string or array of tokens.
-
-    To embed multiple inputs in a single request, pass an array of strings or array
-    of token arrays. The input must not exceed the max input tokens for the model
-    (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
-    array must be 2048 dimensions or less.
-    [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
-    """
-
-    model: Required[Union[str, Literal["text-embedding-ada-002", "text-embedding-3-small", "text-embedding-3-large"]]]
-    """ID of the model to use.
-
-    You can use the
-    [List models](https://platform.openai.com/docs/api-reference/models/list) API to
-    see all of your available models, or see our
-    [Model overview](https://platform.openai.com/docs/models/overview) for
-    descriptions of them.
-    """
-
-    dimensions: int
-    """The number of dimensions the resulting output embeddings should have.
-
-    Only supported in `text-embedding-3` and later models.
-    """
-
-    encoding_format: Literal["float", "base64"]
-    """The format to return the embeddings in.
-
-    Can be either `float` or [`base64`](https://pypi.org/project/pybase64/).
-    """
-
-    user: str
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-    """
diff --git a/openai/types/file_content.py b/openai/types/file_content.py
deleted file mode 100644
index b4aa08a9..00000000
--- a/openai/types/file_content.py
+++ /dev/null
@@ -1,6 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-
-__all__ = ["FileContent"]
-
-FileContent = str
diff --git a/openai/types/file_create_params.py b/openai/types/file_create_params.py
deleted file mode 100644
index 26e2da33..00000000
--- a/openai/types/file_create_params.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Literal, Required, TypedDict
-
-from .._types import FileTypes
-
-__all__ = ["FileCreateParams"]
-
-
-class FileCreateParams(TypedDict, total=False):
-    file: Required[FileTypes]
-    """The File object (not file name) to be uploaded."""
-
-    purpose: Required[Literal["fine-tune", "assistants"]]
-    """The intended purpose of the uploaded file.
-
-    Use "fine-tune" for
-    [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) and
-    "assistants" for
-    [Assistants](https://platform.openai.com/docs/api-reference/assistants) and
-    [Messages](https://platform.openai.com/docs/api-reference/messages). This allows
-    us to validate the format of the uploaded file is correct for fine-tuning.
-    """
diff --git a/openai/types/file_deleted.py b/openai/types/file_deleted.py
deleted file mode 100644
index f25fa87a..00000000
--- a/openai/types/file_deleted.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["FileDeleted"]
-
-
-class FileDeleted(BaseModel):
-    id: str
-
-    deleted: bool
-
-    object: Literal["file"]
diff --git a/openai/types/file_list_params.py b/openai/types/file_list_params.py
deleted file mode 100644
index 212eca13..00000000
--- a/openai/types/file_list_params.py
+++ /dev/null
@@ -1,12 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import TypedDict
-
-__all__ = ["FileListParams"]
-
-
-class FileListParams(TypedDict, total=False):
-    purpose: str
-    """Only return files with the given purpose."""
diff --git a/openai/types/file_object.py b/openai/types/file_object.py
deleted file mode 100644
index 589a1faf..00000000
--- a/openai/types/file_object.py
+++ /dev/null
@@ -1,46 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["FileObject"]
-
-
-class FileObject(BaseModel):
-    id: str
-    """The file identifier, which can be referenced in the API endpoints."""
-
-    bytes: int
-    """The size of the file, in bytes."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the file was created."""
-
-    filename: str
-    """The name of the file."""
-
-    object: Literal["file"]
-    """The object type, which is always `file`."""
-
-    purpose: Literal["fine-tune", "fine-tune-results", "assistants", "assistants_output"]
-    """The intended purpose of the file.
-
-    Supported values are `fine-tune`, `fine-tune-results`, `assistants`, and
-    `assistants_output`.
-    """
-
-    status: Literal["uploaded", "processed", "error"]
-    """Deprecated.
-
-    The current status of the file, which can be either `uploaded`, `processed`, or
-    `error`.
-    """
-
-    status_details: Optional[str] = None
-    """Deprecated.
-
-    For details on why a fine-tuning training file failed validation, see the
-    `error` field on `fine_tuning.job`.
-    """
diff --git a/openai/types/fine_tuning/__init__.py b/openai/types/fine_tuning/__init__.py
deleted file mode 100644
index 0bb2b904..00000000
--- a/openai/types/fine_tuning/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from .fine_tuning_job import FineTuningJob as FineTuningJob
-from .job_list_params import JobListParams as JobListParams
-from .job_create_params import JobCreateParams as JobCreateParams
-from .fine_tuning_job_event import FineTuningJobEvent as FineTuningJobEvent
-from .job_list_events_params import JobListEventsParams as JobListEventsParams
diff --git a/openai/types/fine_tuning/fine_tuning_job.py b/openai/types/fine_tuning/fine_tuning_job.py
deleted file mode 100644
index 23fe96d1..00000000
--- a/openai/types/fine_tuning/fine_tuning_job.py
+++ /dev/null
@@ -1,107 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List, Union, Optional
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["FineTuningJob", "Error", "Hyperparameters"]
-
-
-class Error(BaseModel):
-    code: str
-    """A machine-readable error code."""
-
-    message: str
-    """A human-readable error message."""
-
-    param: Optional[str] = None
-    """The parameter that was invalid, usually `training_file` or `validation_file`.
-
-    This field will be null if the failure was not parameter-specific.
-    """
-
-
-class Hyperparameters(BaseModel):
-    n_epochs: Union[Literal["auto"], int]
-    """The number of epochs to train the model for.
-
-    An epoch refers to one full cycle through the training dataset. "auto" decides
-    the optimal number of epochs based on the size of the dataset. If setting the
-    number manually, we support any number between 1 and 50 epochs.
-    """
-
-
-class FineTuningJob(BaseModel):
-    id: str
-    """The object identifier, which can be referenced in the API endpoints."""
-
-    created_at: int
-    """The Unix timestamp (in seconds) for when the fine-tuning job was created."""
-
-    error: Optional[Error] = None
-    """
-    For fine-tuning jobs that have `failed`, this will contain more information on
-    the cause of the failure.
-    """
-
-    fine_tuned_model: Optional[str] = None
-    """The name of the fine-tuned model that is being created.
-
-    The value will be null if the fine-tuning job is still running.
-    """
-
-    finished_at: Optional[int] = None
-    """The Unix timestamp (in seconds) for when the fine-tuning job was finished.
-
-    The value will be null if the fine-tuning job is still running.
-    """
-
-    hyperparameters: Hyperparameters
-    """The hyperparameters used for the fine-tuning job.
-
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-    for more details.
-    """
-
-    model: str
-    """The base model that is being fine-tuned."""
-
-    object: Literal["fine_tuning.job"]
-    """The object type, which is always "fine_tuning.job"."""
-
-    organization_id: str
-    """The organization that owns the fine-tuning job."""
-
-    result_files: List[str]
-    """The compiled results file ID(s) for the fine-tuning job.
-
-    You can retrieve the results with the
-    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
-    """
-
-    status: Literal["validating_files", "queued", "running", "succeeded", "failed", "cancelled"]
-    """
-    The current status of the fine-tuning job, which can be either
-    `validating_files`, `queued`, `running`, `succeeded`, `failed`, or `cancelled`.
-    """
-
-    trained_tokens: Optional[int] = None
-    """The total number of billable tokens processed by this fine-tuning job.
-
-    The value will be null if the fine-tuning job is still running.
-    """
-
-    training_file: str
-    """The file ID used for training.
-
-    You can retrieve the training data with the
-    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
-    """
-
-    validation_file: Optional[str] = None
-    """The file ID used for validation.
-
-    You can retrieve the validation results with the
-    [Files API](https://platform.openai.com/docs/api-reference/files/retrieve-contents).
-    """
diff --git a/openai/types/fine_tuning/fine_tuning_job_event.py b/openai/types/fine_tuning/fine_tuning_job_event.py
deleted file mode 100644
index 2d204bb9..00000000
--- a/openai/types/fine_tuning/fine_tuning_job_event.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from ..._models import BaseModel
-
-__all__ = ["FineTuningJobEvent"]
-
-
-class FineTuningJobEvent(BaseModel):
-    id: str
-
-    created_at: int
-
-    level: Literal["info", "warn", "error"]
-
-    message: str
-
-    object: Literal["fine_tuning.job.event"]
diff --git a/openai/types/fine_tuning/job_create_params.py b/openai/types/fine_tuning/job_create_params.py
deleted file mode 100644
index 79e0b67e..00000000
--- a/openai/types/fine_tuning/job_create_params.py
+++ /dev/null
@@ -1,78 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["JobCreateParams", "Hyperparameters"]
-
-
-class JobCreateParams(TypedDict, total=False):
-    model: Required[Union[str, Literal["babbage-002", "davinci-002", "gpt-3.5-turbo"]]]
-    """The name of the model to fine-tune.
-
-    You can select one of the
-    [supported models](https://platform.openai.com/docs/guides/fine-tuning/what-models-can-be-fine-tuned).
-    """
-
-    training_file: Required[str]
-    """The ID of an uploaded file that contains training data.
-
-    See [upload file](https://platform.openai.com/docs/api-reference/files/upload)
-    for how to upload a file.
-
-    Your dataset must be formatted as a JSONL file. Additionally, you must upload
-    your file with the purpose `fine-tune`.
-
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-    for more details.
-    """
-
-    hyperparameters: Hyperparameters
-    """The hyperparameters used for the fine-tuning job."""
-
-    suffix: Optional[str]
-    """
-    A string of up to 18 characters that will be added to your fine-tuned model
-    name.
-
-    For example, a `suffix` of "custom-model-name" would produce a model name like
-    `ft:gpt-3.5-turbo:openai:custom-model-name:7p4lURel`.
-    """
-
-    validation_file: Optional[str]
-    """The ID of an uploaded file that contains validation data.
-
-    If you provide this file, the data is used to generate validation metrics
-    periodically during fine-tuning. These metrics can be viewed in the fine-tuning
-    results file. The same data should not be present in both train and validation
-    files.
-
-    Your dataset must be formatted as a JSONL file. You must upload your file with
-    the purpose `fine-tune`.
-
-    See the [fine-tuning guide](https://platform.openai.com/docs/guides/fine-tuning)
-    for more details.
-    """
-
-
-class Hyperparameters(TypedDict, total=False):
-    batch_size: Union[Literal["auto"], int]
-    """Number of examples in each batch.
-
-    A larger batch size means that model parameters are updated less frequently, but
-    with lower variance.
-    """
-
-    learning_rate_multiplier: Union[Literal["auto"], float]
-    """Scaling factor for the learning rate.
-
-    A smaller learning rate may be useful to avoid overfitting.
-    """
-
-    n_epochs: Union[Literal["auto"], int]
-    """The number of epochs to train the model for.
-
-    An epoch refers to one full cycle through the training dataset.
-    """
diff --git a/openai/types/fine_tuning/job_list_events_params.py b/openai/types/fine_tuning/job_list_events_params.py
deleted file mode 100644
index e1c9a64d..00000000
--- a/openai/types/fine_tuning/job_list_events_params.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import TypedDict
-
-__all__ = ["JobListEventsParams"]
-
-
-class JobListEventsParams(TypedDict, total=False):
-    after: str
-    """Identifier for the last event from the previous pagination request."""
-
-    limit: int
-    """Number of events to retrieve."""
diff --git a/openai/types/fine_tuning/job_list_params.py b/openai/types/fine_tuning/job_list_params.py
deleted file mode 100644
index 5c075ca3..00000000
--- a/openai/types/fine_tuning/job_list_params.py
+++ /dev/null
@@ -1,15 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import TypedDict
-
-__all__ = ["JobListParams"]
-
-
-class JobListParams(TypedDict, total=False):
-    after: str
-    """Identifier for the last job from the previous pagination request."""
-
-    limit: int
-    """Number of fine-tuning jobs to retrieve."""
diff --git a/openai/types/image.py b/openai/types/image.py
deleted file mode 100644
index f48aa2c7..00000000
--- a/openai/types/image.py
+++ /dev/null
@@ -1,24 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from .._models import BaseModel
-
-__all__ = ["Image"]
-
-
-class Image(BaseModel):
-    b64_json: Optional[str] = None
-    """
-    The base64-encoded JSON of the generated image, if `response_format` is
-    `b64_json`.
-    """
-
-    revised_prompt: Optional[str] = None
-    """
-    The prompt that was used to generate the image, if there was any revision to the
-    prompt.
-    """
-
-    url: Optional[str] = None
-    """The URL of the generated image, if `response_format` is `url` (default)."""
diff --git a/openai/types/image_create_variation_params.py b/openai/types/image_create_variation_params.py
deleted file mode 100644
index 25493073..00000000
--- a/openai/types/image_create_variation_params.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from .._types import FileTypes
-
-__all__ = ["ImageCreateVariationParams"]
-
-
-class ImageCreateVariationParams(TypedDict, total=False):
-    image: Required[FileTypes]
-    """The image to use as the basis for the variation(s).
-
-    Must be a valid PNG file, less than 4MB, and square.
-    """
-
-    model: Union[str, Literal["dall-e-2"], None]
-    """The model to use for image generation.
-
-    Only `dall-e-2` is supported at this time.
-    """
-
-    n: Optional[int]
-    """The number of images to generate.
-
-    Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
-    """
-
-    response_format: Optional[Literal["url", "b64_json"]]
-    """The format in which the generated images are returned.
-
-    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
-    image has been generated.
-    """
-
-    size: Optional[Literal["256x256", "512x512", "1024x1024"]]
-    """The size of the generated images.
-
-    Must be one of `256x256`, `512x512`, or `1024x1024`.
-    """
-
-    user: str
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-    """
diff --git a/openai/types/image_edit_params.py b/openai/types/image_edit_params.py
deleted file mode 100644
index 073456e3..00000000
--- a/openai/types/image_edit_params.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-from .._types import FileTypes
-
-__all__ = ["ImageEditParams"]
-
-
-class ImageEditParams(TypedDict, total=False):
-    image: Required[FileTypes]
-    """The image to edit.
-
-    Must be a valid PNG file, less than 4MB, and square. If mask is not provided,
-    image must have transparency, which will be used as the mask.
-    """
-
-    prompt: Required[str]
-    """A text description of the desired image(s).
-
-    The maximum length is 1000 characters.
-    """
-
-    mask: FileTypes
-    """An additional image whose fully transparent areas (e.g.
-
-    where alpha is zero) indicate where `image` should be edited. Must be a valid
-    PNG file, less than 4MB, and have the same dimensions as `image`.
-    """
-
-    model: Union[str, Literal["dall-e-2"], None]
-    """The model to use for image generation.
-
-    Only `dall-e-2` is supported at this time.
-    """
-
-    n: Optional[int]
-    """The number of images to generate. Must be between 1 and 10."""
-
-    response_format: Optional[Literal["url", "b64_json"]]
-    """The format in which the generated images are returned.
-
-    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
-    image has been generated.
-    """
-
-    size: Optional[Literal["256x256", "512x512", "1024x1024"]]
-    """The size of the generated images.
-
-    Must be one of `256x256`, `512x512`, or `1024x1024`.
-    """
-
-    user: str
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-    """
diff --git a/openai/types/image_generate_params.py b/openai/types/image_generate_params.py
deleted file mode 100644
index 18c56f8e..00000000
--- a/openai/types/image_generate_params.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Union, Optional
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ImageGenerateParams"]
-
-
-class ImageGenerateParams(TypedDict, total=False):
-    prompt: Required[str]
-    """A text description of the desired image(s).
-
-    The maximum length is 1000 characters for `dall-e-2` and 4000 characters for
-    `dall-e-3`.
-    """
-
-    model: Union[str, Literal["dall-e-2", "dall-e-3"], None]
-    """The model to use for image generation."""
-
-    n: Optional[int]
-    """The number of images to generate.
-
-    Must be between 1 and 10. For `dall-e-3`, only `n=1` is supported.
-    """
-
-    quality: Literal["standard", "hd"]
-    """The quality of the image that will be generated.
-
-    `hd` creates images with finer details and greater consistency across the image.
-    This param is only supported for `dall-e-3`.
-    """
-
-    response_format: Optional[Literal["url", "b64_json"]]
-    """The format in which the generated images are returned.
-
-    Must be one of `url` or `b64_json`. URLs are only valid for 60 minutes after the
-    image has been generated.
-    """
-
-    size: Optional[Literal["256x256", "512x512", "1024x1024", "1792x1024", "1024x1792"]]
-    """The size of the generated images.
-
-    Must be one of `256x256`, `512x512`, or `1024x1024` for `dall-e-2`. Must be one
-    of `1024x1024`, `1792x1024`, or `1024x1792` for `dall-e-3` models.
-    """
-
-    style: Optional[Literal["vivid", "natural"]]
-    """The style of the generated images.
-
-    Must be one of `vivid` or `natural`. Vivid causes the model to lean towards
-    generating hyper-real and dramatic images. Natural causes the model to produce
-    more natural, less hyper-real looking images. This param is only supported for
-    `dall-e-3`.
-    """
-
-    user: str
-    """
-    A unique identifier representing your end-user, which can help OpenAI to monitor
-    and detect abuse.
-    [Learn more](https://platform.openai.com/docs/guides/safety-best-practices/end-user-ids).
-    """
diff --git a/openai/types/images_response.py b/openai/types/images_response.py
deleted file mode 100644
index 7cee8131..00000000
--- a/openai/types/images_response.py
+++ /dev/null
@@ -1,14 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from .image import Image
-from .._models import BaseModel
-
-__all__ = ["ImagesResponse"]
-
-
-class ImagesResponse(BaseModel):
-    created: int
-
-    data: List[Image]
diff --git a/openai/types/model.py b/openai/types/model.py
deleted file mode 100644
index 2631ee8d..00000000
--- a/openai/types/model.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing_extensions import Literal
-
-from .._models import BaseModel
-
-__all__ = ["Model"]
-
-
-class Model(BaseModel):
-    id: str
-    """The model identifier, which can be referenced in the API endpoints."""
-
-    created: int
-    """The Unix timestamp (in seconds) when the model was created."""
-
-    object: Literal["model"]
-    """The object type, which is always "model"."""
-
-    owned_by: str
-    """The organization that owns the model."""
diff --git a/openai/types/model_deleted.py b/openai/types/model_deleted.py
deleted file mode 100644
index e7601f74..00000000
--- a/openai/types/model_deleted.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .._models import BaseModel
-
-__all__ = ["ModelDeleted"]
-
-
-class ModelDeleted(BaseModel):
-    id: str
-
-    deleted: bool
-
-    object: str
diff --git a/openai/types/moderation.py b/openai/types/moderation.py
deleted file mode 100644
index 2a2e5c5d..00000000
--- a/openai/types/moderation.py
+++ /dev/null
@@ -1,117 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from pydantic import Field as FieldInfo
-
-from .._models import BaseModel
-
-__all__ = ["Moderation", "Categories", "CategoryScores"]
-
-
-class Categories(BaseModel):
-    harassment: bool
-    """
-    Content that expresses, incites, or promotes harassing language towards any
-    target.
-    """
-
-    harassment_threatening: bool = FieldInfo(alias="harassment/threatening")
-    """
-    Harassment content that also includes violence or serious harm towards any
-    target.
-    """
-
-    hate: bool
-    """
-    Content that expresses, incites, or promotes hate based on race, gender,
-    ethnicity, religion, nationality, sexual orientation, disability status, or
-    caste. Hateful content aimed at non-protected groups (e.g., chess players) is
-    harassment.
-    """
-
-    hate_threatening: bool = FieldInfo(alias="hate/threatening")
-    """
-    Hateful content that also includes violence or serious harm towards the targeted
-    group based on race, gender, ethnicity, religion, nationality, sexual
-    orientation, disability status, or caste.
-    """
-
-    self_harm: bool = FieldInfo(alias="self-harm")
-    """
-    Content that promotes, encourages, or depicts acts of self-harm, such as
-    suicide, cutting, and eating disorders.
-    """
-
-    self_harm_instructions: bool = FieldInfo(alias="self-harm/instructions")
-    """
-    Content that encourages performing acts of self-harm, such as suicide, cutting,
-    and eating disorders, or that gives instructions or advice on how to commit such
-    acts.
-    """
-
-    self_harm_intent: bool = FieldInfo(alias="self-harm/intent")
-    """
-    Content where the speaker expresses that they are engaging or intend to engage
-    in acts of self-harm, such as suicide, cutting, and eating disorders.
-    """
-
-    sexual: bool
-    """
-    Content meant to arouse sexual excitement, such as the description of sexual
-    activity, or that promotes sexual services (excluding sex education and
-    wellness).
-    """
-
-    sexual_minors: bool = FieldInfo(alias="sexual/minors")
-    """Sexual content that includes an individual who is under 18 years old."""
-
-    violence: bool
-    """Content that depicts death, violence, or physical injury."""
-
-    violence_graphic: bool = FieldInfo(alias="violence/graphic")
-    """Content that depicts death, violence, or physical injury in graphic detail."""
-
-
-class CategoryScores(BaseModel):
-    harassment: float
-    """The score for the category 'harassment'."""
-
-    harassment_threatening: float = FieldInfo(alias="harassment/threatening")
-    """The score for the category 'harassment/threatening'."""
-
-    hate: float
-    """The score for the category 'hate'."""
-
-    hate_threatening: float = FieldInfo(alias="hate/threatening")
-    """The score for the category 'hate/threatening'."""
-
-    self_harm: float = FieldInfo(alias="self-harm")
-    """The score for the category 'self-harm'."""
-
-    self_harm_instructions: float = FieldInfo(alias="self-harm/instructions")
-    """The score for the category 'self-harm/instructions'."""
-
-    self_harm_intent: float = FieldInfo(alias="self-harm/intent")
-    """The score for the category 'self-harm/intent'."""
-
-    sexual: float
-    """The score for the category 'sexual'."""
-
-    sexual_minors: float = FieldInfo(alias="sexual/minors")
-    """The score for the category 'sexual/minors'."""
-
-    violence: float
-    """The score for the category 'violence'."""
-
-    violence_graphic: float = FieldInfo(alias="violence/graphic")
-    """The score for the category 'violence/graphic'."""
-
-
-class Moderation(BaseModel):
-    categories: Categories
-    """A list of the categories, and whether they are flagged or not."""
-
-    category_scores: CategoryScores
-    """A list of the categories along with their scores as predicted by model."""
-
-    flagged: bool
-    """Whether any of the below categories are flagged."""
diff --git a/openai/types/moderation_create_params.py b/openai/types/moderation_create_params.py
deleted file mode 100644
index d4608def..00000000
--- a/openai/types/moderation_create_params.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import List, Union
-from typing_extensions import Literal, Required, TypedDict
-
-__all__ = ["ModerationCreateParams"]
-
-
-class ModerationCreateParams(TypedDict, total=False):
-    input: Required[Union[str, List[str]]]
-    """The input text to classify"""
-
-    model: Union[str, Literal["text-moderation-latest", "text-moderation-stable"]]
-    """
-    Two content moderations models are available: `text-moderation-stable` and
-    `text-moderation-latest`.
-
-    The default is `text-moderation-latest` which will be automatically upgraded
-    over time. This ensures you are always using our most accurate model. If you use
-    `text-moderation-stable`, we will provide advanced notice before updating the
-    model. Accuracy of `text-moderation-stable` may be slightly lower than for
-    `text-moderation-latest`.
-    """
diff --git a/openai/types/moderation_create_response.py b/openai/types/moderation_create_response.py
deleted file mode 100644
index 79684f8a..00000000
--- a/openai/types/moderation_create_response.py
+++ /dev/null
@@ -1,19 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import List
-
-from .._models import BaseModel
-from .moderation import Moderation
-
-__all__ = ["ModerationCreateResponse"]
-
-
-class ModerationCreateResponse(BaseModel):
-    id: str
-    """The unique identifier for the moderation request."""
-
-    model: str
-    """The model used to generate the moderation results."""
-
-    results: List[Moderation]
-    """A list of moderation objects."""
diff --git a/openai/types/shared/__init__.py b/openai/types/shared/__init__.py
deleted file mode 100644
index e085744e..00000000
--- a/openai/types/shared/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .error_object import ErrorObject as ErrorObject
-from .function_definition import FunctionDefinition as FunctionDefinition
-from .function_parameters import FunctionParameters as FunctionParameters
diff --git a/openai/types/shared/error_object.py b/openai/types/shared/error_object.py
deleted file mode 100644
index 32d7045e..00000000
--- a/openai/types/shared/error_object.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from ..._models import BaseModel
-
-__all__ = ["ErrorObject"]
-
-
-class ErrorObject(BaseModel):
-    code: Optional[str] = None
-
-    message: str
-
-    param: Optional[str] = None
-
-    type: str
diff --git a/openai/types/shared/function_definition.py b/openai/types/shared/function_definition.py
deleted file mode 100644
index a39116d6..00000000
--- a/openai/types/shared/function_definition.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Optional
-
-from ..._models import BaseModel
-from .function_parameters import FunctionParameters
-
-__all__ = ["FunctionDefinition"]
-
-
-class FunctionDefinition(BaseModel):
-    name: str
-    """The name of the function to be called.
-
-    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
-    of 64.
-    """
-
-    description: Optional[str] = None
-    """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
-    """
-
-    parameters: Optional[FunctionParameters] = None
-    """The parameters the functions accepts, described as a JSON Schema object.
-
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
-    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
-    documentation about the format.
-
-    Omitting `parameters` defines a function with an empty parameter list.
-    """
diff --git a/openai/types/shared/function_parameters.py b/openai/types/shared/function_parameters.py
deleted file mode 100644
index c9524e4c..00000000
--- a/openai/types/shared/function_parameters.py
+++ /dev/null
@@ -1,7 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from typing import Dict
-
-__all__ = ["FunctionParameters"]
-
-FunctionParameters = Dict[str, object]
diff --git a/openai/types/shared_params/__init__.py b/openai/types/shared_params/__init__.py
deleted file mode 100644
index ef638cb2..00000000
--- a/openai/types/shared_params/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from .function_definition import FunctionDefinition as FunctionDefinition
-from .function_parameters import FunctionParameters as FunctionParameters
diff --git a/openai/types/shared_params/function_definition.py b/openai/types/shared_params/function_definition.py
deleted file mode 100644
index 58d0203b..00000000
--- a/openai/types/shared_params/function_definition.py
+++ /dev/null
@@ -1,36 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing_extensions import Required, TypedDict
-
-from ...types import shared_params
-
-__all__ = ["FunctionDefinition"]
-
-
-class FunctionDefinition(TypedDict, total=False):
-    name: Required[str]
-    """The name of the function to be called.
-
-    Must be a-z, A-Z, 0-9, or contain underscores and dashes, with a maximum length
-    of 64.
-    """
-
-    description: str
-    """
-    A description of what the function does, used by the model to choose when and
-    how to call the function.
-    """
-
-    parameters: shared_params.FunctionParameters
-    """The parameters the functions accepts, described as a JSON Schema object.
-
-    See the
-    [guide](https://platform.openai.com/docs/guides/text-generation/function-calling)
-    for examples, and the
-    [JSON Schema reference](https://json-schema.org/understanding-json-schema/) for
-    documentation about the format.
-
-    Omitting `parameters` defines a function with an empty parameter list.
-    """
diff --git a/openai/types/shared_params/function_parameters.py b/openai/types/shared_params/function_parameters.py
deleted file mode 100644
index 5b40efb7..00000000
--- a/openai/types/shared_params/function_parameters.py
+++ /dev/null
@@ -1,9 +0,0 @@
-# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-
-from __future__ import annotations
-
-from typing import Dict
-
-__all__ = ["FunctionParameters"]
-
-FunctionParameters = Dict[str, object]
diff --git a/openai/version.py b/openai/version.py
deleted file mode 100644
index 01a08ab5..00000000
--- a/openai/version.py
+++ /dev/null
@@ -1,3 +0,0 @@
-from ._version import __version__
-
-VERSION: str = __version__

From e53c8e1ca57b1e13b4cff495b6c71661dd00a5cd Mon Sep 17 00:00:00 2001
From: snipe <72265661+notsniped@users.noreply.github.com>
Date: Sat, 20 Apr 2024 22:00:00 +0530
Subject: [PATCH 2/2] Add back support for `/chatgpt` and `/openai` isobot AI
 commands

Finally lets gooooo
---
 cogs/utils.py | 128 ++++++++++++++++++++++++--------------------------
 1 file changed, 62 insertions(+), 66 deletions(-)

diff --git a/cogs/utils.py b/cogs/utils.py
index b45e6617..7aaca102 100644
--- a/cogs/utils.py
+++ b/cogs/utils.py
@@ -4,7 +4,7 @@
 import os
 import math
 import psutil
-# import openai  # Removed because OpenAI commands were disabled, and OpenAI fails to load due to missing "PyDantic" dependency
+import openai
 import discord
 import json
 import time
@@ -23,7 +23,7 @@
 levelling = levelling.Levelling()
 _commands = cmds.Commands()
 # openai.api_key = os.getenv("chatgpt_API_KEY")
-# openai.api_key = auth.ext_token('chatgpt')
+openai.api_key = auth.ext_token('chatgpt')
 chatgpt_conversation = dict()
 _presence = Presence()
 
@@ -214,36 +214,34 @@ async def status(self, ctx: ApplicationContext):
     @commands.cooldown(1, 1, commands.BucketType.user)
     async def chatgpt(self, ctx: ApplicationContext, message: str):
         """Talk to ChatGPT and get a response back."""
-    #    if str(ctx.author.id) not in chatgpt_conversation:
-    #        chatgpt_conversation[str(ctx.author.id)] = [
-    #                {
-    #                    "role": "system",
-    #                    "content": "You are a intelligent assistant."
-    #                }
-    #            ]
-    #    await ctx.defer()
-    #    try:
-    #        chatgpt_conversation[str(ctx.author.id)].append({"role": "user", "content": message})
-    #        _chat = openai.ChatCompletion.create(
-    #            model="gpt-3.5-turbo",
-    #            messages=chatgpt_conversation[str(ctx.author.id)]
-    #        )
-    #        _reply = _chat.choices[0].message.content
-    #        chatgpt_conversation[str(ctx.author.id)].append({"role": "assistant", "content": _reply})
-    #    except openai.error.RateLimitError as e:
-    #        print(f"Rate limit for OpenAI exceeded: {e}")
-    #        return await ctx.respond("The OpenAI API is currently being rate-limited. Try again after some time.", ephemeral=True)
-    #    except openai.error.ServiceUnavailableError:
-    #        return await ctx.respond("The ChatGPT service is currently unavailable.\nTry again after some time, or check it's status at https://status.openai.com", ephemeral=True)
-    #    except openai.error.APIError:
-    #        return await ctx.respond("ChatGPT encountered an internal error. Please try again.", ephemeral=True)
-    #    except openai.error.Timeout:
-    #        return await ctx.respond("Your request timed out. Please try again, or wait for a while.", ephemeral=True)
-    #    localembed = discord.Embed(description=f"{_reply}", color=discord.Color.random())
-    #    localembed.set_author(name="ChatGPT", icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/0/04/ChatGPT_logo.svg/1200px-ChatGPT_logo.svg.png")
-    #    localembed.set_footer(text="Powered by OpenAI")
-    #    await ctx.respond(embed=localembed)
-        localembed = discord.Embed(title="Discontinuation of isobot AI commands", description="Thank you for showing your interest in the isobot AI commands!\nUnfortunately, due to prolonged issues with OpenAI integration, we are temporarily discontinuing all AI-related commands.\nDon't worry, because sometime, in the (not so distant) future, isobot AI commands will be making a sure return for everyone to enjoy.\n\n- NKA Development Team")
+        if str(ctx.author.id) not in chatgpt_conversation:
+            chatgpt_conversation[str(ctx.author.id)] = [
+                    {
+                        "role": "system",
+                        "content": "You are a intelligent assistant."
+                    }
+                ]
+        await ctx.defer()
+        try:
+            chatgpt_conversation[str(ctx.author.id)].append({"role": "user", "content": message})
+            _chat = openai.ChatCompletion.create(
+                model="gpt-3.5-turbo",
+                messages=chatgpt_conversation[str(ctx.author.id)]
+            )
+            _reply = _chat.choices[0].message.content
+            chatgpt_conversation[str(ctx.author.id)].append({"role": "assistant", "content": _reply})
+        except openai.error.RateLimitError as e:
+            print(f"Rate limit for OpenAI exceeded: {e}")
+            return await ctx.respond("The OpenAI API is currently being rate-limited. Try again after some time.", ephemeral=True)
+        except openai.error.ServiceUnavailableError:
+            return await ctx.respond("The ChatGPT service is currently unavailable.\nTry again after some time, or check it's status at https://status.openai.com", ephemeral=True)
+        except openai.error.APIError:
+            return await ctx.respond("ChatGPT encountered an internal error. Please try again.", ephemeral=True)
+        except openai.error.Timeout:
+            return await ctx.respond("Your request timed out. Please try again, or wait for a while.", ephemeral=True)
+        localembed = discord.Embed(description=f"{_reply}", color=discord.Color.random())
+        localembed.set_author(name="ChatGPT", icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/0/04/ChatGPT_logo.svg/1200px-ChatGPT_logo.svg.png")
+        localembed.set_footer(text="Powered by OpenAI")
         await ctx.respond(embed=localembed)
 
     @commands.slash_command(
@@ -255,40 +253,38 @@ async def chatgpt(self, ctx: ApplicationContext, message: str):
     @commands.cooldown(1, 10, commands.BucketType.user)
     async def generate_image(self, ctx: ApplicationContext, prompt: str, resolution: str = "512x512"):
         """Generate an image of your choice using the DALL-E modal."""
-    #    parsed_resolution: list = resolution.split("x")
-    #    max_index: int = 0
-    #    for index in parsed_resolution:
-    #        max_index += 1
-    #    if max_index < 2 or max_index > 2:
-    #        return await ctx.respond("Your resolution format is malformed. Please check it and try again.", ephemeral=True)
-    #    res_width = int(parsed_resolution[0])
-    #    res_height = int(parsed_resolution[1])
-    #    if res_width < 256 or res_height < 256:
-    #        return await ctx.respond("Your custom resolution needs to be at least 256p or higher.", ephermeral=True)
-    #    if res_width > 1024 or res_height > 1024:
-    #        return await ctx.respond("Your image output resolution cannot exceed 1024p.", ephemeral=True)
-    #    await ctx.defer()
-    #    try:
-    #        response = openai.Image.create(
-    #            prompt=prompt,
-    #            n=1,
-    #            size=resolution
-    #        )
-    #        generated_image_url = response['data'][0]['url']
-    #    except openai.error.RateLimitError:
-    #        return await ctx.respond("The OpenAI API is currently being rate-limited. Try again after some time.", ephemeral=True)
-    #    except openai.error.ServiceUnavailableError:
-    #        return await ctx.respond("The OpenAI service is currently unavailable.\nTry again after some time, or check it's status at https://status.openai.com", ephemeral=True)
-    #    except openai.error.APIError:
-    #        return await ctx.respond("DALL-E encountered an internal error. Please try again.", ephemeral=True)
-    #    except openai.error.Timeout:
-    #        return await ctx.respond("Your request timed out. Please try again, or wait for a while.", ephemeral=True)
-    #    localembed = discord.Embed(title="Here's an image generated using your prompt.", color=discord.Color.random())
-    #    localembed.set_image(url=generated_image_url)
-    #    localembed.set_author(name="DALL-E", icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/0/04/ChatGPT_logo.svg/1200px-ChatGPT_logo.svg.png")
-    #    localembed.set_footer(text="Powered by OpenAI")
-    #    await ctx.respond(embed=localembed)
-        localembed = discord.Embed(title="Discontinuation of isobot AI commands", description="Thank you for showing your interest in the isobot AI commands!\nUnfortunately, due to prolonged issues with OpenAI integration, we are temporarily discontinuing all AI-related commands.\nDon't worry, because sometime, in the (not so distant) future, isobot AI commands will be making a sure return for everyone to enjoy.\n\n- NKA Development Team")
+        parsed_resolution: list = resolution.split("x")
+        max_index: int = 0
+        for index in parsed_resolution:
+            max_index += 1
+        if max_index < 2 or max_index > 2:
+            return await ctx.respond("Your resolution format is malformed. Please check it and try again.", ephemeral=True)
+        res_width = int(parsed_resolution[0])
+        res_height = int(parsed_resolution[1])
+        if res_width < 256 or res_height < 256:
+            return await ctx.respond("Your custom resolution needs to be at least 256p or higher.", ephermeral=True)
+        if res_width > 1024 or res_height > 1024:
+            return await ctx.respond("Your image output resolution cannot exceed 1024p.", ephemeral=True)
+        await ctx.defer()
+        try:
+            response = openai.Image.create(
+                prompt=prompt,
+                n=1,
+                size=resolution
+            )
+            generated_image_url = response['data'][0]['url']
+        except openai.error.RateLimitError:
+            return await ctx.respond("The OpenAI API is currently being rate-limited. Try again after some time.", ephemeral=True)
+        except openai.error.ServiceUnavailableError:
+            return await ctx.respond("The OpenAI service is currently unavailable.\nTry again after some time, or check it's status at https://status.openai.com", ephemeral=True)
+        except openai.error.APIError:
+            return await ctx.respond("DALL-E encountered an internal error. Please try again.", ephemeral=True)
+        except openai.error.Timeout:
+            return await ctx.respond("Your request timed out. Please try again, or wait for a while.", ephemeral=True)
+        localembed = discord.Embed(title="Here's an image generated using your prompt.", color=discord.Color.random())
+        localembed.set_image(url=generated_image_url)
+        localembed.set_author(name="DALL-E", icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/0/04/ChatGPT_logo.svg/1200px-ChatGPT_logo.svg.png")
+        localembed.set_footer(text="Powered by OpenAI")
         await ctx.respond(embed=localembed)
     
     @commands.slash_command(