Not-Diamond · acompa · Aug 15, 2025
diff --git a/notdiamond/llms/providers.py b/notdiamond/llms/providers.py
@@ -29,6 +29,9 @@ class NDLLMProviders(Enum):
         GPT_4_1_MINI_2025_04_14 (NDLLMProvider): refers to 'gpt-4.1-mini-2025-04-14' model by OpenAI
         GPT_4_1_NANO (NDLLMProvider): refers to 'gpt-4.1-nano' model by OpenAI
         GPT_4_1_NANO_2025_04_14 (NDLLMProvider): refers to 'gpt-4.1-nano-2025-04-14' model by OpenAI
+        GPT_5_2025_08_07 (NDLLMProvider): refers to 'gpt-5-2025-08-07' model by OpenAI
+        GPT_5_MINI_2025_08_07 (NDLLMProvider): refers to 'gpt-5-mini-2025-08-07' model by OpenAI
+        GPT_5_NANO_2025_08_07 (NDLLMProvider): refers to 'gpt-5-nano-2025-08-07' model by OpenA
         O1_PREVIEW (NDLLMProvider): refers to 'o1-preview' model by OpenAI
         O1_PREVIEW_2024_09_12 (NDLLMProvider): refers to 'o1-preview-2024-09-12' model by OpenAI
         O1_MINI (NDLLMProvider): refers to 'o1-mini' model by OpenAI
@@ -115,6 +118,9 @@ class NDLLMProviders(Enum):
     GPT_4_1_MINI_2025_04_14 = ("openai", "gpt-4.1-mini-2025-04-14")
     GPT_4_1_NANO = ("openai", "gpt-4.1-nano")
     GPT_4_1_NANO_2025_04_14 = ("openai", "gpt-4.1-nano-2025-04-14")
+    GPT_5_2025_08_07 = ("openai", "gpt-5-2025-08-07")
+    GPT_5_MINI_2025_08_07 = ("openai", "gpt-5-mini-2025-08-07")
+    GPT_5_NANO_2025_08_07 = ("openai", "gpt-5-nano-2025-08-07")
     O1_PREVIEW = ("openai", "o1-preview")
     O1_PREVIEW_2024_09_12 = ("openai", "o1-preview-2024-09-12")
     O1_MINI = ("openai", "o1-mini")

diff --git a/notdiamond/settings.py b/notdiamond/settings.py
@@ -45,6 +45,9 @@
             "gpt-4.1-mini-2025-04-14",
             "gpt-4.1-nano",
             "gpt-4.1-nano-2025-04-14",
+            "gpt-5-2025-08-07",
+            "gpt-5-mini-2025-08-07",
+            "gpt-5-nano-2025-08-07",
             "o1-preview",
             "o1-preview-2024-09-12",
             "o1-mini",
@@ -73,6 +76,9 @@
             "gpt-4.1-mini-2025-04-14",
             "gpt-4.1-nano",
             "gpt-4.1-nano-2025-04-14",
+            "gpt-5-2025-08-07",
+            "gpt-5-mini-2025-08-07",
+            "gpt-5-nano-2025-08-07",
         ],
         "support_response_model": [
             "gpt-3.5-turbo",
@@ -123,6 +129,9 @@
             "gpt-4.1-mini-2025-04-14": "openai/gpt-4.1-mini-2025-04-14",
             "gpt-4.1-nano": "openai/gpt-4.1-nano",
             "gpt-4.1-nano-2025-04-14": "openai/gpt-4.1-nano-2025-04-14",
+            "gpt-5-2025-08-07": "openai/gpt-5-2025-08-07",
+            "gpt-5-mini-2025-08-07": "openai/gpt-5-mini-2025-08-07",
+            "gpt-5-nano-2025-08-07": "openai/gpt-5-nano-2025-08-07",
         },
         "price": {
             "gpt-3.5-turbo": {"input": 0.5, "output": 1.5},
@@ -150,6 +159,9 @@
             "gpt-4.1-2025-04-14": {"input": 2.0, "output": 8.0},
             "gpt-4.1-mini-2025-04-14": {"input": 0.5, "output": 1.6},
             "gpt-4.1-nano-2025-04-14": {"input": 0.1, "output": 0.4},
+            "gpt-5-2025-08-07": {"input": 2.0, "output": 8.0},
+            "gpt-5-mini-2025-08-07": {"input": 0.5, "output": 1.6},
+            "gpt-5-nano-2025-08-07": {"input": 0.1, "output": 0.4},
         },
     },
     "anthropic": {

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -241,12 +241,13 @@ def _before_record_request(request: Any) -> Any:
 
 @pytest.fixture(scope="module")
 def vcr_config():
+    record_mode = os.getenv("RECORD_MODE", "none")
     return {
         "filter_headers": ["authorization", "x-token"],
         "allowed_hosts": ["testserver", "127.0.0.1"],
         "before_record_response": _redact_xtoken_response,
         "before_record_request": _before_record_request,
         "ignore_localhost": True,
-        "record_mode": "none",
+        "record_mode": record_mode,
         "decode_compressed_response": True,
     }
diff --git a/tests/test_llm_calls/cassettes/test_openai/Test_OpenAI.test_async_streaming[provider21].yaml b/tests/test_llm_calls/cassettes/test_openai/Test_OpenAI.test_async_streaming[provider21].yaml
diff --git a/tests/test_llm_calls/cassettes/test_openai/Test_OpenAI.test_async_streaming[provider22].yaml b/tests/test_llm_calls/cassettes/test_openai/Test_OpenAI.test_async_streaming[provider22].yaml
diff --git a/tests/test_llm_calls/cassettes/test_openai/Test_OpenAI.test_async_streaming[provider23].yaml b/tests/test_llm_calls/cassettes/test_openai/Test_OpenAI.test_async_streaming[provider23].yaml
diff --git a/tests/test_llm_calls/cassettes/test_openai/Test_OpenAI.test_response_model[provider23].yaml b/tests/test_llm_calls/cassettes/test_openai/Test_OpenAI.test_response_model[provider23].yaml
@@ -0,0 +1,173 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Tell me a joke"}], "llm_providers":
+      [{"provider": "openai", "model": "chatgpt-4o-latest", "is_custom": false, "context_length":
+      null, "input_price": null, "output_price": null, "latency": null}], "metric":
+      "accuracy", "max_model_depth": 1, "hash_content": true}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '304'
+      User-Agent:
+      - Python-SDK/0.4.4
+      content-type:
+      - application/json
+    method: POST
+    uri: https://api.notdiamond.ai/v2/modelRouter/modelSelect
+  response:
+    body:
+      string: '{"detail":"Could not find valid API key"}'
+    headers:
+      CF-RAY:
+      - 96fac3d02e1423ca-MIA
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 15 Aug 2025 18:30:44 GMT
+      Server:
+      - cloudflare
+      Transfer-Encoding:
+      - chunked
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      rndr-id:
+      - c099966b-d0d0-44bd
+      vary:
+      - Accept-Encoding
+      x-render-origin-server:
+      - uvicorn
+      x-request-id:
+      - c099966b-d0d0-44bd
+    status:
+      code: 401
+      message: Unauthorized
+- request:
+    body: '{"messages":[{"content":"The output should be formatted as a JSON instance
+      that conforms to the JSON schema below.\n\nAs an example, for the schema {\"properties\":
+      {\"foo\": {\"title\": \"Foo\", \"description\": \"a list of strings\", \"type\":
+      \"array\", \"items\": {\"type\": \"string\"}}}, \"required\": [\"foo\"]}\nthe
+      object {\"foo\": [\"bar\", \"baz\"]} is a well-formatted instance of the schema.
+      The object {\"properties\": {\"foo\": [\"bar\", \"baz\"]}} is not well-formatted.\n\nHere
+      is the output schema:\n```\n{\"properties\": {\"setup\": {\"description\": \"question
+      to set up a joke\", \"title\": \"Setup\", \"type\": \"string\"}, \"punchline\":
+      {\"description\": \"answer to resolve the joke\", \"title\": \"Punchline\",
+      \"type\": \"string\"}}, \"required\": [\"setup\", \"punchline\"]}\n```\nTell
+      me a joke","role":"user"}],"model":"chatgpt-4o-latest","max_completion_tokens":200,"stream":false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate, zstd
+      connection:
+      - keep-alive
+      content-length:
+      - '912'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.99.9
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.99.9
+      x-stainless-read-timeout:
+      - '120.0'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.6
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-C4tUKzoB8SQJI2RJEpmVDjVwrFGsN\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1755282644,\n  \"model\": \"chatgpt-4o-latest\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"{\\n  \\\"setup\\\": \\\"Why don't
+        skeletons fight each other?\\\",\\n  \\\"punchline\\\": \\\"Because they don't
+        have the guts.\\\"\\n}\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 200,\n    \"completion_tokens\":
+        29,\n    \"total_tokens\": 229,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_736be7fece\"\n}\n"
+    headers:
+      CF-RAY:
+      - 96fac3d1eea6fdc9-MIA
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Fri, 15 Aug 2025 18:30:45 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=TOk2L458Q8vu8JIyxDmIlNv00Ka.C0BuO0vAci1tAlk-1755282645-1.0.1.1-3AVQlkqWQrCpLekJGP_9WuN0m5piiIcpCQ1VNT1D8ff5NdBmH_RSTCwWbaq.08Fynj1wRmtEOomPPGyEV399rDiPvGQg.7DIWBvFRolh._g;
+        path=/; expires=Fri, 15-Aug-25 19:00:45 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=HKJYwV2WABYQQwemXz9nwf5ZvW.y.LcdWqBwkqjFv_c-1755282645204-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      content-length:
+      - '921'
+      openai-organization:
+      - not-diamond-bdz8cg
+      openai-processing-ms:
+      - '324'
+      openai-project:
+      - proj_EvMLZJqcggqzjvd3BFXe4tfP
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '349'
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '30000000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '29999819'
+      x-ratelimit-reset-requests:
+      - 6ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_058ab24b2cfc4896ae16b7b14d182517
+    status:
+      code: 200
+      message: OK
+version: 1