diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index c2e735dca1a4..e749f952f0dc 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -64,6 +64,9 @@
 # ServiceLabel: %Advisor
 # ServiceOwners:                                     @mojayara @Prasanna-Padmanabhan
 
+# PRLabel: %AI Model Inference
+/sdk/ai/azure-ai-inference/                          @dargilco @jhakulin @glharper
+
 # ServiceLabel: %AKS
 # ServiceOwners:                                     @Azure/aks-pm
 
diff --git a/.vscode/cspell.json b/.vscode/cspell.json
index 61ca10fe854e..58ebfa37fb7e 100644
--- a/.vscode/cspell.json
+++ b/.vscode/cspell.json
@@ -836,6 +836,13 @@
         "NDVI"
       ]
     },
+    {
+      "filename": "/sdk/ai/**",
+      "words": [
+        "ubinary",
+        "UBINARY"
+      ]
+    },
     {
       "filename": "/sdk/datalakeanalytics/**",
       "words": [
diff --git a/eng/code-quality-reports/src/main/resources/checkstyle/checkstyle-suppressions.xml b/eng/code-quality-reports/src/main/resources/checkstyle/checkstyle-suppressions.xml
index 6a907a19a665..2650b791f334 100644
--- a/eng/code-quality-reports/src/main/resources/checkstyle/checkstyle-suppressions.xml
+++ b/eng/code-quality-reports/src/main/resources/checkstyle/checkstyle-suppressions.xml
@@ -286,6 +286,8 @@ the main ServiceBusClientBuilder. -->
   <suppress checks="com.azure.tools.checkstyle.checks.ServiceClientCheck" files="com.azure.ai.openai.(OpenAIClient|OpenAIAsyncClient).java"/>
   <!-- Checkstyle suppression for OpenAI Assistants client APIs that use Flux instead of PagedFlux for methods that return a collection -->
   <suppress checks="com.azure.tools.checkstyle.checks.ServiceClientCheck" files="com.azure.ai.openai.assistants.(AssistantsClient|AssistantsAsyncClient).java"/>
+  <!-- Checkstyle suppression for Inference client APIs that use Flux instead of PagedFlux for methods that return a collection -->
+  <suppress checks="com.azure.tools.checkstyle.checks.ServiceClientCheck" files="com.azure.ai.inference.(ChatCompletionsClient|ChatCompletionsAsyncClient).java"/>
 
   <!-- jdbc sdk suppression -->
   <suppress checks="com.azure.tools.checkstyle.checks.ExternalDependencyExposedCheck"
diff --git a/eng/versioning/version_client.txt b/eng/versioning/version_client.txt
index 487a61de9add..94f13eca3972 100644
--- a/eng/versioning/version_client.txt
+++ b/eng/versioning/version_client.txt
@@ -42,6 +42,7 @@ com.azure:azure-ai-documentintelligence;1.0.0-beta.4;1.0.0-beta.5
 com.azure:azure-ai-documenttranslator;1.0.0-beta.1;1.0.0-beta.2
 com.azure:azure-ai-formrecognizer;4.1.10;4.1.10
 com.azure:azure-ai-formrecognizer-perf;1.0.0-beta.1;1.0.0-beta.1
+com.azure:azure-ai-inference;1.0.0-beta.1;1.0.0-beta.1
 com.azure:azure-ai-metricsadvisor;1.2.1;1.3.0-beta.1
 com.azure:azure-ai-metricsadvisor-perf;1.0.0-beta.1;1.0.0-beta.1
 com.azure:azure-ai-openai;1.0.0-beta.10;1.0.0-beta.11
diff --git a/pom.xml b/pom.xml
index 3f4214576905..2016ea2e3bf3 100644
--- a/pom.xml
+++ b/pom.xml
@@ -13,6 +13,7 @@
     <module>eng/code-quality-reports</module>
     <module>sdk/advisor</module>
     <module>sdk/agrifood</module>
+    <module>sdk/ai</module>
     <module>sdk/alertsmanagement</module>
     <module>sdk/anomalydetector</module>
     <module>sdk/aot</module>
diff --git a/sdk/ai/azure-ai-inference/CHANGELOG.md b/sdk/ai/azure-ai-inference/CHANGELOG.md
new file mode 100644
index 000000000000..56fe2aa6db74
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/CHANGELOG.md
@@ -0,0 +1,13 @@
+# Release History
+
+## 1.0.0-beta.1 (Unreleased)
+
+- Azure AI Inference client library for Java. This package contains Microsoft Azure AI Inference client library.
+
+### Features Added
+
+### Breaking Changes
+
+### Bugs Fixed
+
+### Other Changes
diff --git a/sdk/ai/azure-ai-inference/README.md b/sdk/ai/azure-ai-inference/README.md
new file mode 100644
index 000000000000..75b1b3ed6754
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/README.md
@@ -0,0 +1,225 @@
+# Azure AI Inference client library for Java
+
+Azure AI Inference client library for Java.
+
+This package contains the Azure AI Inference client library.
+
+## Documentation
+
+Various documentation is available to help you get started
+
+- [API reference documentation][docs]
+- [Product documentation][product_documentation]
+
+## Getting started
+
+### Prerequisites
+
+- [Java Development Kit (JDK)][jdk] with version 8 or above
+- [Azure Subscription][azure_subscription]
+
+### Adding the package to your product
+
+[//]: # ({x-version-update-start;com.azure:azure-ai-inference;current})
+```xml
+<dependency>
+    <groupId>com.azure</groupId>
+    <artifactId>azure-ai-inference</artifactId>
+    <version>1.0.0-beta.1</version>
+</dependency>
+```
+[//]: # ({x-version-update-end})
+
+### Authentication
+
+In order to interact with the Azure AI Inference Service you'll need to create an instance of client class,
+[ChatCompletionsAsyncClient][chat_completions_client_async] or [ChatCompletionsClient][chat_completions_client_sync] by using
+[ChatCompletionsClientBuilder][chat_completions_client_builder]. To configure a client for use with
+Azure Inference, provide a valid endpoint URI to an Azure Model resource along with a corresponding key credential,
+token credential, or [Azure Identity][azure_identity] credential that's authorized to use the Azure Model resource.
+
+#### Create a Chat Completions client with key credential
+Get Azure Model `key` credential from the Azure Portal.
+
+```java readme-sample-createSyncClientKeyCredential
+ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+    .credential(new AzureKeyCredential("{key}"))
+    .endpoint("{endpoint}")
+    .buildClient();
+```
+or
+```java readme-sample-createAsyncClientKeyCredential
+ChatCompletionsAsyncClient client = new ChatCompletionsClientBuilder()
+    .credential(new AzureKeyCredential("{key}"))
+    .endpoint("{endpoint}")
+    .buildAsyncClient();
+```
+
+#### Create a client with Azure Active Directory credential
+Azure SDK for Java supports an Azure Identity package, making it easy to get credentials from Microsoft identity
+platform.
+
+Authentication with AAD requires some initial setup:
+* Add the Azure Identity package
+
+[//]: # ({x-version-update-start;com.azure:azure-identity;dependency})
+```xml
+<dependency>
+    <groupId>com.azure</groupId>
+    <artifactId>azure-identity</artifactId>
+    <version>1.13.1</version>
+</dependency>
+```
+[//]: # ({x-version-update-end})
+
+Authorization is easiest using [DefaultAzureCredential][wiki_identity]. It finds the best credential to use in its
+running environment. For more information about using Azure Active Directory authorization with OpenAI service, please
+refer to [the associated documentation][aad_authorization].
+
+```java readme-sample-createChatCompletionsClientWithAAD
+TokenCredential defaultCredential = new DefaultAzureCredentialBuilder().build();
+ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+    .credential(defaultCredential)
+    .endpoint("{endpoint}")
+    .buildClient();
+```
+
+## Key concepts
+
+## Examples
+The following sections provide several code snippets covering some of the most common OpenAI service tasks, including:
+
+* [Chat completions sample](#chat-completions "Chat completions")
+* [Streaming chat completions sample](#streaming-chat-completions "Streaming chat completions")
+<!--
+* [Embeddings sample](#text-embeddings "Text Embeddings")
+-->
+
+## Examples
+
+### Chat completions
+
+```java readme-sample-getChatCompletions
+List<ChatRequestMessage> chatMessages = new ArrayList<>();
+chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+ChatCompletions chatCompletions = client.complete(new ChatCompletionsOptions(chatMessages));
+
+System.out.printf("Model ID=%s is created at %s.%n", chatCompletions.getId(), chatCompletions.getCreated());
+for (ChatChoice choice : chatCompletions.getChoices()) {
+    ChatResponseMessage message = choice.getMessage();
+    System.out.printf("Index: %d, Chat Role: %s.%n", choice.getIndex(), message.getRole());
+    System.out.println("Message:");
+    System.out.println(message.getContent());
+}
+```
+For a complete sample example, see sample [Chat Completions][sample_get_chat_completions].
+
+Please refer to the service documentation for a conceptual discussion of [text completion][microsoft_docs_openai_completion].
+
+### Streaming chat completions
+
+```java readme-sample-getChatCompletionsStream
+List<ChatRequestMessage> chatMessages = new ArrayList<>();
+chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+client.completeStream(new ChatCompletionsOptions(chatMessages))
+    .forEach(chatCompletions -> {
+        if (CoreUtils.isNullOrEmpty(chatCompletions.getChoices())) {
+            return;
+        }
+        StreamingChatResponseMessageUpdate delta = chatCompletions.getChoices().get(0).getDelta();
+        if (delta.getRole() != null) {
+            System.out.println("Role = " + delta.getRole());
+        }
+        if (delta.getContent() != null) {
+            String content = delta.getContent();
+            System.out.print(content);
+        }
+    });
+```
+
+To compute tokens in streaming chat completions, see sample [Streaming Chat Completions][sample_get_chat_completions_streaming].
+
+<!--
+### Text embeddings
+
+```java readme-sample-getEmbedding
+```
+For a complete sample example, see sample [Embedding][sample_get_embedding].
+
+Please refer to the service documentation for a conceptual discussion of [openAI embedding][microsoft_docs_openai_embedding].
+-->
+
+### Service API versions
+
+The client library targets the latest service API version by default.
+The service client builder accepts an optional service API version parameter to specify which API version to communicate.
+
+#### Select a service API version
+
+You have the flexibility to explicitly select a supported service API version when initializing a service client via the service client builder.
+This ensures that the client can communicate with services using the specified API version.
+
+When selecting an API version, it is important to verify that there are no breaking changes compared to the latest API version.
+If there are significant differences, API calls may fail due to incompatibility.
+
+Always ensure that the chosen API version is fully supported and operational for your specific use case and that it aligns with the service's versioning policy.
+
+## Troubleshooting
+### Enable client logging
+You can set the `AZURE_LOG_LEVEL` environment variable to view logging statements made in the client library. For
+example, setting `AZURE_LOG_LEVEL=2` would show all informational, warning, and error log messages. The log levels can
+be found here: [log levels][logLevels].
+
+### Default HTTP Client
+All client libraries by default use the Netty HTTP client. Adding the above dependency will automatically configure
+the client library to use the Netty HTTP client. Configuring or changing the HTTP client is detailed in the
+[HTTP clients wiki](https://github.com/Azure/azure-sdk-for-java/wiki/Configure-HTTP-Clients).
+
+### Default SSL library
+All client libraries, by default, use the Tomcat-native Boring SSL library to enable native-level performance for SSL
+operations. The Boring SSL library is an uber jar containing native libraries for Linux / macOS / Windows, and provides
+better performance compared to the default SSL implementation within the JDK. For more information, including how to
+reduce the dependency size, refer to the [performance tuning][performance_tuning] section of the wiki.
+
+For more details, see [TROUBLESHOOTING][troubleshooting] guideline.
+
+## Next steps
+
+## Contributing
+
+For details on contributing to this repository, see the [contributing guide](https://github.com/Azure/azure-sdk-for-java/blob/main/CONTRIBUTING.md).
+
+1. Fork it
+1. Create your feature branch (`git checkout -b my-new-feature`)
+1. Commit your changes (`git commit -am 'Add some feature'`)
+1. Push to the branch (`git push origin my-new-feature`)
+1. Create new Pull Request
+
+<!-- LINKS -->
+[product_documentation]: https://azure.microsoft.com/services/
+[docs]: https://azure.github.io/azure-sdk-for-java/
+[jdk]: https://learn.microsoft.com/azure/developer/java/fundamentals/
+[aad_authorization]: https://docs.microsoft.com/azure/cognitive-services/authentication#authenticate-with-azure-active-directory
+[azure_subscription]: https://azure.microsoft.com/free/
+[azure_identity]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/identity/azure-identity
+[sample_get_chat_completions]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsSample.java
+[sample_get_chat_completions_streaming]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/java/com/azure/ai/openai/usage/GetChatCompletionsStreamSample.java
+[microsoft_docs_openai_completion]: https://learn.microsoft.com/azure/cognitive-services/openai/how-to/completions
+[microsoft_docs_openai_embedding]: https://learn.microsoft.com/azure/cognitive-services/openai/concepts/understand-embeddings
+[chat_completions_client_async]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIAsyncClient.java
+[chat_completions_client_builder]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClientBuilder.java
+[chat_completions_client_sync]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/main/java/com/azure/ai/openai/OpenAIClient.java
+[logLevels]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/core/azure-core/src/main/java/com/azure/core/util/logging/ClientLogger.java
+[performance_tuning]: https://github.com/Azure/azure-sdk-for-java/wiki/Performance-Tuning
+[troubleshooting]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/TROUBLESHOOTING.md
+[wiki_identity]: https://learn.microsoft.com/azure/developer/java/sdk/identity
+
+![Impressions](https://azure-sdk-impressions.azurewebsites.net/api/impressions/azure-sdk-for-java%2Fsdk%2Fai%2Fazure-ai-inference%2FREADME.png)
diff --git a/sdk/ai/azure-ai-inference/TROUBLESHOOTING.md b/sdk/ai/azure-ai-inference/TROUBLESHOOTING.md
new file mode 100644
index 000000000000..78534e80e459
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/TROUBLESHOOTING.md
@@ -0,0 +1,141 @@
+# Troubleshooting AI Inference issues
+
+This troubleshooting guide covers failure investigation techniques, common errors for the credential types in the Azure
+AI Inference Java client library, and mitigation steps to resolve these errors. The common best practice sample can be found
+in [Best Practice Samples][best_practice_samples].
+
+## Table of Contents
+
+* [General Troubleshooting](#general-troubleshooting)
+    * [Enable client logging](#enable-client-logging)
+    * [Enable HTTP request/response logging](#enable-http-requestresponse-logging)
+    * [Troubleshooting Exceptions](#troubleshooting-exceptions)
+    * [Troubleshooting NoSuchMethodError or NoClassDefFoundError](#dependency-conflicts)
+    * [Network Issues](#network-issues)
+* [Get additional help](#get-additional-help)
+
+## General Troubleshooting
+
+### Enable client logging
+
+To troubleshoot issues with Azure Inference library, it is important to first enable logging to monitor the
+behavior of the application. The errors and warnings in the logs generally provide useful insights into what went wrong
+and sometimes include corrective actions to fix issues. The Azure client libraries for Java have two logging options:
+
+* A built-in logging framework.
+* Support for logging using the [SLF4J](https://www.slf4j.org/) interface.
+
+Refer to the instructions in this reference document on how to [configure logging in Azure SDK for Java][logging_overview].
+
+### Enable HTTP request/response logging
+
+Reviewing the HTTP request sent or response received over the wire to/from the Azure Model service can be
+useful in troubleshooting issues. To enable logging the HTTP request and response payload, the [ChatCompletionsClient][chat_completions_client]
+can be configured as shown below. If there is no SLF4J's `Logger` on the class path, set an environment variable
+[AZURE_LOG_LEVEL][azure_log_level] in your machine to enable logging. 
+
+```java readme-sample-enablehttplogging
+        ChatCompletionsClient chatCompletionsClient = new ChatCompletionsClientBuilder()
+            .endpoint("{endpoint}")
+            .credential(new AzureKeyCredential("{key}"))
+            .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS))
+            .buildClient();
+// or
+        DefaultAzureCredential credential = new DefaultAzureCredentialBuilder().build();
+        ChatCompletionsClient configurationClientAad = new ChatCompletionsClientBuilder()
+            .credential(credential)
+            .endpoint("{endpoint}")
+            .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS))
+            .buildClient();
+```
+
+Alternatively, you can configure logging HTTP requests and responses for your entire application by setting the
+following environment variable. Note that this change will enable logging for every Azure client that supports logging
+HTTP request/response.
+
+Environment variable name: `AZURE_HTTP_LOG_DETAIL_LEVEL`
+
+| Value            | Logging level                                                        |
+|------------------|----------------------------------------------------------------------|
+| none             | HTTP request/response logging is disabled                            |
+| basic            | Logs only URLs, HTTP methods, and time to finish the request.        |
+| headers          | Logs everything in BASIC, plus all the request and response headers. |
+| body             | Logs everything in BASIC, plus all the request and response body.    |
+| body_and_headers | Logs everything in HEADERS and BODY.                                 |
+
+**NOTE**: When logging the body of request and response, please ensure that they do not contain confidential
+information. When logging headers, the client library has a default set of headers that are considered safe to log
+but this set can be updated by updating the log options in the builder as shown below:
+
+```java
+clientBuilder.httpLogOptions(new HttpLogOptions().addAllowedHeaderName("safe-to-log-header-name"))
+```
+
+### Troubleshooting exceptions
+Azure Inference service methods throw a [HttpResponseException][http_response_exception] or its subclass on failure.
+The `HttpResponseException` thrown by the chat completions client library includes detailed response error object
+that provides specific useful insights into what went wrong and includes corrective actions to fix common issues.
+This error information can be found inside the message property of the `HttpResponseException` object.
+
+Here's the example of how to catch it with synchronous client
+
+```java readme-sample-troubleshootingExceptions
+List<ChatRequestMessage> chatMessages = new ArrayList<>();
+chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+try {
+    ChatCompletions chatCompletions = client.complete(new ChatCompletionsOptions(chatMessages));
+} catch (HttpResponseException e) {
+    System.out.println(e.getMessage());
+    // Do something with the exception
+}
+```
+
+With async clients, you can catch and handle exceptions in the error callbacks:
+
+```java readme-sample-troubleshootingExceptions-async
+asyncClient.complete(new ChatCompletionsOptions(chatMessages))
+    .doOnSuccess(ignored -> System.out.println("Success!"))
+    .doOnError(
+        error -> error instanceof ResourceNotFoundException,
+        error -> System.out.println("Exception: 'getChatCompletions' could not be performed."));
+```
+
+### Authentication errors
+
+Azure Inference supports Azure Active Directory authentication. [ChatCompletionsClientBuilder][chat_completions_client_builder]
+offers an API to set the `credential`. To provide a valid credential, you can use `azure-identity` dependency. For more
+details on getting started, refer to the [README][how_to_create_chat_completions_client] of Azure Inference library.
+You can also refer to the [Azure Identity documentation][identity_doc] for more details on the various types of
+credential supported in `azure-identity`.
+
+### Dependency conflicts
+
+If you see `NoSuchMethodError` or `NoClassDefFoundError` during your application runtime, this is due to a
+dependency version conflict. Please take a look at [troubleshooting dependency version conflicts][troubleshooting_dependency_conflict]
+for more information on why this happens and [ways to mitigate this issue][troubleshooting_mitigate_version_mismatch].
+
+### Network issues
+
+If you have network issues, please take a look at [troubleshooting network issues][troubleshooting_network_issues].
+
+## Get additional help
+
+Additional information on ways to reach out for support can be found in the [SUPPORT.md][support] at the root of the repo.
+
+<!-- Links -->
+[azure_log_level]: https://learn.microsoft.com/azure/developer/java/sdk/logging-overview#default-logger-for-temporary-debugging
+[best_practice_samples]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/src/samples/README.md
+[chat_completions_client]: https://learn.microsoft.com/java/api/overview/azure/ai-openai-readme?view=azure-java-preview
+[chat_completions_client_builder]: https://learn.microsoft.com/java/api/overview/azure/ai-openai-readme?view=azure-java-preview#authentication
+[how_to_create_chat_completions_client]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/openai/azure-ai-openai/README.md#authentication
+[http_response_exception]: https://github.com/Azure/azure-sdk-for-java/blob/main/sdk/core/azure-core/src/main/java/com/azure/core/exception/HttpResponseException.java
+[identity_doc]: https://docs.microsoft.com/azure/developer/java/sdk/identity
+[logging_overview]: https://docs.microsoft.com/azure/developer/java/sdk/logging-overview
+[support]: https://github.com/Azure/azure-sdk-for-java/blob/main/SUPPORT.md
+[troubleshooting_network_issues]: https://learn.microsoft.com/azure/developer/java/sdk/troubleshooting-network
+[troubleshooting_dependency_conflict]: https://docs.microsoft.com/azure/developer/java/sdk/troubleshooting-dependency-version-conflict
+[troubleshooting_mitigate_version_mismatch]: https://docs.microsoft.com/azure/developer/java/sdk/troubleshooting-dependency-version-conflict#mitigate-version-mismatch-issues
diff --git a/sdk/ai/azure-ai-inference/assets.json b/sdk/ai/azure-ai-inference/assets.json
new file mode 100644
index 000000000000..40e1348f85d3
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/assets.json
@@ -0,0 +1,6 @@
+{
+  "AssetsRepo" : "Azure/azure-sdk-assets",
+  "AssetsRepoPrefixPath" : "java",
+  "TagPrefix" : "java/ai/azure-ai-inference",
+  "Tag" : "java/ai/azure-ai-inference_afe7d4c15e"
+}
\ No newline at end of file
diff --git a/sdk/ai/azure-ai-inference/customization/pom.xml b/sdk/ai/azure-ai-inference/customization/pom.xml
new file mode 100644
index 000000000000..86a2d9fb1767
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/customization/pom.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>com.azure</groupId>
+    <artifactId>azure-code-customization-parent</artifactId>
+    <version>1.0.0-beta.1</version> <!-- {x-version-update;com.azure:azure-code-customization-parent;current} -->
+    <relativePath>../../../parents/azure-code-customization-parent</relativePath>
+  </parent>
+
+  <name>Azure AI Inference client for Java</name>
+  <description>This package contains client customization for Azure AI Inference</description>
+
+  <groupId>com.azure.tools</groupId>
+  <artifactId>azure-ai-inference-customization</artifactId>
+  <version>1.0.0-beta.1</version>
+  <packaging>jar</packaging>
+</project>
diff --git a/sdk/ai/azure-ai-inference/customization/src/main/java/InferenceCustomizations.java b/sdk/ai/azure-ai-inference/customization/src/main/java/InferenceCustomizations.java
new file mode 100644
index 000000000000..088f3e38c582
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/customization/src/main/java/InferenceCustomizations.java
@@ -0,0 +1,39 @@
+import com.azure.autorest.customization.ClassCustomization;
+import com.azure.autorest.customization.Customization;
+import com.azure.autorest.customization.LibraryCustomization;
+import com.azure.autorest.customization.PackageCustomization;
+import org.slf4j.Logger;
+
+import java.lang.reflect.Modifier;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * This class contains the customization code to customize the AutoRest generated code for Azure AI Inference.
+ */
+public class InferenceCustomizations extends Customization {
+
+    @Override
+    public void customize(LibraryCustomization customization, Logger logger) {
+        // remove unused class (no reference to them, after partial-update)
+        customization.getRawEditor().removeFile("src/main/java/com/azure/ai/inference/implementation/models/CompleteOptions.java");
+        PackageCustomization implModels = customization.getPackage("com.azure.ai.inference.implementation.models");
+        ClassCustomization embedRequest1 = implModels.getClass("EmbedRequest1");
+        embedRequest1.rename("ImageEmbedRequest");
+        customizeChatCompletionsBaseClasses(customization, logger);
+    }
+
+    private void customizeChatCompletionsBaseClasses(LibraryCustomization customization, Logger logger) {
+        List<String> classList = Arrays.asList("ChatCompletionsNamedToolSelection", "ChatCompletionsToolCall", "ChatCompletionsToolDefinition");
+        for (String className : classList) {
+            logger.info("Customizing the {} class", className);
+            ClassCustomization namedToolSelectionClass = customization.getPackage("com.azure.ai.inference.models").getClass(className);
+            namedToolSelectionClass.setModifier(Modifier.PUBLIC);
+        }
+    }
+
+    private static String joinWithNewline(String... lines) {
+        return String.join("\n", lines);
+    }
+
+}
diff --git a/sdk/ai/azure-ai-inference/pom.xml b/sdk/ai/azure-ai-inference/pom.xml
new file mode 100644
index 000000000000..2a9ba4711a69
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/pom.xml
@@ -0,0 +1,139 @@
+<!--
+ ~ Copyright (c) Microsoft Corporation. All rights reserved.
+ ~ Licensed under the MIT License.
+ ~ Code generated by Microsoft (R) TypeSpec Code Generator.
+ -->
+<project xmlns="http://maven.apache.org/POM/4.0.0" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>com.azure</groupId>
+    <artifactId>azure-client-sdk-parent</artifactId>
+    <version>1.7.0</version> <!-- {x-version-update;com.azure:azure-client-sdk-parent;current} -->
+    <relativePath>../../parents/azure-client-sdk-parent</relativePath>
+  </parent>
+
+  <groupId>com.azure</groupId>
+  <artifactId>azure-ai-inference</artifactId>
+  <version>1.0.0-beta.1</version> <!-- {x-version-update;com.azure:azure-ai-inference;current} -->
+  <packaging>jar</packaging>
+
+  <name>Microsoft Azure SDK for Inference</name>
+  <description>This package contains Microsoft Azure Inference client library.</description>
+  <url>https://github.com/Azure/azure-sdk-for-java</url>
+
+  <licenses>
+    <license>
+      <name>The MIT License (MIT)</name>
+      <url>http://opensource.org/licenses/MIT</url>
+      <distribution>repo</distribution>
+    </license>
+  </licenses>
+
+  <scm>
+    <url>https://github.com/Azure/azure-sdk-for-java</url>
+    <connection>scm:git:git@github.com:Azure/azure-sdk-for-java.git</connection>
+    <developerConnection>scm:git:git@github.com:Azure/azure-sdk-for-java.git</developerConnection>
+    <tag>HEAD</tag>
+  </scm>
+  <developers>
+    <developer>
+      <id>microsoft</id>
+      <name>Microsoft</name>
+    </developer>
+  </developers>
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+    <jacoco.min.linecoverage>0.30</jacoco.min.linecoverage>
+    <jacoco.min.branchcoverage>0.20</jacoco.min.branchcoverage>
+  </properties>
+  <dependencies>
+    <dependency>
+      <groupId>com.azure</groupId>
+      <artifactId>azure-json</artifactId>
+      <version>1.3.0</version> <!-- {x-version-update;com.azure:azure-json;dependency} -->
+    </dependency>
+    <dependency>
+      <groupId>com.azure</groupId>
+      <artifactId>azure-core</artifactId>
+      <version>1.52.0</version> <!-- {x-version-update;com.azure:azure-core;dependency} -->
+    </dependency>
+    <dependency>
+      <groupId>com.azure</groupId>
+      <artifactId>azure-core-http-netty</artifactId>
+      <version>1.15.4</version> <!-- {x-version-update;com.azure:azure-core-http-netty;dependency} -->
+    </dependency>
+
+    <!-- test Dependencies -->
+    <dependency>
+      <groupId>com.azure</groupId>
+      <artifactId>azure-core-test</artifactId>
+      <version>1.26.2</version> <!-- {x-version-update;com.azure:azure-core-test;dependency} -->
+    </dependency>
+    <dependency>
+      <groupId>com.azure</groupId>
+      <artifactId>azure-identity</artifactId>
+      <version>1.13.3</version> <!-- {x-version-update;com.azure:azure-identity;dependency} -->
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-simple</artifactId>
+      <version>1.7.36</version> <!-- {x-version-update;org.slf4j:slf4j-simple;external_dependency} -->
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.azure</groupId>
+      <artifactId>azure-core-http-okhttp</artifactId>
+      <version>1.12.3</version> <!-- {x-version-update;com.azure:azure-core-http-okhttp;dependency} -->
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.azure</groupId>
+      <artifactId>azure-core-http-vertx</artifactId>
+      <version>1.0.0-beta.21</version> <!-- {x-version-update;com.azure:azure-core-http-vertx;dependency} -->
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>com.knuddels</groupId>
+      <artifactId>jtokkit</artifactId>
+      <version>1.0.0</version> <!-- {x-version-update;com.knuddels:jtokkit;external_dependency} -->
+      <scope>test</scope>
+    </dependency>
+    <!-- JUnit -->
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-api</artifactId>
+      <version>5.9.3</version> <!-- {x-version-update;org.junit.jupiter:junit-jupiter-api;external_dependency} -->
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-engine</artifactId>
+      <version>5.9.3</version> <!-- {x-version-update;org.junit.jupiter:junit-jupiter-engine;external_dependency} -->
+      <scope>test</scope>
+    </dependency>
+    <dependency>
+      <groupId>org.junit.jupiter</groupId>
+      <artifactId>junit-jupiter-params</artifactId>
+      <version>5.9.3</version> <!-- {x-version-update;org.junit.jupiter:junit-jupiter-params;external_dependency} -->
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <profiles>
+    <profile>
+      <id>java12plus</id>
+      <activation>
+        <jdk>[12,)</jdk>
+      </activation>
+      <dependencies>
+        <dependency>
+          <groupId>com.azure</groupId>
+          <artifactId>azure-core-http-jdk-httpclient</artifactId>
+          <version>1.0.0-beta.16</version> <!-- {x-version-update;com.azure:azure-core-http-jdk-httpclient;dependency} -->
+          <scope>test</scope>
+        </dependency>
+      </dependencies>
+    </profile>
+  </profiles>
+</project>
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsAsyncClient.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsAsyncClient.java
new file mode 100644
index 000000000000..8f2c0ae8672f
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsAsyncClient.java
@@ -0,0 +1,291 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.ChatCompletionsClientImpl;
+import com.azure.ai.inference.implementation.models.CompleteRequest;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.ai.inference.models.ExtraParameters;
+import com.azure.ai.inference.models.ModelInfo;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceClient;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpHeaderName;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.FluxUtil;
+import reactor.core.publisher.Flux;
+import reactor.core.publisher.Mono;
+import com.azure.ai.inference.implementation.accesshelpers.ChatCompletionsOptionsAccessHelper;
+import com.azure.ai.inference.implementation.InferenceServerSentEvents;
+import com.azure.ai.inference.implementation.ChatCompletionsUtils;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.StreamingChatCompletionsUpdate;
+import java.nio.ByteBuffer;
+
+/**
+ * Initializes a new instance of the asynchronous ChatCompletionsClient type.
+ */
+@ServiceClient(builder = ChatCompletionsClientBuilder.class, isAsync = true)
+public final class ChatCompletionsAsyncClient {
+
+    @Generated
+    private final ChatCompletionsClientImpl serviceClient;
+
+    /**
+     * Initializes an instance of ChatCompletionsAsyncClient class.
+     *
+     * @param serviceClient the service client implementation.
+     */
+    @Generated
+    ChatCompletionsAsyncClient(ChatCompletionsClientImpl serviceClient) {
+        this.serviceClient = serviceClient;
+    }
+
+    /**
+     * Gets chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data. The method makes a REST API call to the `/chat/completions` route
+     * on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     messages (Required): [
+     *          (Required){
+     *             role: String(system/user/assistant/tool) (Required)
+     *         }
+     *     ]
+     *     frequency_penalty: Double (Optional)
+     *     stream: Boolean (Optional)
+     *     presence_penalty: Double (Optional)
+     *     temperature: Double (Optional)
+     *     top_p: Double (Optional)
+     *     max_tokens: Integer (Optional)
+     *     response_format (Optional): {
+     *         type: String (Required)
+     *     }
+     *     stop (Optional): [
+     *         String (Optional)
+     *     ]
+     *     tools (Optional): [
+     *          (Optional){
+     *             type: String (Required)
+     *             function (Required): {
+     *                 name: String (Required)
+     *                 description: String (Optional)
+     *                 parameters: Object (Optional)
+     *             }
+     *         }
+     *     ]
+     *     tool_choice: BinaryData (Optional)
+     *     seed: Long (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     id: String (Required)
+     *     created: long (Required)
+     *     model: String (Required)
+     *     usage (Required): {
+     *         completion_tokens: int (Required)
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     choices (Required): [
+     *          (Required){
+     *             index: int (Required)
+     *             finish_reason: String(stop/length/content_filter/tool_calls) (Required)
+     *             message (Required): {
+     *                 role: String(system/user/assistant/tool) (Required)
+     *                 content: String (Required)
+     *                 tool_calls (Optional): [
+     *                      (Optional){
+     *                         id: String (Required)
+     *                         type: String (Required)
+     *                         function (Required): {
+     *                             name: String (Required)
+     *                             arguments: String (Required)
+     *                         }
+     *                     }
+     *                 ]
+     *             }
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param completeRequest The completeRequest parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data along with {@link Response} on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    private Mono<Response<BinaryData>> completeWithResponse(BinaryData completeRequest, RequestOptions requestOptions) {
+        return this.serviceClient.completeWithResponseAsync(completeRequest, requestOptions);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response} on successful completion
+     * of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<Response<BinaryData>> getModelInfoWithResponse(RequestOptions requestOptions) {
+        return this.serviceClient.getModelInfoWithResponseAsync(requestOptions);
+    }
+
+    /**
+     * Gets chat completions for the provided chat messages. Chat completions support a wide variety of tasks and
+     * generate text that continues from or "completes" provided prompt data.
+     *
+     * @param options The configuration information for a chat completions request. Completions support a
+     * wide variety of tasks and generate text that continues from or "completes" provided prompt data.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return chat completions stream for the provided chat messages. Completions support a wide variety of tasks and
+     * generate text that continues from or "completes" provided prompt data.
+     */
+    @ServiceMethod(returns = ReturnType.COLLECTION)
+    public Flux<StreamingChatCompletionsUpdate> completeStream(ChatCompletionsOptions options) {
+        ChatCompletionsOptionsAccessHelper.setStream(options, true);
+        RequestOptions requestOptions = new RequestOptions();
+        Flux<ByteBuffer> responseStream = completeWithResponse(BinaryData.fromObject(options), requestOptions)
+            .flatMapMany(response -> response.getValue().toFluxByteBuffer());
+        InferenceServerSentEvents<StreamingChatCompletionsUpdate> chatCompletionsStream
+            = new InferenceServerSentEvents<>(responseStream, StreamingChatCompletionsUpdate.class);
+        return chatCompletionsStream.getEvents();
+    }
+
+    /**
+     * Gets completions for the provided input prompt. Completions support a wide variety of tasks and generate text
+     * that continues from or "completes" provided prompt data.
+     *
+     * @param prompt The prompt to generate completion text from.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return completions for the provided input prompts. Completions support a wide variety of tasks and generate text
+     * that continues from or "completes" provided prompt data on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<ChatCompletions> complete(String prompt) {
+        return complete(ChatCompletionsUtils.defaultCompleteOptions(prompt));
+    }
+
+    /**
+     * Gets chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data. The method makes a REST API call to the `/chat/completions` route
+     * on the given endpoint.
+     *
+     * @param options Options for complete API.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<ChatCompletions> complete(ChatCompletionsOptions options) {
+        // Generated convenience method for completeWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        CompleteRequest completeRequestObj
+            = new CompleteRequest(options.getMessages()).setFrequencyPenalty(options.getFrequencyPenalty())
+                .setStream(options.isStream())
+                .setPresencePenalty(options.getPresencePenalty())
+                .setTemperature(options.getTemperature())
+                .setTopP(options.getTopP())
+                .setMaxTokens(options.getMaxTokens())
+                .setResponseFormat(options.getResponseFormat())
+                .setStop(options.getStop())
+                .setTools(options.getTools())
+                .setToolChoice(options.getToolChoice())
+                .setSeed(options.getSeed())
+                .setModel(options.getModel());
+        BinaryData completeRequest = BinaryData.fromObject(completeRequestObj);
+        ExtraParameters extraParams = options.getExtraParams();
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        return completeWithResponse(completeRequest, requestOptions).flatMap(FluxUtil::toMono)
+            .map(protocolMethodData -> protocolMethodData.toObject(ChatCompletions.class));
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     *
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return represents some basic information about the AI model on successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<ModelInfo> getModelInfo() {
+        // Generated convenience method for getModelInfoWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getModelInfoWithResponse(requestOptions).flatMap(FluxUtil::toMono)
+            .map(protocolMethodData -> protocolMethodData.toObject(ModelInfo.class));
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClient.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClient.java
new file mode 100644
index 000000000000..040646a2496d
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClient.java
@@ -0,0 +1,306 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.ChatCompletionsClientImpl;
+import com.azure.ai.inference.implementation.models.CompleteRequest;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.ai.inference.models.ExtraParameters;
+import com.azure.ai.inference.models.ModelInfo;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceClient;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpHeaderName;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.util.BinaryData;
+import com.azure.ai.inference.implementation.accesshelpers.ChatCompletionsOptionsAccessHelper;
+import com.azure.ai.inference.implementation.InferenceServerSentEvents;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.implementation.ChatCompletionsUtils;
+import com.azure.ai.inference.models.StreamingChatCompletionsUpdate;
+import com.azure.core.util.IterableStream;
+import reactor.core.publisher.Flux;
+import java.nio.ByteBuffer;
+
+/**
+ * Initializes a new instance of the synchronous ChatCompletionsClient type.
+ */
+@ServiceClient(builder = ChatCompletionsClientBuilder.class)
+public final class ChatCompletionsClient {
+
+    @Generated
+    private final ChatCompletionsClientImpl serviceClient;
+
+    /**
+     * Initializes an instance of ChatCompletionsClient class.
+     *
+     * @param serviceClient the service client implementation.
+     */
+    @Generated
+    ChatCompletionsClient(ChatCompletionsClientImpl serviceClient) {
+        this.serviceClient = serviceClient;
+    }
+
+    /**
+     * Gets chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data. The method makes a REST API call to the `/chat/completions` route
+     * on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     messages (Required): [
+     *          (Required){
+     *             role: String(system/user/assistant/tool) (Required)
+     *         }
+     *     ]
+     *     frequency_penalty: Double (Optional)
+     *     stream: Boolean (Optional)
+     *     presence_penalty: Double (Optional)
+     *     temperature: Double (Optional)
+     *     top_p: Double (Optional)
+     *     max_tokens: Integer (Optional)
+     *     response_format (Optional): {
+     *         type: String (Required)
+     *     }
+     *     stop (Optional): [
+     *         String (Optional)
+     *     ]
+     *     tools (Optional): [
+     *          (Optional){
+     *             type: String (Required)
+     *             function (Required): {
+     *                 name: String (Required)
+     *                 description: String (Optional)
+     *                 parameters: Object (Optional)
+     *             }
+     *         }
+     *     ]
+     *     tool_choice: BinaryData (Optional)
+     *     seed: Long (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     id: String (Required)
+     *     created: long (Required)
+     *     model: String (Required)
+     *     usage (Required): {
+     *         completion_tokens: int (Required)
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     choices (Required): [
+     *          (Required){
+     *             index: int (Required)
+     *             finish_reason: String(stop/length/content_filter/tool_calls) (Required)
+     *             message (Required): {
+     *                 role: String(system/user/assistant/tool) (Required)
+     *                 content: String (Required)
+     *                 tool_calls (Optional): [
+     *                      (Optional){
+     *                         id: String (Required)
+     *                         type: String (Required)
+     *                         function (Required): {
+     *                             name: String (Required)
+     *                             arguments: String (Required)
+     *                         }
+     *                     }
+     *                 ]
+     *             }
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     *
+     * @param completeRequest The completeRequest parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> completeWithResponse(BinaryData completeRequest, RequestOptions requestOptions) {
+        return this.serviceClient.completeWithResponse(completeRequest, requestOptions);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Response<BinaryData> getModelInfoWithResponse(RequestOptions requestOptions) {
+        return this.serviceClient.getModelInfoWithResponse(requestOptions);
+    }
+
+    /**
+     * Gets chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data. The method makes a REST API call to the `/chat/completions` route
+     * on the given endpoint.
+     *
+     * @param options Options for complete API.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public ChatCompletions complete(ChatCompletionsOptions options) {
+        // Generated convenience method for completeWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        CompleteRequest completeRequestObj
+            = new CompleteRequest(options.getMessages()).setFrequencyPenalty(options.getFrequencyPenalty())
+                .setStream(options.isStream())
+                .setPresencePenalty(options.getPresencePenalty())
+                .setTemperature(options.getTemperature())
+                .setTopP(options.getTopP())
+                .setMaxTokens(options.getMaxTokens())
+                .setResponseFormat(options.getResponseFormat())
+                .setStop(options.getStop())
+                .setTools(options.getTools())
+                .setToolChoice(options.getToolChoice())
+                .setSeed(options.getSeed())
+                .setModel(options.getModel());
+        BinaryData completeRequest = BinaryData.fromObject(completeRequestObj);
+        ExtraParameters extraParams = options.getExtraParams();
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        return completeWithResponse(completeRequest, requestOptions).getValue().toObject(ChatCompletions.class);
+    }
+
+    /**
+     * Gets completions for the provided input prompt. Completions support a wide variety of tasks and generate text
+     * that continues from or "completes" provided prompt data.
+     *
+     * @param prompt The prompt to generate completion text from.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return chat completions for the provided input prompts. Chat completions support a wide variety of tasks and
+     * generate text
+     * that continues from or "completes" provided prompt data.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public ChatCompletions complete(String prompt) {
+        return complete(ChatCompletionsUtils.defaultCompleteOptions(prompt));
+    }
+
+    /**
+     * Gets chat completions for the provided chat messages in streaming mode. Chat completions support a wide variety
+     * of tasks and generate text that continues from or "completes" provided prompt data.
+     *
+     * @param options The configuration information for a chat completions request. Completions support a
+     * wide variety of tasks and generate text that continues from or "completes" provided prompt data.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return chat completions stream for the provided chat messages. Completions support a wide variety of tasks and
+     * generate text that continues from or "completes" provided prompt data.
+     */
+    @ServiceMethod(returns = ReturnType.COLLECTION)
+    public IterableStream<StreamingChatCompletionsUpdate> completeStream(ChatCompletionsOptions options) {
+        ChatCompletionsOptionsAccessHelper.setStream(options, true);
+        RequestOptions requestOptions = new RequestOptions();
+        CompleteRequest completeRequestObj
+            = new CompleteRequest(options.getMessages()).setFrequencyPenalty(options.getFrequencyPenalty())
+                .setStream(options.isStream())
+                .setPresencePenalty(options.getPresencePenalty())
+                .setTemperature(options.getTemperature())
+                .setTopP(options.getTopP())
+                .setMaxTokens(options.getMaxTokens())
+                .setResponseFormat(options.getResponseFormat())
+                .setStop(options.getStop())
+                .setTools(options.getTools())
+                .setToolChoice(options.getToolChoice())
+                .setSeed(options.getSeed())
+                .setModel(options.getModel());
+        BinaryData completeRequest = BinaryData.fromObject(completeRequestObj);
+        ExtraParameters extraParams = options.getExtraParams();
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        Flux<ByteBuffer> responseStream
+            = completeWithResponse(completeRequest, requestOptions).getValue().toFluxByteBuffer();
+        InferenceServerSentEvents<StreamingChatCompletionsUpdate> chatCompletionsStream
+            = new InferenceServerSentEvents<>(responseStream, StreamingChatCompletionsUpdate.class);
+        return new IterableStream<>(chatCompletionsStream.getEvents());
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     *
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return represents some basic information about the AI model.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    ModelInfo getModelInfo() {
+        // Generated convenience method for getModelInfoWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getModelInfoWithResponse(requestOptions).getValue().toObject(ModelInfo.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClientBuilder.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClientBuilder.java
new file mode 100644
index 000000000000..edd061ac8379
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClientBuilder.java
@@ -0,0 +1,367 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.ChatCompletionsClientImpl;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ServiceClientBuilder;
+import com.azure.core.client.traits.ConfigurationTrait;
+import com.azure.core.client.traits.EndpointTrait;
+import com.azure.core.client.traits.HttpTrait;
+import com.azure.core.client.traits.KeyCredentialTrait;
+import com.azure.core.client.traits.TokenCredentialTrait;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.credential.TokenCredential;
+import com.azure.core.http.HttpClient;
+import com.azure.core.http.HttpHeaders;
+import com.azure.core.http.HttpPipeline;
+import com.azure.core.http.HttpPipelineBuilder;
+import com.azure.core.http.HttpPipelinePosition;
+import com.azure.core.http.policy.AddDatePolicy;
+import com.azure.core.http.policy.AddHeadersFromContextPolicy;
+import com.azure.core.http.policy.AddHeadersPolicy;
+import com.azure.core.http.policy.BearerTokenAuthenticationPolicy;
+import com.azure.core.http.policy.HttpLoggingPolicy;
+import com.azure.core.http.policy.HttpLogOptions;
+import com.azure.core.http.policy.HttpPipelinePolicy;
+import com.azure.core.http.policy.HttpPolicyProviders;
+import com.azure.core.http.policy.KeyCredentialPolicy;
+import com.azure.core.http.policy.RequestIdPolicy;
+import com.azure.core.http.policy.RetryOptions;
+import com.azure.core.http.policy.RetryPolicy;
+import com.azure.core.http.policy.UserAgentPolicy;
+import com.azure.core.util.ClientOptions;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.CoreUtils;
+import com.azure.core.util.builder.ClientBuilderUtil;
+import com.azure.core.util.logging.ClientLogger;
+import com.azure.core.util.serializer.JacksonAdapter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * A builder for creating a new instance of the ChatCompletionsClient type.
+ */
+@ServiceClientBuilder(serviceClients = { ChatCompletionsClient.class, ChatCompletionsAsyncClient.class })
+public final class ChatCompletionsClientBuilder implements HttpTrait<ChatCompletionsClientBuilder>,
+    ConfigurationTrait<ChatCompletionsClientBuilder>, TokenCredentialTrait<ChatCompletionsClientBuilder>,
+    KeyCredentialTrait<ChatCompletionsClientBuilder>, EndpointTrait<ChatCompletionsClientBuilder> {
+
+    @Generated
+    private static final String SDK_NAME = "name";
+
+    @Generated
+    private static final String SDK_VERSION = "version";
+
+    @Generated
+    private static final String[] DEFAULT_SCOPES = new String[] { "https://ml.azure.com/.default" };
+
+    @Generated
+    private static final Map<String, String> PROPERTIES = CoreUtils.getProperties("azure-ai-inference.properties");
+
+    @Generated
+    private final List<HttpPipelinePolicy> pipelinePolicies;
+
+    private String[] scopes = DEFAULT_SCOPES;
+
+    /**
+     * Create an instance of the ChatCompletionsClientBuilder.
+     */
+    @Generated
+    public ChatCompletionsClientBuilder() {
+        this.pipelinePolicies = new ArrayList<>();
+    }
+
+    /*
+     * The HTTP pipeline to send requests through.
+     */
+    @Generated
+    private HttpPipeline pipeline;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder pipeline(HttpPipeline pipeline) {
+        if (this.pipeline != null && pipeline == null) {
+            LOGGER.atInfo().log("HttpPipeline is being set to 'null' when it was previously configured.");
+        }
+        this.pipeline = pipeline;
+        return this;
+    }
+
+    /*
+     * The HTTP client used to send the request.
+     */
+    @Generated
+    private HttpClient httpClient;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder httpClient(HttpClient httpClient) {
+        this.httpClient = httpClient;
+        return this;
+    }
+
+    /*
+     * The logging configuration for HTTP requests and responses.
+     */
+    @Generated
+    private HttpLogOptions httpLogOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder httpLogOptions(HttpLogOptions httpLogOptions) {
+        this.httpLogOptions = httpLogOptions;
+        return this;
+    }
+
+    /*
+     * The client options such as application ID and custom headers to set on a request.
+     */
+    @Generated
+    private ClientOptions clientOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder clientOptions(ClientOptions clientOptions) {
+        this.clientOptions = clientOptions;
+        return this;
+    }
+
+    /*
+     * The retry options to configure retry policy for failed requests.
+     */
+    @Generated
+    private RetryOptions retryOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder retryOptions(RetryOptions retryOptions) {
+        this.retryOptions = retryOptions;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder addPolicy(HttpPipelinePolicy customPolicy) {
+        Objects.requireNonNull(customPolicy, "'customPolicy' cannot be null.");
+        pipelinePolicies.add(customPolicy);
+        return this;
+    }
+
+    /*
+     * The configuration store that is used during construction of the service client.
+     */
+    @Generated
+    private Configuration configuration;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder configuration(Configuration configuration) {
+        this.configuration = configuration;
+        return this;
+    }
+
+    /*
+     * The TokenCredential used for authentication.
+     */
+    @Generated
+    private TokenCredential tokenCredential;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder credential(TokenCredential tokenCredential) {
+        this.tokenCredential = tokenCredential;
+        return this;
+    }
+
+    /**
+     * Sets auth domain scopes for client authentication.
+     *
+     * @param scopes domain scope to authenticate against.
+     * @return the ChatCompletionsClientBuilder.
+     */
+    public ChatCompletionsClientBuilder scopes(String[] scopes) {
+        this.scopes = scopes;
+        return this;
+    }
+
+    /*
+     * The KeyCredential used for authentication.
+     */
+    @Generated
+    private KeyCredential keyCredential;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder credential(KeyCredential keyCredential) {
+        this.keyCredential = keyCredential;
+        return this;
+    }
+
+    /*
+     * The service endpoint
+     */
+    @Generated
+    private String endpoint;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ChatCompletionsClientBuilder endpoint(String endpoint) {
+        this.endpoint = endpoint;
+        return this;
+    }
+
+    /*
+     * Service version
+     */
+    @Generated
+    private ModelServiceVersion serviceVersion;
+
+    /**
+     * Sets Service version.
+     *
+     * @param serviceVersion the serviceVersion value.
+     * @return the ChatCompletionsClientBuilder.
+     */
+    @Generated
+    public ChatCompletionsClientBuilder serviceVersion(ModelServiceVersion serviceVersion) {
+        this.serviceVersion = serviceVersion;
+        return this;
+    }
+
+    /*
+     * The retry policy that will attempt to retry failed requests, if applicable.
+     */
+    @Generated
+    private RetryPolicy retryPolicy;
+
+    /**
+     * Sets The retry policy that will attempt to retry failed requests, if applicable.
+     *
+     * @param retryPolicy the retryPolicy value.
+     * @return the ChatCompletionsClientBuilder.
+     */
+    @Generated
+    public ChatCompletionsClientBuilder retryPolicy(RetryPolicy retryPolicy) {
+        this.retryPolicy = retryPolicy;
+        return this;
+    }
+
+    /**
+     * Builds an instance of ChatCompletionsClientImpl with the provided parameters.
+     *
+     * @return an instance of ChatCompletionsClientImpl.
+     */
+    @Generated
+    private ChatCompletionsClientImpl buildInnerClient() {
+        this.validateClient();
+        HttpPipeline localPipeline = (pipeline != null) ? pipeline : createHttpPipeline();
+        ModelServiceVersion localServiceVersion
+            = (serviceVersion != null) ? serviceVersion : ModelServiceVersion.getLatest();
+        ChatCompletionsClientImpl client = new ChatCompletionsClientImpl(localPipeline,
+            JacksonAdapter.createDefaultSerializerAdapter(), this.endpoint, localServiceVersion);
+        return client;
+    }
+
+    @Generated
+    private void validateClient() {
+        // This method is invoked from 'buildInnerClient'/'buildClient' method.
+        // Developer can customize this method, to validate that the necessary conditions are met for the new client.
+        Objects.requireNonNull(endpoint, "'endpoint' cannot be null.");
+    }
+
+    private HttpPipeline createHttpPipeline() {
+        Configuration buildConfiguration
+            = (configuration == null) ? Configuration.getGlobalConfiguration() : configuration;
+        HttpLogOptions localHttpLogOptions = this.httpLogOptions == null ? new HttpLogOptions() : this.httpLogOptions;
+        ClientOptions localClientOptions = this.clientOptions == null ? new ClientOptions() : this.clientOptions;
+        List<HttpPipelinePolicy> policies = new ArrayList<>();
+        String clientName = PROPERTIES.getOrDefault(SDK_NAME, "UnknownName");
+        String clientVersion = PROPERTIES.getOrDefault(SDK_VERSION, "UnknownVersion");
+        String applicationId = CoreUtils.getApplicationId(localClientOptions, localHttpLogOptions);
+        policies.add(new UserAgentPolicy(applicationId, clientName, clientVersion, buildConfiguration));
+        policies.add(new RequestIdPolicy());
+        policies.add(new AddHeadersFromContextPolicy());
+        HttpHeaders headers = CoreUtils.createHttpHeadersFromClientOptions(localClientOptions);
+        if (headers != null) {
+            policies.add(new AddHeadersPolicy(headers));
+        }
+        this.pipelinePolicies.stream()
+            .filter(p -> p.getPipelinePosition() == HttpPipelinePosition.PER_CALL)
+            .forEach(p -> policies.add(p));
+        HttpPolicyProviders.addBeforeRetryPolicies(policies);
+        policies.add(ClientBuilderUtil.validateAndGetRetryPolicy(retryPolicy, retryOptions, new RetryPolicy()));
+        policies.add(new AddDatePolicy());
+        if (keyCredential != null) {
+            policies.add(new KeyCredentialPolicy("api-key", keyCredential));
+        }
+        if (tokenCredential != null) {
+            policies.add(new BearerTokenAuthenticationPolicy(tokenCredential, this.scopes));
+        }
+        this.pipelinePolicies.stream()
+            .filter(p -> p.getPipelinePosition() == HttpPipelinePosition.PER_RETRY)
+            .forEach(p -> policies.add(p));
+        HttpPolicyProviders.addAfterRetryPolicies(policies);
+        policies.add(new HttpLoggingPolicy(localHttpLogOptions));
+        HttpPipeline httpPipeline = new HttpPipelineBuilder().policies(policies.toArray(new HttpPipelinePolicy[0]))
+            .httpClient(httpClient)
+            .clientOptions(localClientOptions)
+            .build();
+        return httpPipeline;
+    }
+
+    /**
+     * Builds an instance of ChatCompletionsAsyncClient class.
+     *
+     * @return an instance of ChatCompletionsAsyncClient.
+     */
+    @Generated
+    public ChatCompletionsAsyncClient buildAsyncClient() {
+        return new ChatCompletionsAsyncClient(buildInnerClient());
+    }
+
+    /**
+     * Builds an instance of ChatCompletionsClient class.
+     *
+     * @return an instance of ChatCompletionsClient.
+     */
+    @Generated
+    public ChatCompletionsClient buildClient() {
+        return new ChatCompletionsClient(buildInnerClient());
+    }
+
+    private static final ClientLogger LOGGER = new ClientLogger(ChatCompletionsClientBuilder.class);
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsAsyncClient.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsAsyncClient.java
new file mode 100644
index 000000000000..fb089027a9f4
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsAsyncClient.java
@@ -0,0 +1,303 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.EmbeddingsClientImpl;
+import com.azure.ai.inference.implementation.models.EmbedRequest;
+import com.azure.ai.inference.models.EmbeddingEncodingFormat;
+import com.azure.ai.inference.models.EmbeddingInputType;
+import com.azure.ai.inference.models.EmbeddingsResult;
+import com.azure.ai.inference.models.ExtraParameters;
+import com.azure.ai.inference.models.ModelInfo;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceClient;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpHeaderName;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.http.rest.SimpleResponse;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.FluxUtil;
+import java.util.List;
+import reactor.core.publisher.Mono;
+
+/**
+ * Initializes a new instance of the asynchronous EmbeddingsClient type.
+ */
+@ServiceClient(builder = EmbeddingsClientBuilder.class, isAsync = true)
+public final class EmbeddingsAsyncClient {
+
+    @Generated
+    private final EmbeddingsClientImpl serviceClient;
+
+    /**
+     * Initializes an instance of EmbeddingsAsyncClient class.
+     *
+     * @param serviceClient the service client implementation.
+     */
+    @Generated
+    EmbeddingsAsyncClient(EmbeddingsClientImpl serviceClient) {
+        this.serviceClient = serviceClient;
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     input (Required): [
+     *         String (Required)
+     *     ]
+     *     dimensions: Integer (Optional)
+     *     encoding_format: String(base64/binary/float/int8/ubinary/uint8) (Optional)
+     *     input_type: String(text/query/document) (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     data (Required): [
+     *          (Required){
+     *             embedding: BinaryData (Required)
+     *             index: int (Required)
+     *         }
+     *     ]
+     *     usage (Required): {
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     model: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param embedRequest The embedRequest parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios along with {@link Response} on successful completion of
+     * {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<Response<BinaryData>> embedWithResponse(BinaryData embedRequest, RequestOptions requestOptions) {
+        return this.serviceClient.embedWithResponseAsync(embedRequest, requestOptions);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response} on successful completion
+     * of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<Response<BinaryData>> getModelInfoWithResponse(RequestOptions requestOptions) {
+        return this.serviceClient.getModelInfoWithResponseAsync(requestOptions);
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     *
+     * @param input Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     * @param dimensions Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param encodingFormat Optional. The desired format for the returned embeddings.
+     * @param inputType Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param model ID of the specific AI model to use, if more than one model is available on the endpoint.
+     * @param extraParams Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<EmbeddingsResult>> embedWithResponse(List<String> input, Integer dimensions,
+        EmbeddingEncodingFormat encodingFormat, EmbeddingInputType inputType, String model,
+        ExtraParameters extraParams) {
+        RequestOptions requestOptions = new RequestOptions();
+        EmbedRequest embedRequestObj = new EmbedRequest(input).setDimensions(dimensions)
+            .setEncodingFormat(encodingFormat)
+            .setInputType(inputType)
+            .setModel(model);
+        BinaryData embedRequest = BinaryData.fromObject(embedRequestObj);
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        return embedWithResponse(embedRequest, requestOptions)
+            .map(protocolMethodData -> new SimpleResponse<>(protocolMethodData,
+                protocolMethodData.getValue().toObject(EmbeddingsResult.class)));
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     *
+     * @param input Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     * @param dimensions Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param encodingFormat Optional. The desired format for the returned embeddings.
+     * @param inputType Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param model ID of the specific AI model to use, if more than one model is available on the endpoint.
+     * @param extraParams Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<EmbeddingsResult> embed(List<String> input, Integer dimensions, EmbeddingEncodingFormat encodingFormat,
+        EmbeddingInputType inputType, String model, ExtraParameters extraParams) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        EmbedRequest embedRequestObj = new EmbedRequest(input).setDimensions(dimensions)
+            .setEncodingFormat(encodingFormat)
+            .setInputType(inputType)
+            .setModel(model);
+        BinaryData embedRequest = BinaryData.fromObject(embedRequestObj);
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        return embedWithResponse(embedRequest, requestOptions).flatMap(FluxUtil::toMono)
+            .map(protocolMethodData -> protocolMethodData.toObject(EmbeddingsResult.class));
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     *
+     * @param input Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<EmbeddingsResult>> embedWithResponse(List<String> input) {
+        RequestOptions requestOptions = new RequestOptions();
+        EmbedRequest embedRequestObj = new EmbedRequest(input);
+        BinaryData embedRequest = BinaryData.fromObject(embedRequestObj);
+        return embedWithResponse(embedRequest, requestOptions)
+            .map(protocolMethodData -> new SimpleResponse<>(protocolMethodData,
+                protocolMethodData.getValue().toObject(EmbeddingsResult.class)));
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     *
+     * @param input Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<EmbeddingsResult> embed(List<String> input) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        EmbedRequest embedRequestObj = new EmbedRequest(input);
+        BinaryData embedRequest = BinaryData.fromObject(embedRequestObj);
+        return embedWithResponse(embedRequest, requestOptions).flatMap(FluxUtil::toMono)
+            .map(protocolMethodData -> protocolMethodData.toObject(EmbeddingsResult.class));
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     *
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return represents some basic information about the AI model on successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<ModelInfo> getModelInfo() {
+        // Generated convenience method for getModelInfoWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getModelInfoWithResponse(requestOptions).flatMap(FluxUtil::toMono)
+            .map(protocolMethodData -> protocolMethodData.toObject(ModelInfo.class));
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsClient.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsClient.java
new file mode 100644
index 000000000000..584dd7ba94fd
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsClient.java
@@ -0,0 +1,294 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.EmbeddingsClientImpl;
+import com.azure.ai.inference.implementation.models.EmbedRequest;
+import com.azure.ai.inference.models.EmbeddingEncodingFormat;
+import com.azure.ai.inference.models.EmbeddingInputType;
+import com.azure.ai.inference.models.EmbeddingsResult;
+import com.azure.ai.inference.models.ExtraParameters;
+import com.azure.ai.inference.models.ModelInfo;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceClient;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpHeaderName;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.http.rest.SimpleResponse;
+import com.azure.core.util.BinaryData;
+import java.util.List;
+
+/**
+ * Initializes a new instance of the synchronous EmbeddingsClient type.
+ */
+@ServiceClient(builder = EmbeddingsClientBuilder.class)
+public final class EmbeddingsClient {
+
+    @Generated
+    private final EmbeddingsClientImpl serviceClient;
+
+    /**
+     * Initializes an instance of EmbeddingsClient class.
+     *
+     * @param serviceClient the service client implementation.
+     */
+    @Generated
+    EmbeddingsClient(EmbeddingsClientImpl serviceClient) {
+        this.serviceClient = serviceClient;
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     input (Required): [
+     *         String (Required)
+     *     ]
+     *     dimensions: Integer (Optional)
+     *     encoding_format: String(base64/binary/float/int8/ubinary/uint8) (Optional)
+     *     input_type: String(text/query/document) (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     data (Required): [
+     *          (Required){
+     *             embedding: BinaryData (Required)
+     *             index: int (Required)
+     *         }
+     *     ]
+     *     usage (Required): {
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     model: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param embedRequest The embedRequest parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios along with {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Response<BinaryData> embedWithResponse(BinaryData embedRequest, RequestOptions requestOptions) {
+        return this.serviceClient.embedWithResponse(embedRequest, requestOptions);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Response<BinaryData> getModelInfoWithResponse(RequestOptions requestOptions) {
+        return this.serviceClient.getModelInfoWithResponse(requestOptions);
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     *
+     * @param input Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     * @param dimensions Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param encodingFormat Optional. The desired format for the returned embeddings.
+     * @param inputType Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param model ID of the specific AI model to use, if more than one model is available on the endpoint.
+     * @param extraParams Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios.
+     */
+    public Response<EmbeddingsResult> embedWithResponse(List<String> input, Integer dimensions,
+        EmbeddingEncodingFormat encodingFormat, EmbeddingInputType inputType, String model,
+        ExtraParameters extraParams) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        EmbedRequest embedRequestObj = new EmbedRequest(input).setDimensions(dimensions)
+            .setEncodingFormat(encodingFormat)
+            .setInputType(inputType)
+            .setModel(model);
+        BinaryData embedRequest = BinaryData.fromObject(embedRequestObj);
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        Response<BinaryData> response = embedWithResponse(embedRequest, requestOptions);
+        return new SimpleResponse<>(response, response.getValue().toObject(EmbeddingsResult.class));
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     *
+     * @param input Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     * @param dimensions Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param encodingFormat Optional. The desired format for the returned embeddings.
+     * @param inputType Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param model ID of the specific AI model to use, if more than one model is available on the endpoint.
+     * @param extraParams Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public EmbeddingsResult embed(List<String> input, Integer dimensions, EmbeddingEncodingFormat encodingFormat,
+        EmbeddingInputType inputType, String model, ExtraParameters extraParams) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        EmbedRequest embedRequestObj = new EmbedRequest(input).setDimensions(dimensions)
+            .setEncodingFormat(encodingFormat)
+            .setInputType(inputType)
+            .setModel(model);
+        BinaryData embedRequest = BinaryData.fromObject(embedRequestObj);
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        return embedWithResponse(embedRequest, requestOptions).getValue().toObject(EmbeddingsResult.class);
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     *
+     * @param input Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public EmbeddingsResult embed(List<String> input) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        EmbedRequest embedRequestObj = new EmbedRequest(input);
+        BinaryData embedRequest = BinaryData.fromObject(embedRequestObj);
+        return embedWithResponse(embedRequest, requestOptions).getValue().toObject(EmbeddingsResult.class);
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     *
+     * @param input Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios.
+     */
+    public Response<EmbeddingsResult> embedWithResponse(List<String> input) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        EmbedRequest embedRequestObj = new EmbedRequest(input);
+        BinaryData embedRequest = BinaryData.fromObject(embedRequestObj);
+        Response<BinaryData> response = embedWithResponse(embedRequest, requestOptions);
+        return new SimpleResponse<>(response, response.getValue().toObject(EmbeddingsResult.class));
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     *
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return represents some basic information about the AI model.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    ModelInfo getModelInfo() {
+        // Generated convenience method for getModelInfoWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getModelInfoWithResponse(requestOptions).getValue().toObject(ModelInfo.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsClientBuilder.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsClientBuilder.java
new file mode 100644
index 000000000000..2d923a62a37b
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/EmbeddingsClientBuilder.java
@@ -0,0 +1,367 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.EmbeddingsClientImpl;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ServiceClientBuilder;
+import com.azure.core.client.traits.ConfigurationTrait;
+import com.azure.core.client.traits.EndpointTrait;
+import com.azure.core.client.traits.HttpTrait;
+import com.azure.core.client.traits.KeyCredentialTrait;
+import com.azure.core.client.traits.TokenCredentialTrait;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.credential.TokenCredential;
+import com.azure.core.http.HttpClient;
+import com.azure.core.http.HttpHeaders;
+import com.azure.core.http.HttpPipeline;
+import com.azure.core.http.HttpPipelineBuilder;
+import com.azure.core.http.HttpPipelinePosition;
+import com.azure.core.http.policy.AddDatePolicy;
+import com.azure.core.http.policy.AddHeadersFromContextPolicy;
+import com.azure.core.http.policy.AddHeadersPolicy;
+import com.azure.core.http.policy.BearerTokenAuthenticationPolicy;
+import com.azure.core.http.policy.HttpLoggingPolicy;
+import com.azure.core.http.policy.HttpLogOptions;
+import com.azure.core.http.policy.HttpPipelinePolicy;
+import com.azure.core.http.policy.HttpPolicyProviders;
+import com.azure.core.http.policy.KeyCredentialPolicy;
+import com.azure.core.http.policy.RequestIdPolicy;
+import com.azure.core.http.policy.RetryOptions;
+import com.azure.core.http.policy.RetryPolicy;
+import com.azure.core.http.policy.UserAgentPolicy;
+import com.azure.core.util.ClientOptions;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.CoreUtils;
+import com.azure.core.util.builder.ClientBuilderUtil;
+import com.azure.core.util.logging.ClientLogger;
+import com.azure.core.util.serializer.JacksonAdapter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * A builder for creating a new instance of the EmbeddingsClient type.
+ */
+@ServiceClientBuilder(serviceClients = { EmbeddingsClient.class, EmbeddingsAsyncClient.class })
+public final class EmbeddingsClientBuilder implements HttpTrait<EmbeddingsClientBuilder>,
+    ConfigurationTrait<EmbeddingsClientBuilder>, TokenCredentialTrait<EmbeddingsClientBuilder>,
+    KeyCredentialTrait<EmbeddingsClientBuilder>, EndpointTrait<EmbeddingsClientBuilder> {
+
+    @Generated
+    private static final String SDK_NAME = "name";
+
+    @Generated
+    private static final String SDK_VERSION = "version";
+
+    @Generated
+    private static final String[] DEFAULT_SCOPES = new String[] { "https://ml.azure.com/.default" };
+
+    @Generated
+    private static final Map<String, String> PROPERTIES = CoreUtils.getProperties("azure-ai-inference.properties");
+
+    @Generated
+    private final List<HttpPipelinePolicy> pipelinePolicies;
+
+    private String[] scopes = DEFAULT_SCOPES;
+
+    /**
+     * Create an instance of the EmbeddingsClientBuilder.
+     */
+    @Generated
+    public EmbeddingsClientBuilder() {
+        this.pipelinePolicies = new ArrayList<>();
+    }
+
+    /*
+     * The HTTP pipeline to send requests through.
+     */
+    @Generated
+    private HttpPipeline pipeline;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder pipeline(HttpPipeline pipeline) {
+        if (this.pipeline != null && pipeline == null) {
+            LOGGER.atInfo().log("HttpPipeline is being set to 'null' when it was previously configured.");
+        }
+        this.pipeline = pipeline;
+        return this;
+    }
+
+    /*
+     * The HTTP client used to send the request.
+     */
+    @Generated
+    private HttpClient httpClient;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder httpClient(HttpClient httpClient) {
+        this.httpClient = httpClient;
+        return this;
+    }
+
+    /*
+     * The logging configuration for HTTP requests and responses.
+     */
+    @Generated
+    private HttpLogOptions httpLogOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder httpLogOptions(HttpLogOptions httpLogOptions) {
+        this.httpLogOptions = httpLogOptions;
+        return this;
+    }
+
+    /*
+     * The client options such as application ID and custom headers to set on a request.
+     */
+    @Generated
+    private ClientOptions clientOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder clientOptions(ClientOptions clientOptions) {
+        this.clientOptions = clientOptions;
+        return this;
+    }
+
+    /*
+     * The retry options to configure retry policy for failed requests.
+     */
+    @Generated
+    private RetryOptions retryOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder retryOptions(RetryOptions retryOptions) {
+        this.retryOptions = retryOptions;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder addPolicy(HttpPipelinePolicy customPolicy) {
+        Objects.requireNonNull(customPolicy, "'customPolicy' cannot be null.");
+        pipelinePolicies.add(customPolicy);
+        return this;
+    }
+
+    /*
+     * The configuration store that is used during construction of the service client.
+     */
+    @Generated
+    private Configuration configuration;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder configuration(Configuration configuration) {
+        this.configuration = configuration;
+        return this;
+    }
+
+    /*
+     * The TokenCredential used for authentication.
+     */
+    @Generated
+    private TokenCredential tokenCredential;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder credential(TokenCredential tokenCredential) {
+        this.tokenCredential = tokenCredential;
+        return this;
+    }
+
+    /**
+     * Sets auth domain scopes for client authentication.
+     *
+     * @param scopes domain scope to authenticate against.
+     * @return the ChatCompletionsClientBuilder.
+     */
+    public EmbeddingsClientBuilder scopes(String[] scopes) {
+        this.scopes = scopes;
+        return this;
+    }
+
+    /*
+     * The KeyCredential used for authentication.
+     */
+    @Generated
+    private KeyCredential keyCredential;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder credential(KeyCredential keyCredential) {
+        this.keyCredential = keyCredential;
+        return this;
+    }
+
+    /*
+     * The service endpoint
+     */
+    @Generated
+    private String endpoint;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public EmbeddingsClientBuilder endpoint(String endpoint) {
+        this.endpoint = endpoint;
+        return this;
+    }
+
+    /*
+     * Service version
+     */
+    @Generated
+    private ModelServiceVersion serviceVersion;
+
+    /**
+     * Sets Service version.
+     *
+     * @param serviceVersion the serviceVersion value.
+     * @return the EmbeddingsClientBuilder.
+     */
+    @Generated
+    public EmbeddingsClientBuilder serviceVersion(ModelServiceVersion serviceVersion) {
+        this.serviceVersion = serviceVersion;
+        return this;
+    }
+
+    /*
+     * The retry policy that will attempt to retry failed requests, if applicable.
+     */
+    @Generated
+    private RetryPolicy retryPolicy;
+
+    /**
+     * Sets The retry policy that will attempt to retry failed requests, if applicable.
+     *
+     * @param retryPolicy the retryPolicy value.
+     * @return the EmbeddingsClientBuilder.
+     */
+    @Generated
+    public EmbeddingsClientBuilder retryPolicy(RetryPolicy retryPolicy) {
+        this.retryPolicy = retryPolicy;
+        return this;
+    }
+
+    /**
+     * Builds an instance of EmbeddingsClientImpl with the provided parameters.
+     *
+     * @return an instance of EmbeddingsClientImpl.
+     */
+    @Generated
+    private EmbeddingsClientImpl buildInnerClient() {
+        this.validateClient();
+        HttpPipeline localPipeline = (pipeline != null) ? pipeline : createHttpPipeline();
+        ModelServiceVersion localServiceVersion
+            = (serviceVersion != null) ? serviceVersion : ModelServiceVersion.getLatest();
+        EmbeddingsClientImpl client = new EmbeddingsClientImpl(localPipeline,
+            JacksonAdapter.createDefaultSerializerAdapter(), this.endpoint, localServiceVersion);
+        return client;
+    }
+
+    @Generated
+    private void validateClient() {
+        // This method is invoked from 'buildInnerClient'/'buildClient' method.
+        // Developer can customize this method, to validate that the necessary conditions are met for the new client.
+        Objects.requireNonNull(endpoint, "'endpoint' cannot be null.");
+    }
+
+    private HttpPipeline createHttpPipeline() {
+        Configuration buildConfiguration
+            = (configuration == null) ? Configuration.getGlobalConfiguration() : configuration;
+        HttpLogOptions localHttpLogOptions = this.httpLogOptions == null ? new HttpLogOptions() : this.httpLogOptions;
+        ClientOptions localClientOptions = this.clientOptions == null ? new ClientOptions() : this.clientOptions;
+        List<HttpPipelinePolicy> policies = new ArrayList<>();
+        String clientName = PROPERTIES.getOrDefault(SDK_NAME, "UnknownName");
+        String clientVersion = PROPERTIES.getOrDefault(SDK_VERSION, "UnknownVersion");
+        String applicationId = CoreUtils.getApplicationId(localClientOptions, localHttpLogOptions);
+        policies.add(new UserAgentPolicy(applicationId, clientName, clientVersion, buildConfiguration));
+        policies.add(new RequestIdPolicy());
+        policies.add(new AddHeadersFromContextPolicy());
+        HttpHeaders headers = CoreUtils.createHttpHeadersFromClientOptions(localClientOptions);
+        if (headers != null) {
+            policies.add(new AddHeadersPolicy(headers));
+        }
+        this.pipelinePolicies.stream()
+            .filter(p -> p.getPipelinePosition() == HttpPipelinePosition.PER_CALL)
+            .forEach(p -> policies.add(p));
+        HttpPolicyProviders.addBeforeRetryPolicies(policies);
+        policies.add(ClientBuilderUtil.validateAndGetRetryPolicy(retryPolicy, retryOptions, new RetryPolicy()));
+        policies.add(new AddDatePolicy());
+        if (keyCredential != null) {
+            policies.add(new KeyCredentialPolicy("api-key", keyCredential));
+        }
+        if (tokenCredential != null) {
+            policies.add(new BearerTokenAuthenticationPolicy(tokenCredential, this.scopes));
+        }
+        this.pipelinePolicies.stream()
+            .filter(p -> p.getPipelinePosition() == HttpPipelinePosition.PER_RETRY)
+            .forEach(p -> policies.add(p));
+        HttpPolicyProviders.addAfterRetryPolicies(policies);
+        policies.add(new HttpLoggingPolicy(localHttpLogOptions));
+        HttpPipeline httpPipeline = new HttpPipelineBuilder().policies(policies.toArray(new HttpPipelinePolicy[0]))
+            .httpClient(httpClient)
+            .clientOptions(localClientOptions)
+            .build();
+        return httpPipeline;
+    }
+
+    /**
+     * Builds an instance of EmbeddingsAsyncClient class.
+     *
+     * @return an instance of EmbeddingsAsyncClient.
+     */
+    @Generated
+    public EmbeddingsAsyncClient buildAsyncClient() {
+        return new EmbeddingsAsyncClient(buildInnerClient());
+    }
+
+    /**
+     * Builds an instance of EmbeddingsClient class.
+     *
+     * @return an instance of EmbeddingsClient.
+     */
+    @Generated
+    public EmbeddingsClient buildClient() {
+        return new EmbeddingsClient(buildInnerClient());
+    }
+
+    private static final ClientLogger LOGGER = new ClientLogger(EmbeddingsClientBuilder.class);
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsAsyncClient.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsAsyncClient.java
new file mode 100644
index 000000000000..33d0d77de121
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsAsyncClient.java
@@ -0,0 +1,236 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.ImageEmbeddingsClientImpl;
+import com.azure.ai.inference.implementation.models.ImageEmbedRequest;
+import com.azure.ai.inference.models.EmbeddingEncodingFormat;
+import com.azure.ai.inference.models.EmbeddingInput;
+import com.azure.ai.inference.models.EmbeddingInputType;
+import com.azure.ai.inference.models.EmbeddingsResult;
+import com.azure.ai.inference.models.ExtraParameters;
+import com.azure.ai.inference.models.ModelInfo;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceClient;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpHeaderName;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.FluxUtil;
+import java.util.List;
+import reactor.core.publisher.Mono;
+
+/**
+ * Initializes a new instance of the asynchronous ImageEmbeddingsClient type.
+ */
+@ServiceClient(builder = ImageEmbeddingsClientBuilder.class, isAsync = true)
+public final class ImageEmbeddingsAsyncClient {
+
+    @Generated
+    private final ImageEmbeddingsClientImpl serviceClient;
+
+    /**
+     * Initializes an instance of ImageEmbeddingsAsyncClient class.
+     *
+     * @param serviceClient the service client implementation.
+     */
+    @Generated
+    ImageEmbeddingsAsyncClient(ImageEmbeddingsClientImpl serviceClient) {
+        this.serviceClient = serviceClient;
+    }
+
+    /**
+     * Return the embedding vectors for given images.
+     * The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     input (Required): [
+     *          (Required){
+     *             image: String (Required)
+     *             text: String (Optional)
+     *         }
+     *     ]
+     *     dimensions: Integer (Optional)
+     *     encoding_format: String(base64/binary/float/int8/ubinary/uint8) (Optional)
+     *     input_type: String(text/query/document) (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     data (Required): [
+     *          (Required){
+     *             embedding: BinaryData (Required)
+     *             index: int (Required)
+     *         }
+     *     ]
+     *     usage (Required): {
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     model: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param embedRequest1 The embedRequest1 parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios along with {@link Response} on successful completion of
+     * {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<Response<BinaryData>> embedWithResponse(BinaryData embedRequest1, RequestOptions requestOptions) {
+        return this.serviceClient.embedWithResponseAsync(embedRequest1, requestOptions);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response} on successful completion
+     * of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<Response<BinaryData>> getModelInfoWithResponse(RequestOptions requestOptions) {
+        return this.serviceClient.getModelInfoWithResponseAsync(requestOptions);
+    }
+
+    /**
+     * Return the embedding vectors for given images.
+     * The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+     *
+     * @param input Input image to embed. To embed multiple inputs in a single request, pass an array.
+     * The input must not exceed the max input tokens for the model.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios on successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<EmbeddingsResult> embed(List<EmbeddingInput> input) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        ImageEmbedRequest embedRequest1Obj = new ImageEmbedRequest(input);
+        BinaryData embedRequest1 = BinaryData.fromObject(embedRequest1Obj);
+        return embedWithResponse(embedRequest1, requestOptions).flatMap(FluxUtil::toMono)
+            .map(protocolMethodData -> protocolMethodData.toObject(EmbeddingsResult.class));
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     *
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return represents some basic information about the AI model on successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<ModelInfo> getModelInfo() {
+        // Generated convenience method for getModelInfoWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getModelInfoWithResponse(requestOptions).flatMap(FluxUtil::toMono)
+            .map(protocolMethodData -> protocolMethodData.toObject(ModelInfo.class));
+    }
+
+    /**
+     * Return the embedding vectors for given images.
+     * The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+     *
+     * @param input Input image to embed. To embed multiple inputs in a single request, pass an array.
+     * The input must not exceed the max input tokens for the model.
+     * @param extraParams Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     * @param dimensions Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param encodingFormat Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param inputType Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param model ID of the specific AI model to use, if more than one model is available on the endpoint.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios on successful completion of {@link Mono}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Mono<EmbeddingsResult> embed(List<EmbeddingInput> input, ExtraParameters extraParams, Integer dimensions,
+        EmbeddingEncodingFormat encodingFormat, EmbeddingInputType inputType, String model) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        ImageEmbedRequest embedRequest1Obj = new ImageEmbedRequest(input).setDimensions(dimensions)
+            .setEncodingFormat(encodingFormat)
+            .setInputType(inputType)
+            .setModel(model);
+        BinaryData embedRequest1 = BinaryData.fromObject(embedRequest1Obj);
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        return embedWithResponse(embedRequest1, requestOptions).flatMap(FluxUtil::toMono)
+            .map(protocolMethodData -> protocolMethodData.toObject(EmbeddingsResult.class));
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsClient.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsClient.java
new file mode 100644
index 000000000000..e776ffae7096
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsClient.java
@@ -0,0 +1,229 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.ImageEmbeddingsClientImpl;
+import com.azure.ai.inference.implementation.models.ImageEmbedRequest;
+import com.azure.ai.inference.models.EmbeddingEncodingFormat;
+import com.azure.ai.inference.models.EmbeddingInput;
+import com.azure.ai.inference.models.EmbeddingInputType;
+import com.azure.ai.inference.models.EmbeddingsResult;
+import com.azure.ai.inference.models.ExtraParameters;
+import com.azure.ai.inference.models.ModelInfo;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceClient;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpHeaderName;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.util.BinaryData;
+import java.util.List;
+
+/**
+ * Initializes a new instance of the synchronous ImageEmbeddingsClient type.
+ */
+@ServiceClient(builder = ImageEmbeddingsClientBuilder.class)
+public final class ImageEmbeddingsClient {
+
+    @Generated
+    private final ImageEmbeddingsClientImpl serviceClient;
+
+    /**
+     * Initializes an instance of ImageEmbeddingsClient class.
+     *
+     * @param serviceClient the service client implementation.
+     */
+    @Generated
+    ImageEmbeddingsClient(ImageEmbeddingsClientImpl serviceClient) {
+        this.serviceClient = serviceClient;
+    }
+
+    /**
+     * Return the embedding vectors for given images.
+     * The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     input (Required): [
+     *          (Required){
+     *             image: String (Required)
+     *             text: String (Optional)
+     *         }
+     *     ]
+     *     dimensions: Integer (Optional)
+     *     encoding_format: String(base64/binary/float/int8/ubinary/uint8) (Optional)
+     *     input_type: String(text/query/document) (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     *
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     data (Required): [
+     *          (Required){
+     *             embedding: BinaryData (Required)
+     *             index: int (Required)
+     *         }
+     *     ]
+     *     usage (Required): {
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     model: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param embedRequest1 The embedRequest1 parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios along with {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Response<BinaryData> embedWithResponse(BinaryData embedRequest1, RequestOptions requestOptions) {
+        return this.serviceClient.embedWithResponse(embedRequest1, requestOptions);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     *
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     *
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response}.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    Response<BinaryData> getModelInfoWithResponse(RequestOptions requestOptions) {
+        return this.serviceClient.getModelInfoWithResponse(requestOptions);
+    }
+
+    /**
+     * Return the embedding vectors for given images.
+     * The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+     *
+     * @param input Input image to embed. To embed multiple inputs in a single request, pass an array.
+     * The input must not exceed the max input tokens for the model.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    EmbeddingsResult embed(List<EmbeddingInput> input) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        ImageEmbedRequest embedRequest1Obj = new ImageEmbedRequest(input);
+        BinaryData embedRequest1 = BinaryData.fromObject(embedRequest1Obj);
+        return embedWithResponse(embedRequest1, requestOptions).getValue().toObject(EmbeddingsResult.class);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     *
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return represents some basic information about the AI model.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    ModelInfo getModelInfo() {
+        // Generated convenience method for getModelInfoWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        return getModelInfoWithResponse(requestOptions).getValue().toObject(ModelInfo.class);
+    }
+
+    /**
+     * Return the embedding vectors for given images.
+     * The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+     *
+     * @param input Input image to embed. To embed multiple inputs in a single request, pass an array.
+     * The input must not exceed the max input tokens for the model.
+     * @param extraParams Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     * @param dimensions Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param encodingFormat Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param inputType Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     * @param model ID of the specific AI model to use, if more than one model is available on the endpoint.
+     * @throws IllegalArgumentException thrown if parameters fail the validation.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @throws RuntimeException all other wrapped checked exceptions if the request fails to be sent.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios.
+     */
+    @Generated
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    EmbeddingsResult embed(List<EmbeddingInput> input, ExtraParameters extraParams, Integer dimensions,
+        EmbeddingEncodingFormat encodingFormat, EmbeddingInputType inputType, String model) {
+        // Generated convenience method for embedWithResponse
+        RequestOptions requestOptions = new RequestOptions();
+        ImageEmbedRequest embedRequest1Obj = new ImageEmbedRequest(input).setDimensions(dimensions)
+            .setEncodingFormat(encodingFormat)
+            .setInputType(inputType)
+            .setModel(model);
+        BinaryData embedRequest1 = BinaryData.fromObject(embedRequest1Obj);
+        if (extraParams != null) {
+            requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
+        }
+        return embedWithResponse(embedRequest1, requestOptions).getValue().toObject(EmbeddingsResult.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsClientBuilder.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsClientBuilder.java
new file mode 100644
index 000000000000..c32b35aed339
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ImageEmbeddingsClientBuilder.java
@@ -0,0 +1,355 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.ImageEmbeddingsClientImpl;
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.ServiceClientBuilder;
+import com.azure.core.client.traits.ConfigurationTrait;
+import com.azure.core.client.traits.EndpointTrait;
+import com.azure.core.client.traits.HttpTrait;
+import com.azure.core.client.traits.KeyCredentialTrait;
+import com.azure.core.client.traits.TokenCredentialTrait;
+import com.azure.core.credential.KeyCredential;
+import com.azure.core.credential.TokenCredential;
+import com.azure.core.http.HttpClient;
+import com.azure.core.http.HttpHeaders;
+import com.azure.core.http.HttpPipeline;
+import com.azure.core.http.HttpPipelineBuilder;
+import com.azure.core.http.HttpPipelinePosition;
+import com.azure.core.http.policy.AddDatePolicy;
+import com.azure.core.http.policy.AddHeadersFromContextPolicy;
+import com.azure.core.http.policy.AddHeadersPolicy;
+import com.azure.core.http.policy.BearerTokenAuthenticationPolicy;
+import com.azure.core.http.policy.HttpLoggingPolicy;
+import com.azure.core.http.policy.HttpLogOptions;
+import com.azure.core.http.policy.HttpPipelinePolicy;
+import com.azure.core.http.policy.HttpPolicyProviders;
+import com.azure.core.http.policy.KeyCredentialPolicy;
+import com.azure.core.http.policy.RequestIdPolicy;
+import com.azure.core.http.policy.RetryOptions;
+import com.azure.core.http.policy.RetryPolicy;
+import com.azure.core.http.policy.UserAgentPolicy;
+import com.azure.core.util.ClientOptions;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.CoreUtils;
+import com.azure.core.util.builder.ClientBuilderUtil;
+import com.azure.core.util.logging.ClientLogger;
+import com.azure.core.util.serializer.JacksonAdapter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * A builder for creating a new instance of the ImageEmbeddingsClient type.
+ */
+@ServiceClientBuilder(serviceClients = { ImageEmbeddingsClient.class, ImageEmbeddingsAsyncClient.class })
+public final class ImageEmbeddingsClientBuilder implements HttpTrait<ImageEmbeddingsClientBuilder>,
+    ConfigurationTrait<ImageEmbeddingsClientBuilder>, TokenCredentialTrait<ImageEmbeddingsClientBuilder>,
+    KeyCredentialTrait<ImageEmbeddingsClientBuilder>, EndpointTrait<ImageEmbeddingsClientBuilder> {
+
+    @Generated
+    private static final String SDK_NAME = "name";
+
+    @Generated
+    private static final String SDK_VERSION = "version";
+
+    @Generated
+    private static final String[] DEFAULT_SCOPES = new String[] { "https://ml.azure.com/.default" };
+
+    @Generated
+    private static final Map<String, String> PROPERTIES = CoreUtils.getProperties("azure-ai-inference.properties");
+
+    @Generated
+    private final List<HttpPipelinePolicy> pipelinePolicies;
+
+    /**
+     * Create an instance of the ImageEmbeddingsClientBuilder.
+     */
+    @Generated
+    public ImageEmbeddingsClientBuilder() {
+        this.pipelinePolicies = new ArrayList<>();
+    }
+
+    /*
+     * The HTTP pipeline to send requests through.
+     */
+    @Generated
+    private HttpPipeline pipeline;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder pipeline(HttpPipeline pipeline) {
+        if (this.pipeline != null && pipeline == null) {
+            LOGGER.atInfo().log("HttpPipeline is being set to 'null' when it was previously configured.");
+        }
+        this.pipeline = pipeline;
+        return this;
+    }
+
+    /*
+     * The HTTP client used to send the request.
+     */
+    @Generated
+    private HttpClient httpClient;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder httpClient(HttpClient httpClient) {
+        this.httpClient = httpClient;
+        return this;
+    }
+
+    /*
+     * The logging configuration for HTTP requests and responses.
+     */
+    @Generated
+    private HttpLogOptions httpLogOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder httpLogOptions(HttpLogOptions httpLogOptions) {
+        this.httpLogOptions = httpLogOptions;
+        return this;
+    }
+
+    /*
+     * The client options such as application ID and custom headers to set on a request.
+     */
+    @Generated
+    private ClientOptions clientOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder clientOptions(ClientOptions clientOptions) {
+        this.clientOptions = clientOptions;
+        return this;
+    }
+
+    /*
+     * The retry options to configure retry policy for failed requests.
+     */
+    @Generated
+    private RetryOptions retryOptions;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder retryOptions(RetryOptions retryOptions) {
+        this.retryOptions = retryOptions;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder addPolicy(HttpPipelinePolicy customPolicy) {
+        Objects.requireNonNull(customPolicy, "'customPolicy' cannot be null.");
+        pipelinePolicies.add(customPolicy);
+        return this;
+    }
+
+    /*
+     * The configuration store that is used during construction of the service client.
+     */
+    @Generated
+    private Configuration configuration;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder configuration(Configuration configuration) {
+        this.configuration = configuration;
+        return this;
+    }
+
+    /*
+     * The TokenCredential used for authentication.
+     */
+    @Generated
+    private TokenCredential tokenCredential;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder credential(TokenCredential tokenCredential) {
+        this.tokenCredential = tokenCredential;
+        return this;
+    }
+
+    /*
+     * The KeyCredential used for authentication.
+     */
+    @Generated
+    private KeyCredential keyCredential;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder credential(KeyCredential keyCredential) {
+        this.keyCredential = keyCredential;
+        return this;
+    }
+
+    /*
+     * The service endpoint
+     */
+    @Generated
+    private String endpoint;
+
+    /**
+     * {@inheritDoc}.
+     */
+    @Generated
+    @Override
+    public ImageEmbeddingsClientBuilder endpoint(String endpoint) {
+        this.endpoint = endpoint;
+        return this;
+    }
+
+    /*
+     * Service version
+     */
+    @Generated
+    private ModelServiceVersion serviceVersion;
+
+    /**
+     * Sets Service version.
+     *
+     * @param serviceVersion the serviceVersion value.
+     * @return the ImageEmbeddingsClientBuilder.
+     */
+    @Generated
+    public ImageEmbeddingsClientBuilder serviceVersion(ModelServiceVersion serviceVersion) {
+        this.serviceVersion = serviceVersion;
+        return this;
+    }
+
+    /*
+     * The retry policy that will attempt to retry failed requests, if applicable.
+     */
+    @Generated
+    private RetryPolicy retryPolicy;
+
+    /**
+     * Sets The retry policy that will attempt to retry failed requests, if applicable.
+     *
+     * @param retryPolicy the retryPolicy value.
+     * @return the ImageEmbeddingsClientBuilder.
+     */
+    @Generated
+    public ImageEmbeddingsClientBuilder retryPolicy(RetryPolicy retryPolicy) {
+        this.retryPolicy = retryPolicy;
+        return this;
+    }
+
+    /**
+     * Builds an instance of ImageEmbeddingsClientImpl with the provided parameters.
+     *
+     * @return an instance of ImageEmbeddingsClientImpl.
+     */
+    @Generated
+    private ImageEmbeddingsClientImpl buildInnerClient() {
+        this.validateClient();
+        HttpPipeline localPipeline = (pipeline != null) ? pipeline : createHttpPipeline();
+        ModelServiceVersion localServiceVersion
+            = (serviceVersion != null) ? serviceVersion : ModelServiceVersion.getLatest();
+        ImageEmbeddingsClientImpl client = new ImageEmbeddingsClientImpl(localPipeline,
+            JacksonAdapter.createDefaultSerializerAdapter(), this.endpoint, localServiceVersion);
+        return client;
+    }
+
+    @Generated
+    private void validateClient() {
+        // This method is invoked from 'buildInnerClient'/'buildClient' method.
+        // Developer can customize this method, to validate that the necessary conditions are met for the new client.
+        Objects.requireNonNull(endpoint, "'endpoint' cannot be null.");
+    }
+
+    @Generated
+    private HttpPipeline createHttpPipeline() {
+        Configuration buildConfiguration
+            = (configuration == null) ? Configuration.getGlobalConfiguration() : configuration;
+        HttpLogOptions localHttpLogOptions = this.httpLogOptions == null ? new HttpLogOptions() : this.httpLogOptions;
+        ClientOptions localClientOptions = this.clientOptions == null ? new ClientOptions() : this.clientOptions;
+        List<HttpPipelinePolicy> policies = new ArrayList<>();
+        String clientName = PROPERTIES.getOrDefault(SDK_NAME, "UnknownName");
+        String clientVersion = PROPERTIES.getOrDefault(SDK_VERSION, "UnknownVersion");
+        String applicationId = CoreUtils.getApplicationId(localClientOptions, localHttpLogOptions);
+        policies.add(new UserAgentPolicy(applicationId, clientName, clientVersion, buildConfiguration));
+        policies.add(new RequestIdPolicy());
+        policies.add(new AddHeadersFromContextPolicy());
+        HttpHeaders headers = CoreUtils.createHttpHeadersFromClientOptions(localClientOptions);
+        if (headers != null) {
+            policies.add(new AddHeadersPolicy(headers));
+        }
+        this.pipelinePolicies.stream()
+            .filter(p -> p.getPipelinePosition() == HttpPipelinePosition.PER_CALL)
+            .forEach(p -> policies.add(p));
+        HttpPolicyProviders.addBeforeRetryPolicies(policies);
+        policies.add(ClientBuilderUtil.validateAndGetRetryPolicy(retryPolicy, retryOptions, new RetryPolicy()));
+        policies.add(new AddDatePolicy());
+        if (keyCredential != null) {
+            policies.add(new KeyCredentialPolicy("api-key", keyCredential));
+        }
+        if (tokenCredential != null) {
+            policies.add(new BearerTokenAuthenticationPolicy(tokenCredential, DEFAULT_SCOPES));
+        }
+        this.pipelinePolicies.stream()
+            .filter(p -> p.getPipelinePosition() == HttpPipelinePosition.PER_RETRY)
+            .forEach(p -> policies.add(p));
+        HttpPolicyProviders.addAfterRetryPolicies(policies);
+        policies.add(new HttpLoggingPolicy(localHttpLogOptions));
+        HttpPipeline httpPipeline = new HttpPipelineBuilder().policies(policies.toArray(new HttpPipelinePolicy[0]))
+            .httpClient(httpClient)
+            .clientOptions(localClientOptions)
+            .build();
+        return httpPipeline;
+    }
+
+    /**
+     * Builds an instance of ImageEmbeddingsAsyncClient class.
+     *
+     * @return an instance of ImageEmbeddingsAsyncClient.
+     */
+    @Generated
+    public ImageEmbeddingsAsyncClient buildAsyncClient() {
+        return new ImageEmbeddingsAsyncClient(buildInnerClient());
+    }
+
+    /**
+     * Builds an instance of ImageEmbeddingsClient class.
+     *
+     * @return an instance of ImageEmbeddingsClient.
+     */
+    @Generated
+    public ImageEmbeddingsClient buildClient() {
+        return new ImageEmbeddingsClient(buildInnerClient());
+    }
+
+    private static final ClientLogger LOGGER = new ClientLogger(ImageEmbeddingsClientBuilder.class);
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ModelServiceVersion.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ModelServiceVersion.java
new file mode 100644
index 000000000000..269914beaec8
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ModelServiceVersion.java
@@ -0,0 +1,40 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference;
+
+import com.azure.core.util.ServiceVersion;
+
+/**
+ * Service version of ModelClient.
+ */
+public enum ModelServiceVersion implements ServiceVersion {
+    /**
+     * Enum value 2024-05-01-preview.
+     */
+    V2024_05_01_PREVIEW("2024-05-01-preview");
+
+    private final String version;
+
+    ModelServiceVersion(String version) {
+        this.version = version;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public String getVersion() {
+        return this.version;
+    }
+
+    /**
+     * Gets the latest service version supported by this client library.
+     * 
+     * @return The latest {@link ModelServiceVersion}.
+     */
+    public static ModelServiceVersion getLatest() {
+        return V2024_05_01_PREVIEW;
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ChatCompletionsClientImpl.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ChatCompletionsClientImpl.java
new file mode 100644
index 000000000000..41add2d08b40
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ChatCompletionsClientImpl.java
@@ -0,0 +1,464 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.implementation;
+
+import com.azure.ai.inference.ModelServiceVersion;
+import com.azure.core.annotation.BodyParam;
+import com.azure.core.annotation.ExpectedResponses;
+import com.azure.core.annotation.Get;
+import com.azure.core.annotation.HeaderParam;
+import com.azure.core.annotation.Host;
+import com.azure.core.annotation.HostParam;
+import com.azure.core.annotation.Post;
+import com.azure.core.annotation.QueryParam;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceInterface;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.annotation.UnexpectedResponseExceptionType;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpPipeline;
+import com.azure.core.http.HttpPipelineBuilder;
+import com.azure.core.http.policy.RetryPolicy;
+import com.azure.core.http.policy.UserAgentPolicy;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.http.rest.RestProxy;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.Context;
+import com.azure.core.util.FluxUtil;
+import com.azure.core.util.serializer.JacksonAdapter;
+import com.azure.core.util.serializer.SerializerAdapter;
+import reactor.core.publisher.Mono;
+
+/**
+ * Initializes a new instance of the ChatCompletionsClient type.
+ */
+public final class ChatCompletionsClientImpl {
+    /**
+     * The proxy service used to perform REST calls.
+     */
+    private final ChatCompletionsClientService service;
+
+    /**
+     * Service host.
+     */
+    private final String endpoint;
+
+    /**
+     * Gets Service host.
+     * 
+     * @return the endpoint value.
+     */
+    public String getEndpoint() {
+        return this.endpoint;
+    }
+
+    /**
+     * Service version.
+     */
+    private final ModelServiceVersion serviceVersion;
+
+    /**
+     * Gets Service version.
+     * 
+     * @return the serviceVersion value.
+     */
+    public ModelServiceVersion getServiceVersion() {
+        return this.serviceVersion;
+    }
+
+    /**
+     * The HTTP pipeline to send requests through.
+     */
+    private final HttpPipeline httpPipeline;
+
+    /**
+     * Gets The HTTP pipeline to send requests through.
+     * 
+     * @return the httpPipeline value.
+     */
+    public HttpPipeline getHttpPipeline() {
+        return this.httpPipeline;
+    }
+
+    /**
+     * The serializer to serialize an object into a string.
+     */
+    private final SerializerAdapter serializerAdapter;
+
+    /**
+     * Gets The serializer to serialize an object into a string.
+     * 
+     * @return the serializerAdapter value.
+     */
+    public SerializerAdapter getSerializerAdapter() {
+        return this.serializerAdapter;
+    }
+
+    /**
+     * Initializes an instance of ChatCompletionsClient client.
+     * 
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public ChatCompletionsClientImpl(String endpoint, ModelServiceVersion serviceVersion) {
+        this(new HttpPipelineBuilder().policies(new UserAgentPolicy(), new RetryPolicy()).build(),
+            JacksonAdapter.createDefaultSerializerAdapter(), endpoint, serviceVersion);
+    }
+
+    /**
+     * Initializes an instance of ChatCompletionsClient client.
+     * 
+     * @param httpPipeline The HTTP pipeline to send requests through.
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public ChatCompletionsClientImpl(HttpPipeline httpPipeline, String endpoint, ModelServiceVersion serviceVersion) {
+        this(httpPipeline, JacksonAdapter.createDefaultSerializerAdapter(), endpoint, serviceVersion);
+    }
+
+    /**
+     * Initializes an instance of ChatCompletionsClient client.
+     * 
+     * @param httpPipeline The HTTP pipeline to send requests through.
+     * @param serializerAdapter The serializer to serialize an object into a string.
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public ChatCompletionsClientImpl(HttpPipeline httpPipeline, SerializerAdapter serializerAdapter, String endpoint,
+        ModelServiceVersion serviceVersion) {
+        this.httpPipeline = httpPipeline;
+        this.serializerAdapter = serializerAdapter;
+        this.endpoint = endpoint;
+        this.serviceVersion = serviceVersion;
+        this.service
+            = RestProxy.create(ChatCompletionsClientService.class, this.httpPipeline, this.getSerializerAdapter());
+    }
+
+    /**
+     * The interface defining all the services for ChatCompletionsClient to be used by the proxy service to perform REST
+     * calls.
+     */
+    @Host("{endpoint}")
+    @ServiceInterface(name = "ChatCompletionsClien")
+    public interface ChatCompletionsClientService {
+        @Post("/chat/completions")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> complete(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Content-Type") String contentType,
+            @HeaderParam("Accept") String accept, @BodyParam("application/json") BinaryData completeRequest,
+            RequestOptions requestOptions, Context context);
+
+        @Post("/chat/completions")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> completeSync(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Content-Type") String contentType,
+            @HeaderParam("Accept") String accept, @BodyParam("application/json") BinaryData completeRequest,
+            RequestOptions requestOptions, Context context);
+
+        @Get("/info")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getModelInfo(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Accept") String accept,
+            RequestOptions requestOptions, Context context);
+
+        @Get("/info")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getModelInfoSync(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Accept") String accept,
+            RequestOptions requestOptions, Context context);
+    }
+
+    /**
+     * Gets chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data. The method makes a REST API call to the `/chat/completions` route
+     * on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     messages (Required): [
+     *          (Required){
+     *             role: String(system/user/assistant/tool) (Required)
+     *         }
+     *     ]
+     *     frequency_penalty: Double (Optional)
+     *     stream: Boolean (Optional)
+     *     presence_penalty: Double (Optional)
+     *     temperature: Double (Optional)
+     *     top_p: Double (Optional)
+     *     max_tokens: Integer (Optional)
+     *     response_format (Optional): {
+     *         type: String (Required)
+     *     }
+     *     stop (Optional): [
+     *         String (Optional)
+     *     ]
+     *     tools (Optional): [
+     *          (Optional){
+     *             type: String (Required)
+     *             function (Required): {
+     *                 name: String (Required)
+     *                 description: String (Optional)
+     *                 parameters: Object (Optional)
+     *             }
+     *         }
+     *     ]
+     *     tool_choice: BinaryData (Optional)
+     *     seed: Long (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     * 
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     id: String (Required)
+     *     created: long (Required)
+     *     model: String (Required)
+     *     usage (Required): {
+     *         completion_tokens: int (Required)
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     choices (Required): [
+     *          (Required){
+     *             index: int (Required)
+     *             finish_reason: String(stop/length/content_filter/tool_calls) (Required)
+     *             message (Required): {
+     *                 role: String(system/user/assistant/tool) (Required)
+     *                 content: String (Required)
+     *                 tool_calls (Optional): [
+     *                      (Optional){
+     *                         id: String (Required)
+     *                         type: String (Required)
+     *                         function (Required): {
+     *                             name: String (Required)
+     *                             arguments: String (Required)
+     *                         }
+     *                     }
+     *                 ]
+     *             }
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     * 
+     * @param completeRequest The completeRequest parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data along with {@link Response} on successful completion of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> completeWithResponseAsync(BinaryData completeRequest,
+        RequestOptions requestOptions) {
+        final String contentType = "application/json";
+        final String accept = "application/json";
+        return FluxUtil.withContext(context -> service.complete(this.getEndpoint(),
+            this.getServiceVersion().getVersion(), contentType, accept, completeRequest, requestOptions, context));
+    }
+
+    /**
+     * Gets chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data. The method makes a REST API call to the `/chat/completions` route
+     * on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     messages (Required): [
+     *          (Required){
+     *             role: String(system/user/assistant/tool) (Required)
+     *         }
+     *     ]
+     *     frequency_penalty: Double (Optional)
+     *     stream: Boolean (Optional)
+     *     presence_penalty: Double (Optional)
+     *     temperature: Double (Optional)
+     *     top_p: Double (Optional)
+     *     max_tokens: Integer (Optional)
+     *     response_format (Optional): {
+     *         type: String (Required)
+     *     }
+     *     stop (Optional): [
+     *         String (Optional)
+     *     ]
+     *     tools (Optional): [
+     *          (Optional){
+     *             type: String (Required)
+     *             function (Required): {
+     *                 name: String (Required)
+     *                 description: String (Optional)
+     *                 parameters: Object (Optional)
+     *             }
+     *         }
+     *     ]
+     *     tool_choice: BinaryData (Optional)
+     *     seed: Long (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     * 
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     id: String (Required)
+     *     created: long (Required)
+     *     model: String (Required)
+     *     usage (Required): {
+     *         completion_tokens: int (Required)
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     choices (Required): [
+     *          (Required){
+     *             index: int (Required)
+     *             finish_reason: String(stop/length/content_filter/tool_calls) (Required)
+     *             message (Required): {
+     *                 role: String(system/user/assistant/tool) (Required)
+     *                 content: String (Required)
+     *                 tool_calls (Optional): [
+     *                      (Optional){
+     *                         id: String (Required)
+     *                         type: String (Required)
+     *                         function (Required): {
+     *                             name: String (Required)
+     *                             arguments: String (Required)
+     *                         }
+     *                     }
+     *                 ]
+     *             }
+     *         }
+     *     ]
+     * }
+     * }</pre>
+     * 
+     * @param completeRequest The completeRequest parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return chat completions for the provided chat messages.
+     * Completions support a wide variety of tasks and generate text that continues from or "completes"
+     * provided prompt data along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> completeWithResponse(BinaryData completeRequest, RequestOptions requestOptions) {
+        final String contentType = "application/json";
+        final String accept = "application/json";
+        return service.completeSync(this.getEndpoint(), this.getServiceVersion().getVersion(), contentType, accept,
+            completeRequest, requestOptions, Context.NONE);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response} on successful completion
+     * of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getModelInfoWithResponseAsync(RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(context -> service.getModelInfo(this.getEndpoint(),
+            this.getServiceVersion().getVersion(), accept, requestOptions, context));
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getModelInfoWithResponse(RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getModelInfoSync(this.getEndpoint(), this.getServiceVersion().getVersion(), accept,
+            requestOptions, Context.NONE);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ChatCompletionsUtils.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ChatCompletionsUtils.java
new file mode 100644
index 000000000000..72a7b387a107
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ChatCompletionsUtils.java
@@ -0,0 +1,33 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.inference.implementation;
+
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatRequestMessage;
+
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.core.util.logging.ClientLogger;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/** This class contains convenience methods and constants for operations related to ChatCompletions */
+public final class ChatCompletionsUtils {
+
+    private static final ClientLogger LOGGER = new ClientLogger(ChatCompletionsUtils.class);
+    private ChatCompletionsUtils() {
+    }
+
+    /**
+     * Convenience method for minimal initialization for the ChatCompletionsOptions class
+     * @param prompt from which ChatCompletions will be generated
+     * @return A ChatCompletionsOptions object
+     * */
+    public static ChatCompletionsOptions defaultCompleteOptions(String prompt) {
+        List<ChatRequestMessage> messages = new ArrayList<>();
+        messages.add(new ChatRequestUserMessage(prompt));
+        return new ChatCompletionsOptions(messages);
+    }
+
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/EmbeddingsClientImpl.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/EmbeddingsClientImpl.java
new file mode 100644
index 000000000000..3982b0189cbc
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/EmbeddingsClientImpl.java
@@ -0,0 +1,379 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.implementation;
+
+import com.azure.ai.inference.ModelServiceVersion;
+import com.azure.core.annotation.BodyParam;
+import com.azure.core.annotation.ExpectedResponses;
+import com.azure.core.annotation.Get;
+import com.azure.core.annotation.HeaderParam;
+import com.azure.core.annotation.Host;
+import com.azure.core.annotation.HostParam;
+import com.azure.core.annotation.Post;
+import com.azure.core.annotation.QueryParam;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceInterface;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.annotation.UnexpectedResponseExceptionType;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpPipeline;
+import com.azure.core.http.HttpPipelineBuilder;
+import com.azure.core.http.policy.RetryPolicy;
+import com.azure.core.http.policy.UserAgentPolicy;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.http.rest.RestProxy;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.Context;
+import com.azure.core.util.FluxUtil;
+import com.azure.core.util.serializer.JacksonAdapter;
+import com.azure.core.util.serializer.SerializerAdapter;
+import reactor.core.publisher.Mono;
+
+/**
+ * Initializes a new instance of the EmbeddingsClient type.
+ */
+public final class EmbeddingsClientImpl {
+    /**
+     * The proxy service used to perform REST calls.
+     */
+    private final EmbeddingsClientService service;
+
+    /**
+     * Service host.
+     */
+    private final String endpoint;
+
+    /**
+     * Gets Service host.
+     * 
+     * @return the endpoint value.
+     */
+    public String getEndpoint() {
+        return this.endpoint;
+    }
+
+    /**
+     * Service version.
+     */
+    private final ModelServiceVersion serviceVersion;
+
+    /**
+     * Gets Service version.
+     * 
+     * @return the serviceVersion value.
+     */
+    public ModelServiceVersion getServiceVersion() {
+        return this.serviceVersion;
+    }
+
+    /**
+     * The HTTP pipeline to send requests through.
+     */
+    private final HttpPipeline httpPipeline;
+
+    /**
+     * Gets The HTTP pipeline to send requests through.
+     * 
+     * @return the httpPipeline value.
+     */
+    public HttpPipeline getHttpPipeline() {
+        return this.httpPipeline;
+    }
+
+    /**
+     * The serializer to serialize an object into a string.
+     */
+    private final SerializerAdapter serializerAdapter;
+
+    /**
+     * Gets The serializer to serialize an object into a string.
+     * 
+     * @return the serializerAdapter value.
+     */
+    public SerializerAdapter getSerializerAdapter() {
+        return this.serializerAdapter;
+    }
+
+    /**
+     * Initializes an instance of EmbeddingsClient client.
+     * 
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public EmbeddingsClientImpl(String endpoint, ModelServiceVersion serviceVersion) {
+        this(new HttpPipelineBuilder().policies(new UserAgentPolicy(), new RetryPolicy()).build(),
+            JacksonAdapter.createDefaultSerializerAdapter(), endpoint, serviceVersion);
+    }
+
+    /**
+     * Initializes an instance of EmbeddingsClient client.
+     * 
+     * @param httpPipeline The HTTP pipeline to send requests through.
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public EmbeddingsClientImpl(HttpPipeline httpPipeline, String endpoint, ModelServiceVersion serviceVersion) {
+        this(httpPipeline, JacksonAdapter.createDefaultSerializerAdapter(), endpoint, serviceVersion);
+    }
+
+    /**
+     * Initializes an instance of EmbeddingsClient client.
+     * 
+     * @param httpPipeline The HTTP pipeline to send requests through.
+     * @param serializerAdapter The serializer to serialize an object into a string.
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public EmbeddingsClientImpl(HttpPipeline httpPipeline, SerializerAdapter serializerAdapter, String endpoint,
+        ModelServiceVersion serviceVersion) {
+        this.httpPipeline = httpPipeline;
+        this.serializerAdapter = serializerAdapter;
+        this.endpoint = endpoint;
+        this.serviceVersion = serviceVersion;
+        this.service = RestProxy.create(EmbeddingsClientService.class, this.httpPipeline, this.getSerializerAdapter());
+    }
+
+    /**
+     * The interface defining all the services for EmbeddingsClient to be used by the proxy service to perform REST
+     * calls.
+     */
+    @Host("{endpoint}")
+    @ServiceInterface(name = "EmbeddingsClient")
+    public interface EmbeddingsClientService {
+        @Post("/embeddings")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> embed(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Content-Type") String contentType,
+            @HeaderParam("Accept") String accept, @BodyParam("application/json") BinaryData embedRequest,
+            RequestOptions requestOptions, Context context);
+
+        @Post("/embeddings")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> embedSync(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Content-Type") String contentType,
+            @HeaderParam("Accept") String accept, @BodyParam("application/json") BinaryData embedRequest,
+            RequestOptions requestOptions, Context context);
+
+        @Get("/info")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getModelInfo(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Accept") String accept,
+            RequestOptions requestOptions, Context context);
+
+        @Get("/info")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getModelInfoSync(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Accept") String accept,
+            RequestOptions requestOptions, Context context);
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     input (Required): [
+     *         String (Required)
+     *     ]
+     *     dimensions: Integer (Optional)
+     *     encoding_format: String(base64/binary/float/int8/ubinary/uint8) (Optional)
+     *     input_type: String(text/query/document) (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     * 
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     data (Required): [
+     *          (Required){
+     *             embedding: BinaryData (Required)
+     *             index: int (Required)
+     *         }
+     *     ]
+     *     usage (Required): {
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     model: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param embedRequest The embedRequest parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios along with {@link Response} on successful completion of
+     * {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> embedWithResponseAsync(BinaryData embedRequest, RequestOptions requestOptions) {
+        final String contentType = "application/json";
+        final String accept = "application/json";
+        return FluxUtil.withContext(context -> service.embed(this.getEndpoint(), this.getServiceVersion().getVersion(),
+            contentType, accept, embedRequest, requestOptions, context));
+    }
+
+    /**
+     * Return the embedding vectors for given text prompts.
+     * The method makes a REST API call to the `/embeddings` route on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     input (Required): [
+     *         String (Required)
+     *     ]
+     *     dimensions: Integer (Optional)
+     *     encoding_format: String(base64/binary/float/int8/ubinary/uint8) (Optional)
+     *     input_type: String(text/query/document) (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     * 
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     data (Required): [
+     *          (Required){
+     *             embedding: BinaryData (Required)
+     *             index: int (Required)
+     *         }
+     *     ]
+     *     usage (Required): {
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     model: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param embedRequest The embedRequest parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> embedWithResponse(BinaryData embedRequest, RequestOptions requestOptions) {
+        final String contentType = "application/json";
+        final String accept = "application/json";
+        return service.embedSync(this.getEndpoint(), this.getServiceVersion().getVersion(), contentType, accept,
+            embedRequest, requestOptions, Context.NONE);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response} on successful completion
+     * of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getModelInfoWithResponseAsync(RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(context -> service.getModelInfo(this.getEndpoint(),
+            this.getServiceVersion().getVersion(), accept, requestOptions, context));
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getModelInfoWithResponse(RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getModelInfoSync(this.getEndpoint(), this.getServiceVersion().getVersion(), accept,
+            requestOptions, Context.NONE);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ImageEmbeddingsClientImpl.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ImageEmbeddingsClientImpl.java
new file mode 100644
index 000000000000..ee5c60e02da9
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/ImageEmbeddingsClientImpl.java
@@ -0,0 +1,386 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.implementation;
+
+import com.azure.ai.inference.ModelServiceVersion;
+import com.azure.core.annotation.BodyParam;
+import com.azure.core.annotation.ExpectedResponses;
+import com.azure.core.annotation.Get;
+import com.azure.core.annotation.HeaderParam;
+import com.azure.core.annotation.Host;
+import com.azure.core.annotation.HostParam;
+import com.azure.core.annotation.Post;
+import com.azure.core.annotation.QueryParam;
+import com.azure.core.annotation.ReturnType;
+import com.azure.core.annotation.ServiceInterface;
+import com.azure.core.annotation.ServiceMethod;
+import com.azure.core.annotation.UnexpectedResponseExceptionType;
+import com.azure.core.exception.ClientAuthenticationException;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceModifiedException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.HttpPipeline;
+import com.azure.core.http.HttpPipelineBuilder;
+import com.azure.core.http.policy.RetryPolicy;
+import com.azure.core.http.policy.UserAgentPolicy;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.http.rest.RestProxy;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.Context;
+import com.azure.core.util.FluxUtil;
+import com.azure.core.util.serializer.JacksonAdapter;
+import com.azure.core.util.serializer.SerializerAdapter;
+import reactor.core.publisher.Mono;
+
+/**
+ * Initializes a new instance of the ImageEmbeddingsClient type.
+ */
+public final class ImageEmbeddingsClientImpl {
+    /**
+     * The proxy service used to perform REST calls.
+     */
+    private final ImageEmbeddingsClientService service;
+
+    /**
+     * Service host.
+     */
+    private final String endpoint;
+
+    /**
+     * Gets Service host.
+     * 
+     * @return the endpoint value.
+     */
+    public String getEndpoint() {
+        return this.endpoint;
+    }
+
+    /**
+     * Service version.
+     */
+    private final ModelServiceVersion serviceVersion;
+
+    /**
+     * Gets Service version.
+     * 
+     * @return the serviceVersion value.
+     */
+    public ModelServiceVersion getServiceVersion() {
+        return this.serviceVersion;
+    }
+
+    /**
+     * The HTTP pipeline to send requests through.
+     */
+    private final HttpPipeline httpPipeline;
+
+    /**
+     * Gets The HTTP pipeline to send requests through.
+     * 
+     * @return the httpPipeline value.
+     */
+    public HttpPipeline getHttpPipeline() {
+        return this.httpPipeline;
+    }
+
+    /**
+     * The serializer to serialize an object into a string.
+     */
+    private final SerializerAdapter serializerAdapter;
+
+    /**
+     * Gets The serializer to serialize an object into a string.
+     * 
+     * @return the serializerAdapter value.
+     */
+    public SerializerAdapter getSerializerAdapter() {
+        return this.serializerAdapter;
+    }
+
+    /**
+     * Initializes an instance of ImageEmbeddingsClient client.
+     * 
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public ImageEmbeddingsClientImpl(String endpoint, ModelServiceVersion serviceVersion) {
+        this(new HttpPipelineBuilder().policies(new UserAgentPolicy(), new RetryPolicy()).build(),
+            JacksonAdapter.createDefaultSerializerAdapter(), endpoint, serviceVersion);
+    }
+
+    /**
+     * Initializes an instance of ImageEmbeddingsClient client.
+     * 
+     * @param httpPipeline The HTTP pipeline to send requests through.
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public ImageEmbeddingsClientImpl(HttpPipeline httpPipeline, String endpoint, ModelServiceVersion serviceVersion) {
+        this(httpPipeline, JacksonAdapter.createDefaultSerializerAdapter(), endpoint, serviceVersion);
+    }
+
+    /**
+     * Initializes an instance of ImageEmbeddingsClient client.
+     * 
+     * @param httpPipeline The HTTP pipeline to send requests through.
+     * @param serializerAdapter The serializer to serialize an object into a string.
+     * @param endpoint Service host.
+     * @param serviceVersion Service version.
+     */
+    public ImageEmbeddingsClientImpl(HttpPipeline httpPipeline, SerializerAdapter serializerAdapter, String endpoint,
+        ModelServiceVersion serviceVersion) {
+        this.httpPipeline = httpPipeline;
+        this.serializerAdapter = serializerAdapter;
+        this.endpoint = endpoint;
+        this.serviceVersion = serviceVersion;
+        this.service
+            = RestProxy.create(ImageEmbeddingsClientService.class, this.httpPipeline, this.getSerializerAdapter());
+    }
+
+    /**
+     * The interface defining all the services for ImageEmbeddingsClient to be used by the proxy service to perform REST
+     * calls.
+     */
+    @Host("{endpoint}")
+    @ServiceInterface(name = "ImageEmbeddingsClien")
+    public interface ImageEmbeddingsClientService {
+        @Post("/images/embeddings")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> embed(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Content-Type") String contentType,
+            @HeaderParam("Accept") String accept, @BodyParam("application/json") BinaryData embedRequest1,
+            RequestOptions requestOptions, Context context);
+
+        @Post("/images/embeddings")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> embedSync(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Content-Type") String contentType,
+            @HeaderParam("Accept") String accept, @BodyParam("application/json") BinaryData embedRequest1,
+            RequestOptions requestOptions, Context context);
+
+        @Get("/info")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Mono<Response<BinaryData>> getModelInfo(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Accept") String accept,
+            RequestOptions requestOptions, Context context);
+
+        @Get("/info")
+        @ExpectedResponses({ 200 })
+        @UnexpectedResponseExceptionType(value = ClientAuthenticationException.class, code = { 401 })
+        @UnexpectedResponseExceptionType(value = ResourceNotFoundException.class, code = { 404 })
+        @UnexpectedResponseExceptionType(value = ResourceModifiedException.class, code = { 409 })
+        @UnexpectedResponseExceptionType(HttpResponseException.class)
+        Response<BinaryData> getModelInfoSync(@HostParam("endpoint") String endpoint,
+            @QueryParam("api-version") String apiVersion, @HeaderParam("Accept") String accept,
+            RequestOptions requestOptions, Context context);
+    }
+
+    /**
+     * Return the embedding vectors for given images.
+     * The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     input (Required): [
+     *          (Required){
+     *             image: String (Required)
+     *             text: String (Optional)
+     *         }
+     *     ]
+     *     dimensions: Integer (Optional)
+     *     encoding_format: String(base64/binary/float/int8/ubinary/uint8) (Optional)
+     *     input_type: String(text/query/document) (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     * 
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     data (Required): [
+     *          (Required){
+     *             embedding: BinaryData (Required)
+     *             index: int (Required)
+     *         }
+     *     ]
+     *     usage (Required): {
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     model: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param embedRequest1 The embedRequest1 parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios along with {@link Response} on successful completion of
+     * {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> embedWithResponseAsync(BinaryData embedRequest1, RequestOptions requestOptions) {
+        final String contentType = "application/json";
+        final String accept = "application/json";
+        return FluxUtil.withContext(context -> service.embed(this.getEndpoint(), this.getServiceVersion().getVersion(),
+            contentType, accept, embedRequest1, requestOptions, context));
+    }
+
+    /**
+     * Return the embedding vectors for given images.
+     * The method makes a REST API call to the `/images/embeddings` route on the given endpoint.
+     * <p><strong>Header Parameters</strong></p>
+     * <table border="1">
+     * <caption>Header Parameters</caption>
+     * <tr><th>Name</th><th>Type</th><th>Required</th><th>Description</th></tr>
+     * <tr><td>extra-parameters</td><td>String</td><td>No</td><td>Controls what happens if extra parameters, undefined
+     * by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`. Allowed values: "error", "drop", "pass-through".</td></tr>
+     * </table>
+     * You can add these to a request with {@link RequestOptions#addHeader}
+     * <p><strong>Request Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     input (Required): [
+     *          (Required){
+     *             image: String (Required)
+     *             text: String (Optional)
+     *         }
+     *     ]
+     *     dimensions: Integer (Optional)
+     *     encoding_format: String(base64/binary/float/int8/ubinary/uint8) (Optional)
+     *     input_type: String(text/query/document) (Optional)
+     *     model: String (Optional)
+     *      (Optional): {
+     *         String: Object (Required)
+     *     }
+     * }
+     * }</pre>
+     * 
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     data (Required): [
+     *          (Required){
+     *             embedding: BinaryData (Required)
+     *             index: int (Required)
+     *         }
+     *     ]
+     *     usage (Required): {
+     *         prompt_tokens: int (Required)
+     *         total_tokens: int (Required)
+     *     }
+     *     model: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param embedRequest1 The embedRequest1 parameter.
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return representation of the response data from an embeddings request.
+     * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+     * recommendations, and other similar scenarios along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> embedWithResponse(BinaryData embedRequest1, RequestOptions requestOptions) {
+        final String contentType = "application/json";
+        final String accept = "application/json";
+        return service.embedSync(this.getEndpoint(), this.getServiceVersion().getVersion(), contentType, accept,
+            embedRequest1, requestOptions, Context.NONE);
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response} on successful completion
+     * of {@link Mono}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Mono<Response<BinaryData>> getModelInfoWithResponseAsync(RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return FluxUtil.withContext(context -> service.getModelInfo(this.getEndpoint(),
+            this.getServiceVersion().getVersion(), accept, requestOptions, context));
+    }
+
+    /**
+     * Returns information about the AI model.
+     * The method makes a REST API call to the `/info` route on the given endpoint.
+     * <p><strong>Response Body Schema</strong></p>
+     * 
+     * <pre>{@code
+     * {
+     *     model_name: String (Required)
+     *     model_type: String(embeddings/image_generation/text_generation/image_embeddings/audio_generation/chat) (Required)
+     *     model_provider_name: String (Required)
+     * }
+     * }</pre>
+     * 
+     * @param requestOptions The options to configure the HTTP request before HTTP client sends it.
+     * @throws HttpResponseException thrown if the request is rejected by server.
+     * @throws ClientAuthenticationException thrown if the request is rejected by server on status code 401.
+     * @throws ResourceNotFoundException thrown if the request is rejected by server on status code 404.
+     * @throws ResourceModifiedException thrown if the request is rejected by server on status code 409.
+     * @return represents some basic information about the AI model along with {@link Response}.
+     */
+    @ServiceMethod(returns = ReturnType.SINGLE)
+    public Response<BinaryData> getModelInfoWithResponse(RequestOptions requestOptions) {
+        final String accept = "application/json";
+        return service.getModelInfoSync(this.getEndpoint(), this.getServiceVersion().getVersion(), accept,
+            requestOptions, Context.NONE);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/InferenceServerSentEvents.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/InferenceServerSentEvents.java
new file mode 100644
index 000000000000..a1f64c0c1d6c
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/InferenceServerSentEvents.java
@@ -0,0 +1,96 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.inference.implementation;
+
+import com.azure.core.util.serializer.JsonSerializer;
+import com.azure.core.util.serializer.JsonSerializerProviders;
+import com.azure.core.util.serializer.TypeReference;
+import reactor.core.publisher.Flux;
+import reactor.core.scheduler.Schedulers;
+import java.io.ByteArrayOutputStream;
+import java.io.UncheckedIOException;
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+public final class InferenceServerSentEvents<T> {
+
+    private static final List<String> STREAM_COMPLETION_EVENT = Arrays.asList("data: [DONE]", "data:[DONE]");
+    private final Flux<ByteBuffer> source;
+    private final Class<T> type;
+    private ByteArrayOutputStream outStream;
+
+    private static final JsonSerializer SERIALIZER = JsonSerializerProviders.createInstance(true);
+
+    public InferenceServerSentEvents(Flux<ByteBuffer> source, Class<T> type) {
+        this.source = source;
+        this.type = type;
+        this.outStream = new ByteArrayOutputStream();
+    }
+
+    public Flux<T> getEvents() {
+        return mapByteBuffersToEvents();
+    }
+
+    private Flux<T> mapByteBuffersToEvents() {
+        return source
+            .publishOn(Schedulers.boundedElastic())
+            .concatMap(byteBuffer -> {
+                List<T> values = new ArrayList<>();
+                byte[] byteArray = byteBuffer.array();
+                for (byte currentByte : byteArray) {
+                    if (currentByte == 0xA || currentByte == 0xD) {
+                        try {
+                            handleCurrentLine(outStream.toString(StandardCharsets.UTF_8.name()), values);
+                        } catch (UnsupportedEncodingException | UncheckedIOException e) {
+                            return Flux.error(e);
+                        }
+                        outStream = new ByteArrayOutputStream();
+                    } else {
+                        outStream.write(currentByte);
+                    }
+                }
+                try {
+                    handleCurrentLine(outStream.toString(StandardCharsets.UTF_8.name()), values);
+                    outStream = new ByteArrayOutputStream();
+                } catch (IllegalStateException | UncheckedIOException e) {
+                    // return the values collected so far, as this could be because the server sent event is
+                    // split across two byte buffers and the last line is incomplete and will be continued in
+                    // the next byte buffer
+                    return Flux.fromIterable(values);
+                } catch (UnsupportedEncodingException e) {
+                    return Flux.error(e);
+                }
+                return Flux.fromIterable(values);
+            }).cache();
+    }
+
+    private void handleCurrentLine(String currentLine, List<T> values) throws UncheckedIOException {
+        if (currentLine.isEmpty() || STREAM_COMPLETION_EVENT.contains(currentLine)) {
+            return;
+        }
+
+        // The expected line format of the server sent event is data: {...}
+        String[] split = currentLine.split(":", 2);
+        if (split.length != 2) {
+            throw new IllegalStateException("Invalid data format " + currentLine);
+        }
+
+        String dataValue = split[1];
+        if (split[1].startsWith(" ")) {
+            dataValue = split[1].substring(1);
+        }
+
+        T value = SERIALIZER.deserializeFromBytes(dataValue.getBytes(StandardCharsets.UTF_8), TypeReference.createInstance(type));
+        if (value == null) {
+            throw new IllegalStateException("Failed to deserialize the data value " + dataValue);
+        }
+
+        values.add(value);
+
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/accesshelpers/ChatCompletionsOptionsAccessHelper.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/accesshelpers/ChatCompletionsOptionsAccessHelper.java
new file mode 100644
index 000000000000..b15bef638bee
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/accesshelpers/ChatCompletionsOptionsAccessHelper.java
@@ -0,0 +1,47 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+package com.azure.ai.inference.implementation.accesshelpers;
+
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+
+/**
+ * Class containing helper methods for accessing private members of {@link ChatCompletionsOptions}.
+ */
+public final class ChatCompletionsOptionsAccessHelper {
+    private static ChatCompletionsOptionsAccessor accessor;
+
+    /**
+     * Type defining the methods to set the non-public properties of an {@link ChatCompletionsOptions} instance.
+     */
+    public interface ChatCompletionsOptionsAccessor {
+        /**
+         * Sets the stream property of the {@link ChatCompletionsOptions}.
+         *
+         * @param chatCompletionsOptions The {@link ChatCompletionsOptions} instance
+         * @param stream The boolean value to set private stream property
+         */
+        void setStream(ChatCompletionsOptions chatCompletionsOptions, boolean stream);
+    }
+
+    /**
+     * The method called from {@link ChatCompletionsOptions} to set it's accessor.
+     *
+     * @param chatCompletionsOptionsAccessor The accessor.
+     */
+    public static void setAccessor(final ChatCompletionsOptionsAccessor chatCompletionsOptionsAccessor) {
+        accessor = chatCompletionsOptionsAccessor;
+    }
+
+    /**
+     * Sets the stream property of the {@link ChatCompletionsOptions}.
+     *
+     * @param options The {@link ChatCompletionsOptions} instance
+     * @param stream The boolean value to set private stream property
+     */
+    public static void setStream(ChatCompletionsOptions options, boolean stream) {
+        accessor.setStream(options, stream);
+    }
+
+    private ChatCompletionsOptionsAccessHelper() {
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/CompleteRequest.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/CompleteRequest.java
new file mode 100644
index 000000000000..cd99a7ab8b7a
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/CompleteRequest.java
@@ -0,0 +1,616 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.implementation.models;
+
+import com.azure.ai.inference.models.ChatCompletionsResponseFormat;
+import com.azure.ai.inference.models.ChatCompletionsToolDefinition;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.BinaryData;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The CompleteRequest model.
+ */
+@Fluent
+public final class CompleteRequest implements JsonSerializable<CompleteRequest> {
+
+    /*
+     * The collection of context messages associated with this chat completions request.
+     * Typical usage begins with a chat message for the System role that provides instructions for
+     * the behavior of the assistant, followed by alternating messages between the User and
+     * Assistant roles.
+     */
+    @Generated
+    private final List<ChatRequestMessage> messages;
+
+    /*
+     * A value that influences the probability of generated tokens appearing based on their cumulative
+     * frequency in generated text.
+     * Positive values will make tokens less likely to appear as their frequency increases and
+     * decrease the likelihood of the model repeating the same statements verbatim.
+     * Supported range is [-2, 2].
+     */
+    @Generated
+    private Double frequencyPenalty;
+
+    /*
+     * A value indicating whether chat completions should be streamed for this request.
+     */
+    @Generated
+    private Boolean stream;
+
+    /*
+     * A value that influences the probability of generated tokens appearing based on their existing
+     * presence in generated text.
+     * Positive values will make tokens less likely to appear when they already exist and increase the
+     * model's likelihood to output new topics.
+     * Supported range is [-2, 2].
+     */
+    @Generated
+    private Double presencePenalty;
+
+    /*
+     * The sampling temperature to use that controls the apparent creativity of generated completions.
+     * Higher values will make output more random while lower values will make results more focused
+     * and deterministic.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     */
+    @Generated
+    private Double temperature;
+
+    /*
+     * An alternative to sampling with temperature called nucleus sampling. This value causes the
+     * model to consider the results of tokens with the provided probability mass. As an example, a
+     * value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+     * considered.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     */
+    @Generated
+    private Double topP;
+
+    /*
+     * The maximum number of tokens to generate.
+     */
+    @Generated
+    private Integer maxTokens;
+
+    /*
+     * The format that the model must output. Use this to enable JSON mode instead of the default text mode.
+     * Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+     * via a system or user message.
+     */
+    @Generated
+    private ChatCompletionsResponseFormat responseFormat;
+
+    /*
+     * A collection of textual sequences that will end completions generation.
+     */
+    @Generated
+    private List<String> stop;
+
+    /*
+     * A list of tools the model may request to call. Currently, only functions are supported as a tool. The model
+     * may response with a function call request and provide the input arguments in JSON format for that function.
+     */
+    @Generated
+    private List<ChatCompletionsToolDefinition> tools;
+
+    /*
+     * If specified, the model will configure which of the provided tools it can use for the chat completions response.
+     */
+    @Generated
+    private BinaryData toolChoice;
+
+    /*
+     * If specified, the system will make a best effort to sample deterministically such that repeated requests with the
+     * same seed and parameters should return the same result. Determinism is not guaranteed.
+     */
+    @Generated
+    private Long seed;
+
+    /*
+     * ID of the specific AI model to use, if more than one model is available on the endpoint.
+     */
+    @Generated
+    private String model;
+
+    /*
+     * Additional properties
+     */
+    @Generated
+    private Map<String, Object> additionalProperties;
+
+    /**
+     * Creates an instance of CompleteRequest class.
+     *
+     * @param messages the messages value to set.
+     */
+    @Generated
+    public CompleteRequest(List<ChatRequestMessage> messages) {
+        this.messages = messages;
+    }
+
+    /**
+     * Get the messages property: The collection of context messages associated with this chat completions request.
+     * Typical usage begins with a chat message for the System role that provides instructions for
+     * the behavior of the assistant, followed by alternating messages between the User and
+     * Assistant roles.
+     *
+     * @return the messages value.
+     */
+    @Generated
+    public List<ChatRequestMessage> getMessages() {
+        return this.messages;
+    }
+
+    /**
+     * Get the frequencyPenalty property: A value that influences the probability of generated tokens appearing based on
+     * their cumulative
+     * frequency in generated text.
+     * Positive values will make tokens less likely to appear as their frequency increases and
+     * decrease the likelihood of the model repeating the same statements verbatim.
+     * Supported range is [-2, 2].
+     *
+     * @return the frequencyPenalty value.
+     */
+    @Generated
+    public Double getFrequencyPenalty() {
+        return this.frequencyPenalty;
+    }
+
+    /**
+     * Set the frequencyPenalty property: A value that influences the probability of generated tokens appearing based on
+     * their cumulative
+     * frequency in generated text.
+     * Positive values will make tokens less likely to appear as their frequency increases and
+     * decrease the likelihood of the model repeating the same statements verbatim.
+     * Supported range is [-2, 2].
+     *
+     * @param frequencyPenalty the frequencyPenalty value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setFrequencyPenalty(Double frequencyPenalty) {
+        this.frequencyPenalty = frequencyPenalty;
+        return this;
+    }
+
+    /**
+     * Get the stream property: A value indicating whether chat completions should be streamed for this request.
+     *
+     * @return the stream value.
+     */
+    @Generated
+    public Boolean isStream() {
+        return this.stream;
+    }
+
+    /**
+     * Set the stream property: A value indicating whether chat completions should be streamed for this request.
+     *
+     * @param stream the stream value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setStream(Boolean stream) {
+        this.stream = stream;
+        return this;
+    }
+
+    /**
+     * Get the presencePenalty property: A value that influences the probability of generated tokens appearing based on
+     * their existing
+     * presence in generated text.
+     * Positive values will make tokens less likely to appear when they already exist and increase the
+     * model's likelihood to output new topics.
+     * Supported range is [-2, 2].
+     *
+     * @return the presencePenalty value.
+     */
+    @Generated
+    public Double getPresencePenalty() {
+        return this.presencePenalty;
+    }
+
+    /**
+     * Set the presencePenalty property: A value that influences the probability of generated tokens appearing based on
+     * their existing
+     * presence in generated text.
+     * Positive values will make tokens less likely to appear when they already exist and increase the
+     * model's likelihood to output new topics.
+     * Supported range is [-2, 2].
+     *
+     * @param presencePenalty the presencePenalty value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setPresencePenalty(Double presencePenalty) {
+        this.presencePenalty = presencePenalty;
+        return this;
+    }
+
+    /**
+     * Get the temperature property: The sampling temperature to use that controls the apparent creativity of generated
+     * completions.
+     * Higher values will make output more random while lower values will make results more focused
+     * and deterministic.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     *
+     * @return the temperature value.
+     */
+    @Generated
+    public Double getTemperature() {
+        return this.temperature;
+    }
+
+    /**
+     * Set the temperature property: The sampling temperature to use that controls the apparent creativity of generated
+     * completions.
+     * Higher values will make output more random while lower values will make results more focused
+     * and deterministic.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     *
+     * @param temperature the temperature value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setTemperature(Double temperature) {
+        this.temperature = temperature;
+        return this;
+    }
+
+    /**
+     * Get the topP property: An alternative to sampling with temperature called nucleus sampling. This value causes the
+     * model to consider the results of tokens with the provided probability mass. As an example, a
+     * value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+     * considered.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     *
+     * @return the topP value.
+     */
+    @Generated
+    public Double getTopP() {
+        return this.topP;
+    }
+
+    /**
+     * Set the topP property: An alternative to sampling with temperature called nucleus sampling. This value causes the
+     * model to consider the results of tokens with the provided probability mass. As an example, a
+     * value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+     * considered.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     *
+     * @param topP the topP value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setTopP(Double topP) {
+        this.topP = topP;
+        return this;
+    }
+
+    /**
+     * Get the maxTokens property: The maximum number of tokens to generate.
+     *
+     * @return the maxTokens value.
+     */
+    @Generated
+    public Integer getMaxTokens() {
+        return this.maxTokens;
+    }
+
+    /**
+     * Set the maxTokens property: The maximum number of tokens to generate.
+     *
+     * @param maxTokens the maxTokens value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setMaxTokens(Integer maxTokens) {
+        this.maxTokens = maxTokens;
+        return this;
+    }
+
+    /**
+     * Get the responseFormat property: The format that the model must output. Use this to enable JSON mode instead of
+     * the default text mode.
+     * Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+     * via a system or user message.
+     *
+     * @return the responseFormat value.
+     */
+    @Generated
+    public ChatCompletionsResponseFormat getResponseFormat() {
+        return this.responseFormat;
+    }
+
+    /**
+     * Set the responseFormat property: The format that the model must output. Use this to enable JSON mode instead of
+     * the default text mode.
+     * Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+     * via a system or user message.
+     *
+     * @param responseFormat the responseFormat value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setResponseFormat(ChatCompletionsResponseFormat responseFormat) {
+        this.responseFormat = responseFormat;
+        return this;
+    }
+
+    /**
+     * Get the stop property: A collection of textual sequences that will end completions generation.
+     *
+     * @return the stop value.
+     */
+    @Generated
+    public List<String> getStop() {
+        return this.stop;
+    }
+
+    /**
+     * Set the stop property: A collection of textual sequences that will end completions generation.
+     *
+     * @param stop the stop value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setStop(List<String> stop) {
+        this.stop = stop;
+        return this;
+    }
+
+    /**
+     * Get the tools property: A list of tools the model may request to call. Currently, only functions are supported as
+     * a tool. The model
+     * may response with a function call request and provide the input arguments in JSON format for that function.
+     *
+     * @return the tools value.
+     */
+    @Generated
+    public List<ChatCompletionsToolDefinition> getTools() {
+        return this.tools;
+    }
+
+    /**
+     * Set the tools property: A list of tools the model may request to call. Currently, only functions are supported as
+     * a tool. The model
+     * may response with a function call request and provide the input arguments in JSON format for that function.
+     *
+     * @param tools the tools value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setTools(List<ChatCompletionsToolDefinition> tools) {
+        this.tools = tools;
+        return this;
+    }
+
+    /**
+     * Get the toolChoice property: If specified, the model will configure which of the provided tools it can use for
+     * the chat completions response.
+     *
+     * @return the toolChoice value.
+     */
+    @Generated
+    public BinaryData getToolChoice() {
+        return this.toolChoice;
+    }
+
+    /**
+     * Set the toolChoice property: If specified, the model will configure which of the provided tools it can use for
+     * the chat completions response.
+     *
+     * @param toolChoice the toolChoice value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setToolChoice(BinaryData toolChoice) {
+        this.toolChoice = toolChoice;
+        return this;
+    }
+
+    /**
+     * Get the seed property: If specified, the system will make a best effort to sample deterministically such that
+     * repeated requests with the
+     * same seed and parameters should return the same result. Determinism is not guaranteed.
+     *
+     * @return the seed value.
+     */
+    @Generated
+    public Long getSeed() {
+        return this.seed;
+    }
+
+    /**
+     * Set the seed property: If specified, the system will make a best effort to sample deterministically such that
+     * repeated requests with the
+     * same seed and parameters should return the same result. Determinism is not guaranteed.
+     *
+     * @param seed the seed value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setSeed(Long seed) {
+        this.seed = seed;
+        return this;
+    }
+
+    /**
+     * Get the model property: ID of the specific AI model to use, if more than one model is available on the endpoint.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * Set the model property: ID of the specific AI model to use, if more than one model is available on the endpoint.
+     *
+     * @param model the model value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setModel(String model) {
+        this.model = model;
+        return this;
+    }
+
+    /**
+     * Get the additionalProperties property: Additional properties.
+     *
+     * @return the additionalProperties value.
+     */
+    @Generated
+    public Map<String, Object> getAdditionalProperties() {
+        return this.additionalProperties;
+    }
+
+    /**
+     * Set the additionalProperties property: Additional properties.
+     *
+     * @param additionalProperties the additionalProperties value to set.
+     * @return the CompleteRequest object itself.
+     */
+    @Generated
+    public CompleteRequest setAdditionalProperties(Map<String, Object> additionalProperties) {
+        this.additionalProperties = additionalProperties;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeArrayField("messages", this.messages, (writer, element) -> writer.writeJson(element));
+        jsonWriter.writeNumberField("frequency_penalty", this.frequencyPenalty);
+        jsonWriter.writeBooleanField("stream", this.stream);
+        jsonWriter.writeNumberField("presence_penalty", this.presencePenalty);
+        jsonWriter.writeNumberField("temperature", this.temperature);
+        jsonWriter.writeNumberField("top_p", this.topP);
+        jsonWriter.writeNumberField("max_tokens", this.maxTokens);
+        jsonWriter.writeJsonField("response_format", this.responseFormat);
+        jsonWriter.writeArrayField("stop", this.stop, (writer, element) -> writer.writeString(element));
+        jsonWriter.writeArrayField("tools", this.tools, (writer, element) -> writer.writeJson(element));
+        if (this.toolChoice != null) {
+            jsonWriter.writeUntypedField("tool_choice", this.toolChoice.toObject(Object.class));
+        }
+        jsonWriter.writeNumberField("seed", this.seed);
+        jsonWriter.writeStringField("model", this.model);
+        if (additionalProperties != null) {
+            for (Map.Entry<String, Object> additionalProperty : additionalProperties.entrySet()) {
+                jsonWriter.writeUntypedField(additionalProperty.getKey(), additionalProperty.getValue());
+            }
+        }
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of CompleteRequest from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of CompleteRequest if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the CompleteRequest.
+     */
+    @Generated
+    public static CompleteRequest fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            List<ChatRequestMessage> messages = null;
+            Double frequencyPenalty = null;
+            Boolean stream = null;
+            Double presencePenalty = null;
+            Double temperature = null;
+            Double topP = null;
+            Integer maxTokens = null;
+            ChatCompletionsResponseFormat responseFormat = null;
+            List<String> stop = null;
+            List<ChatCompletionsToolDefinition> tools = null;
+            BinaryData toolChoice = null;
+            Long seed = null;
+            String model = null;
+            Map<String, Object> additionalProperties = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("messages".equals(fieldName)) {
+                    messages = reader.readArray(reader1 -> ChatRequestMessage.fromJson(reader1));
+                } else if ("frequency_penalty".equals(fieldName)) {
+                    frequencyPenalty = reader.getNullable(JsonReader::getDouble);
+                } else if ("stream".equals(fieldName)) {
+                    stream = reader.getNullable(JsonReader::getBoolean);
+                } else if ("presence_penalty".equals(fieldName)) {
+                    presencePenalty = reader.getNullable(JsonReader::getDouble);
+                } else if ("temperature".equals(fieldName)) {
+                    temperature = reader.getNullable(JsonReader::getDouble);
+                } else if ("top_p".equals(fieldName)) {
+                    topP = reader.getNullable(JsonReader::getDouble);
+                } else if ("max_tokens".equals(fieldName)) {
+                    maxTokens = reader.getNullable(JsonReader::getInt);
+                } else if ("response_format".equals(fieldName)) {
+                    responseFormat = ChatCompletionsResponseFormat.fromJson(reader);
+                } else if ("stop".equals(fieldName)) {
+                    stop = reader.readArray(reader1 -> reader1.getString());
+                } else if ("tools".equals(fieldName)) {
+                    tools = reader.readArray(reader1 -> ChatCompletionsToolDefinition.fromJson(reader1));
+                } else if ("tool_choice".equals(fieldName)) {
+                    toolChoice
+                        = reader.getNullable(nonNullReader -> BinaryData.fromObject(nonNullReader.readUntyped()));
+                } else if ("seed".equals(fieldName)) {
+                    seed = reader.getNullable(JsonReader::getLong);
+                } else if ("model".equals(fieldName)) {
+                    model = reader.getString();
+                } else {
+                    if (additionalProperties == null) {
+                        additionalProperties = new LinkedHashMap<>();
+                    }
+                    additionalProperties.put(fieldName, reader.readUntyped());
+                }
+            }
+            CompleteRequest deserializedCompleteRequest = new CompleteRequest(messages);
+            deserializedCompleteRequest.frequencyPenalty = frequencyPenalty;
+            deserializedCompleteRequest.stream = stream;
+            deserializedCompleteRequest.presencePenalty = presencePenalty;
+            deserializedCompleteRequest.temperature = temperature;
+            deserializedCompleteRequest.topP = topP;
+            deserializedCompleteRequest.maxTokens = maxTokens;
+            deserializedCompleteRequest.responseFormat = responseFormat;
+            deserializedCompleteRequest.stop = stop;
+            deserializedCompleteRequest.tools = tools;
+            deserializedCompleteRequest.toolChoice = toolChoice;
+            deserializedCompleteRequest.seed = seed;
+            deserializedCompleteRequest.model = model;
+            deserializedCompleteRequest.additionalProperties = additionalProperties;
+            return deserializedCompleteRequest;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/EmbedRequest.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/EmbedRequest.java
new file mode 100644
index 000000000000..2b47c538787a
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/EmbedRequest.java
@@ -0,0 +1,272 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.implementation.models;
+
+import com.azure.ai.inference.models.EmbeddingEncodingFormat;
+import com.azure.ai.inference.models.EmbeddingInputType;
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The EmbedRequest model.
+ */
+@Fluent
+public final class EmbedRequest implements JsonSerializable<EmbedRequest> {
+
+    /*
+     * Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     */
+    @Generated
+    private final List<String> input;
+
+    /*
+     * Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     */
+    @Generated
+    private Integer dimensions;
+
+    /*
+     * Optional. The desired format for the returned embeddings.
+     */
+    @Generated
+    private EmbeddingEncodingFormat encodingFormat;
+
+    /*
+     * Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     */
+    @Generated
+    private EmbeddingInputType inputType;
+
+    /*
+     * ID of the specific AI model to use, if more than one model is available on the endpoint.
+     */
+    @Generated
+    private String model;
+
+    /*
+     * Additional properties
+     */
+    @Generated
+    private Map<String, Object> additionalProperties;
+
+    /**
+     * Creates an instance of EmbedRequest class.
+     *
+     * @param input the input value to set.
+     */
+    @Generated
+    public EmbedRequest(List<String> input) {
+        this.input = input;
+    }
+
+    /**
+     * Get the input property: Input text to embed, encoded as a string or array of tokens.
+     * To embed multiple inputs in a single request, pass an array
+     * of strings or array of token arrays.
+     *
+     * @return the input value.
+     */
+    @Generated
+    public List<String> getInput() {
+        return this.input;
+    }
+
+    /**
+     * Get the dimensions property: Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @return the dimensions value.
+     */
+    @Generated
+    public Integer getDimensions() {
+        return this.dimensions;
+    }
+
+    /**
+     * Set the dimensions property: Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @param dimensions the dimensions value to set.
+     * @return the EmbedRequest object itself.
+     */
+    @Generated
+    public EmbedRequest setDimensions(Integer dimensions) {
+        this.dimensions = dimensions;
+        return this;
+    }
+
+    /**
+     * Get the encodingFormat property: Optional. The desired format for the returned embeddings.
+     *
+     * @return the encodingFormat value.
+     */
+    @Generated
+    public EmbeddingEncodingFormat getEncodingFormat() {
+        return this.encodingFormat;
+    }
+
+    /**
+     * Set the encodingFormat property: Optional. The desired format for the returned embeddings.
+     *
+     * @param encodingFormat the encodingFormat value to set.
+     * @return the EmbedRequest object itself.
+     */
+    @Generated
+    public EmbedRequest setEncodingFormat(EmbeddingEncodingFormat encodingFormat) {
+        this.encodingFormat = encodingFormat;
+        return this;
+    }
+
+    /**
+     * Get the inputType property: Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @return the inputType value.
+     */
+    @Generated
+    public EmbeddingInputType getInputType() {
+        return this.inputType;
+    }
+
+    /**
+     * Set the inputType property: Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @param inputType the inputType value to set.
+     * @return the EmbedRequest object itself.
+     */
+    @Generated
+    public EmbedRequest setInputType(EmbeddingInputType inputType) {
+        this.inputType = inputType;
+        return this;
+    }
+
+    /**
+     * Get the model property: ID of the specific AI model to use, if more than one model is available on the endpoint.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * Set the model property: ID of the specific AI model to use, if more than one model is available on the endpoint.
+     *
+     * @param model the model value to set.
+     * @return the EmbedRequest object itself.
+     */
+    @Generated
+    public EmbedRequest setModel(String model) {
+        this.model = model;
+        return this;
+    }
+
+    /**
+     * Get the additionalProperties property: Additional properties.
+     *
+     * @return the additionalProperties value.
+     */
+    @Generated
+    public Map<String, Object> getAdditionalProperties() {
+        return this.additionalProperties;
+    }
+
+    /**
+     * Set the additionalProperties property: Additional properties.
+     *
+     * @param additionalProperties the additionalProperties value to set.
+     * @return the EmbedRequest object itself.
+     */
+    @Generated
+    public EmbedRequest setAdditionalProperties(Map<String, Object> additionalProperties) {
+        this.additionalProperties = additionalProperties;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeArrayField("input", this.input, (writer, element) -> writer.writeString(element));
+        jsonWriter.writeNumberField("dimensions", this.dimensions);
+        jsonWriter.writeStringField("encoding_format",
+            this.encodingFormat == null ? null : this.encodingFormat.toString());
+        jsonWriter.writeStringField("input_type", this.inputType == null ? null : this.inputType.toString());
+        jsonWriter.writeStringField("model", this.model);
+        if (additionalProperties != null) {
+            for (Map.Entry<String, Object> additionalProperty : additionalProperties.entrySet()) {
+                jsonWriter.writeUntypedField(additionalProperty.getKey(), additionalProperty.getValue());
+            }
+        }
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of EmbedRequest from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of EmbedRequest if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the EmbedRequest.
+     */
+    @Generated
+    public static EmbedRequest fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            List<String> input = null;
+            Integer dimensions = null;
+            EmbeddingEncodingFormat encodingFormat = null;
+            EmbeddingInputType inputType = null;
+            String model = null;
+            Map<String, Object> additionalProperties = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("input".equals(fieldName)) {
+                    input = reader.readArray(reader1 -> reader1.getString());
+                } else if ("dimensions".equals(fieldName)) {
+                    dimensions = reader.getNullable(JsonReader::getInt);
+                } else if ("encoding_format".equals(fieldName)) {
+                    encodingFormat = EmbeddingEncodingFormat.fromString(reader.getString());
+                } else if ("input_type".equals(fieldName)) {
+                    inputType = EmbeddingInputType.fromString(reader.getString());
+                } else if ("model".equals(fieldName)) {
+                    model = reader.getString();
+                } else {
+                    if (additionalProperties == null) {
+                        additionalProperties = new LinkedHashMap<>();
+                    }
+                    additionalProperties.put(fieldName, reader.readUntyped());
+                }
+            }
+            EmbedRequest deserializedEmbedRequest = new EmbedRequest(input);
+            deserializedEmbedRequest.dimensions = dimensions;
+            deserializedEmbedRequest.encodingFormat = encodingFormat;
+            deserializedEmbedRequest.inputType = inputType;
+            deserializedEmbedRequest.model = model;
+            deserializedEmbedRequest.additionalProperties = additionalProperties;
+            return deserializedEmbedRequest;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/ImageEmbedRequest.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/ImageEmbedRequest.java
new file mode 100644
index 000000000000..931a49454098
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/ImageEmbedRequest.java
@@ -0,0 +1,277 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.implementation.models;
+
+import com.azure.ai.inference.models.EmbeddingEncodingFormat;
+import com.azure.ai.inference.models.EmbeddingInput;
+import com.azure.ai.inference.models.EmbeddingInputType;
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * The ImageEmbedRequest model.
+ */
+@Fluent
+public final class ImageEmbedRequest implements JsonSerializable<ImageEmbedRequest> {
+
+    /*
+     * Input image to embed. To embed multiple inputs in a single request, pass an array.
+     * The input must not exceed the max input tokens for the model.
+     */
+    @Generated
+    private final List<EmbeddingInput> input;
+
+    /*
+     * Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     */
+    @Generated
+    private Integer dimensions;
+
+    /*
+     * Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     */
+    @Generated
+    private EmbeddingEncodingFormat encodingFormat;
+
+    /*
+     * Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     */
+    @Generated
+    private EmbeddingInputType inputType;
+
+    /*
+     * ID of the specific AI model to use, if more than one model is available on the endpoint.
+     */
+    @Generated
+    private String model;
+
+    /*
+     * Additional properties
+     */
+    @Generated
+    private Map<String, Object> additionalProperties;
+
+    /**
+     * Creates an instance of EmbedRequest1 class.
+     *
+     * @param input the input value to set.
+     */
+    @Generated
+    public ImageEmbedRequest(List<EmbeddingInput> input) {
+        this.input = input;
+    }
+
+    /**
+     * Get the input property: Input image to embed. To embed multiple inputs in a single request, pass an array.
+     * The input must not exceed the max input tokens for the model.
+     *
+     * @return the input value.
+     */
+    @Generated
+    public List<EmbeddingInput> getInput() {
+        return this.input;
+    }
+
+    /**
+     * Get the dimensions property: Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @return the dimensions value.
+     */
+    @Generated
+    public Integer getDimensions() {
+        return this.dimensions;
+    }
+
+    /**
+     * Set the dimensions property: Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @param dimensions the dimensions value to set.
+     * @return the ImageEmbedRequest object itself.
+     */
+    @Generated
+    public ImageEmbedRequest setDimensions(Integer dimensions) {
+        this.dimensions = dimensions;
+        return this;
+    }
+
+    /**
+     * Get the encodingFormat property: Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @return the encodingFormat value.
+     */
+    @Generated
+    public EmbeddingEncodingFormat getEncodingFormat() {
+        return this.encodingFormat;
+    }
+
+    /**
+     * Set the encodingFormat property: Optional. The number of dimensions the resulting output embeddings should have.
+     * Passing null causes the model to use its default value.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @param encodingFormat the encodingFormat value to set.
+     * @return the ImageEmbedRequest object itself.
+     */
+    @Generated
+    public ImageEmbedRequest setEncodingFormat(EmbeddingEncodingFormat encodingFormat) {
+        this.encodingFormat = encodingFormat;
+        return this;
+    }
+
+    /**
+     * Get the inputType property: Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @return the inputType value.
+     */
+    @Generated
+    public EmbeddingInputType getInputType() {
+        return this.inputType;
+    }
+
+    /**
+     * Set the inputType property: Optional. The type of the input.
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @param inputType the inputType value to set.
+     * @return the ImageEmbedRequest object itself.
+     */
+    @Generated
+    public ImageEmbedRequest setInputType(EmbeddingInputType inputType) {
+        this.inputType = inputType;
+        return this;
+    }
+
+    /**
+     * Get the model property: ID of the specific AI model to use, if more than one model is available on the endpoint.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * Set the model property: ID of the specific AI model to use, if more than one model is available on the endpoint.
+     *
+     * @param model the model value to set.
+     * @return the ImageEmbedRequest object itself.
+     */
+    @Generated
+    public ImageEmbedRequest setModel(String model) {
+        this.model = model;
+        return this;
+    }
+
+    /**
+     * Get the additionalProperties property: Additional properties.
+     *
+     * @return the additionalProperties value.
+     */
+    @Generated
+    public Map<String, Object> getAdditionalProperties() {
+        return this.additionalProperties;
+    }
+
+    /**
+     * Set the additionalProperties property: Additional properties.
+     *
+     * @param additionalProperties the additionalProperties value to set.
+     * @return the ImageEmbedRequest object itself.
+     */
+    @Generated
+    public ImageEmbedRequest setAdditionalProperties(Map<String, Object> additionalProperties) {
+        this.additionalProperties = additionalProperties;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeArrayField("input", this.input, (writer, element) -> writer.writeJson(element));
+        jsonWriter.writeNumberField("dimensions", this.dimensions);
+        jsonWriter.writeStringField("encoding_format",
+            this.encodingFormat == null ? null : this.encodingFormat.toString());
+        jsonWriter.writeStringField("input_type", this.inputType == null ? null : this.inputType.toString());
+        jsonWriter.writeStringField("model", this.model);
+        if (additionalProperties != null) {
+            for (Map.Entry<String, Object> additionalProperty : additionalProperties.entrySet()) {
+                jsonWriter.writeUntypedField(additionalProperty.getKey(), additionalProperty.getValue());
+            }
+        }
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ImageEmbedRequest from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ImageEmbedRequest if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the EmbedRequest1.
+     */
+    @Generated
+    public static ImageEmbedRequest fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            List<EmbeddingInput> input = null;
+            Integer dimensions = null;
+            EmbeddingEncodingFormat encodingFormat = null;
+            EmbeddingInputType inputType = null;
+            String model = null;
+            Map<String, Object> additionalProperties = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("input".equals(fieldName)) {
+                    input = reader.readArray(reader1 -> EmbeddingInput.fromJson(reader1));
+                } else if ("dimensions".equals(fieldName)) {
+                    dimensions = reader.getNullable(JsonReader::getInt);
+                } else if ("encoding_format".equals(fieldName)) {
+                    encodingFormat = EmbeddingEncodingFormat.fromString(reader.getString());
+                } else if ("input_type".equals(fieldName)) {
+                    inputType = EmbeddingInputType.fromString(reader.getString());
+                } else if ("model".equals(fieldName)) {
+                    model = reader.getString();
+                } else {
+                    if (additionalProperties == null) {
+                        additionalProperties = new LinkedHashMap<>();
+                    }
+                    additionalProperties.put(fieldName, reader.readUntyped());
+                }
+            }
+            ImageEmbedRequest deserializedImageEmbedRequest = new ImageEmbedRequest(input);
+            deserializedImageEmbedRequest.dimensions = dimensions;
+            deserializedImageEmbedRequest.encodingFormat = encodingFormat;
+            deserializedImageEmbedRequest.inputType = inputType;
+            deserializedImageEmbedRequest.model = model;
+            deserializedImageEmbedRequest.additionalProperties = additionalProperties;
+            return deserializedImageEmbedRequest;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/package-info.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/package-info.java
new file mode 100644
index 000000000000..19b0b7fbff14
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/models/package-info.java
@@ -0,0 +1,7 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+/**
+ * Package containing the data models for Model.
+ */
+package com.azure.ai.inference.implementation.models;
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/package-info.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/package-info.java
new file mode 100644
index 000000000000..bff731a5427e
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/implementation/package-info.java
@@ -0,0 +1,7 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+/**
+ * Package containing the implementations for Model.
+ */
+package com.azure.ai.inference.implementation;
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatChoice.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatChoice.java
new file mode 100644
index 000000000000..64ced3188488
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatChoice.java
@@ -0,0 +1,128 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * The representation of a single prompt completion as part of an overall chat completions request.
+ * Generally, `n` choices are generated per provided prompt with a default value of 1.
+ * Token limits and other settings may limit the number of choices generated.
+ */
+@Immutable
+public final class ChatChoice implements JsonSerializable<ChatChoice> {
+
+    /*
+     * The ordered index associated with this chat completions choice.
+     */
+    @Generated
+    private final int index;
+
+    /*
+     * The reason that this chat completions choice completed its generated.
+     */
+    @Generated
+    private final CompletionsFinishReason finishReason;
+
+    /*
+     * The chat message for a given chat completions prompt.
+     */
+    @Generated
+    private final ChatResponseMessage message;
+
+    /**
+     * Creates an instance of ChatChoice class.
+     *
+     * @param index the index value to set.
+     * @param finishReason the finishReason value to set.
+     * @param message the message value to set.
+     */
+    @Generated
+    private ChatChoice(int index, CompletionsFinishReason finishReason, ChatResponseMessage message) {
+        this.index = index;
+        this.finishReason = finishReason;
+        this.message = message;
+    }
+
+    /**
+     * Get the index property: The ordered index associated with this chat completions choice.
+     *
+     * @return the index value.
+     */
+    @Generated
+    public int getIndex() {
+        return this.index;
+    }
+
+    /**
+     * Get the finishReason property: The reason that this chat completions choice completed its generated.
+     *
+     * @return the finishReason value.
+     */
+    @Generated
+    public CompletionsFinishReason getFinishReason() {
+        return this.finishReason;
+    }
+
+    /**
+     * Get the message property: The chat message for a given chat completions prompt.
+     *
+     * @return the message value.
+     */
+    @Generated
+    public ChatResponseMessage getMessage() {
+        return this.message;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeIntField("index", this.index);
+        jsonWriter.writeStringField("finish_reason", this.finishReason == null ? null : this.finishReason.toString());
+        jsonWriter.writeJsonField("message", this.message);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatChoice from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatChoice if the JsonReader was pointing to an instance of it, or null if it was pointing
+     * to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatChoice.
+     */
+    @Generated
+    public static ChatChoice fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            int index = 0;
+            CompletionsFinishReason finishReason = null;
+            ChatResponseMessage message = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("index".equals(fieldName)) {
+                    index = reader.getInt();
+                } else if ("finish_reason".equals(fieldName)) {
+                    finishReason = CompletionsFinishReason.fromString(reader.getString());
+                } else if ("message".equals(fieldName)) {
+                    message = ChatResponseMessage.fromJson(reader);
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new ChatChoice(index, finishReason, message);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletions.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletions.java
new file mode 100644
index 000000000000..d63d1df281c6
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletions.java
@@ -0,0 +1,188 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.time.Instant;
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
+import java.util.List;
+
+/**
+ * Representation of the response data from a chat completions request.
+ * Completions support a wide variety of tasks and generate text that continues from or "completes"
+ * provided prompt data.
+ */
+@Immutable
+public final class ChatCompletions implements JsonSerializable<ChatCompletions> {
+
+    /*
+     * A unique identifier associated with this chat completions response.
+     */
+    @Generated
+    private final String id;
+
+    /*
+     * The first timestamp associated with generation activity for this completions response,
+     * represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
+     */
+    @Generated
+    private final long created;
+
+    /*
+     * The model used for the chat completion.
+     */
+    @Generated
+    private final String model;
+
+    /*
+     * Usage information for tokens processed and generated as part of this completions operation.
+     */
+    @Generated
+    private final CompletionsUsage usage;
+
+    /*
+     * The collection of completions choices associated with this completions response.
+     * Generally, `n` choices are generated per provided prompt with a default value of 1.
+     * Token limits and other settings may limit the number of choices generated.
+     */
+    @Generated
+    private final List<ChatChoice> choices;
+
+    /**
+     * Creates an instance of ChatCompletions class.
+     *
+     * @param id the id value to set.
+     * @param created the created value to set.
+     * @param model the model value to set.
+     * @param usage the usage value to set.
+     * @param choices the choices value to set.
+     */
+    @Generated
+    private ChatCompletions(String id, OffsetDateTime created, String model, CompletionsUsage usage,
+        List<ChatChoice> choices) {
+        this.id = id;
+        if (created == null) {
+            this.created = 0L;
+        } else {
+            this.created = created.toEpochSecond();
+        }
+        this.model = model;
+        this.usage = usage;
+        this.choices = choices;
+    }
+
+    /**
+     * Get the id property: A unique identifier associated with this chat completions response.
+     *
+     * @return the id value.
+     */
+    @Generated
+    public String getId() {
+        return this.id;
+    }
+
+    /**
+     * Get the created property: The first timestamp associated with generation activity for this completions response,
+     * represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
+     *
+     * @return the created value.
+     */
+    @Generated
+    public OffsetDateTime getCreated() {
+        return OffsetDateTime.ofInstant(Instant.ofEpochSecond(this.created), ZoneOffset.UTC);
+    }
+
+    /**
+     * Get the model property: The model used for the chat completion.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * Get the usage property: Usage information for tokens processed and generated as part of this completions
+     * operation.
+     *
+     * @return the usage value.
+     */
+    @Generated
+    public CompletionsUsage getUsage() {
+        return this.usage;
+    }
+
+    /**
+     * Get the choices property: The collection of completions choices associated with this completions response.
+     * Generally, `n` choices are generated per provided prompt with a default value of 1.
+     * Token limits and other settings may limit the number of choices generated.
+     *
+     * @return the choices value.
+     */
+    @Generated
+    public List<ChatChoice> getChoices() {
+        return this.choices;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("id", this.id);
+        jsonWriter.writeLongField("created", this.created);
+        jsonWriter.writeStringField("model", this.model);
+        jsonWriter.writeJsonField("usage", this.usage);
+        jsonWriter.writeArrayField("choices", this.choices, (writer, element) -> writer.writeJson(element));
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletions from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletions if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletions.
+     */
+    @Generated
+    public static ChatCompletions fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String id = null;
+            OffsetDateTime created = null;
+            String model = null;
+            CompletionsUsage usage = null;
+            List<ChatChoice> choices = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("id".equals(fieldName)) {
+                    id = reader.getString();
+                } else if ("created".equals(fieldName)) {
+                    created = OffsetDateTime.ofInstant(Instant.ofEpochSecond(reader.getLong()), ZoneOffset.UTC);
+                } else if ("model".equals(fieldName)) {
+                    model = reader.getString();
+                } else if ("usage".equals(fieldName)) {
+                    usage = CompletionsUsage.fromJson(reader);
+                } else if ("choices".equals(fieldName)) {
+                    choices = reader.readArray(reader1 -> ChatChoice.fromJson(reader1));
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new ChatCompletions(id, created, model, usage, choices);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolCall.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolCall.java
new file mode 100644
index 000000000000..c92f98c54145
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolCall.java
@@ -0,0 +1,114 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A tool call to a function tool, issued by the model in evaluation of a configured function tool, that represents
+ * a function invocation needed for a subsequent chat completions request to resolve.
+ */
+@Immutable
+public final class ChatCompletionsFunctionToolCall extends ChatCompletionsToolCall {
+    /*
+     * The object type.
+     */
+    @Generated
+    private String type = "function";
+
+    /*
+     * The details of the function invocation requested by the tool call.
+     */
+    @Generated
+    private FunctionCall function;
+
+
+    /**
+     * Creates an instance of ChatCompletionsFunctionToolCall class.
+     *
+     * @param id the id value to set.
+     * @param function the function value to set.
+     */
+    public ChatCompletionsFunctionToolCall(String id, FunctionCall function) {
+        super(id, function);
+        this.function = function;
+    }
+
+    /**
+     * Get the type property: The object type.
+     *
+     * @return the type value.
+     */
+    @Generated
+    @Override
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * Get the function property: The details of the function invocation requested by the tool call.
+     *
+     * @return the function value.
+     */
+    @Generated
+    public FunctionCall getFunction() {
+        return this.function;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("id", getId());
+        jsonWriter.writeJsonField("function", this.function);
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsFunctionToolCall from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsFunctionToolCall if the JsonReader was pointing to an instance of it, or
+     * null if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletionsFunctionToolCall.
+     */
+    @Generated
+    public static ChatCompletionsFunctionToolCall fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String id = null;
+            FunctionCall function = null;
+            String type = "function";
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+
+                if ("id".equals(fieldName)) {
+                    id = reader.getString();
+                } else if ("function".equals(fieldName)) {
+                    function = FunctionCall.fromJson(reader);
+                } else if ("type".equals(fieldName)) {
+                    type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatCompletionsFunctionToolCall deserializedChatCompletionsFunctionToolCall
+                = new ChatCompletionsFunctionToolCall(id, function);
+            deserializedChatCompletionsFunctionToolCall.type = type;
+
+            return deserializedChatCompletionsFunctionToolCall;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolDefinition.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolDefinition.java
new file mode 100644
index 000000000000..32f2bfe0a2ff
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolDefinition.java
@@ -0,0 +1,108 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * The definition information for a chat completions function tool that can call a function in response to a tool call.
+ */
+@Immutable
+public final class ChatCompletionsFunctionToolDefinition extends ChatCompletionsToolDefinition {
+    /*
+     * The object type.
+     */
+    @Generated
+    private String type = "function";
+
+    /*
+     * The function definition details for the function tool.
+     */
+    @Generated
+    private FunctionDefinition function;
+
+    /**
+     * Creates an instance of ChatCompletionsFunctionToolDefinition class.
+     *
+     * @param function the function value to set.
+     */
+    @Generated
+    public ChatCompletionsFunctionToolDefinition(FunctionDefinition function) {
+        super(function);
+        this.function = function;
+    }
+
+    /**
+     * Get the type property: The object type.
+     *
+     * @return the type value.
+     */
+    @Generated
+    @Override
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * Get the function property: The function definition details for the function tool.
+     *
+     * @return the function value.
+     */
+    @Generated
+    public FunctionDefinition getFunction() {
+        return this.function;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeJsonField("function", this.function);
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsFunctionToolDefinition from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsFunctionToolDefinition if the JsonReader was pointing to an instance of it,
+     * or null if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletionsFunctionToolDefinition.
+     */
+    @Generated
+    public static ChatCompletionsFunctionToolDefinition fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            FunctionDefinition function = null;
+            String type = "function";
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+
+                if ("function".equals(fieldName)) {
+                    function = FunctionDefinition.fromJson(reader);
+                } else if ("type".equals(fieldName)) {
+                    type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatCompletionsFunctionToolDefinition deserializedChatCompletionsFunctionToolDefinition
+                = new ChatCompletionsFunctionToolDefinition(function);
+            deserializedChatCompletionsFunctionToolDefinition.type = type;
+
+            return deserializedChatCompletionsFunctionToolDefinition;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolSelection.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolSelection.java
new file mode 100644
index 000000000000..9db5a33e0b92
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsFunctionToolSelection.java
@@ -0,0 +1,83 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A tool selection of a specific, named function tool that will limit chat completions to using the named function.
+ */
+@Immutable
+public final class ChatCompletionsFunctionToolSelection
+    implements JsonSerializable<ChatCompletionsFunctionToolSelection> {
+
+    /*
+     * The name of the function that should be called.
+     */
+    @Generated
+    private final String name;
+
+    /**
+     * Creates an instance of ChatCompletionsFunctionToolSelection class.
+     *
+     * @param name the name value to set.
+     */
+    @Generated
+    public ChatCompletionsFunctionToolSelection(String name) {
+        this.name = name;
+    }
+
+    /**
+     * Get the name property: The name of the function that should be called.
+     *
+     * @return the name value.
+     */
+    @Generated
+    public String getName() {
+        return this.name;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("name", this.name);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsFunctionToolSelection from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsFunctionToolSelection if the JsonReader was pointing to an instance of it,
+     * or null if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletionsFunctionToolSelection.
+     */
+    @Generated
+    public static ChatCompletionsFunctionToolSelection fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String name = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("name".equals(fieldName)) {
+                    name = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new ChatCompletionsFunctionToolSelection(name);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsNamedFunctionToolSelection.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsNamedFunctionToolSelection.java
new file mode 100644
index 000000000000..8bd85b26b30c
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsNamedFunctionToolSelection.java
@@ -0,0 +1,107 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A tool selection of a specific, named function tool that will limit chat completions to using the named function.
+ */
+@Immutable
+public final class ChatCompletionsNamedFunctionToolSelection extends ChatCompletionsNamedToolSelection {
+    /*
+     * The object type.
+     */
+    @Generated
+    private String type = "function";
+
+    /*
+     * The function that should be called.
+     */
+    @Generated
+    private ChatCompletionsFunctionToolSelection function;
+
+    /**
+     * Creates an instance of ChatCompletionsNamedFunctionToolSelection class.
+     * 
+     * @param function the function value to set.
+     */
+    @Generated
+    public ChatCompletionsNamedFunctionToolSelection(ChatCompletionsFunctionToolSelection function) {
+        super(function);
+    }
+
+    /**
+     * Get the type property: The object type.
+     * 
+     * @return the type value.
+     */
+    @Generated
+    @Override
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * Get the function property: The function that should be called.
+     * 
+     * @return the function value.
+     */
+    @Generated
+    public ChatCompletionsFunctionToolSelection getFunction() {
+        return this.function;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeJsonField("function", this.function);
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsNamedFunctionToolSelection from the JsonReader.
+     * 
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsNamedFunctionToolSelection if the JsonReader was pointing to an instance of
+     * it, or null if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletionsNamedFunctionToolSelection.
+     */
+    @Generated
+    public static ChatCompletionsNamedFunctionToolSelection fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatCompletionsFunctionToolSelection function = null;
+            String type = "function";
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+
+                if ("function".equals(fieldName)) {
+                    function = ChatCompletionsFunctionToolSelection.fromJson(reader);
+                } else if ("type".equals(fieldName)) {
+                    type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatCompletionsNamedFunctionToolSelection deserializedChatCompletionsNamedFunctionToolSelection
+                = new ChatCompletionsNamedFunctionToolSelection(function);
+            deserializedChatCompletionsNamedFunctionToolSelection.type = type;
+
+            return deserializedChatCompletionsNamedFunctionToolSelection;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsNamedToolSelection.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsNamedToolSelection.java
new file mode 100644
index 000000000000..3eb8ccaf531a
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsNamedToolSelection.java
@@ -0,0 +1,99 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A tool selection of a specific, named function tool that will limit chat completions to using the named function.
+ */
+@Immutable
+public class ChatCompletionsNamedToolSelection implements JsonSerializable<ChatCompletionsNamedToolSelection> {
+
+    /*
+     * The type of the tool. Currently, only `function` is supported.
+     */
+    @Generated
+    private final String type = "function";
+
+    /*
+     * The function that should be called.
+     */
+    @Generated
+    private final ChatCompletionsFunctionToolSelection function;
+
+    /**
+     * Creates an instance of ChatCompletionsNamedToolSelection class.
+     *
+     * @param function the function value to set.
+     */
+    @Generated
+    public ChatCompletionsNamedToolSelection(ChatCompletionsFunctionToolSelection function) {
+        this.function = function;
+    }
+
+    /**
+     * Get the type property: The type of the tool. Currently, only `function` is supported.
+     *
+     * @return the type value.
+     */
+    @Generated
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * Get the function property: The function that should be called.
+     *
+     * @return the function value.
+     */
+    @Generated
+    public ChatCompletionsFunctionToolSelection getFunction() {
+        return this.function;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("type", this.type);
+        jsonWriter.writeJsonField("function", this.function);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsNamedToolSelection from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsNamedToolSelection if the JsonReader was pointing to an instance of it, or
+     * null if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletionsNamedToolSelection.
+     */
+    @Generated
+    public static ChatCompletionsNamedToolSelection fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatCompletionsFunctionToolSelection function = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("function".equals(fieldName)) {
+                    function = ChatCompletionsFunctionToolSelection.fromJson(reader);
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new ChatCompletionsNamedToolSelection(function);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsOptions.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsOptions.java
new file mode 100644
index 000000000000..b408b4d53fcb
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsOptions.java
@@ -0,0 +1,612 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.models;
+
+import com.azure.ai.inference.implementation.accesshelpers.ChatCompletionsOptionsAccessHelper;
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.BinaryData;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Options for complete API.
+ */
+@Fluent
+public final class ChatCompletionsOptions implements JsonSerializable<ChatCompletionsOptions> {
+    static {
+        ChatCompletionsOptionsAccessHelper.setAccessor(new ChatCompletionsOptionsAccessHelper.ChatCompletionsOptionsAccessor() {
+            @Override
+            public void setStream(ChatCompletionsOptions options, boolean stream) {
+                options.setStream(stream);
+            }
+        });
+    }
+    /*
+     * The collection of context messages associated with this chat completions request.
+     * Typical usage begins with a chat message for the System role that provides instructions for
+     * the behavior of the assistant, followed by alternating messages between the User and
+     * Assistant roles.
+     */
+    @Generated
+    private final List<ChatRequestMessage> messages;
+
+    /*
+     * A value that influences the probability of generated tokens appearing based on their cumulative
+     * frequency in generated text.
+     * Positive values will make tokens less likely to appear as their frequency increases and
+     * decrease the likelihood of the model repeating the same statements verbatim.
+     * Supported range is [-2, 2].
+     */
+    @Generated
+    private Double frequencyPenalty;
+
+    /*
+     * A value indicating whether chat completions should be streamed for this request.
+     */
+    @Generated
+    private Boolean stream;
+
+    /*
+     * A value that influences the probability of generated tokens appearing based on their existing
+     * presence in generated text.
+     * Positive values will make tokens less likely to appear when they already exist and increase the
+     * model's likelihood to output new topics.
+     * Supported range is [-2, 2].
+     */
+    @Generated
+    private Double presencePenalty;
+
+    /*
+     * The sampling temperature to use that controls the apparent creativity of generated completions.
+     * Higher values will make output more random while lower values will make results more focused
+     * and deterministic.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     */
+    @Generated
+    private Double temperature;
+
+    /*
+     * An alternative to sampling with temperature called nucleus sampling. This value causes the
+     * model to consider the results of tokens with the provided probability mass. As an example, a
+     * value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+     * considered.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     */
+    @Generated
+    private Double topP;
+
+    /*
+     * The maximum number of tokens to generate.
+     */
+    @Generated
+    private Integer maxTokens;
+
+    /*
+     * The format that the model must output. Use this to enable JSON mode instead of the default text mode.
+     * Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+     * via a system or user message.
+     */
+    @Generated
+    private ChatCompletionsResponseFormat responseFormat;
+
+    /*
+     * A collection of textual sequences that will end completions generation.
+     */
+    @Generated
+    private List<String> stop;
+
+    /*
+     * The available tool definitions that the chat completions request can use, including caller-defined functions.
+     */
+    @Generated
+    private List<ChatCompletionsToolDefinition> tools;
+
+    /*
+     * If specified, the model will configure which of the provided tools it can use for the chat completions response.
+     */
+    @Generated
+    private BinaryData toolChoice;
+
+    /*
+     * If specified, the system will make a best effort to sample deterministically such that repeated requests with the
+     * same seed and parameters should return the same result. Determinism is not guaranteed.
+     */
+    @Generated
+    private Long seed;
+
+    /*
+     * ID of the specific AI model to use, if more than one model is available on the endpoint.
+     */
+    @Generated
+    private String model;
+
+    /*
+     * Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     */
+    @Generated
+    private ExtraParameters extraParams;
+
+    /**
+     * Creates an instance of ChatCompletionsOptions class.
+     *
+     * @param messages the messages value to set.
+     */
+    @Generated
+    public ChatCompletionsOptions(List<ChatRequestMessage> messages) {
+        this.messages = messages;
+    }
+
+    /**
+     * Get the messages property: The collection of context messages associated with this chat completions request.
+     * Typical usage begins with a chat message for the System role that provides instructions for
+     * the behavior of the assistant, followed by alternating messages between the User and
+     * Assistant roles.
+     *
+     * @return the messages value.
+     */
+    @Generated
+    public List<ChatRequestMessage> getMessages() {
+        return this.messages;
+    }
+
+    /**
+     * Get the frequencyPenalty property: A value that influences the probability of generated tokens appearing based on
+     * their cumulative
+     * frequency in generated text.
+     * Positive values will make tokens less likely to appear as their frequency increases and
+     * decrease the likelihood of the model repeating the same statements verbatim.
+     * Supported range is [-2, 2].
+     *
+     * @return the frequencyPenalty value.
+     */
+    @Generated
+    public Double getFrequencyPenalty() {
+        return this.frequencyPenalty;
+    }
+
+    /**
+     * Set the frequencyPenalty property: A value that influences the probability of generated tokens appearing based on
+     * their cumulative
+     * frequency in generated text.
+     * Positive values will make tokens less likely to appear as their frequency increases and
+     * decrease the likelihood of the model repeating the same statements verbatim.
+     * Supported range is [-2, 2].
+     *
+     * @param frequencyPenalty the frequencyPenalty value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setFrequencyPenalty(Double frequencyPenalty) {
+        this.frequencyPenalty = frequencyPenalty;
+        return this;
+    }
+
+    /**
+     * Get the stream property: A value indicating whether chat completions should be streamed for this request.
+     *
+     * @return the stream value.
+     */
+    @Generated
+    public Boolean isStream() {
+        return this.stream;
+    }
+
+    /**
+     * Set the stream property: A value indicating whether chat completions should be streamed for this request.
+     *
+     * @param stream the stream value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    private ChatCompletionsOptions setStream(Boolean stream) {
+        this.stream = stream;
+        return this;
+    }
+
+    /**
+     * Get the presencePenalty property: A value that influences the probability of generated tokens appearing based on
+     * their existing
+     * presence in generated text.
+     * Positive values will make tokens less likely to appear when they already exist and increase the
+     * model's likelihood to output new topics.
+     * Supported range is [-2, 2].
+     *
+     * @return the presencePenalty value.
+     */
+    @Generated
+    public Double getPresencePenalty() {
+        return this.presencePenalty;
+    }
+
+    /**
+     * Set the presencePenalty property: A value that influences the probability of generated tokens appearing based on
+     * their existing
+     * presence in generated text.
+     * Positive values will make tokens less likely to appear when they already exist and increase the
+     * model's likelihood to output new topics.
+     * Supported range is [-2, 2].
+     *
+     * @param presencePenalty the presencePenalty value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setPresencePenalty(Double presencePenalty) {
+        this.presencePenalty = presencePenalty;
+        return this;
+    }
+
+    /**
+     * Get the temperature property: The sampling temperature to use that controls the apparent creativity of generated
+     * completions.
+     * Higher values will make output more random while lower values will make results more focused
+     * and deterministic.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     *
+     * @return the temperature value.
+     */
+    @Generated
+    public Double getTemperature() {
+        return this.temperature;
+    }
+
+    /**
+     * Set the temperature property: The sampling temperature to use that controls the apparent creativity of generated
+     * completions.
+     * Higher values will make output more random while lower values will make results more focused
+     * and deterministic.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     *
+     * @param temperature the temperature value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setTemperature(Double temperature) {
+        this.temperature = temperature;
+        return this;
+    }
+
+    /**
+     * Get the topP property: An alternative to sampling with temperature called nucleus sampling. This value causes the
+     * model to consider the results of tokens with the provided probability mass. As an example, a
+     * value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+     * considered.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     *
+     * @return the topP value.
+     */
+    @Generated
+    public Double getTopP() {
+        return this.topP;
+    }
+
+    /**
+     * Set the topP property: An alternative to sampling with temperature called nucleus sampling. This value causes the
+     * model to consider the results of tokens with the provided probability mass. As an example, a
+     * value of 0.15 will cause only the tokens comprising the top 15% of probability mass to be
+     * considered.
+     * It is not recommended to modify temperature and top_p for the same completions request as the
+     * interaction of these two settings is difficult to predict.
+     * Supported range is [0, 1].
+     *
+     * @param topP the topP value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setTopP(Double topP) {
+        this.topP = topP;
+        return this;
+    }
+
+    /**
+     * Get the maxTokens property: The maximum number of tokens to generate.
+     *
+     * @return the maxTokens value.
+     */
+    @Generated
+    public Integer getMaxTokens() {
+        return this.maxTokens;
+    }
+
+    /**
+     * Set the maxTokens property: The maximum number of tokens to generate.
+     *
+     * @param maxTokens the maxTokens value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setMaxTokens(Integer maxTokens) {
+        this.maxTokens = maxTokens;
+        return this;
+    }
+
+    /**
+     * Get the responseFormat property: The format that the model must output. Use this to enable JSON mode instead of
+     * the default text mode.
+     * Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+     * via a system or user message.
+     *
+     * @return the responseFormat value.
+     */
+    @Generated
+    public ChatCompletionsResponseFormat getResponseFormat() {
+        return this.responseFormat;
+    }
+
+    /**
+     * Set the responseFormat property: The format that the model must output. Use this to enable JSON mode instead of
+     * the default text mode.
+     * Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+     * via a system or user message.
+     *
+     * @param responseFormat the responseFormat value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setResponseFormat(ChatCompletionsResponseFormat responseFormat) {
+        this.responseFormat = responseFormat;
+        return this;
+    }
+
+    /**
+     * Get the stop property: A collection of textual sequences that will end completions generation.
+     *
+     * @return the stop value.
+     */
+    @Generated
+    public List<String> getStop() {
+        return this.stop;
+    }
+
+    /**
+     * Set the stop property: A collection of textual sequences that will end completions generation.
+     *
+     * @param stop the stop value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setStop(List<String> stop) {
+        this.stop = stop;
+        return this;
+    }
+
+    /**
+     * Get the tools property: The available tool definitions that the chat completions request can use, including
+     * caller-defined functions.
+     *
+     * @return the tools value.
+     */
+    @Generated
+    public List<ChatCompletionsToolDefinition> getTools() {
+        return this.tools;
+    }
+
+    /**
+     * Set the tools property: The available tool definitions that the chat completions request can use, including
+     * caller-defined functions.
+     *
+     * @param tools the tools value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setTools(List<ChatCompletionsToolDefinition> tools) {
+        this.tools = tools;
+        return this;
+    }
+
+    /**
+     * Get the toolChoice property: If specified, the model will configure which of the provided tools it can use for
+     * the chat completions response.
+     *
+     * @return the toolChoice value.
+     */
+    @Generated
+    public BinaryData getToolChoice() {
+        return this.toolChoice;
+    }
+
+    /**
+     * Set the toolChoice property: If specified, the model will configure which of the provided tools it can use for
+     * the chat completions response.
+     *
+     * @param toolChoice the toolChoice value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setToolChoice(BinaryData toolChoice) {
+        this.toolChoice = toolChoice;
+        return this;
+    }
+
+    /**
+     * Get the seed property: If specified, the system will make a best effort to sample deterministically such that
+     * repeated requests with the
+     * same seed and parameters should return the same result. Determinism is not guaranteed.
+     *
+     * @return the seed value.
+     */
+    @Generated
+    public Long getSeed() {
+        return this.seed;
+    }
+
+    /**
+     * Set the seed property: If specified, the system will make a best effort to sample deterministically such that
+     * repeated requests with the
+     * same seed and parameters should return the same result. Determinism is not guaranteed.
+     *
+     * @param seed the seed value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setSeed(Long seed) {
+        this.seed = seed;
+        return this;
+    }
+
+    /**
+     * Get the model property: ID of the specific AI model to use, if more than one model is available on the endpoint.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * Set the model property: ID of the specific AI model to use, if more than one model is available on the endpoint.
+     *
+     * @param model the model value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setModel(String model) {
+        this.model = model;
+        return this;
+    }
+
+    /**
+     * Get the extraParams property: Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     *
+     * @return the extraParams value.
+     */
+    @Generated
+    public ExtraParameters getExtraParams() {
+        return this.extraParams;
+    }
+
+    /**
+     * Set the extraParams property: Controls what happens if extra parameters, undefined by the REST API,
+     * are passed in the JSON request payload.
+     * This sets the HTTP request header `extra-parameters`.
+     *
+     * @param extraParams the extraParams value to set.
+     * @return the ChatCompletionsOptions object itself.
+     */
+    @Generated
+    public ChatCompletionsOptions setExtraParams(ExtraParameters extraParams) {
+        this.extraParams = extraParams;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @throws IOException If an error occurs while writing fields to the ChatCompletionsOptions instance.
+     */
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeArrayField("messages", this.messages, JsonWriter::writeJson);
+        jsonWriter.writeNumberField("max_tokens", this.maxTokens);
+        jsonWriter.writeNumberField("temperature", this.temperature);
+        jsonWriter.writeNumberField("top_p", this.topP);
+        jsonWriter.writeArrayField("stop", this.stop, JsonWriter::writeString);
+        jsonWriter.writeNumberField("presence_penalty", this.presencePenalty);
+        jsonWriter.writeNumberField("frequency_penalty", this.frequencyPenalty);
+        jsonWriter.writeBooleanField("stream", this.stream);
+        jsonWriter.writeStringField("model", this.model);
+        jsonWriter.writeNumberField("seed", this.seed);
+        jsonWriter.writeJsonField("response_format", this.responseFormat);
+        jsonWriter.writeArrayField("tools", this.tools, JsonWriter::writeJson);
+        if (this.toolChoice != null) {
+            jsonWriter.writeRawField("tool_choice", this.toolChoice.toString());
+        }
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsOptions from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsOptions if the JsonReader was pointing to an instance of it, or null if it
+     * was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletionsOptions.
+     */
+    @Generated
+    public static ChatCompletionsOptions fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            List<ChatRequestMessage> messages = null;
+            Integer maxTokens = null;
+            Double temperature = null;
+            Double topP = null;
+            List<String> stop = null;
+            Double presencePenalty = null;
+            Double frequencyPenalty = null;
+            Boolean stream = null;
+            String model = null;
+            Long seed = null;
+            ChatCompletionsResponseFormat responseFormat = null;
+            List<ChatCompletionsToolDefinition> tools = null;
+            BinaryData toolChoice = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("messages".equals(fieldName)) {
+                    messages = reader.readArray(ChatRequestMessage::fromJson);
+                } else if ("max_tokens".equals(fieldName)) {
+                    maxTokens = reader.getNullable(JsonReader::getInt);
+                } else if ("temperature".equals(fieldName)) {
+                    temperature = reader.getNullable(JsonReader::getDouble);
+                } else if ("top_p".equals(fieldName)) {
+                    topP = reader.getNullable(JsonReader::getDouble);
+                } else if ("stop".equals(fieldName)) {
+                    stop = reader.readArray(JsonReader::getString);
+                } else if ("presence_penalty".equals(fieldName)) {
+                    presencePenalty = reader.getNullable(JsonReader::getDouble);
+                } else if ("frequency_penalty".equals(fieldName)) {
+                    frequencyPenalty = reader.getNullable(JsonReader::getDouble);
+                } else if ("stream".equals(fieldName)) {
+                    stream = reader.getNullable(JsonReader::getBoolean);
+                } else if ("model".equals(fieldName)) {
+                    model = reader.getString();
+                } else if ("seed".equals(fieldName)) {
+                    seed = reader.getNullable(JsonReader::getLong);
+                } else if ("response_format".equals(fieldName)) {
+                    responseFormat = ChatCompletionsResponseFormat.fromJson(reader);
+                } else if ("tools".equals(fieldName)) {
+                    tools = reader.readArray(ChatCompletionsToolDefinition::fromJson);
+                } else if ("tool_choice".equals(fieldName)) {
+                    toolChoice
+                        = reader.getNullable(nonNullReader -> BinaryData.fromObject(nonNullReader.readUntyped()));
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatCompletionsOptions deserializedChatCompletionsOptions = new ChatCompletionsOptions(messages);
+            deserializedChatCompletionsOptions.maxTokens = maxTokens;
+            deserializedChatCompletionsOptions.temperature = temperature;
+            deserializedChatCompletionsOptions.topP = topP;
+            deserializedChatCompletionsOptions.stop = stop;
+            deserializedChatCompletionsOptions.presencePenalty = presencePenalty;
+            deserializedChatCompletionsOptions.frequencyPenalty = frequencyPenalty;
+            deserializedChatCompletionsOptions.stream = stream;
+            deserializedChatCompletionsOptions.model = model;
+            deserializedChatCompletionsOptions.seed = seed;
+            deserializedChatCompletionsOptions.responseFormat = responseFormat;
+            deserializedChatCompletionsOptions.tools = tools;
+            deserializedChatCompletionsOptions.toolChoice = toolChoice;
+            return deserializedChatCompletionsOptions;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormat.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormat.java
new file mode 100644
index 000000000000..984ba391e541
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormat.java
@@ -0,0 +1,110 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * Represents the format that the model must output. Use this to enable JSON mode instead of the default text mode.
+ * Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+ * via a system or user message.
+ */
+@Immutable
+public class ChatCompletionsResponseFormat implements JsonSerializable<ChatCompletionsResponseFormat> {
+
+    /*
+     * The response format type to use for chat completions.
+     */
+    @Generated
+    private String type = "ChatCompletionsResponseFormat";
+
+    /**
+     * Creates an instance of ChatCompletionsResponseFormat class.
+     */
+    @Generated
+    public ChatCompletionsResponseFormat() {
+    }
+
+    /**
+     * Get the type property: The response format type to use for chat completions.
+     *
+     * @return the type value.
+     */
+    @Generated
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsResponseFormat from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsResponseFormat if the JsonReader was pointing to an instance of it, or null
+     * if it was pointing to JSON null.
+     * @throws IOException If an error occurs while reading the ChatCompletionsResponseFormat.
+     */
+    @Generated
+    public static ChatCompletionsResponseFormat fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String discriminatorValue = null;
+            try (JsonReader readerToUse = reader.bufferObject()) {
+                // Prepare for reading
+                readerToUse.nextToken();
+                while (readerToUse.nextToken() != JsonToken.END_OBJECT) {
+                    String fieldName = readerToUse.getFieldName();
+                    readerToUse.nextToken();
+                    if ("type".equals(fieldName)) {
+                        discriminatorValue = readerToUse.getString();
+                        break;
+                    } else {
+                        readerToUse.skipChildren();
+                    }
+                }
+                // Use the discriminator value to determine which subtype should be deserialized.
+                if ("text".equals(discriminatorValue)) {
+                    return ChatCompletionsResponseFormatText.fromJson(readerToUse.reset());
+                } else if ("json_object".equals(discriminatorValue)) {
+                    return ChatCompletionsResponseFormatJson.fromJson(readerToUse.reset());
+                } else {
+                    return fromJsonKnownDiscriminator(readerToUse.reset());
+                }
+            }
+        });
+    }
+
+    @Generated
+    static ChatCompletionsResponseFormat fromJsonKnownDiscriminator(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatCompletionsResponseFormat deserializedChatCompletionsResponseFormat
+                = new ChatCompletionsResponseFormat();
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("type".equals(fieldName)) {
+                    deserializedChatCompletionsResponseFormat.type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return deserializedChatCompletionsResponseFormat;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormatJson.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormatJson.java
new file mode 100644
index 000000000000..86db6bdbea4f
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormatJson.java
@@ -0,0 +1,81 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A response format for Chat Completions that restricts responses to emitting valid JSON objects.
+ * Note that to enable JSON mode, some AI models may also require you to instruct the model to produce JSON
+ * via a system or user message.
+ */
+@Immutable
+public final class ChatCompletionsResponseFormatJson extends ChatCompletionsResponseFormat {
+
+    /*
+     * The response format type to use for chat completions.
+     */
+    @Generated
+    private String type = "json_object";
+
+    /**
+     * Creates an instance of ChatCompletionsResponseFormatJson class.
+     */
+    @Generated
+    public ChatCompletionsResponseFormatJson() {
+    }
+
+    /**
+     * Get the type property: The response format type to use for chat completions.
+     *
+     * @return the type value.
+     */
+    @Generated
+    @Override
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsResponseFormatJson from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsResponseFormatJson if the JsonReader was pointing to an instance of it, or
+     * null if it was pointing to JSON null.
+     * @throws IOException If an error occurs while reading the ChatCompletionsResponseFormatJson.
+     */
+    @Generated
+    public static ChatCompletionsResponseFormatJson fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatCompletionsResponseFormatJson deserializedChatCompletionsResponseFormatJson
+                = new ChatCompletionsResponseFormatJson();
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("type".equals(fieldName)) {
+                    deserializedChatCompletionsResponseFormatJson.type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return deserializedChatCompletionsResponseFormatJson;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormatText.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormatText.java
new file mode 100644
index 000000000000..9c049b8e2aeb
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsResponseFormatText.java
@@ -0,0 +1,79 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A response format for Chat Completions that emits text responses. This is the default response format.
+ */
+@Immutable
+public final class ChatCompletionsResponseFormatText extends ChatCompletionsResponseFormat {
+
+    /*
+     * The response format type to use for chat completions.
+     */
+    @Generated
+    private String type = "text";
+
+    /**
+     * Creates an instance of ChatCompletionsResponseFormatText class.
+     */
+    @Generated
+    public ChatCompletionsResponseFormatText() {
+    }
+
+    /**
+     * Get the type property: The response format type to use for chat completions.
+     *
+     * @return the type value.
+     */
+    @Generated
+    @Override
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsResponseFormatText from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsResponseFormatText if the JsonReader was pointing to an instance of it, or
+     * null if it was pointing to JSON null.
+     * @throws IOException If an error occurs while reading the ChatCompletionsResponseFormatText.
+     */
+    @Generated
+    public static ChatCompletionsResponseFormatText fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatCompletionsResponseFormatText deserializedChatCompletionsResponseFormatText
+                = new ChatCompletionsResponseFormatText();
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("type".equals(fieldName)) {
+                    deserializedChatCompletionsResponseFormatText.type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return deserializedChatCompletionsResponseFormatText;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolCall.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolCall.java
new file mode 100644
index 000000000000..a368a505fdff
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolCall.java
@@ -0,0 +1,121 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A function tool call requested by the AI model.
+ */
+@Immutable
+public class ChatCompletionsToolCall implements JsonSerializable<ChatCompletionsToolCall> {
+
+    /*
+     * The ID of the tool call.
+     */
+    @Generated
+    private final String id;
+
+    /*
+     * The type of tool call. Currently, only `function` is supported.
+     */
+    @Generated
+    private final String type = "function";
+
+    /*
+     * The details of the function call requested by the AI model.
+     */
+    @Generated
+    private final FunctionCall function;
+
+    /**
+     * Creates an instance of ChatCompletionsToolCall class.
+     *
+     * @param id the id value to set.
+     * @param function the function value to set.
+     */
+    @Generated
+    public ChatCompletionsToolCall(String id, FunctionCall function) {
+        this.id = id;
+        this.function = function;
+    }
+
+    /**
+     * Get the id property: The ID of the tool call.
+     *
+     * @return the id value.
+     */
+    @Generated
+    public String getId() {
+        return this.id;
+    }
+
+    /**
+     * Get the type property: The type of tool call. Currently, only `function` is supported.
+     *
+     * @return the type value.
+     */
+    @Generated
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * Get the function property: The details of the function call requested by the AI model.
+     *
+     * @return the function value.
+     */
+    @Generated
+    public FunctionCall getFunction() {
+        return this.function;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("id", this.id);
+        jsonWriter.writeStringField("type", this.type);
+        jsonWriter.writeJsonField("function", this.function);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsToolCall from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsToolCall if the JsonReader was pointing to an instance of it, or null if it
+     * was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletionsToolCall.
+     */
+    @Generated
+    public static ChatCompletionsToolCall fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String id = null;
+            FunctionCall function = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("id".equals(fieldName)) {
+                    id = reader.getString();
+                } else if ("function".equals(fieldName)) {
+                    function = FunctionCall.fromJson(reader);
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new ChatCompletionsToolCall(id, function);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolDefinition.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolDefinition.java
new file mode 100644
index 000000000000..45ad50de4589
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolDefinition.java
@@ -0,0 +1,99 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * The definition of a chat completions tool that can call a function.
+ */
+@Immutable
+public class ChatCompletionsToolDefinition implements JsonSerializable<ChatCompletionsToolDefinition> {
+
+    /*
+     * The type of the tool. Currently, only `function` is supported.
+     */
+    @Generated
+    private final String type = "function";
+
+    /*
+     * The function definition details for the function tool.
+     */
+    @Generated
+    private final FunctionDefinition function;
+
+    /**
+     * Creates an instance of ChatCompletionsToolDefinition class.
+     *
+     * @param function the function value to set.
+     */
+    @Generated
+    public ChatCompletionsToolDefinition(FunctionDefinition function) {
+        this.function = function;
+    }
+
+    /**
+     * Get the type property: The type of the tool. Currently, only `function` is supported.
+     *
+     * @return the type value.
+     */
+    @Generated
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * Get the function property: The function definition details for the function tool.
+     *
+     * @return the function value.
+     */
+    @Generated
+    public FunctionDefinition getFunction() {
+        return this.function;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("type", this.type);
+        jsonWriter.writeJsonField("function", this.function);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatCompletionsToolDefinition from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatCompletionsToolDefinition if the JsonReader was pointing to an instance of it, or null
+     * if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatCompletionsToolDefinition.
+     */
+    @Generated
+    public static ChatCompletionsToolDefinition fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            FunctionDefinition function = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("function".equals(fieldName)) {
+                    function = FunctionDefinition.fromJson(reader);
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new ChatCompletionsToolDefinition(function);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolSelectionPreset.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolSelectionPreset.java
new file mode 100644
index 000000000000..c67d318b47e1
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatCompletionsToolSelectionPreset.java
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * Represents a generic policy for how a chat completions tool may be selected.
+ */
+public final class ChatCompletionsToolSelectionPreset extends ExpandableStringEnum<ChatCompletionsToolSelectionPreset> {
+
+    /**
+     * Specifies that the model may either use any of the tools provided in this chat completions request or
+     * instead return a standard chat completions response as if no tools were provided.
+     */
+    @Generated
+    public static final ChatCompletionsToolSelectionPreset AUTO = fromString("auto");
+
+    /**
+     * Specifies that the model should not respond with a tool call and should instead provide a standard chat
+     * completions response. Response content may still be influenced by the provided tool definitions.
+     */
+    @Generated
+    public static final ChatCompletionsToolSelectionPreset NONE = fromString("none");
+
+    /**
+     * Specifies that the model should respond with a call to one or more tools.
+     */
+    @Generated
+    public static final ChatCompletionsToolSelectionPreset REQUIRED = fromString("required");
+
+    /**
+     * Creates a new instance of ChatCompletionsToolSelectionPreset value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public ChatCompletionsToolSelectionPreset() {
+    }
+
+    /**
+     * Creates or finds a ChatCompletionsToolSelectionPreset from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding ChatCompletionsToolSelectionPreset.
+     */
+    @Generated
+    public static ChatCompletionsToolSelectionPreset fromString(String name) {
+        return fromString(name, ChatCompletionsToolSelectionPreset.class);
+    }
+
+    /**
+     * Gets known ChatCompletionsToolSelectionPreset values.
+     *
+     * @return known ChatCompletionsToolSelectionPreset values.
+     */
+    @Generated
+    public static Collection<ChatCompletionsToolSelectionPreset> values() {
+        return values(ChatCompletionsToolSelectionPreset.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageContentItem.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageContentItem.java
new file mode 100644
index 000000000000..0acb2a8bdd15
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageContentItem.java
@@ -0,0 +1,107 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * An abstract representation of a structured content item within a chat message.
+ */
+@Immutable
+public class ChatMessageContentItem implements JsonSerializable<ChatMessageContentItem> {
+
+    /*
+     * The discriminated object type.
+     */
+    @Generated
+    private String type = "ChatMessageContentItem";
+
+    /**
+     * Creates an instance of ChatMessageContentItem class.
+     */
+    @Generated
+    public ChatMessageContentItem() {
+    }
+
+    /**
+     * Get the type property: The discriminated object type.
+     *
+     * @return the type value.
+     */
+    @Generated
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatMessageContentItem from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatMessageContentItem if the JsonReader was pointing to an instance of it, or null if it
+     * was pointing to JSON null.
+     * @throws IOException If an error occurs while reading the ChatMessageContentItem.
+     */
+    @Generated
+    public static ChatMessageContentItem fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String discriminatorValue = null;
+            try (JsonReader readerToUse = reader.bufferObject()) {
+                // Prepare for reading
+                readerToUse.nextToken();
+                while (readerToUse.nextToken() != JsonToken.END_OBJECT) {
+                    String fieldName = readerToUse.getFieldName();
+                    readerToUse.nextToken();
+                    if ("type".equals(fieldName)) {
+                        discriminatorValue = readerToUse.getString();
+                        break;
+                    } else {
+                        readerToUse.skipChildren();
+                    }
+                }
+                // Use the discriminator value to determine which subtype should be deserialized.
+                if ("text".equals(discriminatorValue)) {
+                    return ChatMessageTextContentItem.fromJson(readerToUse.reset());
+                } else if ("image_url".equals(discriminatorValue)) {
+                    return ChatMessageImageContentItem.fromJson(readerToUse.reset());
+                } else {
+                    return fromJsonKnownDiscriminator(readerToUse.reset());
+                }
+            }
+        });
+    }
+
+    @Generated
+    static ChatMessageContentItem fromJsonKnownDiscriminator(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatMessageContentItem deserializedChatMessageContentItem = new ChatMessageContentItem();
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("type".equals(fieldName)) {
+                    deserializedChatMessageContentItem.type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return deserializedChatMessageContentItem;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageContentItem.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageContentItem.java
new file mode 100644
index 000000000000..eab18cc2a0cf
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageContentItem.java
@@ -0,0 +1,134 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.Base64;
+import java.nio.charset.StandardCharsets;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.nio.file.Path;
+
+/**
+ * A structured chat content item containing an image reference.
+ */
+@Immutable
+public final class ChatMessageImageContentItem extends ChatMessageContentItem {
+
+    /*
+     * The discriminated object type.
+     */
+    @Generated
+    private String type = "image_url";
+
+    /*
+     * An internet location, which must be accessible to the model,from which the image may be retrieved.
+     */
+    @Generated
+    private final ChatMessageImageUrl imageUrl;
+
+    /**
+     * Creates an instance of ChatMessageImageContentItem class.
+     *
+     * @param imageUrl the imageUrl value to set.
+     */
+    @Generated
+    public ChatMessageImageContentItem(ChatMessageImageUrl imageUrl) {
+        this.imageUrl = imageUrl;
+    }
+
+    /**
+     * Creates an instance of ChatMessageImageContentItem class.
+     *
+     * @param filePath path to the imageFile.
+     * @param imageFormat format of the image
+     * @throws RuntimeException If an error occurs while reading the file or file not found.
+     */
+    public ChatMessageImageContentItem(Path filePath, String imageFormat) {
+        File imageFile = filePath.toFile();
+        try (FileInputStream fileInputStreamReader = new FileInputStream(imageFile)) {
+            byte[] bytes = new byte[(int) imageFile.length()];
+            fileInputStreamReader.read(bytes);
+            String encodedFile = new String(Base64.getEncoder().encode(bytes), StandardCharsets.UTF_8);
+            String urlTemplate = "data:image/%s;base64,%s";
+            this.imageUrl = new ChatMessageImageUrl(String.format(urlTemplate, imageFormat, encodedFile));
+        } catch (FileNotFoundException e) {
+            throw new RuntimeException("Local file not found.", e);
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Get the type property: The discriminated object type.
+     *
+     * @return the type value.
+     */
+    @Generated
+    @Override
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * Get the imageUrl property: An internet location, which must be accessible to the model,from which the image may
+     * be retrieved.
+     *
+     * @return the imageUrl value.
+     */
+    @Generated
+    public ChatMessageImageUrl getImageUrl() {
+        return this.imageUrl;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeJsonField("image_url", this.imageUrl);
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatMessageImageContentItem from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatMessageImageContentItem if the JsonReader was pointing to an instance of it, or null
+     * if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatMessageImageContentItem.
+     */
+    @Generated
+    public static ChatMessageImageContentItem fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatMessageImageUrl imageUrl = null;
+            String type = "image_url";
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("image_url".equals(fieldName)) {
+                    imageUrl = ChatMessageImageUrl.fromJson(reader);
+                } else if ("type".equals(fieldName)) {
+                    type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatMessageImageContentItem deserializedChatMessageImageContentItem
+                = new ChatMessageImageContentItem(imageUrl);
+            deserializedChatMessageImageContentItem.type = type;
+            return deserializedChatMessageImageContentItem;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageDetailLevel.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageDetailLevel.java
new file mode 100644
index 000000000000..f238f423bc65
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageDetailLevel.java
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * A representation of the possible image detail levels for image-based chat completions message content.
+ */
+public final class ChatMessageImageDetailLevel extends ExpandableStringEnum<ChatMessageImageDetailLevel> {
+
+    /**
+     * Specifies that the model should determine which detail level to apply using heuristics like image size.
+     */
+    @Generated
+    public static final ChatMessageImageDetailLevel AUTO = fromString("auto");
+
+    /**
+     * Specifies that image evaluation should be constrained to the 'low-res' model that may be faster and consume fewer
+     * tokens but may also be less accurate for highly detailed images.
+     */
+    @Generated
+    public static final ChatMessageImageDetailLevel LOW = fromString("low");
+
+    /**
+     * Specifies that image evaluation should enable the 'high-res' model that may be more accurate for highly detailed
+     * images but may also be slower and consume more tokens.
+     */
+    @Generated
+    public static final ChatMessageImageDetailLevel HIGH = fromString("high");
+
+    /**
+     * Creates a new instance of ChatMessageImageDetailLevel value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public ChatMessageImageDetailLevel() {
+    }
+
+    /**
+     * Creates or finds a ChatMessageImageDetailLevel from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding ChatMessageImageDetailLevel.
+     */
+    @Generated
+    public static ChatMessageImageDetailLevel fromString(String name) {
+        return fromString(name, ChatMessageImageDetailLevel.class);
+    }
+
+    /**
+     * Gets known ChatMessageImageDetailLevel values.
+     *
+     * @return known ChatMessageImageDetailLevel values.
+     */
+    @Generated
+    public static Collection<ChatMessageImageDetailLevel> values() {
+        return values(ChatMessageImageDetailLevel.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageUrl.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageUrl.java
new file mode 100644
index 000000000000..bbf59cd6d8b2
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageImageUrl.java
@@ -0,0 +1,121 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * An internet location from which the model may retrieve an image.
+ */
+@Fluent
+public final class ChatMessageImageUrl implements JsonSerializable<ChatMessageImageUrl> {
+
+    /*
+     * The URL of the image.
+     */
+    @Generated
+    private final String url;
+
+    /*
+     * The evaluation quality setting to use, which controls relative prioritization of speed, token consumption, and
+     * accuracy.
+     */
+    @Generated
+    private ChatMessageImageDetailLevel detail;
+
+    /**
+     * Creates an instance of ChatMessageImageUrl class.
+     *
+     * @param url the url value to set.
+     */
+    @Generated
+    public ChatMessageImageUrl(String url) {
+        this.url = url;
+    }
+
+    /**
+     * Get the url property: The URL of the image.
+     *
+     * @return the url value.
+     */
+    @Generated
+    public String getUrl() {
+        return this.url;
+    }
+
+    /**
+     * Get the detail property: The evaluation quality setting to use, which controls relative prioritization of speed,
+     * token consumption, and
+     * accuracy.
+     *
+     * @return the detail value.
+     */
+    @Generated
+    public ChatMessageImageDetailLevel getDetail() {
+        return this.detail;
+    }
+
+    /**
+     * Set the detail property: The evaluation quality setting to use, which controls relative prioritization of speed,
+     * token consumption, and
+     * accuracy.
+     *
+     * @param detail the detail value to set.
+     * @return the ChatMessageImageUrl object itself.
+     */
+    @Generated
+    public ChatMessageImageUrl setDetail(ChatMessageImageDetailLevel detail) {
+        this.detail = detail;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("url", this.url);
+        jsonWriter.writeStringField("detail", this.detail == null ? null : this.detail.toString());
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatMessageImageUrl from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatMessageImageUrl if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatMessageImageUrl.
+     */
+    @Generated
+    public static ChatMessageImageUrl fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String url = null;
+            ChatMessageImageDetailLevel detail = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("url".equals(fieldName)) {
+                    url = reader.getString();
+                } else if ("detail".equals(fieldName)) {
+                    detail = ChatMessageImageDetailLevel.fromString(reader.getString());
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatMessageImageUrl deserializedChatMessageImageUrl = new ChatMessageImageUrl(url);
+            deserializedChatMessageImageUrl.detail = detail;
+            return deserializedChatMessageImageUrl;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageTextContentItem.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageTextContentItem.java
new file mode 100644
index 000000000000..8685cd662509
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatMessageTextContentItem.java
@@ -0,0 +1,104 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A structured chat content item containing plain text.
+ */
+@Immutable
+public final class ChatMessageTextContentItem extends ChatMessageContentItem {
+
+    /*
+     * The discriminated object type.
+     */
+    @Generated
+    private String type = "text";
+
+    /*
+     * The content of the message.
+     */
+    @Generated
+    private final String text;
+
+    /**
+     * Creates an instance of ChatMessageTextContentItem class.
+     *
+     * @param text the text value to set.
+     */
+    @Generated
+    public ChatMessageTextContentItem(String text) {
+        this.text = text;
+    }
+
+    /**
+     * Get the type property: The discriminated object type.
+     *
+     * @return the type value.
+     */
+    @Generated
+    @Override
+    public String getType() {
+        return this.type;
+    }
+
+    /**
+     * Get the text property: The content of the message.
+     *
+     * @return the text value.
+     */
+    @Generated
+    public String getText() {
+        return this.text;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("text", this.text);
+        jsonWriter.writeStringField("type", this.type);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatMessageTextContentItem from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatMessageTextContentItem if the JsonReader was pointing to an instance of it, or null if
+     * it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatMessageTextContentItem.
+     */
+    @Generated
+    public static ChatMessageTextContentItem fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String text = null;
+            String type = "text";
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("text".equals(fieldName)) {
+                    text = reader.getString();
+                } else if ("type".equals(fieldName)) {
+                    type = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatMessageTextContentItem deserializedChatMessageTextContentItem = new ChatMessageTextContentItem(text);
+            deserializedChatMessageTextContentItem.type = type;
+            return deserializedChatMessageTextContentItem;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestAssistantMessage.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestAssistantMessage.java
new file mode 100644
index 000000000000..107df3c73d92
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestAssistantMessage.java
@@ -0,0 +1,156 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * A request chat message representing response or action from the assistant.
+ */
+@Fluent
+public final class ChatRequestAssistantMessage extends ChatRequestMessage {
+
+    /*
+     * The chat role associated with this message.
+     */
+    @Generated
+    private ChatRole role = ChatRole.ASSISTANT;
+
+    /*
+     * The content of the message.
+     */
+    @Generated
+    private String content;
+
+    /*
+     * The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
+     * completions request to resolve as configured.
+     */
+    @Generated
+    private List<ChatCompletionsToolCall> toolCalls;
+
+    /**
+     * Creates an instance of ChatRequestAssistantMessage class.
+     */
+    public ChatRequestAssistantMessage() {
+    }
+
+    /**
+     * Creates an instance of ChatRequestAssistantMessage class.
+     *
+     * @param content the content value to set.
+     */
+    public ChatRequestAssistantMessage(String content) {
+        this.content = content;
+    }
+
+    /**
+     * Get the role property: The chat role associated with this message.
+     *
+     * @return the role value.
+     */
+    @Generated
+    @Override
+    public ChatRole getRole() {
+        return this.role;
+    }
+
+    /**
+     * Get the content property: The content of the message.
+     *
+     * @return the content value.
+     */
+    @Generated
+    public String getContent() {
+        return this.content;
+    }
+
+    /**
+     * Set the content property: The content of the message.
+     *
+     * @param content the content value to set.
+     * @return the ChatRequestAssistantMessage object itself.
+     */
+    @Generated
+    public ChatRequestAssistantMessage setContent(String content) {
+        this.content = content;
+        return this;
+    }
+
+    /**
+     * Get the toolCalls property: The tool calls that must be resolved and have their outputs appended to subsequent
+     * input messages for the chat
+     * completions request to resolve as configured.
+     *
+     * @return the toolCalls value.
+     */
+    @Generated
+    public List<ChatCompletionsToolCall> getToolCalls() {
+        return this.toolCalls;
+    }
+
+    /**
+     * Set the toolCalls property: The tool calls that must be resolved and have their outputs appended to subsequent
+     * input messages for the chat
+     * completions request to resolve as configured.
+     *
+     * @param toolCalls the toolCalls value to set.
+     * @return the ChatRequestAssistantMessage object itself.
+     */
+    @Generated
+    public ChatRequestAssistantMessage setToolCalls(List<ChatCompletionsToolCall> toolCalls) {
+        this.toolCalls = toolCalls;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("role", this.role == null ? null : this.role.toString());
+        jsonWriter.writeStringField("content", this.content);
+        jsonWriter.writeArrayField("tool_calls", this.toolCalls, (writer, element) -> writer.writeJson(element));
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatRequestAssistantMessage from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatRequestAssistantMessage if the JsonReader was pointing to an instance of it, or null
+     * if it was pointing to JSON null.
+     * @throws IOException If an error occurs while reading the ChatRequestAssistantMessage.
+     */
+    @Generated
+    public static ChatRequestAssistantMessage fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatRequestAssistantMessage deserializedChatRequestAssistantMessage = new ChatRequestAssistantMessage();
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("role".equals(fieldName)) {
+                    deserializedChatRequestAssistantMessage.role = ChatRole.fromString(reader.getString());
+                } else if ("content".equals(fieldName)) {
+                    deserializedChatRequestAssistantMessage.content = reader.getString();
+                } else if ("tool_calls".equals(fieldName)) {
+                    List<ChatCompletionsToolCall> toolCalls
+                        = reader.readArray(reader1 -> ChatCompletionsToolCall.fromJson(reader1));
+                    deserializedChatRequestAssistantMessage.toolCalls = toolCalls;
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return deserializedChatRequestAssistantMessage;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestMessage.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestMessage.java
new file mode 100644
index 000000000000..e6a9e1868b74
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestMessage.java
@@ -0,0 +1,111 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * An abstract representation of a chat message as provided in a request.
+ */
+@Immutable
+public class ChatRequestMessage implements JsonSerializable<ChatRequestMessage> {
+
+    /*
+     * The chat role associated with this message.
+     */
+    @Generated
+    private ChatRole role = ChatRole.fromString("ChatRequestMessage");
+
+    /**
+     * Creates an instance of ChatRequestMessage class.
+     */
+    @Generated
+    public ChatRequestMessage() {
+    }
+
+    /**
+     * Get the role property: The chat role associated with this message.
+     *
+     * @return the role value.
+     */
+    @Generated
+    public ChatRole getRole() {
+        return this.role;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("role", this.role == null ? null : this.role.toString());
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatRequestMessage from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatRequestMessage if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IOException If an error occurs while reading the ChatRequestMessage.
+     */
+    @Generated
+    public static ChatRequestMessage fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String discriminatorValue = null;
+            try (JsonReader readerToUse = reader.bufferObject()) {
+                // Prepare for reading
+                readerToUse.nextToken();
+                while (readerToUse.nextToken() != JsonToken.END_OBJECT) {
+                    String fieldName = readerToUse.getFieldName();
+                    readerToUse.nextToken();
+                    if ("role".equals(fieldName)) {
+                        discriminatorValue = readerToUse.getString();
+                        break;
+                    } else {
+                        readerToUse.skipChildren();
+                    }
+                }
+                // Use the discriminator value to determine which subtype should be deserialized.
+                if ("system".equals(discriminatorValue)) {
+                    return ChatRequestSystemMessage.fromJson(readerToUse.reset());
+                } else if ("user".equals(discriminatorValue)) {
+                    return ChatRequestUserMessage.fromJson(readerToUse.reset());
+                } else if ("assistant".equals(discriminatorValue)) {
+                    return ChatRequestAssistantMessage.fromJson(readerToUse.reset());
+                } else if ("tool".equals(discriminatorValue)) {
+                    return ChatRequestToolMessage.fromJson(readerToUse.reset());
+                } else {
+                    return fromJsonKnownDiscriminator(readerToUse.reset());
+                }
+            }
+        });
+    }
+
+    @Generated
+    static ChatRequestMessage fromJsonKnownDiscriminator(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatRequestMessage deserializedChatRequestMessage = new ChatRequestMessage();
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("role".equals(fieldName)) {
+                    deserializedChatRequestMessage.role = ChatRole.fromString(reader.getString());
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return deserializedChatRequestMessage;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestSystemMessage.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestSystemMessage.java
new file mode 100644
index 000000000000..9f7c52ab5324
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestSystemMessage.java
@@ -0,0 +1,105 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A request chat message containing system instructions that influence how the model will generate a chat completions
+ * response.
+ */
+@Immutable
+public final class ChatRequestSystemMessage extends ChatRequestMessage {
+
+    /*
+     * The chat role associated with this message.
+     */
+    @Generated
+    private ChatRole role = ChatRole.SYSTEM;
+
+    /*
+     * The contents of the system message.
+     */
+    @Generated
+    private final String content;
+
+    /**
+     * Creates an instance of ChatRequestSystemMessage class.
+     *
+     * @param content the content value to set.
+     */
+    @Generated
+    public ChatRequestSystemMessage(String content) {
+        this.content = content;
+    }
+
+    /**
+     * Get the role property: The chat role associated with this message.
+     *
+     * @return the role value.
+     */
+    @Generated
+    @Override
+    public ChatRole getRole() {
+        return this.role;
+    }
+
+    /**
+     * Get the content property: The contents of the system message.
+     *
+     * @return the content value.
+     */
+    @Generated
+    public String getContent() {
+        return this.content;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("content", this.content);
+        jsonWriter.writeStringField("role", this.role == null ? null : this.role.toString());
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatRequestSystemMessage from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatRequestSystemMessage if the JsonReader was pointing to an instance of it, or null if
+     * it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatRequestSystemMessage.
+     */
+    @Generated
+    public static ChatRequestSystemMessage fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String content = null;
+            ChatRole role = ChatRole.SYSTEM;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("content".equals(fieldName)) {
+                    content = reader.getString();
+                } else if ("role".equals(fieldName)) {
+                    role = ChatRole.fromString(reader.getString());
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatRequestSystemMessage deserializedChatRequestSystemMessage = new ChatRequestSystemMessage(content);
+            deserializedChatRequestSystemMessage.role = role;
+            return deserializedChatRequestSystemMessage;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestToolMessage.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestToolMessage.java
new file mode 100644
index 000000000000..16dca9ee729d
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestToolMessage.java
@@ -0,0 +1,126 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * A request chat message representing requested output from a configured tool.
+ */
+@Immutable
+public final class ChatRequestToolMessage extends ChatRequestMessage {
+
+    /*
+     * The chat role associated with this message.
+     */
+    @Generated
+    private ChatRole role = ChatRole.TOOL;
+
+    /*
+     * The content of the message.
+     */
+    @Generated
+    private final String content;
+
+    /*
+     * The ID of the tool call resolved by the provided content.
+     */
+    @Generated
+    private final String toolCallId;
+
+    /**
+     * Creates an instance of ChatRequestToolMessage class.
+     *
+     * @param content the content value to set.
+     * @param toolCallId the toolCallId value to set.
+     */
+    @Generated
+    public ChatRequestToolMessage(String content, String toolCallId) {
+        this.content = content;
+        this.toolCallId = toolCallId;
+    }
+
+    /**
+     * Get the role property: The chat role associated with this message.
+     *
+     * @return the role value.
+     */
+    @Generated
+    @Override
+    public ChatRole getRole() {
+        return this.role;
+    }
+
+    /**
+     * Get the content property: The content of the message.
+     *
+     * @return the content value.
+     */
+    @Generated
+    public String getContent() {
+        return this.content;
+    }
+
+    /**
+     * Get the toolCallId property: The ID of the tool call resolved by the provided content.
+     *
+     * @return the toolCallId value.
+     */
+    @Generated
+    public String getToolCallId() {
+        return this.toolCallId;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("content", this.content);
+        jsonWriter.writeStringField("tool_call_id", this.toolCallId);
+        jsonWriter.writeStringField("role", this.role == null ? null : this.role.toString());
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatRequestToolMessage from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatRequestToolMessage if the JsonReader was pointing to an instance of it, or null if it
+     * was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatRequestToolMessage.
+     */
+    @Generated
+    public static ChatRequestToolMessage fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String content = null;
+            String toolCallId = null;
+            ChatRole role = ChatRole.TOOL;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("content".equals(fieldName)) {
+                    content = reader.getString();
+                } else if ("tool_call_id".equals(fieldName)) {
+                    toolCallId = reader.getString();
+                } else if ("role".equals(fieldName)) {
+                    role = ChatRole.fromString(reader.getString());
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatRequestToolMessage deserializedChatRequestToolMessage = new ChatRequestToolMessage(content, toolCallId);
+            deserializedChatRequestToolMessage.role = role;
+            return deserializedChatRequestToolMessage;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestUserMessage.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestUserMessage.java
new file mode 100644
index 000000000000..6700a91db85b
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRequestUserMessage.java
@@ -0,0 +1,170 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.core.util.BinaryData;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+import com.azure.json.JsonProviders;
+import java.io.StringReader;
+
+/**
+ * A request chat message representing user input to the assistant.
+ */
+@Immutable
+public final class ChatRequestUserMessage extends ChatRequestMessage {
+
+    /*
+     * The chat role associated with this message.
+     */
+    @Generated
+    private ChatRole role = ChatRole.USER;
+
+    /*
+     * The contents of the user message, with available input types varying by selected model.
+     */
+    @Generated
+    private final BinaryData content;
+
+    /**
+     * Creates an instance of ChatRequestUserMessage class.
+     *
+     * @param content the content value to set.
+     */
+    @Generated
+    public ChatRequestUserMessage(BinaryData content) {
+        this.content = content;
+    }
+
+    /**
+     * Creates an instance of ChatRequestUserMessage class.
+     *
+     * @param content the string content value to set.
+     */
+    public ChatRequestUserMessage(String content) {
+        String contentString = String.format("\"%s\"", content);
+        this.content = BinaryData.fromString(contentString);
+    }
+
+    /**
+     * Get the role property: The chat role associated with this message.
+     *
+     * @return the role value.
+     */
+    @Generated
+    @Override
+    public ChatRole getRole() {
+        return this.role;
+    }
+
+    /**
+     * Get the content property: The contents of the user message, with available input types varying by selected model.
+     *
+     * @return the content value.
+     */
+    @Generated
+    public BinaryData getContent() {
+        return this.content;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeUntypedField("content", this.content.toObject(Object.class));
+        jsonWriter.writeStringField("role", this.role == null ? null : this.role.toString());
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatRequestUserMessage from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatRequestUserMessage if the JsonReader was pointing to an instance of it, or null if it
+     * was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatRequestUserMessage.
+     */
+    @Generated
+    public static ChatRequestUserMessage fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            BinaryData content = null;
+            ChatRole role = ChatRole.USER;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("content".equals(fieldName)) {
+                    content = reader.getNullable(nonNullReader -> BinaryData.fromObject(nonNullReader.readUntyped()));
+                } else if ("role".equals(fieldName)) {
+                    role = ChatRole.fromString(reader.getString());
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatRequestUserMessage deserializedChatRequestUserMessage = new ChatRequestUserMessage(content);
+            deserializedChatRequestUserMessage.role = role;
+            return deserializedChatRequestUserMessage;
+        });
+    }
+
+    /**
+     * Creates an instance of ChatRequestUserMessage class.
+     *
+     * @param content the content value to set.
+     * @return An instance of ChatRequestUserMessage if the JsonReader was pointing to an instance of it, or null if it
+     * was pointing to JSON null.
+     * @throws RuntimeException If the deserialized JSON object was missing any required properties.
+     */
+    public static ChatRequestUserMessage fromString(String content) {
+        String jsonPrompt = "{" + "\"content\":\"%s\"" + "}";
+        String contentString = String.format(jsonPrompt, content);
+        try {
+            return ChatRequestUserMessage.fromJson(JsonProviders.createReader(new StringReader(contentString)));
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    /**
+     * Creates an instance of ChatRequestUserMessage class.
+     *
+     * @param contentItems An array of content items to include in the message.
+     * @return An instance of ChatRequestUserMessage if the JsonReader was pointing to an instance of it, or null if it
+     * was pointing to JSON null.
+     * @throws RuntimeException If the deserialized JSON object was missing any required properties.
+     */
+    public static ChatRequestUserMessage fromContentItems(List<ChatMessageContentItem> contentItems) {
+        if (contentItems == null || contentItems.isEmpty()) {
+            throw new RuntimeException("Content items cannot be null or empty.");
+        }
+        String jsonPrompt = "{\"content\":[";
+        for (ChatMessageContentItem item : contentItems) {
+            if (item instanceof ChatMessageTextContentItem) {
+                ChatMessageTextContentItem textItem = (ChatMessageTextContentItem) item;
+                String textPrompt = "{\"type\": \"text\", \"text\":\"%s\"" + "}";
+                jsonPrompt += String.format(textPrompt, textItem.getText());
+            } else if (item instanceof ChatMessageImageContentItem) {
+                ChatMessageImageContentItem imageItem = (ChatMessageImageContentItem) item;
+                String imageUrlPrompt = "{\"type\": \"image_url\", \"image_url\":{ \"url\": \"%s\"}" + "}";
+                jsonPrompt += String.format(imageUrlPrompt, imageItem.getImageUrl().getUrl());
+            }
+            jsonPrompt += ",";
+        }
+        jsonPrompt = jsonPrompt.substring(0, jsonPrompt.length() - 1);
+        jsonPrompt += "]}";
+        try {
+            return ChatRequestUserMessage.fromJson(JsonProviders.createReader(new StringReader(jsonPrompt)));
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatResponseMessage.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatResponseMessage.java
new file mode 100644
index 000000000000..57cc94b718bb
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatResponseMessage.java
@@ -0,0 +1,130 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * A representation of a chat message as received in a response.
+ */
+@Immutable
+public final class ChatResponseMessage implements JsonSerializable<ChatResponseMessage> {
+
+    /*
+     * The chat role associated with the message.
+     */
+    @Generated
+    private final ChatRole role;
+
+    /*
+     * The content of the message.
+     */
+    @Generated
+    private final String content;
+
+    /*
+     * The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
+     * completions request to resolve as configured.
+     */
+    @Generated
+    private List<ChatCompletionsToolCall> toolCalls;
+
+    /**
+     * Creates an instance of ChatResponseMessage class.
+     *
+     * @param role the role value to set.
+     * @param content the content value to set.
+     */
+    @Generated
+    private ChatResponseMessage(ChatRole role, String content) {
+        this.role = role;
+        this.content = content;
+    }
+
+    /**
+     * Get the role property: The chat role associated with the message.
+     *
+     * @return the role value.
+     */
+    @Generated
+    public ChatRole getRole() {
+        return this.role;
+    }
+
+    /**
+     * Get the content property: The content of the message.
+     *
+     * @return the content value.
+     */
+    @Generated
+    public String getContent() {
+        return this.content;
+    }
+
+    /**
+     * Get the toolCalls property: The tool calls that must be resolved and have their outputs appended to subsequent
+     * input messages for the chat
+     * completions request to resolve as configured.
+     *
+     * @return the toolCalls value.
+     */
+    @Generated
+    public List<ChatCompletionsToolCall> getToolCalls() {
+        return this.toolCalls;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("role", this.role == null ? null : this.role.toString());
+        jsonWriter.writeStringField("content", this.content);
+        jsonWriter.writeArrayField("tool_calls", this.toolCalls, (writer, element) -> writer.writeJson(element));
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ChatResponseMessage from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ChatResponseMessage if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ChatResponseMessage.
+     */
+    @Generated
+    public static ChatResponseMessage fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            ChatRole role = null;
+            String content = null;
+            List<ChatCompletionsToolCall> toolCalls = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("role".equals(fieldName)) {
+                    role = ChatRole.fromString(reader.getString());
+                } else if ("content".equals(fieldName)) {
+                    content = reader.getString();
+                } else if ("tool_calls".equals(fieldName)) {
+                    toolCalls = reader.readArray(reader1 -> ChatCompletionsToolCall.fromJson(reader1));
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            ChatResponseMessage deserializedChatResponseMessage = new ChatResponseMessage(role, content);
+            deserializedChatResponseMessage.toolCalls = toolCalls;
+            return deserializedChatResponseMessage;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRole.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRole.java
new file mode 100644
index 000000000000..10faab5bd750
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ChatRole.java
@@ -0,0 +1,69 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * A description of the intended purpose of a message within a chat completions interaction.
+ */
+public final class ChatRole extends ExpandableStringEnum<ChatRole> {
+
+    /**
+     * The role that instructs or sets the behavior of the assistant.
+     */
+    @Generated
+    public static final ChatRole SYSTEM = fromString("system");
+
+    /**
+     * The role that provides input for chat completions.
+     */
+    @Generated
+    public static final ChatRole USER = fromString("user");
+
+    /**
+     * The role that provides responses to system-instructed, user-prompted input.
+     */
+    @Generated
+    public static final ChatRole ASSISTANT = fromString("assistant");
+
+    /**
+     * The role that represents extension tool activity within a chat completions operation.
+     */
+    @Generated
+    public static final ChatRole TOOL = fromString("tool");
+
+    /**
+     * Creates a new instance of ChatRole value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public ChatRole() {
+    }
+
+    /**
+     * Creates or finds a ChatRole from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding ChatRole.
+     */
+    @Generated
+    public static ChatRole fromString(String name) {
+        return fromString(name, ChatRole.class);
+    }
+
+    /**
+     * Gets known ChatRole values.
+     *
+     * @return known ChatRole values.
+     */
+    @Generated
+    public static Collection<ChatRole> values() {
+        return values(ChatRole.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/CompletionsFinishReason.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/CompletionsFinishReason.java
new file mode 100644
index 000000000000..f98d925da9a3
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/CompletionsFinishReason.java
@@ -0,0 +1,70 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * Representation of the manner in which a completions response concluded.
+ */
+public final class CompletionsFinishReason extends ExpandableStringEnum<CompletionsFinishReason> {
+
+    /**
+     * Completions ended normally and reached its end of token generation.
+     */
+    @Generated
+    public static final CompletionsFinishReason STOPPED = fromString("stop");
+
+    /**
+     * Completions exhausted available token limits before generation could complete.
+     */
+    @Generated
+    public static final CompletionsFinishReason TOKEN_LIMIT_REACHED = fromString("length");
+
+    /**
+     * Completions generated a response that was identified as potentially sensitive per content
+     * moderation policies.
+     */
+    @Generated
+    public static final CompletionsFinishReason CONTENT_FILTERED = fromString("content_filter");
+
+    /**
+     * Completion ended with the model calling a provided tool for output.
+     */
+    @Generated
+    public static final CompletionsFinishReason TOOL_CALLS = fromString("tool_calls");
+
+    /**
+     * Creates a new instance of CompletionsFinishReason value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public CompletionsFinishReason() {
+    }
+
+    /**
+     * Creates or finds a CompletionsFinishReason from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding CompletionsFinishReason.
+     */
+    @Generated
+    public static CompletionsFinishReason fromString(String name) {
+        return fromString(name, CompletionsFinishReason.class);
+    }
+
+    /**
+     * Gets known CompletionsFinishReason values.
+     *
+     * @return known CompletionsFinishReason values.
+     */
+    @Generated
+    public static Collection<CompletionsFinishReason> values() {
+        return values(CompletionsFinishReason.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/CompletionsUsage.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/CompletionsUsage.java
new file mode 100644
index 000000000000..00a1f0041473
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/CompletionsUsage.java
@@ -0,0 +1,128 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * Representation of the token counts processed for a completions request.
+ * Counts consider all tokens across prompts, choices, choice alternates, best_of generations, and
+ * other consumers.
+ */
+@Immutable
+public final class CompletionsUsage implements JsonSerializable<CompletionsUsage> {
+
+    /*
+     * The number of tokens generated across all completions emissions.
+     */
+    @Generated
+    private final int completionTokens;
+
+    /*
+     * The number of tokens in the provided prompts for the completions request.
+     */
+    @Generated
+    private final int promptTokens;
+
+    /*
+     * The total number of tokens processed for the completions request and response.
+     */
+    @Generated
+    private final int totalTokens;
+
+    /**
+     * Creates an instance of CompletionsUsage class.
+     *
+     * @param completionTokens the completionTokens value to set.
+     * @param promptTokens the promptTokens value to set.
+     * @param totalTokens the totalTokens value to set.
+     */
+    @Generated
+    private CompletionsUsage(int completionTokens, int promptTokens, int totalTokens) {
+        this.completionTokens = completionTokens;
+        this.promptTokens = promptTokens;
+        this.totalTokens = totalTokens;
+    }
+
+    /**
+     * Get the completionTokens property: The number of tokens generated across all completions emissions.
+     *
+     * @return the completionTokens value.
+     */
+    @Generated
+    public int getCompletionTokens() {
+        return this.completionTokens;
+    }
+
+    /**
+     * Get the promptTokens property: The number of tokens in the provided prompts for the completions request.
+     *
+     * @return the promptTokens value.
+     */
+    @Generated
+    public int getPromptTokens() {
+        return this.promptTokens;
+    }
+
+    /**
+     * Get the totalTokens property: The total number of tokens processed for the completions request and response.
+     *
+     * @return the totalTokens value.
+     */
+    @Generated
+    public int getTotalTokens() {
+        return this.totalTokens;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeIntField("completion_tokens", this.completionTokens);
+        jsonWriter.writeIntField("prompt_tokens", this.promptTokens);
+        jsonWriter.writeIntField("total_tokens", this.totalTokens);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of CompletionsUsage from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of CompletionsUsage if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the CompletionsUsage.
+     */
+    @Generated
+    public static CompletionsUsage fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            int completionTokens = 0;
+            int promptTokens = 0;
+            int totalTokens = 0;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("completion_tokens".equals(fieldName)) {
+                    completionTokens = reader.getInt();
+                } else if ("prompt_tokens".equals(fieldName)) {
+                    promptTokens = reader.getInt();
+                } else if ("total_tokens".equals(fieldName)) {
+                    totalTokens = reader.getInt();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new CompletionsUsage(completionTokens, promptTokens, totalTokens);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingEncodingFormat.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingEncodingFormat.java
new file mode 100644
index 000000000000..d767d57b5527
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingEncodingFormat.java
@@ -0,0 +1,82 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * The format of the embeddings result.
+ * Returns a 422 error if the model doesn't support the value or parameter.
+ */
+public final class EmbeddingEncodingFormat extends ExpandableStringEnum<EmbeddingEncodingFormat> {
+
+    /**
+     * Base64.
+     */
+    @Generated
+    public static final EmbeddingEncodingFormat BASE64 = fromString("base64");
+
+    /**
+     * Binary.
+     */
+    @Generated
+    public static final EmbeddingEncodingFormat BINARY = fromString("binary");
+
+    /**
+     * Floating point.
+     */
+    @Generated
+    public static final EmbeddingEncodingFormat FLOAT = fromString("float");
+
+    /**
+     * Signed 8-bit integer.
+     */
+    @Generated
+    public static final EmbeddingEncodingFormat INT8 = fromString("int8");
+
+    /**
+     * ubinary.
+     */
+    @Generated
+    public static final EmbeddingEncodingFormat UBINARY = fromString("ubinary");
+
+    /**
+     * Unsigned 8-bit integer.
+     */
+    @Generated
+    public static final EmbeddingEncodingFormat UINT8 = fromString("uint8");
+
+    /**
+     * Creates a new instance of EmbeddingEncodingFormat value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public EmbeddingEncodingFormat() {
+    }
+
+    /**
+     * Creates or finds a EmbeddingEncodingFormat from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding EmbeddingEncodingFormat.
+     */
+    @Generated
+    public static EmbeddingEncodingFormat fromString(String name) {
+        return fromString(name, EmbeddingEncodingFormat.class);
+    }
+
+    /**
+     * Gets known EmbeddingEncodingFormat values.
+     *
+     * @return known EmbeddingEncodingFormat values.
+     */
+    @Generated
+    public static Collection<EmbeddingEncodingFormat> values() {
+        return values(EmbeddingEncodingFormat.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingInput.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingInput.java
new file mode 100644
index 000000000000..33e29df69dac
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingInput.java
@@ -0,0 +1,119 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * Represents an image with optional text.
+ */
+@Fluent
+public final class EmbeddingInput implements JsonSerializable<EmbeddingInput> {
+
+    /*
+     * The input image, in PNG format.
+     */
+    @Generated
+    private final String image;
+
+    /*
+     * Optional. The text input to feed into the model (like DINO, CLIP).
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     */
+    @Generated
+    private String text;
+
+    /**
+     * Creates an instance of EmbeddingInput class.
+     *
+     * @param image the image value to set.
+     */
+    @Generated
+    public EmbeddingInput(String image) {
+        this.image = image;
+    }
+
+    /**
+     * Get the image property: The input image, in PNG format.
+     *
+     * @return the image value.
+     */
+    @Generated
+    public String getImage() {
+        return this.image;
+    }
+
+    /**
+     * Get the text property: Optional. The text input to feed into the model (like DINO, CLIP).
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @return the text value.
+     */
+    @Generated
+    public String getText() {
+        return this.text;
+    }
+
+    /**
+     * Set the text property: Optional. The text input to feed into the model (like DINO, CLIP).
+     * Returns a 422 error if the model doesn't support the value or parameter.
+     *
+     * @param text the text value to set.
+     * @return the EmbeddingInput object itself.
+     */
+    @Generated
+    public EmbeddingInput setText(String text) {
+        this.text = text;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("image", this.image);
+        jsonWriter.writeStringField("text", this.text);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of EmbeddingInput from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of EmbeddingInput if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the EmbeddingInput.
+     */
+    @Generated
+    public static EmbeddingInput fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String image = null;
+            String text = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("image".equals(fieldName)) {
+                    image = reader.getString();
+                } else if ("text".equals(fieldName)) {
+                    text = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            EmbeddingInput deserializedEmbeddingInput = new EmbeddingInput(image);
+            deserializedEmbeddingInput.text = text;
+            return deserializedEmbeddingInput;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingInputType.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingInputType.java
new file mode 100644
index 000000000000..2fbe119da216
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingInputType.java
@@ -0,0 +1,63 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * Represents the input types used for embedding search.
+ */
+public final class EmbeddingInputType extends ExpandableStringEnum<EmbeddingInputType> {
+
+    /**
+     * to do.
+     */
+    @Generated
+    public static final EmbeddingInputType TEXT = fromString("text");
+
+    /**
+     * to do.
+     */
+    @Generated
+    public static final EmbeddingInputType QUERY = fromString("query");
+
+    /**
+     * to do.
+     */
+    @Generated
+    public static final EmbeddingInputType DOCUMENT = fromString("document");
+
+    /**
+     * Creates a new instance of EmbeddingInputType value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public EmbeddingInputType() {
+    }
+
+    /**
+     * Creates or finds a EmbeddingInputType from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding EmbeddingInputType.
+     */
+    @Generated
+    public static EmbeddingInputType fromString(String name) {
+        return fromString(name, EmbeddingInputType.class);
+    }
+
+    /**
+     * Gets known EmbeddingInputType values.
+     *
+     * @return known EmbeddingInputType values.
+     */
+    @Generated
+    public static Collection<EmbeddingInputType> values() {
+        return values(EmbeddingInputType.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingItem.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingItem.java
new file mode 100644
index 000000000000..ace975de636b
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingItem.java
@@ -0,0 +1,120 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.core.util.BinaryData;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+import com.azure.core.util.serializer.TypeReference;
+
+/**
+ * Representation of a single embeddings relatedness comparison.
+ */
+@Immutable
+public final class EmbeddingItem implements JsonSerializable<EmbeddingItem> {
+
+    /*
+     * List of embedding values for the input prompt. These represent a measurement of the
+     * vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
+     */
+    @Generated
+    private final BinaryData embedding;
+
+    /*
+     * Index of the prompt to which the EmbeddingItem corresponds.
+     */
+    @Generated
+    private final int index;
+
+    /**
+     * Creates an instance of EmbeddingItem class.
+     *
+     * @param embedding the embedding value to set.
+     * @param index the index value to set.
+     */
+    @Generated
+    private EmbeddingItem(BinaryData embedding, int index) {
+        this.embedding = embedding;
+        this.index = index;
+    }
+
+    /**
+     * Get the embedding property: List of embedding values for the input prompt. These represent a measurement of the
+     * vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
+     *
+     * @return the embedding value.
+     */
+    @Generated
+    public BinaryData getEmbedding() {
+        return this.embedding;
+    }
+
+    /**
+     * Get the embedding property: List of embedding values for the input prompt. These represent a measurement of the
+     * vector-based relatedness of the provided input. Or a base64 encoded string of the embedding vector.
+     *
+     * @return the embeddings as a list of floats.
+     */
+    public List<Float> getEmbeddingList() {
+        return this.embedding.toObject(new TypeReference<List<Float>>() {
+        });
+    }
+
+    /**
+     * Get the index property: Index of the prompt to which the EmbeddingItem corresponds.
+     *
+     * @return the index value.
+     */
+    @Generated
+    public int getIndex() {
+        return this.index;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeUntypedField("embedding", this.embedding.toObject(Object.class));
+        jsonWriter.writeIntField("index", this.index);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of EmbeddingItem from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of EmbeddingItem if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the EmbeddingItem.
+     */
+    @Generated
+    public static EmbeddingItem fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            BinaryData embedding = null;
+            int index = 0;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("embedding".equals(fieldName)) {
+                    embedding = reader.getNullable(nonNullReader -> BinaryData.fromObject(nonNullReader.readUntyped()));
+                } else if ("index".equals(fieldName)) {
+                    index = reader.getInt();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new EmbeddingItem(embedding, index);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingsResult.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingsResult.java
new file mode 100644
index 000000000000..486fb8b128ab
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingsResult.java
@@ -0,0 +1,129 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Representation of the response data from an embeddings request.
+ * Embeddings measure the relatedness of text strings and are commonly used for search, clustering,
+ * recommendations, and other similar scenarios.
+ */
+@Immutable
+public final class EmbeddingsResult implements JsonSerializable<EmbeddingsResult> {
+
+    /*
+     * Embedding values for the prompts submitted in the request.
+     */
+    @Generated
+    private final List<EmbeddingItem> data;
+
+    /*
+     * Usage counts for tokens input using the embeddings API.
+     */
+    @Generated
+    private final EmbeddingsUsage usage;
+
+    /*
+     * The model ID used to generate this result.
+     */
+    @Generated
+    private final String model;
+
+    /**
+     * Creates an instance of EmbeddingsResult class.
+     *
+     * @param data the data value to set.
+     * @param usage the usage value to set.
+     * @param model the model value to set.
+     */
+    @Generated
+    private EmbeddingsResult(List<EmbeddingItem> data, EmbeddingsUsage usage, String model) {
+        this.data = data;
+        this.usage = usage;
+        this.model = model;
+    }
+
+    /**
+     * Get the data property: Embedding values for the prompts submitted in the request.
+     *
+     * @return the data value.
+     */
+    @Generated
+    public List<EmbeddingItem> getData() {
+        return this.data;
+    }
+
+    /**
+     * Get the usage property: Usage counts for tokens input using the embeddings API.
+     *
+     * @return the usage value.
+     */
+    @Generated
+    public EmbeddingsUsage getUsage() {
+        return this.usage;
+    }
+
+    /**
+     * Get the model property: The model ID used to generate this result.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeArrayField("data", this.data, (writer, element) -> writer.writeJson(element));
+        jsonWriter.writeJsonField("usage", this.usage);
+        jsonWriter.writeStringField("model", this.model);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of EmbeddingsResult from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of EmbeddingsResult if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the EmbeddingsResult.
+     */
+    @Generated
+    public static EmbeddingsResult fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            List<EmbeddingItem> data = null;
+            EmbeddingsUsage usage = null;
+            String model = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("data".equals(fieldName)) {
+                    data = reader.readArray(reader1 -> EmbeddingItem.fromJson(reader1));
+                } else if ("usage".equals(fieldName)) {
+                    usage = EmbeddingsUsage.fromJson(reader);
+                } else if ("model".equals(fieldName)) {
+                    model = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new EmbeddingsResult(data, usage, model);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingsUsage.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingsUsage.java
new file mode 100644
index 000000000000..fad43a5295e7
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/EmbeddingsUsage.java
@@ -0,0 +1,106 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * Measurement of the amount of tokens used in this request and response.
+ */
+@Immutable
+public final class EmbeddingsUsage implements JsonSerializable<EmbeddingsUsage> {
+
+    /*
+     * Number of tokens in the request.
+     */
+    @Generated
+    private final int promptTokens;
+
+    /*
+     * Total number of tokens transacted in this request/response. Should equal the
+     * number of tokens in the request.
+     */
+    @Generated
+    private final int totalTokens;
+
+    /**
+     * Creates an instance of EmbeddingsUsage class.
+     *
+     * @param promptTokens the promptTokens value to set.
+     * @param totalTokens the totalTokens value to set.
+     */
+    @Generated
+    private EmbeddingsUsage(int promptTokens, int totalTokens) {
+        this.promptTokens = promptTokens;
+        this.totalTokens = totalTokens;
+    }
+
+    /**
+     * Get the promptTokens property: Number of tokens in the request.
+     *
+     * @return the promptTokens value.
+     */
+    @Generated
+    public int getPromptTokens() {
+        return this.promptTokens;
+    }
+
+    /**
+     * Get the totalTokens property: Total number of tokens transacted in this request/response. Should equal the
+     * number of tokens in the request.
+     *
+     * @return the totalTokens value.
+     */
+    @Generated
+    public int getTotalTokens() {
+        return this.totalTokens;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeIntField("prompt_tokens", this.promptTokens);
+        jsonWriter.writeIntField("total_tokens", this.totalTokens);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of EmbeddingsUsage from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of EmbeddingsUsage if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the EmbeddingsUsage.
+     */
+    @Generated
+    public static EmbeddingsUsage fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            int promptTokens = 0;
+            int totalTokens = 0;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("prompt_tokens".equals(fieldName)) {
+                    promptTokens = reader.getInt();
+                } else if ("total_tokens".equals(fieldName)) {
+                    totalTokens = reader.getInt();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new EmbeddingsUsage(promptTokens, totalTokens);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ExtraParameters.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ExtraParameters.java
new file mode 100644
index 000000000000..721102625576
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ExtraParameters.java
@@ -0,0 +1,64 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * Controls what happens if extra parameters, undefined by the REST API, are passed in the JSON request payload.
+ */
+public final class ExtraParameters extends ExpandableStringEnum<ExtraParameters> {
+
+    /**
+     * The service will error if it detected extra parameters in the request payload. This is the service default.
+     */
+    @Generated
+    public static final ExtraParameters ERROR = fromString("error");
+
+    /**
+     * The service will ignore (drop) extra parameters in the request payload. It will only pass the known parameters to
+     * the back-end AI model.
+     */
+    @Generated
+    public static final ExtraParameters DROP = fromString("drop");
+
+    /**
+     * The service will pass extra parameters to the back-end AI model.
+     */
+    @Generated
+    public static final ExtraParameters PASS_THROUGH = fromString("pass-through");
+
+    /**
+     * Creates a new instance of ExtraParameters value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public ExtraParameters() {
+    }
+
+    /**
+     * Creates or finds a ExtraParameters from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding ExtraParameters.
+     */
+    @Generated
+    public static ExtraParameters fromString(String name) {
+        return fromString(name, ExtraParameters.class);
+    }
+
+    /**
+     * Gets known ExtraParameters values.
+     *
+     * @return known ExtraParameters values.
+     */
+    @Generated
+    public static Collection<ExtraParameters> values() {
+        return values(ExtraParameters.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/FunctionCall.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/FunctionCall.java
new file mode 100644
index 000000000000..34b65cc42fb1
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/FunctionCall.java
@@ -0,0 +1,110 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * The name and arguments of a function that should be called, as generated by the model.
+ */
+@Immutable
+public final class FunctionCall implements JsonSerializable<FunctionCall> {
+
+    /*
+     * The name of the function to call.
+     */
+    @Generated
+    private final String name;
+
+    /*
+     * The arguments to call the function with, as generated by the model in JSON format.
+     * Note that the model does not always generate valid JSON, and may hallucinate parameters
+     * not defined by your function schema. Validate the arguments in your code before calling
+     * your function.
+     */
+    @Generated
+    private final String arguments;
+
+    /**
+     * Creates an instance of FunctionCall class.
+     *
+     * @param name the name value to set.
+     * @param arguments the arguments value to set.
+     */
+    @Generated
+    public FunctionCall(String name, String arguments) {
+        this.name = name;
+        this.arguments = arguments;
+    }
+
+    /**
+     * Get the name property: The name of the function to call.
+     *
+     * @return the name value.
+     */
+    @Generated
+    public String getName() {
+        return this.name;
+    }
+
+    /**
+     * Get the arguments property: The arguments to call the function with, as generated by the model in JSON format.
+     * Note that the model does not always generate valid JSON, and may hallucinate parameters
+     * not defined by your function schema. Validate the arguments in your code before calling
+     * your function.
+     *
+     * @return the arguments value.
+     */
+    @Generated
+    public String getArguments() {
+        return this.arguments;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("name", this.name);
+        jsonWriter.writeStringField("arguments", this.arguments);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of FunctionCall from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of FunctionCall if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the FunctionCall.
+     */
+    @Generated
+    public static FunctionCall fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String name = null;
+            String arguments = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("name".equals(fieldName)) {
+                    name = reader.getString();
+                } else if ("arguments".equals(fieldName)) {
+                    arguments = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new FunctionCall(name, arguments);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/FunctionDefinition.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/FunctionDefinition.java
new file mode 100644
index 000000000000..20755d99d839
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/FunctionDefinition.java
@@ -0,0 +1,150 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Fluent;
+import com.azure.core.annotation.Generated;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import com.azure.core.util.BinaryData;
+
+/**
+ * The definition of a caller-specified function that chat completions may invoke in response to matching user input.
+ */
+@Fluent
+public final class FunctionDefinition implements JsonSerializable<FunctionDefinition> {
+
+    /*
+     * The name of the function to be called.
+     */
+    @Generated
+    private final String name;
+
+    /*
+     * A description of what the function does. The model will use this description when selecting the function and
+     * interpreting its parameters.
+     */
+    @Generated
+    private String description;
+
+    /*
+     * The parameters the function accepts, described as a JSON Schema object.
+     */
+    private BinaryData parameters;
+
+    /**
+     * Creates an instance of FunctionDefinition class.
+     *
+     * @param name the name value to set.
+     */
+    @Generated
+    public FunctionDefinition(String name) {
+        this.name = name;
+    }
+
+    /**
+     * Get the name property: The name of the function to be called.
+     *
+     * @return the name value.
+     */
+    @Generated
+    public String getName() {
+        return this.name;
+    }
+
+    /**
+     * Get the description property: A description of what the function does. The model will use this description when
+     * selecting the function and
+     * interpreting its parameters.
+     *
+     * @return the description value.
+     */
+    @Generated
+    public String getDescription() {
+        return this.description;
+    }
+
+    /**
+     * Set the description property: A description of what the function does. The model will use this description when
+     * selecting the function and
+     * interpreting its parameters.
+     *
+     * @param description the description value to set.
+     * @return the FunctionDefinition object itself.
+     */
+    @Generated
+    public FunctionDefinition setDescription(String description) {
+        this.description = description;
+        return this;
+    }
+
+    /**
+     * Get the parameters property: The parameters the function accepts, described as a JSON Schema object.
+     *
+     * @return the parameters value.
+     */
+    public BinaryData getParameters() {
+        return this.parameters;
+    }
+
+    /**
+     * Set the parameters property: The parameters the function accepts, described as a JSON Schema object.
+     *
+     * @param parameters the parameters value to set.
+     * @return the FunctionDefinition object itself.
+     */
+    public FunctionDefinition setParameters(BinaryData parameters) {
+        this.parameters = parameters;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("name", this.name);
+        jsonWriter.writeStringField("description", this.description);
+        jsonWriter.writeRawField("parameters", this.parameters.toString());
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of FunctionDefinition from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of FunctionDefinition if the JsonReader was pointing to an instance of it, or null if it was
+     * pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the FunctionDefinition.
+     */
+    public static FunctionDefinition fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String name = null;
+            String description = null;
+            BinaryData parameters = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("name".equals(fieldName)) {
+                    name = reader.getString();
+                } else if ("description".equals(fieldName)) {
+                    description = reader.getString();
+                } else if ("parameters".equals(fieldName)) {
+                    parameters = BinaryData.fromObject(reader.readUntyped());
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            FunctionDefinition deserializedFunctionDefinition = new FunctionDefinition(name);
+            deserializedFunctionDefinition.description = description;
+            deserializedFunctionDefinition.parameters = parameters;
+            return deserializedFunctionDefinition;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ModelInfo.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ModelInfo.java
new file mode 100644
index 000000000000..235a2659960a
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ModelInfo.java
@@ -0,0 +1,126 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * Represents some basic information about the AI model.
+ */
+@Immutable
+public final class ModelInfo implements JsonSerializable<ModelInfo> {
+
+    /*
+     * The name of the AI model. For example: `Phi21`
+     */
+    @Generated
+    private final String modelName;
+
+    /*
+     * The type of the AI model. A Unique identifier for the profile.
+     */
+    @Generated
+    private final ModelType modelType;
+
+    /*
+     * The model provider name. For example: `Microsoft Research`
+     */
+    @Generated
+    private final String modelProviderName;
+
+    /**
+     * Creates an instance of ModelInfo class.
+     *
+     * @param modelName the modelName value to set.
+     * @param modelType the modelType value to set.
+     * @param modelProviderName the modelProviderName value to set.
+     */
+    @Generated
+    private ModelInfo(String modelName, ModelType modelType, String modelProviderName) {
+        this.modelName = modelName;
+        this.modelType = modelType;
+        this.modelProviderName = modelProviderName;
+    }
+
+    /**
+     * Get the modelName property: The name of the AI model. For example: `Phi21`.
+     *
+     * @return the modelName value.
+     */
+    @Generated
+    public String getModelName() {
+        return this.modelName;
+    }
+
+    /**
+     * Get the modelType property: The type of the AI model. A Unique identifier for the profile.
+     *
+     * @return the modelType value.
+     */
+    @Generated
+    public ModelType getModelType() {
+        return this.modelType;
+    }
+
+    /**
+     * Get the modelProviderName property: The model provider name. For example: `Microsoft Research`.
+     *
+     * @return the modelProviderName value.
+     */
+    @Generated
+    public String getModelProviderName() {
+        return this.modelProviderName;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("model_name", this.modelName);
+        jsonWriter.writeStringField("model_type", this.modelType == null ? null : this.modelType.toString());
+        jsonWriter.writeStringField("model_provider_name", this.modelProviderName);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of ModelInfo from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of ModelInfo if the JsonReader was pointing to an instance of it, or null if it was pointing
+     * to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the ModelInfo.
+     */
+    @Generated
+    public static ModelInfo fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String modelName = null;
+            ModelType modelType = null;
+            String modelProviderName = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("model_name".equals(fieldName)) {
+                    modelName = reader.getString();
+                } else if ("model_type".equals(fieldName)) {
+                    modelType = ModelType.fromString(reader.getString());
+                } else if ("model_provider_name".equals(fieldName)) {
+                    modelProviderName = reader.getString();
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new ModelInfo(modelName, modelType, modelProviderName);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ModelType.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ModelType.java
new file mode 100644
index 000000000000..be9e52d19d3a
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/ModelType.java
@@ -0,0 +1,81 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.util.ExpandableStringEnum;
+import java.util.Collection;
+
+/**
+ * The type of AI model.
+ */
+public final class ModelType extends ExpandableStringEnum<ModelType> {
+
+    /**
+     * Embeddings.
+     */
+    @Generated
+    public static final ModelType EMBEDDINGS = fromString("embeddings");
+
+    /**
+     * Image generation.
+     */
+    @Generated
+    public static final ModelType IMAGE_GENERATION = fromString("image_generation");
+
+    /**
+     * Text generation.
+     */
+    @Generated
+    public static final ModelType TEXT_GENERATION = fromString("text_generation");
+
+    /**
+     * Image embeddings.
+     */
+    @Generated
+    public static final ModelType IMAGE_EMBEDDINGS = fromString("image_embeddings");
+
+    /**
+     * Audio generation.
+     */
+    @Generated
+    public static final ModelType AUDIO_GENERATION = fromString("audio_generation");
+
+    /**
+     * Chat completions.
+     */
+    @Generated
+    public static final ModelType CHAT = fromString("chat");
+
+    /**
+     * Creates a new instance of ModelType value.
+     *
+     * @deprecated Use the {@link #fromString(String)} factory method.
+     */
+    @Generated
+    @Deprecated
+    public ModelType() {
+    }
+
+    /**
+     * Creates or finds a ModelType from its string representation.
+     *
+     * @param name a name to look for.
+     * @return the corresponding ModelType.
+     */
+    @Generated
+    public static ModelType fromString(String name) {
+        return fromString(name, ModelType.class);
+    }
+
+    /**
+     * Gets known ModelType values.
+     *
+     * @return known ModelType values.
+     */
+    @Generated
+    public static Collection<ModelType> values() {
+        return values(ModelType.class);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatChoiceUpdate.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatChoiceUpdate.java
new file mode 100644
index 000000000000..369541df75b6
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatChoiceUpdate.java
@@ -0,0 +1,130 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * Represents an update to a single prompt completion when the service is streaming updates
+ * using Server Sent Events (SSE).
+ * Generally, `n` choices are generated per provided prompt with a default value of 1.
+ * Token limits and other settings may limit the number of choices generated.
+ */
+@Immutable
+public final class StreamingChatChoiceUpdate implements JsonSerializable<StreamingChatChoiceUpdate> {
+
+    /*
+     * The ordered index associated with this chat completions choice.
+     */
+    @Generated
+    private final int index;
+
+    /*
+     * The reason that this chat completions choice completed its generated.
+     */
+    @Generated
+    private final CompletionsFinishReason finishReason;
+
+    /*
+     * An update to the chat message for a given chat completions prompt.
+     */
+    @Generated
+    private final StreamingChatResponseMessageUpdate delta;
+
+    /**
+     * Creates an instance of StreamingChatChoiceUpdate class.
+     *
+     * @param index the index value to set.
+     * @param finishReason the finishReason value to set.
+     * @param delta the delta value to set.
+     */
+    @Generated
+    private StreamingChatChoiceUpdate(int index, CompletionsFinishReason finishReason,
+        StreamingChatResponseMessageUpdate delta) {
+        this.index = index;
+        this.finishReason = finishReason;
+        this.delta = delta;
+    }
+
+    /**
+     * Get the index property: The ordered index associated with this chat completions choice.
+     *
+     * @return the index value.
+     */
+    @Generated
+    public int getIndex() {
+        return this.index;
+    }
+
+    /**
+     * Get the finishReason property: The reason that this chat completions choice completed its generated.
+     *
+     * @return the finishReason value.
+     */
+    @Generated
+    public CompletionsFinishReason getFinishReason() {
+        return this.finishReason;
+    }
+
+    /**
+     * Get the delta property: An update to the chat message for a given chat completions prompt.
+     *
+     * @return the delta value.
+     */
+    @Generated
+    public StreamingChatResponseMessageUpdate getDelta() {
+        return this.delta;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeIntField("index", this.index);
+        jsonWriter.writeStringField("finish_reason", this.finishReason == null ? null : this.finishReason.toString());
+        jsonWriter.writeJsonField("delta", this.delta);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of StreamingChatChoiceUpdate from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of StreamingChatChoiceUpdate if the JsonReader was pointing to an instance of it, or null if
+     * it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the StreamingChatChoiceUpdate.
+     */
+    @Generated
+    public static StreamingChatChoiceUpdate fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            int index = 0;
+            CompletionsFinishReason finishReason = null;
+            StreamingChatResponseMessageUpdate delta = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("index".equals(fieldName)) {
+                    index = reader.getInt();
+                } else if ("finish_reason".equals(fieldName)) {
+                    finishReason = CompletionsFinishReason.fromString(reader.getString());
+                } else if ("delta".equals(fieldName)) {
+                    delta = StreamingChatResponseMessageUpdate.fromJson(reader);
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new StreamingChatChoiceUpdate(index, finishReason, delta);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatCompletionsUpdate.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatCompletionsUpdate.java
new file mode 100644
index 000000000000..1fef5e55a656
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatCompletionsUpdate.java
@@ -0,0 +1,190 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.time.Instant;
+import java.time.OffsetDateTime;
+import java.time.ZoneOffset;
+import java.util.List;
+
+/**
+ * Represents a response update to a chat completions request, when the service is streaming updates
+ * using Server Sent Events (SSE).
+ * Completions support a wide variety of tasks and generate text that continues from or "completes"
+ * provided prompt data.
+ */
+@Immutable
+public final class StreamingChatCompletionsUpdate implements JsonSerializable<StreamingChatCompletionsUpdate> {
+
+    /*
+     * A unique identifier associated with this chat completions response.
+     */
+    @Generated
+    private final String id;
+
+    /*
+     * The first timestamp associated with generation activity for this completions response,
+     * represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
+     */
+    @Generated
+    private final long created;
+
+    /*
+     * The model used for the chat completion.
+     */
+    @Generated
+    private final String model;
+
+    /*
+     * Usage information for tokens processed and generated as part of this completions operation.
+     */
+    @Generated
+    private final CompletionsUsage usage;
+
+    /*
+     * An update to the collection of completion choices associated with this completions response.
+     * Generally, `n` choices are generated per provided prompt with a default value of 1.
+     * Token limits and other settings may limit the number of choices generated.
+     */
+    @Generated
+    private final List<StreamingChatChoiceUpdate> choices;
+
+    /**
+     * Creates an instance of StreamingChatCompletionsUpdate class.
+     *
+     * @param id the id value to set.
+     * @param created the created value to set.
+     * @param model the model value to set.
+     * @param usage the usage value to set.
+     * @param choices the choices value to set.
+     */
+    @Generated
+    private StreamingChatCompletionsUpdate(String id, OffsetDateTime created, String model, CompletionsUsage usage,
+        List<StreamingChatChoiceUpdate> choices) {
+        this.id = id;
+        if (created == null) {
+            this.created = 0L;
+        } else {
+            this.created = created.toEpochSecond();
+        }
+        this.model = model;
+        this.usage = usage;
+        this.choices = choices;
+    }
+
+    /**
+     * Get the id property: A unique identifier associated with this chat completions response.
+     *
+     * @return the id value.
+     */
+    @Generated
+    public String getId() {
+        return this.id;
+    }
+
+    /**
+     * Get the created property: The first timestamp associated with generation activity for this completions response,
+     * represented as seconds since the beginning of the Unix epoch of 00:00 on 1 Jan 1970.
+     *
+     * @return the created value.
+     */
+    @Generated
+    public OffsetDateTime getCreated() {
+        return OffsetDateTime.ofInstant(Instant.ofEpochSecond(this.created), ZoneOffset.UTC);
+    }
+
+    /**
+     * Get the model property: The model used for the chat completion.
+     *
+     * @return the model value.
+     */
+    @Generated
+    public String getModel() {
+        return this.model;
+    }
+
+    /**
+     * Get the usage property: Usage information for tokens processed and generated as part of this completions
+     * operation.
+     *
+     * @return the usage value.
+     */
+    @Generated
+    public CompletionsUsage getUsage() {
+        return this.usage;
+    }
+
+    /**
+     * Get the choices property: An update to the collection of completion choices associated with this completions
+     * response.
+     * Generally, `n` choices are generated per provided prompt with a default value of 1.
+     * Token limits and other settings may limit the number of choices generated.
+     *
+     * @return the choices value.
+     */
+    @Generated
+    public List<StreamingChatChoiceUpdate> getChoices() {
+        return this.choices;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("id", this.id);
+        jsonWriter.writeLongField("created", this.created);
+        jsonWriter.writeStringField("model", this.model);
+        jsonWriter.writeJsonField("usage", this.usage);
+        jsonWriter.writeArrayField("choices", this.choices, (writer, element) -> writer.writeJson(element));
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of StreamingChatCompletionsUpdate from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of StreamingChatCompletionsUpdate if the JsonReader was pointing to an instance of it, or
+     * null if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the StreamingChatCompletionsUpdate.
+     */
+    @Generated
+    public static StreamingChatCompletionsUpdate fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String id = null;
+            OffsetDateTime created = null;
+            String model = null;
+            CompletionsUsage usage = null;
+            List<StreamingChatChoiceUpdate> choices = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("id".equals(fieldName)) {
+                    id = reader.getString();
+                } else if ("created".equals(fieldName)) {
+                    created = OffsetDateTime.ofInstant(Instant.ofEpochSecond(reader.getLong()), ZoneOffset.UTC);
+                } else if ("model".equals(fieldName)) {
+                    model = reader.getString();
+                } else if ("usage".equals(fieldName)) {
+                    usage = CompletionsUsage.fromJson(reader);
+                } else if ("choices".equals(fieldName)) {
+                    choices = reader.readArray(reader1 -> StreamingChatChoiceUpdate.fromJson(reader1));
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new StreamingChatCompletionsUpdate(id, created, model, usage, choices);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatResponseMessageUpdate.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatResponseMessageUpdate.java
new file mode 100644
index 000000000000..fd8be092b3f9
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatResponseMessageUpdate.java
@@ -0,0 +1,123 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * A representation of a chat message update as received in a streaming response.
+ */
+@Immutable
+public final class StreamingChatResponseMessageUpdate implements JsonSerializable<StreamingChatResponseMessageUpdate> {
+
+    /*
+     * The chat role associated with the message. If present, should always be 'assistant'
+     */
+    @Generated
+    private ChatRole role;
+
+    /*
+     * The content of the message.
+     */
+    @Generated
+    private String content;
+
+    /*
+     * The tool calls that must be resolved and have their outputs appended to subsequent input messages for the chat
+     * completions request to resolve as configured.
+     */
+    @Generated
+    private List<StreamingChatResponseToolCallUpdate> toolCalls;
+
+    /**
+     * Creates an instance of StreamingChatResponseMessageUpdate class.
+     */
+    @Generated
+    private StreamingChatResponseMessageUpdate() {
+    }
+
+    /**
+     * Get the role property: The chat role associated with the message. If present, should always be 'assistant'.
+     *
+     * @return the role value.
+     */
+    @Generated
+    public ChatRole getRole() {
+        return this.role;
+    }
+
+    /**
+     * Get the content property: The content of the message.
+     *
+     * @return the content value.
+     */
+    @Generated
+    public String getContent() {
+        return this.content;
+    }
+
+    /**
+     * Get the toolCalls property: The tool calls that must be resolved and have their outputs appended to subsequent
+     * input messages for the chat
+     * completions request to resolve as configured.
+     *
+     * @return the toolCalls value.
+     */
+    @Generated
+    public List<StreamingChatResponseToolCallUpdate> getToolCalls() {
+        return this.toolCalls;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("role", this.role == null ? null : this.role.toString());
+        jsonWriter.writeStringField("content", this.content);
+        jsonWriter.writeArrayField("tool_calls", this.toolCalls, (writer, element) -> writer.writeJson(element));
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of StreamingChatResponseMessageUpdate from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of StreamingChatResponseMessageUpdate if the JsonReader was pointing to an instance of it, or
+     * null if it was pointing to JSON null.
+     * @throws IOException If an error occurs while reading the StreamingChatResponseMessageUpdate.
+     */
+    @Generated
+    public static StreamingChatResponseMessageUpdate fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            StreamingChatResponseMessageUpdate deserializedStreamingChatResponseMessageUpdate
+                = new StreamingChatResponseMessageUpdate();
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("role".equals(fieldName)) {
+                    deserializedStreamingChatResponseMessageUpdate.role = ChatRole.fromString(reader.getString());
+                } else if ("content".equals(fieldName)) {
+                    deserializedStreamingChatResponseMessageUpdate.content = reader.getString();
+                } else if ("tool_calls".equals(fieldName)) {
+                    List<StreamingChatResponseToolCallUpdate> toolCalls
+                        = reader.readArray(reader1 -> StreamingChatResponseToolCallUpdate.fromJson(reader1));
+                    deserializedStreamingChatResponseMessageUpdate.toolCalls = toolCalls;
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return deserializedStreamingChatResponseMessageUpdate;
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatResponseToolCallUpdate.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatResponseToolCallUpdate.java
new file mode 100644
index 000000000000..64b3dbfd4c3d
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/StreamingChatResponseToolCallUpdate.java
@@ -0,0 +1,105 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+package com.azure.ai.inference.models;
+
+import com.azure.core.annotation.Generated;
+import com.azure.core.annotation.Immutable;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import java.io.IOException;
+
+/**
+ * An update to the function tool call information requested by the AI model.
+ */
+@Immutable
+public final class StreamingChatResponseToolCallUpdate
+    implements JsonSerializable<StreamingChatResponseToolCallUpdate> {
+
+    /*
+     * The ID of the tool call.
+     */
+    @Generated
+    private final String id;
+
+    /*
+     * Updates to the function call requested by the AI model.
+     */
+    @Generated
+    private final FunctionCall function;
+
+    /**
+     * Creates an instance of StreamingChatResponseToolCallUpdate class.
+     *
+     * @param id the id value to set.
+     * @param function the function value to set.
+     */
+    @Generated
+    private StreamingChatResponseToolCallUpdate(String id, FunctionCall function) {
+        this.id = id;
+        this.function = function;
+    }
+
+    /**
+     * Get the id property: The ID of the tool call.
+     *
+     * @return the id value.
+     */
+    @Generated
+    public String getId() {
+        return this.id;
+    }
+
+    /**
+     * Get the function property: Updates to the function call requested by the AI model.
+     *
+     * @return the function value.
+     */
+    @Generated
+    public FunctionCall getFunction() {
+        return this.function;
+    }
+
+    /**
+     * {@inheritDoc}
+     */
+    @Generated
+    @Override
+    public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+        jsonWriter.writeStartObject();
+        jsonWriter.writeStringField("id", this.id);
+        jsonWriter.writeJsonField("function", this.function);
+        return jsonWriter.writeEndObject();
+    }
+
+    /**
+     * Reads an instance of StreamingChatResponseToolCallUpdate from the JsonReader.
+     *
+     * @param jsonReader The JsonReader being read.
+     * @return An instance of StreamingChatResponseToolCallUpdate if the JsonReader was pointing to an instance of it,
+     * or null if it was pointing to JSON null.
+     * @throws IllegalStateException If the deserialized JSON object was missing any required properties.
+     * @throws IOException If an error occurs while reading the StreamingChatResponseToolCallUpdate.
+     */
+    @Generated
+    public static StreamingChatResponseToolCallUpdate fromJson(JsonReader jsonReader) throws IOException {
+        return jsonReader.readObject(reader -> {
+            String id = null;
+            FunctionCall function = null;
+            while (reader.nextToken() != JsonToken.END_OBJECT) {
+                String fieldName = reader.getFieldName();
+                reader.nextToken();
+                if ("id".equals(fieldName)) {
+                    id = reader.getString();
+                } else if ("function".equals(fieldName)) {
+                    function = FunctionCall.fromJson(reader);
+                } else {
+                    reader.skipChildren();
+                }
+            }
+            return new StreamingChatResponseToolCallUpdate(id, function);
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/package-info.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/package-info.java
new file mode 100644
index 000000000000..ae296ef0748c
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/models/package-info.java
@@ -0,0 +1,7 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+/**
+ * Package containing the data models for Model.
+ */
+package com.azure.ai.inference.models;
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/package-info.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/package-info.java
new file mode 100644
index 000000000000..06b95a667d3e
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/package-info.java
@@ -0,0 +1,7 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+/**
+ * Package containing the classes for Model.
+ */
+package com.azure.ai.inference;
diff --git a/sdk/ai/azure-ai-inference/src/main/java/module-info.java b/sdk/ai/azure-ai-inference/src/main/java/module-info.java
new file mode 100644
index 000000000000..49816749e27a
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/java/module-info.java
@@ -0,0 +1,11 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+module com.azure.ai.inference {
+    requires transitive com.azure.core;
+    exports com.azure.ai.inference;
+    exports com.azure.ai.inference.models;
+    opens com.azure.ai.inference.models to com.azure.core;
+    opens com.azure.ai.inference.implementation.models to com.azure.core;
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/resources/META-INF/azure-ai-inference_apiview_properties.json b/sdk/ai/azure-ai-inference/src/main/resources/META-INF/azure-ai-inference_apiview_properties.json
new file mode 100644
index 000000000000..01ba4716738e
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/resources/META-INF/azure-ai-inference_apiview_properties.json
@@ -0,0 +1,81 @@
+{
+  "flavor": "azure", 
+  "CrossLanguageDefinitionId": {
+    "com.azure.ai.inference.ChatCompletionsAsyncClient": "Customizations.Client1",
+    "com.azure.ai.inference.ChatCompletionsAsyncClient.complete": "Customizations.Client1.complete",
+    "com.azure.ai.inference.ChatCompletionsAsyncClient.completeWithResponse": "Customizations.Client1.complete",
+    "com.azure.ai.inference.ChatCompletionsAsyncClient.getModelInfo": "Customizations.Client1.getModelInfo",
+    "com.azure.ai.inference.ChatCompletionsAsyncClient.getModelInfoWithResponse": "Customizations.Client1.getModelInfo",
+    "com.azure.ai.inference.ChatCompletionsClient": "Customizations.Client1",
+    "com.azure.ai.inference.ChatCompletionsClient.complete": "Customizations.Client1.complete",
+    "com.azure.ai.inference.ChatCompletionsClient.completeWithResponse": "Customizations.Client1.complete",
+    "com.azure.ai.inference.ChatCompletionsClient.getModelInfo": "Customizations.Client1.getModelInfo",
+    "com.azure.ai.inference.ChatCompletionsClient.getModelInfoWithResponse": "Customizations.Client1.getModelInfo",
+    "com.azure.ai.inference.ChatCompletionsClientBuilder": "Customizations.Client1",
+    "com.azure.ai.inference.EmbeddingsAsyncClient": "Customizations.Client2",
+    "com.azure.ai.inference.EmbeddingsAsyncClient.embed": "Customizations.Client2.embed",
+    "com.azure.ai.inference.EmbeddingsAsyncClient.embedWithResponse": "Customizations.Client2.embed",
+    "com.azure.ai.inference.EmbeddingsAsyncClient.getModelInfo": "Customizations.Client2.getModelInfo",
+    "com.azure.ai.inference.EmbeddingsAsyncClient.getModelInfoWithResponse": "Customizations.Client2.getModelInfo",
+    "com.azure.ai.inference.EmbeddingsClient": "Customizations.Client2",
+    "com.azure.ai.inference.EmbeddingsClient.embed": "Customizations.Client2.embed",
+    "com.azure.ai.inference.EmbeddingsClient.embedWithResponse": "Customizations.Client2.embed",
+    "com.azure.ai.inference.EmbeddingsClient.getModelInfo": "Customizations.Client2.getModelInfo",
+    "com.azure.ai.inference.EmbeddingsClient.getModelInfoWithResponse": "Customizations.Client2.getModelInfo",
+    "com.azure.ai.inference.EmbeddingsClientBuilder": "Customizations.Client2",
+    "com.azure.ai.inference.ImageEmbeddingsAsyncClient": "Customizations.Client3",
+    "com.azure.ai.inference.ImageEmbeddingsAsyncClient.embed": "Customizations.Client3.embed",
+    "com.azure.ai.inference.ImageEmbeddingsAsyncClient.embedWithResponse": "Customizations.Client3.embed",
+    "com.azure.ai.inference.ImageEmbeddingsAsyncClient.getModelInfo": "Customizations.Client3.getModelInfo",
+    "com.azure.ai.inference.ImageEmbeddingsAsyncClient.getModelInfoWithResponse": "Customizations.Client3.getModelInfo",
+    "com.azure.ai.inference.ImageEmbeddingsClient": "Customizations.Client3",
+    "com.azure.ai.inference.ImageEmbeddingsClient.embed": "Customizations.Client3.embed",
+    "com.azure.ai.inference.ImageEmbeddingsClient.embedWithResponse": "Customizations.Client3.embed",
+    "com.azure.ai.inference.ImageEmbeddingsClient.getModelInfo": "Customizations.Client3.getModelInfo",
+    "com.azure.ai.inference.ImageEmbeddingsClient.getModelInfoWithResponse": "Customizations.Client3.getModelInfo",
+    "com.azure.ai.inference.ImageEmbeddingsClientBuilder": "Customizations.Client3",
+    "com.azure.ai.inference.implementation.models.CompleteOptions": "null",
+    "com.azure.ai.inference.implementation.models.CompleteRequest": "Customizations.complete.Request.anonymous",
+    "com.azure.ai.inference.implementation.models.EmbedRequest": "Customizations.embed.Request.anonymous",
+    "com.azure.ai.inference.implementation.models.EmbedRequest1": "Customizations.embed.Request.anonymous",
+    "com.azure.ai.inference.models.ChatChoice": "AI.Model.ChatChoice",
+    "com.azure.ai.inference.models.ChatCompletions": "AI.Model.ChatCompletions",
+    "com.azure.ai.inference.models.ChatCompletionsFunctionToolSelection": "AI.Model.ChatCompletionsFunctionToolSelection",
+    "com.azure.ai.inference.models.ChatCompletionsNamedToolSelection": "AI.Model.ChatCompletionsNamedToolSelection",
+    "com.azure.ai.inference.models.ChatCompletionsResponseFormat": "AI.Model.ChatCompletionsResponseFormat",
+    "com.azure.ai.inference.models.ChatCompletionsResponseFormatJson": "AI.Model.ChatCompletionsResponseFormatJSON",
+    "com.azure.ai.inference.models.ChatCompletionsResponseFormatText": "AI.Model.ChatCompletionsResponseFormatText",
+    "com.azure.ai.inference.models.ChatCompletionsToolCall": "AI.Model.ChatCompletionsToolCall",
+    "com.azure.ai.inference.models.ChatCompletionsToolDefinition": "AI.Model.ChatCompletionsToolDefinition",
+    "com.azure.ai.inference.models.ChatCompletionsToolSelectionPreset": "AI.Model.ChatCompletionsToolSelectionPreset",
+    "com.azure.ai.inference.models.ChatMessageContentItem": "AI.Model.ChatMessageContentItem",
+    "com.azure.ai.inference.models.ChatMessageImageContentItem": "AI.Model.ChatMessageImageContentItem",
+    "com.azure.ai.inference.models.ChatMessageImageDetailLevel": "AI.Model.ChatMessageImageDetailLevel",
+    "com.azure.ai.inference.models.ChatMessageImageUrl": "AI.Model.ChatMessageImageUrl",
+    "com.azure.ai.inference.models.ChatMessageTextContentItem": "AI.Model.ChatMessageTextContentItem",
+    "com.azure.ai.inference.models.ChatRequestAssistantMessage": "AI.Model.ChatRequestAssistantMessage",
+    "com.azure.ai.inference.models.ChatRequestMessage": "AI.Model.ChatRequestMessage",
+    "com.azure.ai.inference.models.ChatRequestSystemMessage": "AI.Model.ChatRequestSystemMessage",
+    "com.azure.ai.inference.models.ChatRequestToolMessage": "AI.Model.ChatRequestToolMessage",
+    "com.azure.ai.inference.models.ChatRequestUserMessage": "AI.Model.ChatRequestUserMessage",
+    "com.azure.ai.inference.models.ChatResponseMessage": "AI.Model.ChatResponseMessage",
+    "com.azure.ai.inference.models.ChatRole": "AI.Model.ChatRole",
+    "com.azure.ai.inference.models.CompletionsFinishReason": "AI.Model.CompletionsFinishReason",
+    "com.azure.ai.inference.models.CompletionsUsage": "AI.Model.CompletionsUsage",
+    "com.azure.ai.inference.models.EmbeddingEncodingFormat": "AI.Model.EmbeddingEncodingFormat",
+    "com.azure.ai.inference.models.EmbeddingInput": "AI.Model.EmbeddingInput",
+    "com.azure.ai.inference.models.EmbeddingInputType": "AI.Model.EmbeddingInputType",
+    "com.azure.ai.inference.models.EmbeddingItem": "AI.Model.EmbeddingItem",
+    "com.azure.ai.inference.models.EmbeddingsResult": "AI.Model.EmbeddingsResult",
+    "com.azure.ai.inference.models.EmbeddingsUsage": "AI.Model.EmbeddingsUsage",
+    "com.azure.ai.inference.models.ExtraParameters": "AI.Model.ExtraParameters",
+    "com.azure.ai.inference.models.FunctionCall": "AI.Model.FunctionCall",
+    "com.azure.ai.inference.models.FunctionDefinition": "AI.Model.FunctionDefinition",
+    "com.azure.ai.inference.models.ModelInfo": "AI.Model.ModelInfo",
+    "com.azure.ai.inference.models.ModelType": "AI.Model.ModelType",
+    "com.azure.ai.inference.models.StreamingChatChoiceUpdate": "AI.Model.StreamingChatChoiceUpdate",
+    "com.azure.ai.inference.models.StreamingChatCompletionsUpdate": "AI.Model.StreamingChatCompletionsUpdate",
+    "com.azure.ai.inference.models.StreamingChatResponseMessageUpdate": "AI.Model.StreamingChatResponseMessageUpdate",
+    "com.azure.ai.inference.models.StreamingChatResponseToolCallUpdate": "AI.Model.StreamingChatResponseToolCallUpdate"
+  }
+}
diff --git a/sdk/ai/azure-ai-inference/src/main/resources/azure-ai-inference.properties b/sdk/ai/azure-ai-inference/src/main/resources/azure-ai-inference.properties
new file mode 100644
index 000000000000..ca812989b4f2
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/main/resources/azure-ai-inference.properties
@@ -0,0 +1,2 @@
+name=${project.artifactId}
+version=${project.version}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/ReadmeSamples.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/ReadmeSamples.java
new file mode 100644
index 000000000000..8d7c2d808fff
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/ReadmeSamples.java
@@ -0,0 +1,165 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.models.ChatChoice;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestAssistantMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.ai.inference.models.ChatResponseMessage;
+import com.azure.ai.inference.models.StreamingChatResponseMessageUpdate;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.credential.TokenCredential;
+import com.azure.core.exception.HttpResponseException;
+import com.azure.core.exception.ResourceNotFoundException;
+import com.azure.core.http.policy.HttpLogDetailLevel;
+import com.azure.core.http.policy.HttpLogOptions;
+import com.azure.core.util.CoreUtils;
+import com.azure.identity.DefaultAzureCredential;
+import com.azure.identity.DefaultAzureCredentialBuilder;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public final class ReadmeSamples {
+
+    private ChatCompletionsClient client = new ChatCompletionsClientBuilder().buildClient();
+    public void createSyncClientKeyCredential() {
+        // BEGIN: readme-sample-createSyncClientKeyCredential
+        ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential("{key}"))
+            .endpoint("{endpoint}")
+            .buildClient();
+        // END: readme-sample-createSyncClientKeyCredential
+    }
+
+    public void createAsyncClientKeyCredential() {
+        // BEGIN: readme-sample-createAsyncClientKeyCredential
+        ChatCompletionsAsyncClient client = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential("{key}"))
+            .endpoint("{endpoint}")
+            .buildAsyncClient();
+        // END: readme-sample-createAsyncClientKeyCredential
+    }
+
+    public void createChatCompletionsClientWithAAD() {
+        // BEGIN: readme-sample-createChatCompletionsClientWithAAD
+        TokenCredential defaultCredential = new DefaultAzureCredentialBuilder().build();
+        ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+            .credential(defaultCredential)
+            .endpoint("{endpoint}")
+            .buildClient();
+        // END: readme-sample-createChatCompletionsClientWithAAD
+    }
+
+    public void getChatCompletions() {
+        // BEGIN: readme-sample-getChatCompletions
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+        chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+        chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+        chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+        ChatCompletions chatCompletions = client.complete(new ChatCompletionsOptions(chatMessages));
+
+        System.out.printf("Model ID=%s is created at %s.%n", chatCompletions.getId(), chatCompletions.getCreated());
+        for (ChatChoice choice : chatCompletions.getChoices()) {
+            ChatResponseMessage message = choice.getMessage();
+            System.out.printf("Index: %d, Chat Role: %s.%n", choice.getIndex(), message.getRole());
+            System.out.println("Message:");
+            System.out.println(message.getContent());
+        }
+        // END: readme-sample-getChatCompletions
+    }
+
+    public void getChatCompletionsStream() {
+        // BEGIN: readme-sample-getChatCompletionsStream
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+        chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+        chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+        chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+        client.completeStream(new ChatCompletionsOptions(chatMessages))
+            .forEach(chatCompletions -> {
+                if (CoreUtils.isNullOrEmpty(chatCompletions.getChoices())) {
+                    return;
+                }
+                StreamingChatResponseMessageUpdate delta = chatCompletions.getChoices().get(0).getDelta();
+                if (delta.getRole() != null) {
+                    System.out.println("Role = " + delta.getRole());
+                }
+                if (delta.getContent() != null) {
+                    String content = delta.getContent();
+                    System.out.print(content);
+                }
+            });
+        // END: readme-sample-getChatCompletionsStream
+    }
+
+    public void getEmbedding() {
+        // BEGIN: readme-sample-getEmbedding
+        // END: readme-sample-getEmbedding
+    }
+
+    public void enableHttpLogging() {
+        // BEGIN: readme-sample-enablehttplogging
+        ChatCompletionsClient chatCompletionsClient = new ChatCompletionsClientBuilder()
+            .endpoint("{endpoint}")
+            .credential(new AzureKeyCredential("{key}"))
+            .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS))
+            .buildClient();
+// or
+        DefaultAzureCredential credential = new DefaultAzureCredentialBuilder().build();
+        ChatCompletionsClient configurationClientAad = new ChatCompletionsClientBuilder()
+            .credential(credential)
+            .endpoint("{endpoint}")
+            .httpLogOptions(new HttpLogOptions().setLogLevel(HttpLogDetailLevel.BODY_AND_HEADERS))
+            .buildClient();
+        // END: readme-sample-enablehttplogging
+    }
+
+    public void troubleshootingExceptions() {
+        // BEGIN: readme-sample-troubleshootingExceptions
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+        chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+        chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+        chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+        try {
+            ChatCompletions chatCompletions = client.complete(new ChatCompletionsOptions(chatMessages));
+        } catch (HttpResponseException e) {
+            System.out.println(e.getMessage());
+            // Do something with the exception
+        }
+        // END: readme-sample-troubleshootingExceptions
+    }
+
+    public void troubleshootingExceptionsAsync() {
+        ChatCompletionsAsyncClient asyncClient = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential("{key}"))
+            .endpoint("{endpoint}")
+            .buildAsyncClient();
+
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+        chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+        chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+        chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+        // BEGIN: readme-sample-troubleshootingExceptions-async
+        asyncClient.complete(new ChatCompletionsOptions(chatMessages))
+            .doOnSuccess(ignored -> System.out.println("Success!"))
+            .doOnError(
+                error -> error instanceof ResourceNotFoundException,
+                error -> System.out.println("Exception: 'getChatCompletions' could not be performed."));
+        // END: readme-sample-troubleshootingExceptions-async
+    }
+
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatAADSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatAADSample.java
new file mode 100644
index 000000000000..5c66a37cb196
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatAADSample.java
@@ -0,0 +1,39 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatChoice;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.core.credential.TokenCredential;
+import com.azure.core.util.Configuration;
+import com.azure.identity.DefaultAzureCredentialBuilder;
+
+public final class BasicChatAADSample {
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        TokenCredential defaultCredential = new DefaultAzureCredentialBuilder().build();
+        // Currently the auth scope needs to be set as below for Azure OpenAI resources using EntraID.
+        // For non-Azure OpenAI models (such as Cohere, Mistral, Llama, or Phi), comment out the line below.
+        String[] scopes = new String[] { "https://cognitiveservices.azure.com/.default" };
+        String endpoint = Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT");
+        ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+            .scopes(scopes) // remove for non-Azure OpenAI models
+            .credential(defaultCredential)
+            .endpoint(endpoint)
+            .buildClient();
+
+        String prompt = "Tell me 3 jokes about trains";
+
+        ChatCompletions completions = client.complete(prompt);
+
+        for (ChatChoice choice : completions.getChoices()) {
+            System.out.printf("%s.%n", choice.getMessage().getContent());
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatSample.java
new file mode 100644
index 000000000000..744e75a24888
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatSample.java
@@ -0,0 +1,34 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatChoice;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.Configuration;
+
+public final class BasicChatSample {
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String key = Configuration.getGlobalConfiguration().get("AZURE_API_KEY");
+        String endpoint = Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT");
+        ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential(key))
+            .endpoint(endpoint)
+            .buildClient();
+
+        String prompt = "Tell me 3 jokes about trains";
+
+        ChatCompletions completions = client.complete(prompt);
+
+        for (ChatChoice choice : completions.getChoices()) {
+            System.out.printf("%s.%n", choice.getMessage().getContent());
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatSampleAsync.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatSampleAsync.java
new file mode 100644
index 000000000000..5bc7112e8cba
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/BasicChatSampleAsync.java
@@ -0,0 +1,52 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsAsyncClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatChoice;
+import com.azure.ai.inference.models.ChatResponseMessage;
+import com.azure.ai.inference.models.CompletionsUsage;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.Configuration;
+import java.util.concurrent.TimeUnit;
+
+public final class BasicChatSampleAsync {
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) throws InterruptedException {
+        String key = Configuration.getGlobalConfiguration().get("AZURE_API_KEY");
+        String endpoint = Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT");
+        ChatCompletionsAsyncClient client = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential(key))
+            .endpoint(endpoint)
+            .buildAsyncClient();
+
+        client.complete("Tell me about Euler's Identity").subscribe(
+            chatCompletions -> {
+                System.out.printf("Model ID=%s.%n", chatCompletions.getId());
+                for (ChatChoice choice : chatCompletions.getChoices()) {
+                    ChatResponseMessage message = choice.getMessage();
+                    System.out.printf("Index: %d, Chat Role: %s.%n", choice.getIndex(), message.getRole());
+                    System.out.println("Message:");
+                    System.out.println(message.getContent());
+                }
+
+                System.out.println();
+                CompletionsUsage usage = chatCompletions.getUsage();
+                System.out.printf("Usage: number of prompt token is %d, "
+                        + "number of completion token is %d, and number of total tokens in request and response is %d.%n",
+                    usage.getPromptTokens(), usage.getCompletionTokens(), usage.getTotalTokens());
+            },
+            error -> System.err.println("There was an error getting chat completions." + error),
+            () -> System.out.println("\nCompleted calling complete."));
+
+        // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+        // the thread so the program does not end before the send operation is complete. Using .block() instead of
+        // .subscribe() will turn this into a synchronous call.
+        TimeUnit.SECONDS.sleep(10);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ImageFileChatSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ImageFileChatSample.java
new file mode 100644
index 000000000000..d97c65e1b8c4
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ImageFileChatSample.java
@@ -0,0 +1,56 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatChoice;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatMessageContentItem;
+import com.azure.ai.inference.models.ChatMessageImageContentItem;
+import com.azure.ai.inference.models.ChatMessageTextContentItem;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.Configuration;
+
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.List;
+
+public final class ImageFileChatSample {
+
+    private static final String TEST_IMAGE_PATH = "./src/samples/resources/sample-images/sample.png";
+    private static final String TEST_IMAGE_FORMAT = "png";
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String key = Configuration.getGlobalConfiguration().get("AZURE_API_KEY");
+        String endpoint = Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT");
+        ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential(key))
+            .endpoint(endpoint)
+            .buildClient();
+
+        Path testFilePath = Paths.get(TEST_IMAGE_PATH);
+        List<ChatMessageContentItem> contentItems = new ArrayList<>();
+        contentItems.add(new ChatMessageTextContentItem("Describe the image."));
+        contentItems.add(new ChatMessageImageContentItem(testFilePath, TEST_IMAGE_FORMAT));
+
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant."));
+        chatMessages.add(ChatRequestUserMessage.fromContentItems(contentItems));
+
+        ChatCompletions completions = client.complete(new ChatCompletionsOptions(chatMessages));
+
+        for (ChatChoice choice : completions.getChoices()) {
+            System.out.printf("%s.%n", choice.getMessage().getContent());
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ImageUrlChatSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ImageUrlChatSample.java
new file mode 100644
index 000000000000..e0309c4fffc1
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ImageUrlChatSample.java
@@ -0,0 +1,56 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatChoice;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.ai.inference.models.ChatMessageContentItem;
+import com.azure.ai.inference.models.ChatMessageTextContentItem;
+import com.azure.ai.inference.models.ChatMessageImageContentItem;
+import com.azure.ai.inference.models.ChatMessageImageUrl;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.Configuration;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public final class ImageUrlChatSample {
+
+    private static final String TEST_URL =
+        "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg";
+
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String key = Configuration.getGlobalConfiguration().get("AZURE_API_KEY");
+        String endpoint = Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT");
+        ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential(key))
+            .endpoint(endpoint)
+            .buildClient();
+
+        List<ChatMessageContentItem> contentItems = new ArrayList<>();
+        contentItems.add(new ChatMessageTextContentItem("Describe the image."));
+        contentItems.add(new ChatMessageImageContentItem(
+            new ChatMessageImageUrl(TEST_URL)));
+
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant."));
+        chatMessages.add(ChatRequestUserMessage.fromContentItems(contentItems));
+
+        ChatCompletions completions = client.complete(new ChatCompletionsOptions(chatMessages));
+
+        for (ChatChoice choice : completions.getChoices()) {
+            System.out.printf("%s.%n", choice.getMessage().getContent());
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/StreamingChatSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/StreamingChatSample.java
new file mode 100644
index 000000000000..aeaec4414866
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/StreamingChatSample.java
@@ -0,0 +1,79 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestAssistantMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.ai.inference.models.StreamingChatCompletionsUpdate;
+import com.azure.ai.inference.models.StreamingChatResponseMessageUpdate;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.CoreUtils;
+import com.azure.core.util.IterableStream;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public final class StreamingChatSample {
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String key = Configuration.getGlobalConfiguration().get("AZURE_API_KEY");
+        String endpoint = Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT");
+        ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential(key))
+            .endpoint(endpoint)
+            .buildClient();
+
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+        chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+        chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+        chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+        IterableStream<StreamingChatCompletionsUpdate> chatCompletionsStream = client.completeStream(
+            new ChatCompletionsOptions(chatMessages));
+
+        // The delta is the message content for a streaming response.
+        // Subsequence of streaming delta will be like:
+        // "delta": {
+        //     "role": "assistant"
+        // },
+        // "delta": {
+        //     "content": "Why"
+        //  },
+        //  "delta": {
+        //     "content": " don"
+        //  },
+        //  "delta": {
+        //     "content": "'t"
+        //  }
+        chatCompletionsStream
+            .stream()
+            .forEach(chatCompletions -> {
+                if (CoreUtils.isNullOrEmpty(chatCompletions.getChoices())) {
+                    return;
+                }
+
+                StreamingChatResponseMessageUpdate delta = chatCompletions.getChoices().get(0).getDelta();
+
+                if (delta.getRole() != null) {
+                    System.out.println("Role = " + delta.getRole());
+                }
+
+                if (delta.getContent() != null) {
+                    String content = delta.getContent();
+                    System.out.print(content);
+                }
+            });
+
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/StreamingChatSampleAsync.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/StreamingChatSampleAsync.java
new file mode 100644
index 000000000000..9b92ef03621d
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/StreamingChatSampleAsync.java
@@ -0,0 +1,82 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsAsyncClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.ai.inference.models.ChatRequestAssistantMessage;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.StreamingChatResponseMessageUpdate;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.CoreUtils;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+public final class StreamingChatSampleAsync {
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) throws InterruptedException {
+        String key = Configuration.getGlobalConfiguration().get("AZURE_API_KEY");
+        String endpoint = Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT");
+        ChatCompletionsAsyncClient client = new ChatCompletionsClientBuilder()
+            .credential(new AzureKeyCredential(key))
+            .endpoint(endpoint)
+            .buildAsyncClient();
+
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+        chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+        chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+        chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+
+
+        client.completeStream(new ChatCompletionsOptions(chatMessages))
+            .map(chatCompletions -> {
+                /* The delta is the message content for a streaming response.
+                 * Subsequence of streaming delta will be like:
+                 * "delta": {
+                 *     "role": "assistant"
+                 * },
+                 * "delta": {
+                 *     "content": "Why"
+                 * },
+                 * "delta": {
+                 *     "content": " don"
+                 * },
+                 * "delta": {
+                 *     "content": "'t"
+                 * }
+                 */
+
+                if (CoreUtils.isNullOrEmpty(chatCompletions.getChoices())) {
+                    return "";
+                }
+
+                StreamingChatResponseMessageUpdate delta = chatCompletions.getChoices().get(0).getDelta();
+
+                if (delta.getRole() != null) {
+                    System.out.println("Role = " + delta.getRole());
+                }
+                return delta.getContent() == null ? "" : delta.getContent();
+            })
+            .subscribe(
+                System.out::print,
+                error -> System.err.println("There was an error getting chat completions." + error),
+                () -> System.out.println("\nCompleted called completeStreaming."));
+
+
+        // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+        // the thread so the program does not end before the send operation is complete. Using .block() instead of
+        // .subscribe() will turn this into a synchronous call.
+        TimeUnit.SECONDS.sleep(10);
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TextEmbeddingsAsyncSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TextEmbeddingsAsyncSample.java
new file mode 100644
index 000000000000..5b27daab137d
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TextEmbeddingsAsyncSample.java
@@ -0,0 +1,58 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.EmbeddingsAsyncClient;
+import com.azure.ai.inference.EmbeddingsClientBuilder;
+import com.azure.ai.inference.models.EmbeddingItem;
+import com.azure.ai.inference.models.EmbeddingsUsage;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.Configuration;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+
+public final class TextEmbeddingsAsyncSample {
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) throws InterruptedException {
+        String key = Configuration.getGlobalConfiguration().get("AZURE_EMBEDDINGS_KEY");
+        String endpoint = Configuration.getGlobalConfiguration().get("EMBEDDINGS_MODEL_ENDPOINT");
+        EmbeddingsAsyncClient client = new EmbeddingsClientBuilder()
+            .credential(new AzureKeyCredential(key))
+            .endpoint(endpoint)
+            .buildAsyncClient();
+
+        List<String> promptList = new ArrayList<>();
+        String prompt = "Tell me 3 jokes about trains";
+        promptList.add(prompt);
+
+        client.embed(promptList).subscribe(
+            embeddings -> {
+                for (EmbeddingItem item : embeddings.getData()) {
+                    System.out.printf("Index: %d.%n", item.getIndex());
+                    System.out.println("Embedding as list of floats: ");
+                    for (Float embedding : item.getEmbeddingList()) {
+                        System.out.printf("%f;", embedding);
+                    }
+                }
+                EmbeddingsUsage usage = embeddings.getUsage();
+                System.out.println("");
+                System.out.printf(
+                    "Usage: number of prompt token is %d and number of total tokens in request and response is %d.%n",
+                    usage.getPromptTokens(), usage.getTotalTokens());
+            },
+            error -> System.err.println("There was an error getting embeddings." + error),
+            () -> System.out.println("Completed called getEmbeddings."));
+
+        // The .subscribe() creation and assignment is not a blocking call. For the purpose of this example, we sleep
+        // the thread so the program does not end before the send operation is complete. Using .block() instead of
+        // .subscribe() will turn this into a synchronous call.
+        TimeUnit.SECONDS.sleep(10);
+
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TextEmbeddingsSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TextEmbeddingsSample.java
new file mode 100644
index 000000000000..1e9282117755
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TextEmbeddingsSample.java
@@ -0,0 +1,42 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.EmbeddingsClient;
+import com.azure.ai.inference.EmbeddingsClientBuilder;
+import com.azure.ai.inference.models.EmbeddingsResult;
+import com.azure.ai.inference.models.EmbeddingItem;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.Configuration;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public final class TextEmbeddingsSample {
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        String key = Configuration.getGlobalConfiguration().get("AZURE_EMBEDDINGS_KEY");
+        String endpoint = Configuration.getGlobalConfiguration().get("EMBEDDINGS_MODEL_ENDPOINT");
+        EmbeddingsClient client = new EmbeddingsClientBuilder()
+            .credential(new AzureKeyCredential(key))
+            .endpoint(endpoint)
+            .buildClient();
+
+        List<String> promptList = new ArrayList<>();
+        String prompt = "Tell me 3 jokes about trains";
+        promptList.add(prompt);
+
+        EmbeddingsResult embeddings = client.embed(promptList);
+
+        for (EmbeddingItem item : embeddings.getData()) {
+            System.out.printf("Index: %d.%n", item.getIndex());
+            for (Float embedding : item.getEmbeddingList()) {
+                System.out.printf("%f;", embedding);
+            }
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ToolCallSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ToolCallSample.java
new file mode 100644
index 000000000000..e59a2e9ee3eb
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/ToolCallSample.java
@@ -0,0 +1,203 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatCompletionsFunctionToolCall;
+import com.azure.ai.inference.models.ChatCompletionsFunctionToolDefinition;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestAssistantMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.ai.inference.models.ChatRequestToolMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.ai.inference.models.CompletionsFinishReason;
+import com.azure.ai.inference.models.FunctionCall;
+import com.azure.ai.inference.models.FunctionDefinition;
+import com.azure.ai.inference.models.StreamingChatChoiceUpdate;
+import com.azure.ai.inference.models.StreamingChatCompletionsUpdate;
+import com.azure.ai.inference.models.StreamingChatResponseToolCallUpdate;
+import com.azure.core.credential.TokenCredential;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.IterableStream;
+import com.azure.identity.DefaultAzureCredentialBuilder;
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+import java.util.Arrays;
+import java.util.List;
+
+public final class ToolCallSample {
+     /**
+     * @param args Unused. Arguments to the program.
+     */
+    public static void main(String[] args) {
+        TokenCredential defaultCredential = new DefaultAzureCredentialBuilder().build();
+        // Currently the auth scope needs to be set as below for Azure OpenAI resources using EntraID.
+        // For non-Azure OpenAI models (such as Cohere, Mistral, Llama, or Phi), comment out the line below.
+        String[] scopes = new String[] { "https://cognitiveservices.azure.com/.default" };
+        String endpoint = Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT");
+        ChatCompletionsClient client = new ChatCompletionsClientBuilder()
+            .scopes(scopes) // remove for non-Azure OpenAI models
+            .credential(defaultCredential)
+            .endpoint(endpoint)
+            .buildClient();
+
+        List<ChatRequestMessage> chatMessages = Arrays.asList(
+            new ChatRequestSystemMessage("You are a helpful assistant."),
+            new ChatRequestUserMessage("What sort of clothing should I wear today in Berlin?")
+        );
+
+        ChatCompletionsFunctionToolDefinition toolDefinition = new ChatCompletionsFunctionToolDefinition(
+            getFutureTemperatureFunctionDefinition());
+
+        ChatCompletionsOptions chatCompletionsOptions = new ChatCompletionsOptions(chatMessages);
+        chatCompletionsOptions.setTools(Arrays.asList(toolDefinition));
+
+        IterableStream<StreamingChatCompletionsUpdate> chatCompletionsStream = client.completeStream(chatCompletionsOptions);
+
+        String toolCallId = null;
+        String functionName = null;
+        StringBuilder functionArguments = new StringBuilder();
+        CompletionsFinishReason finishReason = null;
+        for (StreamingChatCompletionsUpdate chatCompletions : chatCompletionsStream) {
+            // In the case of Azure, the 1st message will contain filter information but no choices sometimes
+            if (chatCompletions.getChoices().isEmpty()) {
+                continue;
+            }
+            StreamingChatChoiceUpdate choice = chatCompletions.getChoices().get(0);
+            if (choice.getFinishReason() != null) {
+                finishReason = choice.getFinishReason();
+            }
+            List<StreamingChatResponseToolCallUpdate> toolCalls = choice.getDelta().getToolCalls();
+            // We take the functionName when it's available, and we aggregate the arguments.
+            // We also monitor FinishReason for TOOL_CALL. That's the LLM signaling we should
+            // call our function
+            if (toolCalls != null) {
+                StreamingChatResponseToolCallUpdate toolCall = toolCalls.get(0);
+                if (toolCall != null) {
+                    functionArguments.append(toolCall.getFunction().getArguments());
+                    if (toolCall.getId() != null) {
+                        toolCallId = toolCall.getId();
+                    }
+
+                    if (toolCall.getFunction().getName() != null) {
+                        functionName = toolCall.getFunction().getName();
+                    }
+                }
+            }
+        }
+
+        System.out.println("Tool Call Id: " + toolCallId);
+        System.out.println("Function Name: " + functionName);
+        System.out.println("Function Arguments: " + functionArguments);
+        System.out.println("Finish Reason: " + finishReason);
+
+        // We verify that the LLM wants us to call the function we advertised in the original request
+        // Preparation for follow-up with the service we add:
+        // - All the messages we sent
+        // - The ChatCompletionsFunctionToolCall from the service as part of a ChatRequestAssistantMessage
+        // - The result of function tool as part of a ChatRequestToolMessage
+        if (finishReason == CompletionsFinishReason.TOOL_CALLS) {
+            // Here the "content" can be null if used in non-Azure OpenAI
+            // We prepare the assistant message reminding the LLM of the context of this request. We provide:
+            // - The tool call id
+            // - The function description
+            FunctionCall functionCall = new FunctionCall(functionName, functionArguments.toString());
+            ChatCompletionsFunctionToolCall functionToolCall = new ChatCompletionsFunctionToolCall(toolCallId, functionCall);
+            ChatRequestAssistantMessage assistantRequestMessage = new ChatRequestAssistantMessage("");
+            assistantRequestMessage.setToolCalls(Arrays.asList(functionToolCall));
+
+            // As an additional step, you may want to deserialize the parameters, so you can call your function
+            FunctionArguments parameters = BinaryData.fromString(functionArguments.toString()).toObject(FunctionArguments.class);
+            System.out.println("Location Name: " + parameters.locationName);
+            System.out.println("Date: " + parameters.date);
+            String functionCallResult = futureTemperature(parameters.locationName, parameters.date);
+
+            // This message contains the information that will allow the LLM to resume the text generation
+            ChatRequestToolMessage toolRequestMessage = new ChatRequestToolMessage(functionCallResult, toolCallId);
+            List<ChatRequestMessage> followUpMessages = Arrays.asList(
+                // We add the original messages from the request
+                chatMessages.get(0),
+                chatMessages.get(1),
+                assistantRequestMessage,
+                toolRequestMessage
+            );
+
+            IterableStream<StreamingChatCompletionsUpdate> followUpChatCompletionsStream = client.completeStream(
+                new ChatCompletionsOptions(followUpMessages));
+
+            StringBuilder finalResult = new StringBuilder();
+            CompletionsFinishReason finalFinishReason = null;
+            for (StreamingChatCompletionsUpdate chatCompletions : followUpChatCompletionsStream) {
+                if (chatCompletions.getChoices().isEmpty()) {
+                    continue;
+                }
+                StreamingChatChoiceUpdate choice = chatCompletions.getChoices().get(0);
+                if (choice.getFinishReason() != null) {
+                    finalFinishReason = choice.getFinishReason();
+                }
+                if (choice.getDelta().getContent() != null) {
+                    finalResult.append(choice.getDelta().getContent());
+                }
+            }
+
+            // We verify that the LLM has STOPPED as a finishing reason
+            if (finalFinishReason == CompletionsFinishReason.STOPPED) {
+                System.out.println("Final Result: " + finalResult);
+            }
+        }
+    }
+
+    // In this example we ignore the parameters for our tool function
+    private static String futureTemperature(String locationName, String data) {
+        return "-7 C";
+    }
+
+    private static FunctionDefinition getFutureTemperatureFunctionDefinition() {
+        FunctionDefinition functionDefinition = new FunctionDefinition("FutureTemperature");
+        functionDefinition.setDescription("Get the future temperature for a given location and date.");
+        FutureTemperatureParameters parameters = new FutureTemperatureParameters();
+        functionDefinition.setParameters(BinaryData.fromObject(parameters));
+        return functionDefinition;
+    }
+
+    private static class FunctionArguments {
+        @JsonProperty(value = "location_name")
+        private String locationName;
+
+        @JsonProperty(value = "date")
+        private String date;
+    }
+
+    private static class FutureTemperatureParameters {
+        @JsonProperty(value = "type")
+        private String type = "object";
+
+        @JsonProperty(value = "properties")
+        private FutureTemperatureProperties properties = new FutureTemperatureProperties();
+    }
+
+    private static class FutureTemperatureProperties {
+        @JsonProperty(value = "unit") StringField unit = new StringField("Temperature unit. Can be either Celsius or Fahrenheit. Defaults to Celsius.");
+        @JsonProperty(value = "location_name") StringField locationName = new StringField("The name of the location to get the future temperature for.");
+        @JsonProperty(value = "date") StringField date = new StringField("The date to get the future temperature for. The format is YYYY-MM-DD.");
+    }
+
+    private static class StringField {
+        @JsonProperty(value = "type")
+        private final String type = "string";
+
+        @JsonProperty(value = "description")
+        private String description;
+
+        @JsonCreator
+        StringField(@JsonProperty(value = "description") String description) {
+            this.description = description;
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/resources/sample-images/sample.png b/sdk/ai/azure-ai-inference/src/samples/resources/sample-images/sample.png
new file mode 100644
index 000000000000..55dafd287ef7
Binary files /dev/null and b/sdk/ai/azure-ai-inference/src/samples/resources/sample-images/sample.png differ
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsAsyncClientTest.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsAsyncClientTest.java
new file mode 100644
index 000000000000..c7f205933c4c
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsAsyncClientTest.java
@@ -0,0 +1,69 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.models.*;
+import com.azure.core.http.HttpClient;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+import reactor.test.StepVerifier;
+
+import java.util.ArrayList;
+
+import static com.azure.ai.inference.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+public class ChatCompletionsAsyncClientTest extends ChatCompletionsClientTestBase {
+    private ChatCompletionsAsyncClient client;
+
+    private ChatCompletionsAsyncClient getChatCompletionsAsyncClient(HttpClient httpClient) {
+        return getChatCompletionsClientBuilder(
+            interceptorManager.isPlaybackMode() ? interceptorManager.getPlaybackClient() : httpClient)
+            .buildAsyncClient();
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetChatCompletions(HttpClient httpClient) {
+        client = getChatCompletionsAsyncClient(httpClient);
+        getChatCompletionsRunner((prompt) -> {
+            StepVerifier.create(client.complete(prompt))
+                .assertNext(resultCompletions -> {
+                    assertNotNull(resultCompletions.getUsage());
+                    assertCompletions(1, resultCompletions);
+                })
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetChatCompletionsFromOptions(HttpClient httpClient) {
+        client = getChatCompletionsAsyncClient(httpClient);
+        getChatCompletionsFromOptionsRunner((options) -> {
+            StepVerifier.create(client.complete(options))
+                .assertNext(resultCompletions -> {
+                    assertNotNull(resultCompletions.getUsage());
+                    assertCompletions(1, resultCompletions);
+                })
+                .verifyComplete();
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetCompletionsStream(HttpClient httpClient) {
+        client = getChatCompletionsAsyncClient(httpClient);
+        getChatCompletionsFromMessagesRunner((chatMessages) -> {
+            StepVerifier.create(client.completeStream(new ChatCompletionsOptions(chatMessages)))
+                .recordWith(ArrayList::new)
+                .thenConsumeWhile(chatCompletions -> {
+                    assertCompletionsStream(chatCompletions);
+                    return true;
+                })
+                .consumeRecordedWith(messageList -> assertTrue(messageList.size() > 1))
+                .verifyComplete();
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsClientTestBase.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsClientTestBase.java
new file mode 100644
index 000000000000..88aa73b7047a
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsClientTestBase.java
@@ -0,0 +1,160 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference;
+
+// The Java test files under 'generated' package are generated for your reference.
+// If you wish to modify these files, please copy them out of the 'generated' package, and modify there.
+// See https://aka.ms/azsdk/dpg/java/tests for guide on adding a test.
+
+import com.azure.ai.inference.models.*;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.http.HttpClient;
+import com.azure.core.http.HttpRequest;
+import com.azure.core.test.TestMode;
+import com.azure.core.test.TestProxyTestBase;
+import com.azure.core.test.models.CustomMatcher;
+import com.azure.core.test.models.TestProxySanitizer;
+import com.azure.core.test.models.TestProxySanitizerType;
+import com.azure.core.util.Configuration;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.Consumer;
+
+import org.junit.jupiter.api.Test;
+import static org.junit.jupiter.api.Assertions.assertInstanceOf;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static com.azure.ai.inference.TestUtils.FAKE_API_KEY;
+
+public abstract class ChatCompletionsClientTestBase extends TestProxyTestBase {
+    protected ChatCompletionsClient chatCompletionsClient;
+    protected EmbeddingsClient embeddingsClient;
+    protected ImageEmbeddingsClient imageEmbeddingsClient;
+    private boolean sanitizersRemoved = false;
+
+    ChatCompletionsClientBuilder getChatCompletionsClientBuilder(HttpClient httpClient) {
+        ChatCompletionsClientBuilder builder = new ChatCompletionsClientBuilder()
+                .httpClient(httpClient);
+        TestMode testMode = getTestMode();
+        if (testMode != TestMode.LIVE) {
+            addTestRecordCustomSanitizers();
+            addCustomMatchers();
+            // Disable "$..id"=AZSDK3430, "Set-Cookie"=AZSDK2015 for both azure and non-azure clients from the list of common sanitizers.
+            if (!sanitizersRemoved) {
+                interceptorManager.removeSanitizers("AZSDK3430", "AZSDK3493");
+                sanitizersRemoved = true;
+            }
+        }
+
+        if (testMode == TestMode.PLAYBACK) {
+            builder
+                    .endpoint("https://localhost:8080")
+                    .credential(new AzureKeyCredential(FAKE_API_KEY));
+        } else if (testMode == TestMode.RECORD) {
+            builder
+                    .addPolicy(interceptorManager.getRecordPolicy())
+                    .endpoint(Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT"))
+                    .credential(new AzureKeyCredential(Configuration.getGlobalConfiguration().get("AZURE_API_KEY")));
+        } else {
+            builder
+                    .endpoint(Configuration.getGlobalConfiguration().get("MODEL_ENDPOINT"))
+                    .credential(new AzureKeyCredential(Configuration.getGlobalConfiguration().get("AZURE_API_KEY")));
+        }
+        return builder;
+    }
+
+    private void addTestRecordCustomSanitizers() {
+        String sanitizedRequestUri = "https://REDACTED/";
+        String requestUriRegex = "https://.*/openai/deployments/.*?/";
+        interceptorManager.addSanitizers(Arrays.asList(
+                new TestProxySanitizer("$..key", null, "REDACTED", TestProxySanitizerType.BODY_KEY),
+                new TestProxySanitizer("$..endpoint", requestUriRegex, sanitizedRequestUri, TestProxySanitizerType.URL),
+                new TestProxySanitizer("Content-Type", "(^multipart\\/form-data; boundary=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{2})",
+                        "multipart\\/form-data; boundary=BOUNDARY", TestProxySanitizerType.HEADER)
+        ));
+    }
+
+    private void addCustomMatchers() {
+        interceptorManager.addMatchers(new CustomMatcher().setExcludedHeaders(Arrays.asList("Cookie", "Set-Cookie")));
+    }
+
+    @Test
+    public abstract void testGetChatCompletions(HttpClient httpClient);
+
+    void getChatCompletionsRunner(Consumer<String> testRunner) {
+        testRunner.accept("Say this is a test");
+    }
+
+    void getChatCompletionsFromOptionsRunner(Consumer<ChatCompletionsOptions> testRunner) {
+        List<ChatRequestMessage> chatMessages = Arrays.asList(
+            new ChatRequestSystemMessage("You are a helpful assistant."),
+            new ChatRequestUserMessage("What sort of clothing should I wear today in Berlin?")
+        );
+        ChatCompletionsOptions options = new ChatCompletionsOptions(chatMessages);
+        testRunner.accept(options);
+    }
+
+    void getChatCompletionsFromMessagesRunner(Consumer<List<ChatRequestMessage>> testRunner) {
+        testRunner.accept(getChatMessages());
+    }
+
+    static void assertCompletionsStream(StreamingChatCompletionsUpdate chatCompletions) {
+        if (chatCompletions.getId() != null && !chatCompletions.getId().isEmpty()) {
+            assertNotNull(chatCompletions.getId());
+            assertNotNull(chatCompletions.getChoices());
+            assertFalse(chatCompletions.getChoices().isEmpty());
+            assertNotNull(chatCompletions.getChoices().get(0).getDelta());
+        }
+    }
+
+    static void assertCompletions(int choicesPerPrompt, ChatCompletions actual) {
+        assertNotNull(actual);
+        assertInstanceOf(ChatCompletions.class, actual);
+        assertChoices(choicesPerPrompt, actual.getChoices());
+        assertNotNull(actual.getUsage());
+    }
+
+    static void assertChoices(int choicesPerPrompt, List<ChatChoice> actual) {
+        assertEquals(choicesPerPrompt, actual.size());
+        for (int i = 0; i < actual.size(); i++) {
+            assertChoice(i, actual.get(i));
+        }
+    }
+
+    static void assertChoice(int index, ChatChoice actual) {
+        assertNotNull(actual.getMessage().getContent());
+        assertEquals(index, actual.getIndex());
+        assertNotNull(actual.getFinishReason());
+    }
+
+    static void assertResponseRequestHeader(HttpRequest request) {
+        request.getHeaders().stream().filter(header -> {
+            String name = header.getName();
+            return "my-header1".equals(name) || "my-header2".equals(name) || "my-header3".equals(name);
+        }).forEach(header -> {
+            if (header.getName().equals("my-header1")) {
+                assertEquals("my-header1-value", header.getValue());
+            } else if (header.getName().equals("my-header2")) {
+                assertEquals("my-header2-value", header.getValue());
+            } else if (header.getName().equals("my-header3")) {
+                assertEquals("my-header3-value", header.getValue());
+            } else {
+                assertFalse(true);
+            }
+        });
+    }
+
+    private List<ChatRequestMessage> getChatMessages() {
+        List<ChatRequestMessage> chatMessages = new ArrayList<>();
+        chatMessages.add(new ChatRequestSystemMessage("You are a helpful assistant. You will talk like a pirate."));
+        chatMessages.add(new ChatRequestUserMessage("Can you help me?"));
+        chatMessages.add(new ChatRequestAssistantMessage("Of course, me hearty! What can I do for ye?"));
+        chatMessages.add(new ChatRequestUserMessage("What's the best way to train a parrot?"));
+        return chatMessages;
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsSyncClientTest.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsSyncClientTest.java
new file mode 100644
index 000000000000..8d4d8f5ac893
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsSyncClientTest.java
@@ -0,0 +1,108 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.ai.inference.models.CompletionsUsage;
+import com.azure.ai.inference.models.StreamingChatCompletionsUpdate;
+import com.azure.core.http.HttpClient;
+
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.http.rest.Response;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.IterableStream;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import static com.azure.ai.inference.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
+import static org.junit.jupiter.api.Assertions.*;
+
+public class ChatCompletionsSyncClientTest extends ChatCompletionsClientTestBase {
+    private ChatCompletionsClient client;
+
+    private ChatCompletionsClient getChatCompletionsClient(HttpClient httpClient) {
+        return getChatCompletionsClientBuilder(
+            interceptorManager.isPlaybackMode() ? interceptorManager.getPlaybackClient() : httpClient)
+            .buildClient();
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetChatCompletions(HttpClient httpClient) {
+        client = getChatCompletionsClient(httpClient);
+        getChatCompletionsRunner((prompt) -> {
+            ChatCompletions resultCompletions = client.complete(prompt);
+            assertCompletions(1, resultCompletions);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetCompletionsStream(HttpClient httpClient) {
+        client = getChatCompletionsClient(httpClient);
+        getChatCompletionsRunner((prompt) -> {
+            List<ChatRequestMessage> chatMessages = new ArrayList<>();
+            chatMessages.add(new ChatRequestUserMessage(prompt));
+            IterableStream<StreamingChatCompletionsUpdate> resultCompletions = client.completeStream(new ChatCompletionsOptions(chatMessages));
+            assertTrue(resultCompletions.stream().toArray().length > 1);
+            resultCompletions.forEach(ChatCompletionsClientTestBase::assertCompletionsStream);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetCompletionsFromOptions(HttpClient httpClient) {
+        client = getChatCompletionsClient(httpClient);
+        getChatCompletionsFromOptionsRunner((options) -> {
+            ChatCompletions completions = client.complete(options);
+            assertCompletions(1, completions);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetCompletionsWithResponse(HttpClient httpClient) {
+        client = getChatCompletionsClient(httpClient);
+        getChatCompletionsFromOptionsRunner((options) -> {
+            Response<BinaryData> binaryDataResponse = client.completeWithResponse(
+                BinaryData.fromObject(options), new RequestOptions());
+            ChatCompletions response = binaryDataResponse.getValue().toObject(ChatCompletions.class);
+            assertCompletions(1, response);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetCompletionsUsageField(HttpClient httpClient) {
+        client = getChatCompletionsClient(httpClient);
+        getChatCompletionsFromOptionsRunner((options) -> {
+            options.setMaxTokens(1024);
+
+            ChatCompletions resultCompletions = client.complete(options);
+
+            CompletionsUsage usage = resultCompletions.getUsage();
+            assertCompletions(1, resultCompletions);
+            assertNotNull(usage);
+            assertTrue(usage.getTotalTokens() > 0);
+            assertEquals(usage.getCompletionTokens() + usage.getPromptTokens(), usage.getTotalTokens());
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetCompletionsTokenCutoff(HttpClient httpClient) {
+        client = getChatCompletionsClient(httpClient);
+        getChatCompletionsFromOptionsRunner((options) -> {
+            options.setMaxTokens(3);
+            ChatCompletions resultCompletions = client.complete(options);
+            assertCompletions(1, resultCompletions);
+        });
+    }
+}
+
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsAsyncClientTest.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsAsyncClientTest.java
new file mode 100644
index 000000000000..7809f49f4f42
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsAsyncClientTest.java
@@ -0,0 +1,35 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+package com.azure.ai.inference;
+
+import com.azure.core.http.HttpClient;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+import reactor.test.StepVerifier;
+
+import static com.azure.ai.inference.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+public class EmbeddingsAsyncClientTest extends EmbeddingsClientTestBase {
+    private EmbeddingsAsyncClient client;
+
+    private EmbeddingsAsyncClient getEmbeddingsAsyncClient(HttpClient httpClient) {
+        return getEmbeddingsClientBuilder(
+            interceptorManager.isPlaybackMode() ? interceptorManager.getPlaybackClient() : httpClient)
+            .buildAsyncClient();
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetEmbeddings(HttpClient httpClient) {
+        client = getEmbeddingsAsyncClient(httpClient);
+        getEmbeddingsRunner((promptList) -> {
+            StepVerifier.create(client.embed(promptList))
+                .assertNext(result -> {
+                    assertNotNull(result.getUsage());
+                    assertEmbeddings(result);
+                })
+                .verifyComplete();
+        });
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsClientTestBase.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsClientTestBase.java
new file mode 100644
index 000000000000..23fc9c51d96a
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsClientTestBase.java
@@ -0,0 +1,138 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+// Code generated by Microsoft (R) TypeSpec Code Generator.
+
+package com.azure.ai.inference;
+
+// The Java test files under 'generated' package are generated for your reference.
+// If you wish to modify these files, please copy them out of the 'generated' package, and modify there.
+// See https://aka.ms/azsdk/dpg/java/tests for guide on adding a test.
+
+import com.azure.ai.inference.models.*;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.http.HttpClient;
+import com.azure.core.test.TestMode;
+import com.azure.core.test.TestProxyTestBase;
+import com.azure.core.test.models.CustomMatcher;
+import com.azure.core.test.models.TestProxySanitizer;
+import com.azure.core.test.models.TestProxySanitizerType;
+import com.azure.core.util.Configuration;
+import org.junit.jupiter.api.Test;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.function.Consumer;
+
+import static com.azure.ai.inference.TestUtils.FAKE_API_KEY;
+import static org.junit.jupiter.api.Assertions.*;
+
+public abstract class EmbeddingsClientTestBase extends TestProxyTestBase {
+    protected EmbeddingsClient embeddingsClient;
+    protected ImageEmbeddingsClient imageEmbeddingsClient;
+    private boolean sanitizersRemoved = false;
+
+    EmbeddingsClientBuilder getEmbeddingsClientBuilder(HttpClient httpClient) {
+        EmbeddingsClientBuilder builder = new EmbeddingsClientBuilder()
+                .httpClient(httpClient);
+        TestMode testMode = getTestMode();
+        if (testMode != TestMode.LIVE) {
+            addTestRecordCustomSanitizers();
+            addCustomMatchers();
+            // Disable "$..id"=AZSDK3430, "Set-Cookie"=AZSDK2015 for both azure and non-azure clients from the list of common sanitizers.
+            if (!sanitizersRemoved) {
+                interceptorManager.removeSanitizers("AZSDK3430", "AZSDK3493");
+                sanitizersRemoved = true;
+            }
+        }
+
+        if (testMode == TestMode.PLAYBACK) {
+            builder
+                    .endpoint("https://localhost:8080")
+                    .credential(new AzureKeyCredential(FAKE_API_KEY));
+        } else if (testMode == TestMode.RECORD) {
+            builder
+                    .addPolicy(interceptorManager.getRecordPolicy())
+                    .endpoint(Configuration.getGlobalConfiguration().get("EMBEDDINGS_MODEL_ENDPOINT"))
+                    .credential(new AzureKeyCredential(Configuration.getGlobalConfiguration().get("AZURE_EMBEDDINGS_KEY")));
+        } else {
+            builder
+                    .endpoint(Configuration.getGlobalConfiguration().get("EMBEDDINGS_MODEL_ENDPOINT"))
+                    .credential(new AzureKeyCredential(Configuration.getGlobalConfiguration().get("AZURE_EMBEDDINGS_KEY")));
+        }
+        return builder;
+    }
+
+    ImageEmbeddingsClientBuilder getImageEmbeddingsClientBuilder(HttpClient httpClient) {
+        ImageEmbeddingsClientBuilder builder = new ImageEmbeddingsClientBuilder()
+                .httpClient(httpClient);
+        TestMode testMode = getTestMode();
+        if (testMode != TestMode.LIVE) {
+            addTestRecordCustomSanitizers();
+            addCustomMatchers();
+            // Disable "$..id"=AZSDK3430, "Set-Cookie"=AZSDK2015 for both azure and non-azure clients from the list of common sanitizers.
+            if (!sanitizersRemoved) {
+                interceptorManager.removeSanitizers("AZSDK3430", "AZSDK3493");
+                sanitizersRemoved = true;
+            }
+        }
+
+        if (testMode == TestMode.PLAYBACK) {
+            builder
+                    .endpoint("https://localhost:8080")
+                    .credential(new AzureKeyCredential(FAKE_API_KEY));
+        } else if (testMode == TestMode.RECORD) {
+            builder
+                    .addPolicy(interceptorManager.getRecordPolicy())
+                    .endpoint(Configuration.getGlobalConfiguration().get("EMBEDDINGS_MODEL_ENDPOINT"))
+                    .credential(new AzureKeyCredential(Configuration.getGlobalConfiguration().get("AZURE_EMBEDDINGS_KEY")));
+        } else {
+            builder
+                    .endpoint(Configuration.getGlobalConfiguration().get("EMBEDDINGS_MODEL_ENDPOINT"))
+                    .credential(new AzureKeyCredential(Configuration.getGlobalConfiguration().get("AZURE_EMBEDDINGS_KEY")));
+        }
+        return builder;
+    }
+
+    private void addTestRecordCustomSanitizers() {
+        String sanitizedRequestUri = "https://REDACTED/";
+        String requestUriRegex = "https://.*/openai/deployments/.*?/";
+        interceptorManager.addSanitizers(Arrays.asList(
+                new TestProxySanitizer("$..key", null, "REDACTED", TestProxySanitizerType.BODY_KEY),
+                new TestProxySanitizer("$..endpoint", requestUriRegex, sanitizedRequestUri, TestProxySanitizerType.URL),
+                new TestProxySanitizer("Content-Type", "(^multipart\\/form-data; boundary=[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{2})",
+                        "multipart\\/form-data; boundary=BOUNDARY", TestProxySanitizerType.HEADER)
+        ));
+    }
+
+    private void addCustomMatchers() {
+        interceptorManager.addMatchers(new CustomMatcher().setExcludedHeaders(Arrays.asList("Cookie", "Set-Cookie")));
+    }
+
+    @Test
+    public abstract void testGetEmbeddings(HttpClient httpClient);
+
+    void getEmbeddingsRunner(Consumer<List<String>> testRunner) {
+        testRunner.accept(getPrompts());
+    }
+
+    static void assertEmbeddings(EmbeddingsResult actual) {
+        List<EmbeddingItem> data = actual.getData();
+        assertNotNull(data);
+        assertFalse(data.isEmpty());
+
+        for (EmbeddingItem item : data) {
+            List<Float> embedding = item.getEmbeddingList();
+            assertNotNull(embedding);
+            assertFalse(embedding.isEmpty());
+        }
+        assertNotNull(actual.getUsage());
+    }
+
+    private List<String> getPrompts() {
+        List<String> prompts = new ArrayList<>();
+        prompts.add("Can you help me?");
+        prompts.add("What's the best way to train a parrot?");
+        return prompts;
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsSyncClientTest.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsSyncClientTest.java
new file mode 100644
index 000000000000..ff0f1bdf6e01
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/EmbeddingsSyncClientTest.java
@@ -0,0 +1,31 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.models.*;
+import com.azure.core.http.HttpClient;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.MethodSource;
+
+import static com.azure.ai.inference.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
+
+public class EmbeddingsSyncClientTest extends EmbeddingsClientTestBase {
+    private EmbeddingsClient client;
+
+    private EmbeddingsClient getEmbeddingsClient(HttpClient httpClient) {
+        return getEmbeddingsClientBuilder(
+            interceptorManager.isPlaybackMode() ? interceptorManager.getPlaybackClient() : httpClient)
+            .buildClient();
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetEmbeddings(HttpClient httpClient) {
+        client = getEmbeddingsClient(httpClient);
+        getEmbeddingsRunner((promptList) -> {
+            EmbeddingsResult result = client.embed(promptList);
+            assertEmbeddings(result);
+        });
+    }
+
+}
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/TestUtils.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/TestUtils.java
new file mode 100644
index 000000000000..ac5ced527df7
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/TestUtils.java
@@ -0,0 +1,33 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.inference;
+
+import com.azure.core.http.HttpClient;
+import org.junit.jupiter.params.provider.Arguments;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+
+import static com.azure.core.test.TestBase.getHttpClients;
+
+public class TestUtils {
+    static final String DISPLAY_NAME_WITH_ARGUMENTS = "{displayName} with [{arguments}]";
+    static final String FAKE_API_KEY = "fakeKeyPlaceholder";
+
+    /**
+     * Returns a stream of arguments that includes all combinations of eligible {@link HttpClient HttpClients}
+     * that should be tested.
+     *
+     * @return A stream of HttpClients to test.
+     */
+    static Stream<Arguments> getTestParameters() {
+        // when this issues is closed, the newer version of junit will have better support for
+        // cartesian product of arguments - https://github.com/junit-team/junit5/issues/1427
+        List<Arguments> argumentsList = new ArrayList<>();
+        getHttpClients().forEach(httpClient -> argumentsList.add(Arguments.of(httpClient)));
+        return argumentsList.stream();
+    }
+
+}
diff --git a/sdk/ai/azure-ai-inference/tsp-location.yaml b/sdk/ai/azure-ai-inference/tsp-location.yaml
new file mode 100644
index 000000000000..b4f13dd78be3
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/tsp-location.yaml
@@ -0,0 +1,5 @@
+commit: 016b80ae90f5fabc1e572efabd3ecb1030d444c0
+additionalDirectories: []
+repo: Azure/azure-rest-api-specs
+directory: specification/ai/ModelClient
+
diff --git a/sdk/ai/ci.yml b/sdk/ai/ci.yml
new file mode 100644
index 000000000000..c52361cf6f1b
--- /dev/null
+++ b/sdk/ai/ci.yml
@@ -0,0 +1,46 @@
+# NOTE: Please refer to https://aka.ms/azsdk/engsys/ci-yaml before editing this file.
+
+trigger:
+  branches:
+    include:
+      - main
+      - hotfix/*
+      - release/*
+  paths:
+    include:
+      - sdk/ai/ci.yml
+      - sdk/ai/azure-ai-inference/
+    exclude:
+      - sdk/ai/pom.xml
+      - sdk/ai/azure-ai-inference/pom.xml
+
+pr:
+  branches:
+    include:
+      - main
+      - feature/*
+      - hotfix/*
+      - release/*
+  paths:
+    include:
+      - sdk/ai/ci.yml
+      - sdk/ai/azure-ai-inference/
+    exclude:
+      - sdk/ai/pom.xml
+      - sdk/ai/azure-ai-inference/pom.xml
+
+parameters:
+  - name: release_azureaiinference
+    displayName: 'azure-ai-inference'
+    type: boolean
+    default: true
+
+extends:
+  template: ../../eng/pipelines/templates/stages/archetype-sdk-client.yml
+  parameters:
+    ServiceDirectory: ai
+    Artifacts:
+      - name: azure-ai-inference
+        groupId: com.azure
+        safeName: azureaiinference
+        releaseInBatch: ${{ parameters.release_azureaiinference }}
diff --git a/sdk/ai/pom.xml b/sdk/ai/pom.xml
new file mode 100644
index 000000000000..14915ac80174
--- /dev/null
+++ b/sdk/ai/pom.xml
@@ -0,0 +1,15 @@
+<!-- Copyright (c) Microsoft Corporation. All rights reserved.
+     Licensed under the MIT License. -->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>com.azure</groupId>
+  <artifactId>azure-ai-service</artifactId>
+  <packaging>pom</packaging>
+  <version>1.0.0</version>  <!-- Need not change for every release-->
+
+  <modules>
+    <module>azure-ai-inference</module>
+  </modules>
+</project>
diff --git a/sdk/ai/test-resources.json b/sdk/ai/test-resources.json
new file mode 100644
index 000000000000..2f4fe9027cec
--- /dev/null
+++ b/sdk/ai/test-resources.json
@@ -0,0 +1,269 @@
+{
+    "$schema": "http://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
+    "contentVersion": "1.0.0.0",
+    "parameters": {
+        "name": {
+            "type": "string"
+        },
+        "location": {
+            "type": "string"
+        },
+        "sku": {
+            "type": "string"
+        },
+        "tagValues": {
+            "type": "object"
+        },
+        "virtualNetworkType": {
+            "type": "string"
+        },
+        "vnet": {
+            "type": "object"
+        },
+        "ipRules": {
+            "type": "array"
+        },
+        "privateEndpoints": {
+            "type": "array"
+        },
+        "privateDnsZone": {
+            "type": "string"
+        },
+        "resourceGroupName": {
+            "type": "string"
+        },
+        "resourceGroupId": {
+            "type": "string"
+        },
+        "uniqueId": {
+            "type": "string",
+            "defaultValue": "[newGuid()]"
+        }
+    },
+    "resources": [
+        {
+            "type": "Microsoft.Resources/deployments",
+            "apiVersion": "2017-05-10",
+            "name": "deployVnet",
+            "properties": {
+                "mode": "Incremental",
+                "template": {
+                    "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
+                    "contentVersion": "1.0.0.0",
+                    "parameters": {},
+                    "variables": {},
+                    "resources": [
+                        {
+                            "type": "Microsoft.Network/virtualNetworks",
+                            "apiVersion": "2020-04-01",
+                            "name": "[if(equals(parameters('virtualNetworkType'), 'External'), parameters('vnet').name, variables('defaultVNetName'))]",
+                            "location": "[parameters('location')]",
+                            "properties": {
+                                "addressSpace": {
+                                    "addressPrefixes": "[if(equals(parameters('virtualNetworkType'), 'External'), parameters('vnet').addressPrefixes, json(concat('[{\"', variables('defaultAddressPrefix'),'\"}]')))]"
+                                },
+                                "subnets": [
+                                    {
+                                        "name": "[if(equals(parameters('virtualNetworkType'), 'External'), parameters('vnet').subnets.subnet.name, variables('defaultSubnetName'))]",
+                                        "properties": {
+                                            "serviceEndpoints": [
+                                                {
+                                                    "service": "Microsoft.CognitiveServices",
+                                                    "locations": [
+                                                        "[parameters('location')]"
+                                                    ]
+                                                }
+                                            ],
+                                            "addressPrefix": "[if(equals(parameters('virtualNetworkType'), 'External'), parameters('vnet').subnets.subnet.addressPrefix, variables('defaultAddressPrefix'))]"
+                                        }
+                                    }
+                                ]
+                            }
+                        }
+                    ]
+                },
+                "parameters": {}
+            },
+            "condition": "[and(and(not(empty(parameters('vnet'))), equals(parameters('vnet').newOrExisting, 'new')), equals(parameters('virtualNetworkType'), 'External'))]"
+        },
+        {
+            "apiVersion": "2022-03-01",
+            "name": "[parameters('name')]",
+            "location": "[parameters('location')]",
+            "type": "Microsoft.CognitiveServices/accounts",
+            "kind": "OpenAI",
+            "tags": "[if(contains(parameters('tagValues'), 'Microsoft.CognitiveServices/accounts'), parameters('tagValues')['Microsoft.CognitiveServices/accounts'], json('{}'))]",
+            "sku": {
+                "name": "[parameters('sku')]"
+            },
+            "properties": {
+                "customSubDomainName": "[toLower(parameters('name'))]",
+                "publicNetworkAccess": "[if(equals(parameters('virtualNetworkType'), 'Internal'), 'Disabled', 'Enabled')]",
+                "networkAcls": {
+                    "defaultAction": "[if(equals(parameters('virtualNetworkType'), 'External'), 'Deny', 'Allow')]",
+                    "virtualNetworkRules": "[if(equals(parameters('virtualNetworkType'), 'External'), json(concat('[{\"id\": \"', concat(subscription().id, '/resourceGroups/', parameters('vnet').resourceGroup, '/providers/Microsoft.Network/virtualNetworks/', parameters('vnet').name, '/subnets/', parameters('vnet').subnets.subnet.name), '\"}]')), json('[]'))]",
+                    "ipRules": "[if(or(empty(parameters('ipRules')), empty(parameters('ipRules')[0].value)), json('[]'), parameters('ipRules'))]"
+                }
+            },
+            "dependsOn": [
+                "[concat('Microsoft.Resources/deployments/', 'deployVnet')]"
+            ]
+        },
+        {
+            "apiVersion": "2018-05-01",
+            "name": "[concat('deployPrivateEndpoint-', parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name)]",
+            "type": "Microsoft.Resources/deployments",
+            "resourceGroup": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.resourceGroup.value.name]",
+            "subscriptionId": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.subscription.subscriptionId]",
+            "dependsOn": [
+                "[concat('Microsoft.CognitiveServices/accounts/', parameters('name'))]"
+            ],
+            "condition": "[equals(parameters('virtualNetworkType'), 'Internal')]",
+            "copy": {
+                "name": "privateendpointscopy",
+                "count": "[length(parameters('privateEndpoints'))]"
+            },
+            "properties": {
+                "mode": "Incremental",
+                "template": {
+                    "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
+                    "contentVersion": "1.0.0.0",
+                    "resources": [
+                        {
+                            "location": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.location]",
+                            "name": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name]",
+                            "type": "Microsoft.Network/privateEndpoints",
+                            "apiVersion": "2021-05-01",
+                            "properties": {
+                                "subnet": {
+                                    "id": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.properties.subnet.id]"
+                                },
+                                "privateLinkServiceConnections": [
+                                    {
+                                        "name": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name]",
+                                        "properties": {
+                                            "privateLinkServiceId": "[concat(parameters('resourceGroupId'), '/providers/Microsoft.CognitiveServices/accounts/', parameters('name'))]",
+                                            "groupIds": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.properties.privateLinkServiceConnections[0].properties.groupIds]"
+                                        }
+                                    }
+                                ],
+                                "customNetworkInterfaceName": "[concat(parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name, '-nic')]"
+                            },
+                            "tags": {}
+                        }
+                    ]
+                }
+            }
+        },
+        {
+            "apiVersion": "2018-05-01",
+            "name": "[concat('deployDnsZoneGroup-', parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name)]",
+            "type": "Microsoft.Resources/deployments",
+            "resourceGroup": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.resourceGroup.value.name]",
+            "subscriptionId": "[parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.subscription.subscriptionId]",
+            "dependsOn": [
+                "[concat('deployPrivateEndpoint-', parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name)]"
+            ],
+            "condition": "[and(equals(parameters('virtualNetworkType'), 'Internal'), parameters('privateEndpoints')[copyIndex()].privateDnsZoneConfiguration.integrateWithPrivateDnsZone)]",
+            "copy": {
+                "name": "privateendpointdnscopy",
+                "count": "[length(parameters('privateEndpoints'))]"
+            },
+            "properties": {
+                "mode": "Incremental",
+                "template": {
+                    "$schema": "https://schema.management.azure.com/schemas/2015-01-01/deploymentTemplate.json#",
+                    "contentVersion": "1.0.0.0",
+                    "resources": [
+                        {
+                            "type": "Microsoft.Network/privateDnsZones",
+                            "apiVersion": "2018-09-01",
+                            "name": "[parameters('privateDnsZone')]",
+                            "location": "global",
+                            "tags": {},
+                            "properties": {}
+                        },
+                        {
+                            "type": "Microsoft.Network/privateDnsZones/virtualNetworkLinks",
+                            "apiVersion": "2018-09-01",
+                            "name": "[concat(parameters('privateDnsZone'), '/', replace(uniqueString(parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.properties.subnet.id), '/subnets/default', ''))]",
+                            "location": "global",
+                            "dependsOn": [
+                                "[parameters('privateDnsZone')]"
+                            ],
+                            "properties": {
+                                "virtualNetwork": {
+                                    "id": "[split(parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.properties.subnet.id, '/subnets/')[0]]"
+                                },
+                                "registrationEnabled": false
+                            }
+                        },
+                        {
+                            "apiVersion": "2017-05-10",
+                            "name": "[concat('EndpointDnsRecords-', parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name)]",
+                            "type": "Microsoft.Resources/deployments",
+                            "dependsOn": [
+                                "[parameters('privateDnsZone')]"
+                            ],
+                            "properties": {
+                                "mode": "Incremental",
+                                "templatelink": {
+                                    "uri": "https://go.microsoft.com/fwlink/?linkid=2264916"
+                                },
+                                "parameters": {
+                                    "privateDnsName": {
+                                        "value": "[parameters('privateDnsZone')]"
+                                    },
+                                    "privateEndpointNicResourceId": {
+                                        "value": "[concat('/subscriptions/', parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.subscription.subscriptionId, '/resourceGroups/', parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.resourceGroup.value.name, '/providers/Microsoft.Network/networkInterfaces/', parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name, '-nic')]"
+                                    },
+                                    "nicRecordsTemplateUri": {
+                                        "value": "https://go.microsoft.com/fwlink/?linkid=2264719"
+                                    },
+                                    "ipConfigRecordsTemplateUri": {
+                                        "value": "https://go.microsoft.com/fwlink/?linkid=2265018"
+                                    },
+                                    "uniqueId": {
+                                        "value": "[parameters('uniqueId')]"
+                                    },
+                                    "existingRecords": {
+                                        "value": {}
+                                    }
+                                }
+                            }
+                        },
+                        {
+                            "type": "Microsoft.Network/privateEndpoints/privateDnsZoneGroups",
+                            "apiVersion": "2020-03-01",
+                            "name": "[concat(parameters('privateEndpoints')[copyIndex()].privateEndpointConfiguration.privateEndpoint.name, '/', 'default')]",
+                            "location": "[parameters('location')]",
+                            "dependsOn": [
+                                "[parameters('privateDnsZone')]"
+                            ],
+                            "properties": {
+                                "privateDnsZoneConfigs": [
+                                    {
+                                        "name": "privatelink-cognitiveservices",
+                                        "properties": {
+                                            "privateDnsZoneId": "[concat(parameters('resourceGroupId'), '/providers/Microsoft.Network/privateDnsZones/', parameters('privateDnsZone'))]"
+                                        }
+                                    }
+                                ]
+                            }
+                        }
+                    ]
+                }
+            }
+        }
+    ],
+    "outputs": {
+        "AZURE_CLIENT_SECRET": {
+            "type": "string",
+            "value": "['foo']"
+        },
+        "MODEL_ENDPOINT": {
+          "type": "string",
+          "value": "['https://java-glharper-aoai.openai.azure.com/openai/deployments/gpt-4o/']"
+        }
+    }
+}
\ No newline at end of file