diff --git a/cmd/cloud.go b/cmd/cloud.go
index 91e119e6..4588a2c7 100644
--- a/cmd/cloud.go
+++ b/cmd/cloud.go
@@ -18,6 +18,7 @@ import (
 	"syscall"
 	"time"
 
+	"github.com/agentuity/cli/internal/bundler/prompts"
 	"github.com/agentuity/cli/internal/deployer"
 	"github.com/agentuity/cli/internal/envutil"
 	"github.com/agentuity/cli/internal/errsystem"
@@ -73,6 +74,21 @@ type startAgent struct {
 	Remove bool `json:"remove,omitempty"`
 }
 
+type PromptVariable struct {
+	Name     string `json:"name"`
+	Required bool   `json:"required,omitempty"`
+	Default  string `json:"default,omitempty"`
+}
+
+type DeployPrompt struct {
+	Slug        string           `json:"slug"`
+	Name        string           `json:"name"`
+	System      *string          `json:"system,omitempty"`
+	Prompt      *string          `json:"prompt,omitempty"`
+	Variables   []PromptVariable `json:"variables,omitempty"`
+	Description *string          `json:"description,omitempty"`
+}
+
 type startRequest struct {
 	Agents         []startAgent       `json:"agents"`
 	Resources      *Resources         `json:"resources,omitempty"`
@@ -81,6 +97,7 @@ type startRequest struct {
 	TagDescription string             `json:"description,omitempty"`
 	TagMessage     string             `json:"message,omitempty"`
 	UsePrivateKey  bool               `json:"usePrivateKey,omitempty"`
+	Prompts        []DeployPrompt     `json:"prompts,omitempty"`
 }
 
 func ShowNewProjectImport(ctx context.Context, logger logger.Logger, cmd *cobra.Command, apiUrl string, apikey string, projectId string, project *project.Project, dir string, isImport bool) {
@@ -425,6 +442,18 @@ Examples:
 		startRequest.TagMessage = message
 		startRequest.UsePrivateKey = true
 
+		// Collect prompts data if prompts feature flag is enabled
+		promptsEvalsFF := CheckFeatureFlag(cmd, FeaturePromptsEvals, "enable-prompts-evals")
+		if promptsEvalsFF {
+			prompts, err := collectPromptsData(logger, dir)
+			if err != nil {
+				logger.Debug("Failed to collect prompts data: %v", err)
+			} else {
+				startRequest.Prompts = prompts
+				logger.Debug("Collected %d prompts for deployment", len(prompts))
+			}
+		}
+
 		// Start deployment
 		if err := client.Do("PUT", fmt.Sprintf("/cli/deploy/start/%s%s", theproject.ProjectId, deploymentId), startRequest, &startResponse); err != nil {
 			errsystem.New(errsystem.ErrDeployProject, err,
@@ -1028,6 +1057,98 @@ Examples:
 	},
 }
 
+// collectPromptsData collects prompts data from the project directory
+func collectPromptsData(logger logger.Logger, dir string) ([]DeployPrompt, error) {
+	// Find all prompt files
+	promptFiles := prompts.FindAllPromptFiles(dir)
+	if len(promptFiles) == 0 {
+		logger.Debug("No prompt files found")
+		return nil, nil
+	}
+
+	logger.Debug("Found %d prompt files: %v", len(promptFiles), promptFiles)
+
+	var allPrompts []DeployPrompt
+
+	// Parse all prompt files and combine prompts
+	for _, promptFile := range promptFiles {
+		data, err := os.ReadFile(promptFile)
+		if err != nil {
+			return nil, fmt.Errorf("failed to read %s: %w", promptFile, err)
+		}
+
+		promptsList, err := prompts.ParsePromptsYAML(data)
+		if err != nil {
+			return nil, fmt.Errorf("failed to parse %s: %w", promptFile, err)
+		}
+
+		// Convert to DeployPrompt format
+		for _, prompt := range promptsList {
+			deployPrompt := DeployPrompt{
+				Slug:        prompt.Slug,
+				Name:        prompt.Name,
+				Description: &prompt.Description,
+			}
+
+			// Convert system prompt
+			if prompt.System != "" {
+				deployPrompt.System = &prompt.System
+			}
+
+			// Convert user prompt
+			if prompt.Prompt != "" {
+				deployPrompt.Prompt = &prompt.Prompt
+			}
+
+			// Convert variables from templates
+			var variables []PromptVariable
+			if prompt.SystemTemplate.Variables != nil {
+				for _, v := range prompt.SystemTemplate.Variables {
+					variables = append(variables, PromptVariable{
+						Name:     v.Name,
+						Required: v.IsRequired,
+						Default:  v.DefaultValue,
+					})
+				}
+			}
+			if prompt.PromptTemplate.Variables != nil {
+				for _, v := range prompt.PromptTemplate.Variables {
+					// Check if variable already exists
+					found := false
+					for i, existing := range variables {
+						if existing.Name == v.Name {
+							// Update existing variable if it's more restrictive
+							if v.IsRequired && !existing.Required {
+								variables[i].Required = true
+							}
+							if v.DefaultValue != "" && existing.Default == "" {
+								variables[i].Default = v.DefaultValue
+							}
+							found = true
+							break
+						}
+					}
+					if !found {
+						variables = append(variables, PromptVariable{
+							Name:     v.Name,
+							Required: v.IsRequired,
+							Default:  v.DefaultValue,
+						})
+					}
+				}
+			}
+
+			deployPrompt.Variables = variables
+			allPrompts = append(allPrompts, deployPrompt)
+		}
+
+		logger.Debug("Parsed %d prompts from %s", len(promptsList), promptFile)
+	}
+
+	logger.Debug("Total prompts collected: %d", len(allPrompts))
+	return allPrompts, nil
+}
+
 func init() {
 	rootCmd.AddCommand(cloudCmd)
 	rootCmd.AddCommand(cloudDeployCmd)
diff --git a/cmd/dev.go b/cmd/dev.go
index 37d171cb..3db71bfd 100644
--- a/cmd/dev.go
+++ b/cmd/dev.go
@@ -49,7 +49,6 @@ Examples:
 		apiUrl := urls.API
 		appUrl := urls.App
 		gravityUrl := urls.Gravity
-
 		noBuild, _ := cmd.Flags().GetBool("no-build")
 
 		promptsEvalsFF := CheckFeatureFlag(cmd, FeaturePromptsEvals, "enable-prompts-evals")
@@ -159,7 +158,7 @@ Examples:
 				if errors.Is(err, context.Canceled) {
 					return
 				}
-				log.Fatal("failed to start devmode connection: %s", err)
+				log.Error("failed to start live dev connection: %s", err)
 				return
 			}
 		}
diff --git a/cmd/eval.go b/cmd/eval.go
new file mode 100644
index 00000000..cff16dc1
--- /dev/null
+++ b/cmd/eval.go
@@ -0,0 +1,186 @@
+package cmd
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"os/signal"
+	"path/filepath"
+	"strings"
+	"syscall"
+
+	"github.com/agentuity/cli/internal/errsystem"
+	"github.com/agentuity/cli/internal/eval"
+	"github.com/agentuity/cli/internal/project"
+	"github.com/agentuity/cli/internal/util"
+	"github.com/agentuity/go-common/env"
+	"github.com/agentuity/go-common/logger"
+	"github.com/agentuity/go-common/tui"
+	"github.com/spf13/cobra"
+)
+
+var evalCmd = &cobra.Command{
+	Use:   "eval",
+	Short: "Evaluation related commands",
+	Run: func(cmd *cobra.Command, args []string) {
+		cmd.Help()
+	},
+}
+
+func getEvalInfoFlow(logger logger.Logger, name string, description string) (string, string) {
+	if name == "" {
+		if !tui.HasTTY {
+			logger.Fatal("No TTY detected, please specify an eval name from the command line")
+		}
+		name = tui.InputWithValidation(logger, "What should we name the evaluation?", "The name of the eval helps identify its purpose", 255, func(name string) error {
+			if name == "" {
+				return fmt.Errorf("Eval name cannot be empty")
+			}
+			return nil
+		})
+	}
+
+	if description == "" {
+		description = tui.Input(logger, "How should we describe what the "+name+" eval does?", "The description of the eval is optional but helpful for understanding its purpose")
+	}
+
+	return name, description
+}
+
+func generateEvalFile(logger logger.Logger, projectDir string, evalID string, slug string, name string, description string) error {
+	// Always generate TypeScript files for evals
+	ext := ".ts"
+
+	// Create evals directory if it doesn't exist
+	evalsDir := filepath.Join(projectDir, "src", "evals")
+	if err := os.MkdirAll(evalsDir, 0755); err != nil {
+		return fmt.Errorf("failed to create evals directory: %w", err)
+	}
+
+	// Generate file path
+	filename := filepath.Join(evalsDir, slug+ext)
+
+	// Check if file already exists
+	if util.Exists(filename) {
+		return fmt.Errorf("eval file already exists: %s", filename)
+	}
+
+	// Generate TypeScript content with metadata
+	content := fmt.Sprintf(`import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: '%s',
+  slug: '%s',
+  name: '%s',
+  description: '%s'
+};
+
+/**
+ * %s
+ * %s
+ */
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  const { input, output } = req;
+
+  // TODO: Implement your evaluation logic here
+  // Example: Score the output based on some criteria
+  
+  const score = 0.8; // Replace with your actual scoring logic
+  const metadata = {
+    reasoning: 'Replace with your evaluation reasoning'
+  };
+
+  res.score(score, metadata);
+}
+`, evalID, slug, name, description, name, description)
+
+	// Write file
+	if err := os.WriteFile(filename, []byte(content), 0644); err != nil {
+		return fmt.Errorf("failed to write eval file: %w", err)
+	}
+
+	logger.Debug("Created eval file: %s", filename)
+	return nil
+}
+
+var evalCreateCmd = &cobra.Command{
+	Use:     "create [name] [description]",
+	Short:   "Create a new evaluation function",
+	Aliases: []string{"new"},
+	Args:    cobra.MaximumNArgs(2),
+	Run: func(cmd *cobra.Command, args []string) {
+		ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM)
+		defer cancel()
+		logger := env.NewLogger(cmd)
+		theproject := project.EnsureProject(ctx, cmd)
+		apikey := theproject.Token
+		urls := util.GetURLs(logger)
+		apiUrl := urls.API
+
+		var name string
+		var description string
+
+		if len(args) > 0 {
+			name = args[0]
+		}
+
+		if len(args) > 1 {
+			description = args[1]
+		}
+
+		name, description = getEvalInfoFlow(logger, name, description)
+
+		// Generate slug from name
+		isPython := theproject.Project.Bundler.Language == "python"
+		slug := util.SafeProjectFilename(strings.ToLower(name), isPython)
+
+		var evalID string
+		var evalErr error
+
+		action := func() {
+			// Create eval via API
+			evalID, evalErr = eval.CreateEval(ctx, logger, apiUrl, apikey, theproject.Project.ProjectId, slug, name, description)
+			if evalErr != nil {
+				errsystem.New(errsystem.ErrApiRequest, evalErr, errsystem.WithContextMessage("Failed to create eval")).ShowErrorAndExit()
+			}
+
+			logger.Debug("Created eval with ID: %s", evalID)
+
+			// Generate eval file (always TypeScript) with the real ID from API
+			if err := generateEvalFile(logger, theproject.Dir, evalID, slug, name, description); err != nil {
+				errsystem.New(errsystem.ErrOpenFile, err, errsystem.WithContextMessage("Failed to create eval file")).ShowErrorAndExit()
+			}
+		}
+
+		tui.ShowSpinner("Creating evaluation ...", action)
+
+		format, _ := cmd.Flags().GetString("format")
+		if format == "json" {
+			result := map[string]string{
+				"id":          evalID,
+				"slug":        slug,
+				"name":        name,
+				"description": description,
+			}
+			json.NewEncoder(os.Stdout).Encode(result)
+		} else {
+			tui.ShowSuccess("Evaluation created successfully")
+			fmt.Printf("\nFile created: %s\n", tui.Muted(fmt.Sprintf("src/evals/%s.ts", slug)))
+		}
+	},
+}
+
+func init() {
+	rootCmd.AddCommand(evalCmd)
+	evalCmd.AddCommand(evalCreateCmd)
+
+	for _, cmd := range []*cobra.Command{evalCreateCmd} {
+		cmd.Flags().StringP("dir", "d", "", "The project directory")
+		cmd.Flags().String("format", "text", "The format to use for the output. Can be either 'text' or 'json'")
+	}
+}
diff --git a/error_codes.yaml b/error_codes.yaml
index a3cef919..2965d8fc 100644
--- a/error_codes.yaml
+++ b/error_codes.yaml
@@ -91,3 +91,9 @@ errors:
 
   - code: CLI-0029
     message: Failed to retrieve devmode endpoint
+
+  - code: CLI-0030
+    message: Breaking change migration required
+
+  - code: CLI-0031
+    message: SDK update required
diff --git a/internal/bundler/bundler.go b/internal/bundler/bundler.go
index 32185208..e996ba81 100644
--- a/internal/bundler/bundler.go
+++ b/internal/bundler/bundler.go
@@ -467,8 +467,8 @@ func bundleJavascript(ctx BundleContext, dir string, outdir string, theproject *
 		shimSourceMap = true
 	}
 
-	if err := checkForBreakingChanges(ctx, "javascript", theproject.Bundler.Runtime); err != nil {
-		return err
+	if CheckForBreakingChangesWithBanner(ctx, "javascript", theproject.Bundler.Runtime) {
+		return nil // Breaking change was handled gracefully
 	}
 
 	if err := possiblyCreateDeclarationFile(ctx.Logger, dir); err != nil {
@@ -481,6 +481,8 @@ func bundleJavascript(ctx BundleContext, dir string, outdir string, theproject *
 
 	var entryPoints []string
 	entryPoints = append(entryPoints, filepath.Join(dir, "index.js"))
+
+	// Add agent entry points
 	files, err := util.ListDir(filepath.Join(dir, theproject.Bundler.AgentConfig.Dir))
 	if err != nil {
 		errsystem.New(errsystem.ErrListFilesAndDirectories, err).ShowErrorAndExit()
@@ -493,6 +495,18 @@ func bundleJavascript(ctx BundleContext, dir string, outdir string, theproject *
 	if len(entryPoints) == 0 {
 		return fmt.Errorf("no index.ts files found in %s", theproject.Bundler.AgentConfig.Dir)
 	}
+
+	// Add eval entry points if evals directory exists
+	if ctx.PromptsEvalsFF {
+		evalsDir := filepath.Join(dir, "src", "evals")
+		if util.Exists(evalsDir) {
+			evalFiles, err := filepath.Glob(filepath.Join(evalsDir, "*.ts"))
+			if err == nil && len(evalFiles) > 0 {
+				ctx.Logger.Debug("found %d eval files to bundle", len(evalFiles))
+				entryPoints = append(entryPoints, evalFiles...)
+			}
+		}
+	}
 	pkgjson := filepath.Join(dir, "package.json")
 	pkg, err := util.NewOrderedMapFromFile(util.PackageJsonKeysOrder, pkgjson)
 	if err != nil {
@@ -710,8 +724,8 @@ func bundlePython(ctx BundleContext, dir string, outdir string, theproject *proj
 		ctx.Logger.Debug("installed dependencies: %s", strings.TrimSpace(string(out)))
 	}
 
-	if err := checkForBreakingChanges(ctx, "python", theproject.Bundler.Runtime); err != nil {
-		return err
+	if CheckForBreakingChangesWithBanner(ctx, "python", theproject.Bundler.Runtime) {
+		return nil // Breaking change was handled gracefully
 	}
 
 	config := map[string]any{
diff --git a/internal/bundler/opentelemetry.go b/internal/bundler/opentelemetry.go
new file mode 100644
index 00000000..4803d1ee
--- /dev/null
+++ b/internal/bundler/opentelemetry.go
@@ -0,0 +1,53 @@
+package bundler
+
+func init() {
+	// Patch OpenTelemetry SDK Span class to intercept setAttribute calls
+	// This allows us to capture ai.response.text when it's set on spans
+
+	openTelemetryPatches := patchModule{
+		Module: "@opentelemetry/sdk-trace-base",
+		Classes: map[string]patchClass{
+			"Span": {
+				Methods: map[string]patchAction{
+					"setAttribute": {
+						Before: `
+						const key = args[0];
+						const value = args[1];
+						
+						
+						if (key === 'ai.response.text') {
+							const spanId = this.spanContext().spanId;
+							const traceId = this.spanContext().traceId;
+							const sessionId = 'sess_' + traceId;
+							const promptMetadataRaw = this.attributes['@agentuity/prompts'];
+						
+							// Create eval job with output if promptMetadata exists
+							if (globalThis.__evalJobSchedulerInstance && promptMetadataRaw) {								
+								try {
+									// Parse the JSON string to get the actual prompt metadata array
+									const promptMetadata = JSON.parse(promptMetadataRaw);
+									
+									// Count total evals across all prompt metadata
+									const totalEvals = promptMetadata.reduce((count, meta) => count + (meta.evals?.length || 0), 0);
+									
+									// Create job with output included
+									const jobWithOutput = {
+										spanId,
+										sessionId,
+										promptMetadata,
+										output: value,
+										createdAt: new Date().toISOString()
+									};								
+									globalThis.__evalJobSchedulerInstance.pendingJobs.set(spanId, jobWithOutput);
+								} catch (error) {
+								}
+							}
+						}
+						`,
+					},
+				},
+			},
+		},
+	}
+	patches["@opentelemetry/sdk-trace-base"] = openTelemetryPatches
+}
diff --git a/internal/bundler/patch.go b/internal/bundler/patch.go
index 10f6a7e6..7ec8ef83 100644
--- a/internal/bundler/patch.go
+++ b/internal/bundler/patch.go
@@ -14,9 +14,14 @@ type patchModule struct {
 	Module    string
 	Filename  string
 	Functions map[string]patchAction
+	Classes   map[string]patchClass
 	Body      *patchAction
 }
 
+type patchClass struct {
+	Methods map[string]patchAction
+}
+
 type patchAction struct {
 	Before string
 	After  string
@@ -169,6 +174,80 @@ func createPlugin(logger logger.Logger, dir string, shimSourceMap bool) api.Plug
 						suffix.WriteString("}\n")
 						logger.Debug("patched %s -> %s", name, fn)
 					}
+
+					// Handle class method patching
+					for className, class := range mod.Classes {
+						for methodName, method := range class.Methods {
+							logger.Debug("attempting to patch class %s method %s", className, methodName)
+
+							// Look for class definition
+							classPattern := "class " + className
+							classIndex := strings.Index(contents, classPattern)
+							if classIndex == -1 {
+								logger.Debug("class %s not found", className)
+								continue
+							}
+							logger.Debug("found class %s at index %d", className, classIndex)
+
+							// Look for method definition within the class
+							methodPattern := methodName + "("
+							methodIndex := strings.Index(contents[classIndex:], methodPattern)
+							if methodIndex == -1 {
+								logger.Debug("method %s not found in class %s", methodName, className)
+								continue
+							}
+							methodIndex += classIndex
+							logger.Debug("found method %s at index %d", methodName, methodIndex)
+
+							// Find the start of the method
+							braceIndex := strings.LastIndex(contents[:methodIndex], "{")
+							if braceIndex == -1 {
+								logger.Debug("opening brace not found for method %s", methodName)
+								continue
+							}
+
+							// Find the end of the method
+							braceCount := 0
+							endIndex := braceIndex
+							for i := braceIndex; i < len(contents); i++ {
+								if contents[i] == '{' {
+									braceCount++
+								} else if contents[i] == '}' {
+									braceCount--
+									if braceCount == 0 {
+										endIndex = i
+										break
+									}
+								}
+							}
+
+							// Extract method content
+							methodContent := contents[braceIndex+1 : endIndex]
+
+							// Create patched method
+							patchedMethod := fmt.Sprintf(`{
+								// Store original method
+								if (!%s.prototype.__agentuity_%s) {
+									%s.prototype.__agentuity_%s = %s.prototype.%s;
+								}
+								
+								// Create wrapper
+								%s.prototype.%s = function(...args) {
+									%s
+									return this.__agentuity_%s.apply(this, args);
+								};
+								
+								// Original method implementation
+								%s
+							}`, className, methodName, className, methodName, className, methodName, className, methodName, method.Before, methodName, methodContent)
+
+							// Replace the method in the content
+							contents = contents[:braceIndex] + patchedMethod + contents[endIndex+1:]
+
+							logger.Debug("patched class method %s.%s", className, methodName)
+						}
+					}
+
 					contents = contents + "\n" + suffix.String()
 					if mod.Body != nil {
 						if mod.Body.Before != "" {
diff --git a/internal/bundler/prompts/code_generator.go b/internal/bundler/prompts/code_generator.go
index 07590a7a..dfeaa5e8 100644
--- a/internal/bundler/prompts/code_generator.go
+++ b/internal/bundler/prompts/code_generator.go
@@ -129,10 +129,11 @@ func (cg *CodeGenerator) generateSystemField(prompt Prompt) string {
             slug: %q,
             compiled,
             template: %q,
-            variables: { %s }
+            variables: { %s },
+            evals: %s
         });
         return compiled;
-    }`, jsdoc, paramStr, prompt.System, paramStr, prompt.Slug, prompt.System, variablesStr)
+    }`, jsdoc, paramStr, prompt.System, paramStr, prompt.Slug, prompt.System, variablesStr, cg.formatEvalsArray(prompt.Evals))
 		} else {
 			// Parameters are required
 			return fmt.Sprintf(`system: %s({ %s }) => {
@@ -141,10 +142,11 @@ func (cg *CodeGenerator) generateSystemField(prompt Prompt) string {
             slug: %q,
             compiled,
             template: %q,
-            variables: { %s }
+            variables: { %s },
+            evals: %s
         });
         return compiled;
-    }`, jsdoc, paramStr, prompt.System, paramStr, prompt.Slug, prompt.System, variablesStr)
+    }`, jsdoc, paramStr, prompt.System, paramStr, prompt.Slug, prompt.System, variablesStr, cg.formatEvalsArray(prompt.Evals))
 		}
 	}
 	return fmt.Sprintf(`system: %s() => {
@@ -153,10 +155,11 @@ func (cg *CodeGenerator) generateSystemField(prompt Prompt) string {
             slug: %q,
             compiled,
             template: %q,
-            variables: {}
+            variables: {},
+            evals: %s
         });
         return compiled;
-    }`, jsdoc, prompt.System, prompt.Slug, prompt.System)
+    }`, jsdoc, prompt.System, prompt.Slug, prompt.System, cg.formatEvalsArray(prompt.Evals))
 }
 
 // generatePromptField generates the prompt field for a prompt
@@ -191,10 +194,11 @@ func (cg *CodeGenerator) generatePromptField(prompt Prompt) string {
             slug: %q,
             compiled,
             template: %q,
-            variables: { %s }
+            variables: { %s },
+            evals: %s
         });
         return compiled;
-    }`, jsdoc, paramStr, prompt.Prompt, paramStr, prompt.Slug, prompt.Prompt, variablesStr)
+    }`, jsdoc, paramStr, prompt.Prompt, paramStr, prompt.Slug, prompt.Prompt, variablesStr, cg.formatEvalsArray(prompt.Evals))
 		} else {
 			// Parameters are required
 			return fmt.Sprintf(`prompt: %s({ %s }) => {
@@ -203,10 +207,11 @@ func (cg *CodeGenerator) generatePromptField(prompt Prompt) string {
             slug: %q,
             compiled,
             template: %q,
-            variables: { %s }
+            variables: { %s },
+            evals: %s
         });
         return compiled;
-    }`, jsdoc, paramStr, prompt.Prompt, paramStr, prompt.Slug, prompt.Prompt, variablesStr)
+    }`, jsdoc, paramStr, prompt.Prompt, paramStr, prompt.Slug, prompt.Prompt, variablesStr, cg.formatEvalsArray(prompt.Evals))
 		}
 	}
 	return fmt.Sprintf(`prompt: %s() => {
@@ -215,10 +220,11 @@ func (cg *CodeGenerator) generatePromptField(prompt Prompt) string {
             slug: %q,
             compiled,
             template: %q,
-            variables: {}
+            variables: {},
+            evals: %s
         });
         return compiled;
-    }`, jsdoc, prompt.Prompt, prompt.Slug, prompt.Prompt)
+    }`, jsdoc, prompt.Prompt, prompt.Slug, prompt.Prompt, cg.formatEvalsArray(prompt.Evals))
 }
 
 // generateVariablesField generates the variables field for a prompt
@@ -818,6 +824,19 @@ func (cg *CodeGenerator) generatePromptJSDocForType(prompt Prompt) string {
 	return jsdoc.String()
 }
 
+// formatEvalsArray formats a Go string slice as a JavaScript array
+func (cg *CodeGenerator) formatEvalsArray(evals []string) string {
+	if len(evals) == 0 {
+		return "[]"
+	}
+
+	var quoted []string
+	for _, eval := range evals {
+		quoted = append(quoted, fmt.Sprintf("%q", eval))
+	}
+	return fmt.Sprintf("[%s]", strings.Join(quoted, ", "))
+}
+
 // generateTypedefJSDoc generates JSDoc typedef for the prompt type
 func (cg *CodeGenerator) generateTypedefJSDoc(prompt Prompt) string {
 	var jsdoc strings.Builder
diff --git a/internal/bundler/upgrade.go b/internal/bundler/upgrade.go
index b4f21082..b6c1246d 100644
--- a/internal/bundler/upgrade.go
+++ b/internal/bundler/upgrade.go
@@ -8,6 +8,7 @@ import (
 	"strings"
 
 	"github.com/Masterminds/semver"
+	"github.com/agentuity/cli/internal/errsystem"
 	"github.com/agentuity/cli/internal/util"
 	"github.com/agentuity/go-common/tui"
 	"github.com/pelletier/go-toml/v2"
@@ -22,6 +23,18 @@ type breakingChange struct {
 }
 
 var breakingChanges = []breakingChange{
+	{
+		Runtime: "bunjs",
+		Version: "<0.0.157",
+		Title:   "🚫 JS SDK Update Required 🚫",
+		Message: "Please run `bun update @agentuity/sdk --latest` and then re-run this command again.  There are no code changes required on your end.",
+	},
+	{
+		Runtime: "nodejs",
+		Version: "<0.0.157",
+		Title:   "🚫 JS SDK Update Required 🚫",
+		Message: "Please run `npm upgrade @agentuity/sdk` and then re-run this command again.  There are no code changes required on your end.",
+	},
 	{
 		Runtime: "bunjs",
 		Version: "<0.0.154",
@@ -240,32 +253,48 @@ func checkForBreakingChanges(ctx BundleContext, language string, runtime string)
 			return nil
 		}
 		if c.Check(currentVersion) {
+			// Always show banner if we have a TTY
+			if tui.HasTTY {
+				tui.ShowBanner(change.Title, change.Message, true)
+			}
+
 			if change.Callback != nil {
-				var proceed bool
-				if tui.HasTTY && !ctx.DevMode {
-					tui.ShowBanner(change.Title, change.Message, true)
-				} else {
-					return fmt.Errorf("migration required: %s. %s", change.Title, change.Message)
-				}
-				proceed = tui.AskForConfirm("Would you like to migrate your project now?", 'y') == 'y'
-				if proceed {
-					if err := change.Callback(ctx); err != nil {
-						return err
+				if !ctx.DevMode {
+					proceed := tui.AskForConfirm("Would you like to migrate your project now?", 'y') == 'y'
+					if proceed {
+						if err := change.Callback(ctx); err != nil {
+							return err
+						}
+						return errsystem.New(errsystem.ErrBreakingChangeMigrationRequired, fmt.Errorf("migration performed, please re-run the command"))
+					} else {
+						return errsystem.New(errsystem.ErrBreakingChangeMigrationRequired, fmt.Errorf("migration required"))
 					}
-					return fmt.Errorf("migration performed, please re-run the command")
 				} else {
-					return fmt.Errorf("migration required")
+					// In dev mode, return specific error type
+					return errsystem.New(errsystem.ErrSdkUpdateRequired, fmt.Errorf("%s", change.Message))
 				}
 			} else {
-				if tui.HasTTY && !ctx.DevMode {
-					tui.ShowBanner(change.Title, change.Message, true)
-					return fmt.Errorf("breaking change migration required")
-				} else {
-					return fmt.Errorf("%s", change.Message)
-				}
+				// For breaking changes without callbacks, return specific error type
+				return errsystem.New(errsystem.ErrSdkUpdateRequired, fmt.Errorf("breaking change migration required"))
 			}
 		}
 	}
 
 	return nil
 }
+
+// CheckForBreakingChangesWithBanner is a wrapper that handles breaking changes gracefully
+// Returns true if a breaking change was detected and handled, false otherwise
+func CheckForBreakingChangesWithBanner(ctx BundleContext, language string, runtime string) bool {
+	err := checkForBreakingChanges(ctx, language, runtime)
+	if err != nil {
+		// Check if this is a breaking change error that we should handle gracefully
+		if errsystem.IsBreakingChangeError(err) {
+			// Don't show the error code plane, just exit cleanly
+			os.Exit(1)
+		}
+		// For other errors, let them propagate
+		return false
+	}
+	return false
+}
diff --git a/internal/bundler/vercel_ai.go b/internal/bundler/vercel_ai.go
index 97d19f32..4f3b206b 100644
--- a/internal/bundler/vercel_ai.go
+++ b/internal/bundler/vercel_ai.go
@@ -31,97 +31,117 @@ func createVercelAIProviderPatch(module string, createFn string, envkey string,
 }
 
 func init() {
-	// Generate PatchPortal integration patch with hashing and telemetry
-	var patchPortalPatch = `
-		const { PatchPortal } = await import('@agentuity/sdk');
-		const { internal } = await import('@agentuity/sdk');
-		const crypto = await import('node:crypto');
-		internal.debug('🔧 generateText patch executing...');
-		const patchPortal = await PatchPortal.getInstance();
-		internal.debug('✅ PatchPortal instance created');
 
-		let compiledSystemHash = '';
-		let compiledPromptHash = '';
-		const agentuityPromptMetadata = [];
-		patchPortal.printState();
-		
-		if (_args[0]?.system) {
-			// Extract prompt from arguments
-			const systemString = _args[0]?.system;
-			internal.debug('📝 Extracted system:', systemString.substring(0, 100) + '...');
-			compiledSystemHash = crypto.createHash('sha256').update(systemString).digest('hex');
-			internal.debug('🔑 SYSTEM Generated compiled hash:', compiledSystemHash);
+	var vercelTelemetryPatch = generateJSArgsPatch(0, ` `+"")
 
-			// Get patch data using the same key format as processPromptMetadata
-			const key = 'prompt:' + compiledPromptHash;
-			internal.debug('🔍 Looking for key:', key);
-			const patchData = await patchPortal.get(key);
-			if (patchData) {
-				internal.debug('🔍 Retrieved patch data:', patchData);
-				agentuityPromptMetadata.push(...patchData);
-			} else {
-				internal.debug('ℹ️ No patch data found for compiled hash:', compiledSystemHash);
-			}
-		}
-
-
-		if (_args[0]?.prompt) {
-			const prompt = _args[0]?.prompt || _args[0]?.messages || '';
-			const promptString = typeof prompt === 'string' ? prompt : JSON.stringify(prompt);
-			internal.debug('📝 Extracted prompt:', promptString.substring(0, 100) + '...');
-			// Generate hash for the compiled prompt (same as processPromptMetadata uses)
-			compiledPromptHash = crypto.createHash('sha256').update(promptString).digest('hex');
-			internal.debug('🔑 PROMPT Generated compiled hash:', compiledPromptHash);
-
-			// Get patch data using the same key format as processPromptMetadata
-			const key = 'prompt:' + compiledPromptHash;
-			internal.debug('🔍 Looking for key:', key);
-			const patchData = await patchPortal.get(key);
-			if (patchData) {
-				internal.debug('🔍 Retrieved patch data:', patchData);
-				agentuityPromptMetadata.push(...patchData);
-			} else {
-				internal.debug('ℹ️ No patch data found for compiled hash:', compiledPromptHash);
-			}
-		}
-
-		if (agentuityPromptMetadata.length > 0) {
-			// Prepare telemetry metadata with PatchPortal data
-			const opts = {...(_args[0] ?? {}) };
-			const userMetadata = opts?.experimental_telemetry?.metadata || {};
-			opts.experimental_telemetry = { isEnabled: true, metadata: { ...userMetadata, 'agentuity.prompts': JSON.stringify(agentuityPromptMetadata) } };
-			_args[0] = opts;
-			internal.debug('✅ Patch metadata attached:', agentuityPromptMetadata);
-		} else {
-			internal.debug('ℹ️ No patch data found for this invocation');
-		}
-	`
-
-	var vercelTelemetryPatch = generateJSArgsPatch(0, ``)
+	var enableTelemetryPatch = `
+		// Enable experimental telemetry to capture response text
+		const opts = {...(_args[0] ?? {}) };
+		opts.experimental_telemetry = { isEnabled: true };
+		_args[0] = opts;
+		`
 
 	vercelAIPatches := patchModule{
 		Module: "ai",
 		Functions: map[string]patchAction{
 			"generateText": {
-				Before: vercelTelemetryPatch + "\n" + patchPortalPatch,
+				Before: vercelTelemetryPatch + enableTelemetryPatch,
 			},
 			"streamText": {
-				Before: vercelTelemetryPatch,
+				Before: vercelTelemetryPatch + enableTelemetryPatch,
 			},
 			"generateObject": {
-				Before: vercelTelemetryPatch,
+				Before: vercelTelemetryPatch + enableTelemetryPatch,
 			},
 			"streamObject": {
-				Before: vercelTelemetryPatch,
+				Before: vercelTelemetryPatch + enableTelemetryPatch,
 			},
 			"embed": {
-				Before: vercelTelemetryPatch,
+				Before: vercelTelemetryPatch + enableTelemetryPatch,
 			},
 			"embedMany": {
-				Before: vercelTelemetryPatch,
+				Before: vercelTelemetryPatch + enableTelemetryPatch,
+			},
+			"recordSpan": {
+				Before: `
+				if (_args[0]?.name && ['ai.generateText', 'ai.generateObject', 'ai.streamText', 'ai.streamObject'].includes(_args[0].name)) {
+					// Add our custom attributes to the span configuration
+					const originalAttributes = _args[0].attributes || {};
+					
+					// Extract system and prompt from the span attributes
+					let systemString = '';
+					let promptString = '';
+					
+					if (_args[0]?.attributes) {
+						// Try to extract from span attributes
+						systemString = _args[0].attributes['ai.system'] || _args[0].attributes['system'] || '';
+						promptString = _args[0].attributes['ai.prompt'] || _args[0].attributes['prompt'] || '';
+						
+						// If prompt is a JSON object, extract the individual fields
+						if (typeof promptString === 'string' && promptString.startsWith('{')) {
+							try {
+								const promptObj = JSON.parse(promptString);
+								systemString = promptObj.system || systemString;
+								promptString = promptObj.prompt || promptString;
+							} catch (e) {
+								// If parsing fails, keep the original string
+							}
+						}
+					}
+					
+					// Generate hashes using SDK utility
+					const { hashSync } = require('@agentuity/sdk');
+					let compiledSystemHash = '';
+					let compiledPromptHash = '';
+					
+					if (systemString) {
+						compiledSystemHash = hashSync(systemString);
+						
+					}
+					
+					if (promptString) {
+						compiledPromptHash = hashSync(promptString);
+					
+					}
+					
+					// Access PatchPortal state synchronously
+					const agentuityPromptMetadata = [];
+					
+					if (globalThis.__patchPortalInstance) {
+						if (systemString) {
+							const key = 'prompt:' + compiledSystemHash;
+							const patchData = globalThis.__patchPortalInstance.state[key];
+							if (patchData) {
+								agentuityPromptMetadata.push(...patchData);
+							}
+						}
+						
+						if (promptString) {
+							const key = 'prompt:' + compiledPromptHash;
+							const patchData = globalThis.__patchPortalInstance.state[key];
+							if (patchData) {
+								agentuityPromptMetadata.push(...patchData);
+							}
+						}
+					}
+					
+					
+				
+					
+					// Add attributes to span configuration
+					if (agentuityPromptMetadata.length > 0) {
+						_args[0].attributes = {
+							...originalAttributes,
+							'@agentuity/prompts': JSON.stringify(agentuityPromptMetadata)
+						};
+					}
+
+				}
+				`,
 			},
 		},
 	}
+
 	patches["@vercel/ai"] = vercelAIPatches
 
 	// register all the providers that we support in our Agentuity AI Gateway
diff --git a/internal/errsystem/errorcodes.go b/internal/errsystem/errorcodes.go
index bbc95e95..be7352ed 100644
--- a/internal/errsystem/errorcodes.go
+++ b/internal/errsystem/errorcodes.go
@@ -118,4 +118,12 @@ var (
 		Code:    "CLI-0029",
 		Message: "Failed to retrieve devmode endpoint",
 	}
+	ErrBreakingChangeMigrationRequired = errorType{
+		Code:    "CLI-0030",
+		Message: "Breaking change migration required",
+	}
+	ErrSdkUpdateRequired = errorType{
+		Code:    "CLI-0031",
+		Message: "SDK update required",
+	}
 )
diff --git a/internal/errsystem/errsystem.go b/internal/errsystem/errsystem.go
index 9693d860..9620704f 100644
--- a/internal/errsystem/errsystem.go
+++ b/internal/errsystem/errsystem.go
@@ -89,6 +89,26 @@ func WithUserId(userId string) option {
 	}
 }
 
+// IsBreakingChangeError checks if an error is a breaking change error that should be handled gracefully
+func IsBreakingChangeError(err error) bool {
+	if err == nil {
+		return false
+	}
+
+	// Check if it's an errSystem error with breaking change codes
+	if es, ok := err.(*errSystem); ok {
+		return es.code.Code == "CLI-0030" || es.code.Code == "CLI-0031"
+	}
+
+	// Check if it's wrapped in an errSystem
+	var es *errSystem
+	if errors.As(err, &es) && es != nil {
+		return es.code.Code == "CLI-0030" || es.code.Code == "CLI-0031"
+	}
+
+	return false
+}
+
 // WithProjectId adds the project ID to the error attributes.
 func WithProjectId(projectId string) option {
 	return func(e *errSystem) {
diff --git a/internal/eval/eval.go b/internal/eval/eval.go
new file mode 100644
index 00000000..722caf79
--- /dev/null
+++ b/internal/eval/eval.go
@@ -0,0 +1,103 @@
+package eval
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"path/filepath"
+
+	"github.com/agentuity/cli/internal/util"
+	"github.com/agentuity/go-common/logger"
+)
+
+type Response[T any] struct {
+	Success bool   `json:"success"`
+	Message string `json:"message"`
+	Data    T      `json:"data"`
+}
+
+type EvalData struct {
+	ID   string `json:"id"`
+	Slug string `json:"slug"`
+	Name string `json:"name"`
+}
+
+type EvalMetadata struct {
+	ID          string `json:"id"`
+	Slug        string `json:"slug"`
+	Name        string `json:"name"`
+	Description string `json:"description"`
+}
+
+// LoadEvalMetadataMap scans the evals directory and builds a map of slug -> eval ID
+func LoadEvalMetadataMap(logger logger.Logger, projectDir string) (map[string]string, error) {
+	evalsDir := filepath.Join(projectDir, "src", "evals")
+
+	// Check if evals directory exists
+	if !util.Exists(evalsDir) {
+		logger.Debug("evals directory not found: %s", evalsDir)
+		return make(map[string]string), nil
+	}
+
+	files, err := os.ReadDir(evalsDir)
+	if err != nil {
+		return nil, fmt.Errorf("failed to read evals directory: %w", err)
+	}
+
+	slugToIDMap := make(map[string]string)
+
+	for _, file := range files {
+		ext := filepath.Ext(file.Name())
+		if file.IsDir() || (ext != ".ts" && ext != ".js") {
+			continue
+		}
+
+		// Skip index files
+		if file.Name() == "index.ts" || file.Name() == "index.js" {
+			continue
+		}
+
+		filePath := filepath.Join(evalsDir, file.Name())
+		content, err := os.ReadFile(filePath)
+		if err != nil {
+			logger.Warn("failed to read eval file %s: %v", file.Name(), err)
+			continue
+		}
+
+		// Parse metadata from file content
+		metadata, err := ParseEvalMetadata(string(content))
+		if err != nil {
+			logger.Warn("failed to parse metadata from %s: %v", file.Name(), err)
+			continue
+		}
+
+		if metadata.Slug != "" && metadata.ID != "" {
+			slugToIDMap[metadata.Slug] = metadata.ID
+			logger.Debug("mapped eval slug '%s' to ID '%s'", metadata.Slug, metadata.ID)
+		}
+	}
+
+	logger.Debug("loaded %d eval mappings", len(slugToIDMap))
+	return slugToIDMap, nil
+}
+
+// CreateEval creates a new evaluation function in the project
+func CreateEval(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, slug string, name string, description string) (string, error) {
+	client := util.NewAPIClient(ctx, logger, baseUrl, token)
+
+	payload := map[string]any{
+		"projectId":   projectId,
+		"slug":        slug,
+		"name":        name,
+		"description": description,
+	}
+
+	var resp Response[EvalData]
+	if err := client.Do("POST", "/cli/evals", payload, &resp); err != nil {
+		return "", fmt.Errorf("error creating eval: %s", err)
+	}
+	if !resp.Success {
+		return "", fmt.Errorf("error creating eval: %s", resp.Message)
+	}
+	return resp.Data.ID, nil
+}
diff --git a/internal/eval/metadata_parser.go b/internal/eval/metadata_parser.go
new file mode 100644
index 00000000..b290d5f5
--- /dev/null
+++ b/internal/eval/metadata_parser.go
@@ -0,0 +1,190 @@
+package eval
+
+import (
+	"encoding/json"
+	"fmt"
+	"regexp"
+)
+
+// ParseEvalMetadata extracts metadata from TypeScript/JavaScript eval file content
+func ParseEvalMetadata(content string) (*EvalMetadata, error) {
+	// Find the metadata export pattern with optional type annotations
+	metadataRegex := regexp.MustCompile(`export\s+const\s+metadata(?:\s*:[^=]+)?\s*=\s*\{`)
+	metadataStart := metadataRegex.FindStringIndex(content)
+	if metadataStart == nil {
+		return nil, fmt.Errorf("no metadata export found")
+	}
+
+	// Find the opening brace position
+	braceStart := metadataStart[1] - 1 // Position of the opening brace
+	if braceStart >= len(content) || content[braceStart] != '{' {
+		return nil, fmt.Errorf("invalid metadata format: opening brace not found")
+	}
+
+	// Find the matching closing brace using string-aware parsing
+	braceEnd, err := findMatchingBrace(content, braceStart)
+	if err != nil {
+		return nil, fmt.Errorf("no matching closing brace found: %w", err)
+	}
+
+	// Extract the object content
+	objectContent := content[braceStart : braceEnd+1]
+
+	// Convert single quotes to double quotes for JSON compatibility
+	jsonStr := normalizeToJSON(objectContent)
+
+	// Parse the JSON
+	var metadata EvalMetadata
+	if err := json.Unmarshal([]byte(jsonStr), &metadata); err != nil {
+		return nil, fmt.Errorf("expect valid JSON object after `export const metadata =`: %w", err)
+	}
+
+	return &metadata, nil
+}
+
+// findMatchingBrace finds the matching closing brace using string-aware parsing
+func findMatchingBrace(content string, start int) (int, error) {
+	braceCount := 0
+	inString := false
+	escapeNext := false
+
+	for i := start; i < len(content); i++ {
+		char := content[i]
+
+		if escapeNext {
+			escapeNext = false
+			continue
+		}
+
+		if char == '\\' {
+			escapeNext = true
+			continue
+		}
+
+		if char == '"' || char == '\'' {
+			inString = !inString
+			continue
+		}
+
+		if !inString {
+			if char == '{' {
+				braceCount++
+			} else if char == '}' {
+				braceCount--
+				if braceCount == 0 {
+					return i, nil
+				}
+			}
+		}
+	}
+
+	return -1, fmt.Errorf("no matching closing brace found")
+}
+
+// normalizeToJSON converts JavaScript object syntax to valid JSON
+func normalizeToJSON(content string) string {
+	result := make([]rune, 0, len(content))
+	inString := false
+	escapeNext := false
+
+	for _, char := range content {
+		if escapeNext {
+			escapeNext = false
+			result = append(result, char)
+			continue
+		}
+
+		if char == '\\' {
+			escapeNext = true
+			result = append(result, char)
+			continue
+		}
+
+		if char == '"' || char == '\'' {
+			if !inString {
+				// Opening quote - always use double quote
+				result = append(result, '"')
+				inString = true
+			} else {
+				// Closing quote - always use double quote
+				result = append(result, '"')
+				inString = false
+			}
+			continue
+		}
+
+		if !inString && char == '\'' {
+			// Single quote outside string - convert to double quote
+			result = append(result, '"')
+			inString = true
+			continue
+		}
+
+		result = append(result, char)
+	}
+
+	// Now quote unquoted keys, but only outside of strings
+	jsonStr := string(result)
+	jsonStr = quoteUnquotedKeys(jsonStr)
+
+	return jsonStr
+}
+
+// quoteUnquotedKeys quotes unquoted keys in JSON, but only outside of strings
+func quoteUnquotedKeys(content string) string {
+	result := make([]rune, 0, len(content))
+	inString := false
+	escapeNext := false
+
+	for i, char := range content {
+		if escapeNext {
+			escapeNext = false
+			result = append(result, char)
+			continue
+		}
+
+		if char == '\\' {
+			escapeNext = true
+			result = append(result, char)
+			continue
+		}
+
+		if char == '"' {
+			inString = !inString
+			result = append(result, char)
+			continue
+		}
+
+		if !inString && char == ':' {
+			// Look backwards to find the start of the key
+			keyStart := i
+			for j := i - 1; j >= 0; j-- {
+				if content[j] == ' ' || content[j] == '\t' || content[j] == '\n' {
+					keyStart = j + 1
+					break
+				}
+				if content[j] == ',' || content[j] == '{' {
+					keyStart = j + 1
+					break
+				}
+			}
+
+			// Check if the key is already quoted
+			if keyStart < i && content[keyStart] != '"' {
+				// Key is not quoted, add quotes around the key we already added
+				// Remove the key from result and add it quoted
+				keyLength := i - keyStart
+				result = result[:len(result)-keyLength]
+				result = append(result, '"')
+				result = append(result, []rune(content[keyStart:i])...)
+				result = append(result, '"')
+			}
+			result = append(result, char)
+			continue
+		}
+
+		result = append(result, char)
+	}
+
+	return string(result)
+}
diff --git a/internal/eval/metadata_parser_test.go b/internal/eval/metadata_parser_test.go
new file mode 100644
index 00000000..67cb9d0f
--- /dev/null
+++ b/internal/eval/metadata_parser_test.go
@@ -0,0 +1,393 @@
+package eval
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestParseEvalMetadata(t *testing.T) {
+	tests := []struct {
+		name     string
+		content  string
+		expected *EvalMetadata
+		wantErr  bool
+	}{
+		{
+			name: "coherence-check.ts",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "eval_3b74dc768cbdec883d857c011bb85925",
+  slug: "coherence-check",
+  name: "Coherence Check",
+  description: "Evaluates if the response is coherent and logically consistent"
+};
+
+/**
+ * Coherence Check
+ * Evaluates if the response is coherent and logically consistent
+ */
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  const { input, output } = req;
+
+  // TODO: Implement your evaluation logic here
+  // Example: Score the output based on some criteria
+  
+  const score = 0.8; // Replace with your actual scoring logic
+  const metadata = {
+    reasoning: 'Replace with your evaluation reasoning'
+  };
+
+  res.score(score, metadata);
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_3b74dc768cbdec883d857c011bb85925",
+				Slug:        "coherence-check",
+				Name:        "Coherence Check",
+				Description: "Evaluates if the response is coherent and logically consistent",
+			},
+			wantErr: false,
+		},
+		{
+			name: "politeness.ts",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+import { groq } from '@ai-sdk/groq';
+import { generateObject } from 'ai';
+import { z } from 'zod';
+
+export const metadata = {
+  id: "eval_politeness_123456789",
+  slug: "politeness",
+  name: "Politeness Evaluation",
+  description: "Evaluates the politeness of AI responses on a scale of 0 to 1"
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  const { input, output } = req;
+  const prompt = "You are a human judge evaluating the politeness of an AI response. Please rate the politeness of the following response on a scale of 0 to 1, where: 0.0-0.3: Not polite (rude, dismissive, or inappropriate), 0.4-0.6: Moderately polite (neutral tone, some courtesy), 0.7-0.9: Polite (courteous, respectful, helpful tone), 1.0: Very polite (exceptionally courteous, warm, and respectful). Consider these factors: Greeting and closing politeness, Use of please, thank you, and other courtesy words, Overall tone and respectfulness, Helpful and considerate language, Professional yet friendly demeanor. User Input: \"" + input + "\" AI Response: \"" + output + "\" Please provide a JSON response with a score (0-1) and reasoning.";
+
+  const result = await generateObject({
+    model: groq('llama-3.1-8b-instant'),
+    prompt,
+    schema: z.object({
+      score: z.number().min(0).max(1),
+      reasoning: z.string(),
+    }),
+  });
+
+  const { score, reasoning } = result.object;
+  res.score(score, { reasoning: reasoning || 'No reasoning provided' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_politeness_123456789",
+				Slug:        "politeness",
+				Name:        "Politeness Evaluation",
+				Description: "Evaluates the politeness of AI responses on a scale of 0 to 1",
+			},
+			wantErr: false,
+		},
+		{
+			name: "missing metadata",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: nil,
+			wantErr:  true,
+		},
+		{
+			name: "malformed metadata",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "test",
+  slug: "test",
+  name: "Test",
+  description: "Test description"
+  // Missing closing brace
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: nil,
+			wantErr:  true,
+		},
+		{
+			name: "nested objects in metadata",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "eval_nested_123",
+  slug: "nested-test",
+  name: "Nested Test",
+  description: "Test with nested objects",
+  config: {
+    threshold: 0.5,
+    enabled: true
+  }
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_nested_123",
+				Slug:        "nested-test",
+				Name:        "Nested Test",
+				Description: "Test with nested objects",
+			},
+			wantErr: false,
+		},
+		{
+			name: "with TypeScript type annotation",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata: { id: string; slug: string; name: string; description: string } = {
+  id: "eval_typed_123",
+  slug: "typed-test",
+  name: "Typed Test",
+  description: "Test with TypeScript type annotation"
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_typed_123",
+				Slug:        "typed-test",
+				Name:        "Typed Test",
+				Description: "Test with TypeScript type annotation",
+			},
+			wantErr: false,
+		},
+		{
+			name: "with URLs in description",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "eval_url_123",
+  slug: "url-test",
+  name: "URL Test",
+  description: "Test with URLs: https://example.com/api and http://test.org:8080/path"
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_url_123",
+				Slug:        "url-test",
+				Name:        "URL Test",
+				Description: "Test with URLs: https://example.com/api and http://test.org:8080/path",
+			},
+			wantErr: false,
+		},
+		{
+			name: "with colons in string values",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "eval_colon_123",
+  slug: "colon-test",
+  name: "Colon Test",
+  description: "Test with colons: time is 12:30:45, ratio is 3:1, and protocol is https:"
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_colon_123",
+				Slug:        "colon-test",
+				Name:        "Colon Test",
+				Description: "Test with colons: time is 12:30:45, ratio is 3:1, and protocol is https:",
+			},
+			wantErr: false,
+		},
+		{
+			name: "with escaped quotes in strings",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "eval_escape_123",
+  slug: "escape-test",
+  name: "Escape Test",
+  description: "Test with escaped quotes: \\\"Hello world\\\" and \\\"single quotes\\\""
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_escape_123",
+				Slug:        "escape-test",
+				Name:        "Escape Test",
+				Description: "Test with escaped quotes: \\\"Hello world\\\" and \\\"single quotes\\\"",
+			},
+			wantErr: false,
+		},
+		{
+			name: "with nested objects containing strings with colons",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "eval_nested_colon_123",
+  slug: "nested-colon-test",
+  name: "Nested Colon Test",
+  description: "Test with nested objects containing colons",
+  config: {
+    url: "https://api.example.com:8080/v1",
+    time: "12:30:45",
+    ratio: "3:1"
+  }
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_nested_colon_123",
+				Slug:        "nested-colon-test",
+				Name:        "Nested Colon Test",
+				Description: "Test with nested objects containing colons",
+			},
+			wantErr: false,
+		},
+		{
+			name: "with braces in strings",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "eval_brace_123",
+  slug: "brace-test",
+  name: "Brace Test",
+  description: "Test with braces in strings: {nested} and {another}"
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_brace_123",
+				Slug:        "brace-test",
+				Name:        "Brace Test",
+				Description: "Test with braces in strings: {nested} and {another}",
+			},
+			wantErr: false,
+		},
+		{
+			name: "invalid JSON format",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata = {
+  id: "eval_invalid_123",
+  slug: "invalid-test",
+  name: "Invalid Test",
+  description: "Test with invalid JSON format",
+  invalid: unquoted_value
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: nil,
+			wantErr:  true,
+		},
+		{
+			name: "complex TypeScript type annotation",
+			content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk';
+
+export const metadata: Record<string, string> & { id: string; slug: string } = {
+  id: "eval_complex_123",
+  slug: "complex-test",
+  name: "Complex Test",
+  description: "Test with complex TypeScript type annotation"
+};
+
+export default async function evaluate(
+  _ctx: EvalContext,
+  req: EvalRequest,
+  res: EvalResponse
+) {
+  res.score(0.8, { reasoning: 'test' });
+}`,
+			expected: &EvalMetadata{
+				ID:          "eval_complex_123",
+				Slug:        "complex-test",
+				Name:        "Complex Test",
+				Description: "Test with complex TypeScript type annotation",
+			},
+			wantErr: false,
+		},
+	}
+
+	for _, test := range tests {
+		t.Run(test.name, func(t *testing.T) {
+			result, err := ParseEvalMetadata(test.content)
+
+			if test.wantErr {
+				require.Error(t, err)
+				assert.Nil(t, result)
+			} else {
+				require.NoError(t, err)
+				require.NotNil(t, result)
+				assert.Equal(t, test.expected.ID, result.ID)
+				assert.Equal(t, test.expected.Slug, result.Slug)
+				assert.Equal(t, test.expected.Name, result.Name)
+				assert.Equal(t, test.expected.Description, result.Description)
+			}
+		})
+	}
+}