diff --git a/cmd/cloud.go b/cmd/cloud.go index 91e119e6..4588a2c7 100644 --- a/cmd/cloud.go +++ b/cmd/cloud.go @@ -18,6 +18,7 @@ import ( "syscall" "time" + "github.com/agentuity/cli/internal/bundler/prompts" "github.com/agentuity/cli/internal/deployer" "github.com/agentuity/cli/internal/envutil" "github.com/agentuity/cli/internal/errsystem" @@ -73,6 +74,21 @@ type startAgent struct { Remove bool `json:"remove,omitempty"` } +type PromptVariable struct { + Name string `json:"name"` + Required bool `json:"required,omitempty"` + Default string `json:"default,omitempty"` +} + +type DeployPrompt struct { + Slug string `json:"slug"` + Name string `json:"name"` + System *string `json:"system,omitempty"` + Prompt *string `json:"prompt,omitempty"` + Variables []PromptVariable `json:"variables,omitempty"` + Description *string `json:"description,omitempty"` +} + type startRequest struct { Agents []startAgent `json:"agents"` Resources *Resources `json:"resources,omitempty"` @@ -81,6 +97,7 @@ type startRequest struct { TagDescription string `json:"description,omitempty"` TagMessage string `json:"message,omitempty"` UsePrivateKey bool `json:"usePrivateKey,omitempty"` + Prompts []DeployPrompt `json:"prompts,omitempty"` } func ShowNewProjectImport(ctx context.Context, logger logger.Logger, cmd *cobra.Command, apiUrl string, apikey string, projectId string, project *project.Project, dir string, isImport bool) { @@ -425,6 +442,18 @@ Examples: startRequest.TagMessage = message startRequest.UsePrivateKey = true + // Collect prompts data if prompts feature flag is enabled + promptsEvalsFF := CheckFeatureFlag(cmd, FeaturePromptsEvals, "enable-prompts-evals") + if promptsEvalsFF { + prompts, err := collectPromptsData(logger, dir) + if err != nil { + logger.Debug("Failed to collect prompts data: %v", err) + } else { + startRequest.Prompts = prompts + logger.Debug("Collected %d prompts for deployment", len(prompts)) + } + } + // Start deployment if err := client.Do("PUT", fmt.Sprintf("/cli/deploy/start/%s%s", theproject.ProjectId, deploymentId), startRequest, &startResponse); err != nil { errsystem.New(errsystem.ErrDeployProject, err, @@ -1028,6 +1057,98 @@ Examples: }, } +// collectPromptsData collects prompts data from the project directory +func collectPromptsData(logger logger.Logger, dir string) ([]DeployPrompt, error) { + // Find all prompt files + promptFiles := prompts.FindAllPromptFiles(dir) + if len(promptFiles) == 0 { + logger.Debug("No prompt files found") + return nil, nil + } + + logger.Debug("Found %d prompt files: %v", len(promptFiles), promptFiles) + + var allPrompts []DeployPrompt + + // Parse all prompt files and combine prompts + for _, promptFile := range promptFiles { + data, err := os.ReadFile(promptFile) + if err != nil { + return nil, fmt.Errorf("failed to read %s: %w", promptFile, err) + } + + promptsList, err := prompts.ParsePromptsYAML(data) + if err != nil { + return nil, fmt.Errorf("failed to parse %s: %w", promptFile, err) + } + + // Convert to DeployPrompt format + for _, prompt := range promptsList { + deployPrompt := DeployPrompt{ + Slug: prompt.Slug, + Name: prompt.Name, + Description: &prompt.Description, + } + + // Convert system prompt + if prompt.System != "" { + deployPrompt.System = &prompt.System + } + + // Convert user prompt + if prompt.Prompt != "" { + deployPrompt.Prompt = &prompt.Prompt + } + + // Convert variables from templates + var variables []PromptVariable + if prompt.SystemTemplate.Variables != nil { + for _, v := range prompt.SystemTemplate.Variables { + variables = append(variables, PromptVariable{ + Name: v.Name, + Required: v.IsRequired, + Default: v.DefaultValue, + }) + } + } + if prompt.PromptTemplate.Variables != nil { + for _, v := range prompt.PromptTemplate.Variables { + // Check if variable already exists + found := false + for i, existing := range variables { + if existing.Name == v.Name { + // Update existing variable if it's more restrictive + if v.IsRequired && !existing.Required { + variables[i].Required = true + } + if v.DefaultValue != "" && existing.Default == "" { + variables[i].Default = v.DefaultValue + } + found = true + break + } + } + if !found { + variables = append(variables, PromptVariable{ + Name: v.Name, + Required: v.IsRequired, + Default: v.DefaultValue, + }) + } + } + } + + deployPrompt.Variables = variables + allPrompts = append(allPrompts, deployPrompt) + } + + logger.Debug("Parsed %d prompts from %s", len(promptsList), promptFile) + } + + logger.Debug("Total prompts collected: %d", len(allPrompts)) + return allPrompts, nil +} + func init() { rootCmd.AddCommand(cloudCmd) rootCmd.AddCommand(cloudDeployCmd) diff --git a/cmd/dev.go b/cmd/dev.go index 37d171cb..3db71bfd 100644 --- a/cmd/dev.go +++ b/cmd/dev.go @@ -49,7 +49,6 @@ Examples: apiUrl := urls.API appUrl := urls.App gravityUrl := urls.Gravity - noBuild, _ := cmd.Flags().GetBool("no-build") promptsEvalsFF := CheckFeatureFlag(cmd, FeaturePromptsEvals, "enable-prompts-evals") @@ -159,7 +158,7 @@ Examples: if errors.Is(err, context.Canceled) { return } - log.Fatal("failed to start devmode connection: %s", err) + log.Error("failed to start live dev connection: %s", err) return } } diff --git a/cmd/eval.go b/cmd/eval.go new file mode 100644 index 00000000..cff16dc1 --- /dev/null +++ b/cmd/eval.go @@ -0,0 +1,186 @@ +package cmd + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/signal" + "path/filepath" + "strings" + "syscall" + + "github.com/agentuity/cli/internal/errsystem" + "github.com/agentuity/cli/internal/eval" + "github.com/agentuity/cli/internal/project" + "github.com/agentuity/cli/internal/util" + "github.com/agentuity/go-common/env" + "github.com/agentuity/go-common/logger" + "github.com/agentuity/go-common/tui" + "github.com/spf13/cobra" +) + +var evalCmd = &cobra.Command{ + Use: "eval", + Short: "Evaluation related commands", + Run: func(cmd *cobra.Command, args []string) { + cmd.Help() + }, +} + +func getEvalInfoFlow(logger logger.Logger, name string, description string) (string, string) { + if name == "" { + if !tui.HasTTY { + logger.Fatal("No TTY detected, please specify an eval name from the command line") + } + name = tui.InputWithValidation(logger, "What should we name the evaluation?", "The name of the eval helps identify its purpose", 255, func(name string) error { + if name == "" { + return fmt.Errorf("Eval name cannot be empty") + } + return nil + }) + } + + if description == "" { + description = tui.Input(logger, "How should we describe what the "+name+" eval does?", "The description of the eval is optional but helpful for understanding its purpose") + } + + return name, description +} + +func generateEvalFile(logger logger.Logger, projectDir string, evalID string, slug string, name string, description string) error { + // Always generate TypeScript files for evals + ext := ".ts" + + // Create evals directory if it doesn't exist + evalsDir := filepath.Join(projectDir, "src", "evals") + if err := os.MkdirAll(evalsDir, 0755); err != nil { + return fmt.Errorf("failed to create evals directory: %w", err) + } + + // Generate file path + filename := filepath.Join(evalsDir, slug+ext) + + // Check if file already exists + if util.Exists(filename) { + return fmt.Errorf("eval file already exists: %s", filename) + } + + // Generate TypeScript content with metadata + content := fmt.Sprintf(`import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: '%s', + slug: '%s', + name: '%s', + description: '%s' +}; + +/** + * %s + * %s + */ +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + const { input, output } = req; + + // TODO: Implement your evaluation logic here + // Example: Score the output based on some criteria + + const score = 0.8; // Replace with your actual scoring logic + const metadata = { + reasoning: 'Replace with your evaluation reasoning' + }; + + res.score(score, metadata); +} +`, evalID, slug, name, description, name, description) + + // Write file + if err := os.WriteFile(filename, []byte(content), 0644); err != nil { + return fmt.Errorf("failed to write eval file: %w", err) + } + + logger.Debug("Created eval file: %s", filename) + return nil +} + +var evalCreateCmd = &cobra.Command{ + Use: "create [name] [description]", + Short: "Create a new evaluation function", + Aliases: []string{"new"}, + Args: cobra.MaximumNArgs(2), + Run: func(cmd *cobra.Command, args []string) { + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGINT, syscall.SIGTERM) + defer cancel() + logger := env.NewLogger(cmd) + theproject := project.EnsureProject(ctx, cmd) + apikey := theproject.Token + urls := util.GetURLs(logger) + apiUrl := urls.API + + var name string + var description string + + if len(args) > 0 { + name = args[0] + } + + if len(args) > 1 { + description = args[1] + } + + name, description = getEvalInfoFlow(logger, name, description) + + // Generate slug from name + isPython := theproject.Project.Bundler.Language == "python" + slug := util.SafeProjectFilename(strings.ToLower(name), isPython) + + var evalID string + var evalErr error + + action := func() { + // Create eval via API + evalID, evalErr = eval.CreateEval(ctx, logger, apiUrl, apikey, theproject.Project.ProjectId, slug, name, description) + if evalErr != nil { + errsystem.New(errsystem.ErrApiRequest, evalErr, errsystem.WithContextMessage("Failed to create eval")).ShowErrorAndExit() + } + + logger.Debug("Created eval with ID: %s", evalID) + + // Generate eval file (always TypeScript) with the real ID from API + if err := generateEvalFile(logger, theproject.Dir, evalID, slug, name, description); err != nil { + errsystem.New(errsystem.ErrOpenFile, err, errsystem.WithContextMessage("Failed to create eval file")).ShowErrorAndExit() + } + } + + tui.ShowSpinner("Creating evaluation ...", action) + + format, _ := cmd.Flags().GetString("format") + if format == "json" { + result := map[string]string{ + "id": evalID, + "slug": slug, + "name": name, + "description": description, + } + json.NewEncoder(os.Stdout).Encode(result) + } else { + tui.ShowSuccess("Evaluation created successfully") + fmt.Printf("\nFile created: %s\n", tui.Muted(fmt.Sprintf("src/evals/%s.ts", slug))) + } + }, +} + +func init() { + rootCmd.AddCommand(evalCmd) + evalCmd.AddCommand(evalCreateCmd) + + for _, cmd := range []*cobra.Command{evalCreateCmd} { + cmd.Flags().StringP("dir", "d", "", "The project directory") + cmd.Flags().String("format", "text", "The format to use for the output. Can be either 'text' or 'json'") + } +} diff --git a/error_codes.yaml b/error_codes.yaml index a3cef919..2965d8fc 100644 --- a/error_codes.yaml +++ b/error_codes.yaml @@ -91,3 +91,9 @@ errors: - code: CLI-0029 message: Failed to retrieve devmode endpoint + + - code: CLI-0030 + message: Breaking change migration required + + - code: CLI-0031 + message: SDK update required diff --git a/internal/bundler/bundler.go b/internal/bundler/bundler.go index 32185208..e996ba81 100644 --- a/internal/bundler/bundler.go +++ b/internal/bundler/bundler.go @@ -467,8 +467,8 @@ func bundleJavascript(ctx BundleContext, dir string, outdir string, theproject * shimSourceMap = true } - if err := checkForBreakingChanges(ctx, "javascript", theproject.Bundler.Runtime); err != nil { - return err + if CheckForBreakingChangesWithBanner(ctx, "javascript", theproject.Bundler.Runtime) { + return nil // Breaking change was handled gracefully } if err := possiblyCreateDeclarationFile(ctx.Logger, dir); err != nil { @@ -481,6 +481,8 @@ func bundleJavascript(ctx BundleContext, dir string, outdir string, theproject * var entryPoints []string entryPoints = append(entryPoints, filepath.Join(dir, "index.js")) + + // Add agent entry points files, err := util.ListDir(filepath.Join(dir, theproject.Bundler.AgentConfig.Dir)) if err != nil { errsystem.New(errsystem.ErrListFilesAndDirectories, err).ShowErrorAndExit() @@ -493,6 +495,18 @@ func bundleJavascript(ctx BundleContext, dir string, outdir string, theproject * if len(entryPoints) == 0 { return fmt.Errorf("no index.ts files found in %s", theproject.Bundler.AgentConfig.Dir) } + + // Add eval entry points if evals directory exists + if ctx.PromptsEvalsFF { + evalsDir := filepath.Join(dir, "src", "evals") + if util.Exists(evalsDir) { + evalFiles, err := filepath.Glob(filepath.Join(evalsDir, "*.ts")) + if err == nil && len(evalFiles) > 0 { + ctx.Logger.Debug("found %d eval files to bundle", len(evalFiles)) + entryPoints = append(entryPoints, evalFiles...) + } + } + } pkgjson := filepath.Join(dir, "package.json") pkg, err := util.NewOrderedMapFromFile(util.PackageJsonKeysOrder, pkgjson) if err != nil { @@ -710,8 +724,8 @@ func bundlePython(ctx BundleContext, dir string, outdir string, theproject *proj ctx.Logger.Debug("installed dependencies: %s", strings.TrimSpace(string(out))) } - if err := checkForBreakingChanges(ctx, "python", theproject.Bundler.Runtime); err != nil { - return err + if CheckForBreakingChangesWithBanner(ctx, "python", theproject.Bundler.Runtime) { + return nil // Breaking change was handled gracefully } config := map[string]any{ diff --git a/internal/bundler/opentelemetry.go b/internal/bundler/opentelemetry.go new file mode 100644 index 00000000..4803d1ee --- /dev/null +++ b/internal/bundler/opentelemetry.go @@ -0,0 +1,53 @@ +package bundler + +func init() { + // Patch OpenTelemetry SDK Span class to intercept setAttribute calls + // This allows us to capture ai.response.text when it's set on spans + + openTelemetryPatches := patchModule{ + Module: "@opentelemetry/sdk-trace-base", + Classes: map[string]patchClass{ + "Span": { + Methods: map[string]patchAction{ + "setAttribute": { + Before: ` + const key = args[0]; + const value = args[1]; + + + if (key === 'ai.response.text') { + const spanId = this.spanContext().spanId; + const traceId = this.spanContext().traceId; + const sessionId = 'sess_' + traceId; + const promptMetadataRaw = this.attributes['@agentuity/prompts']; + + // Create eval job with output if promptMetadata exists + if (globalThis.__evalJobSchedulerInstance && promptMetadataRaw) { + try { + // Parse the JSON string to get the actual prompt metadata array + const promptMetadata = JSON.parse(promptMetadataRaw); + + // Count total evals across all prompt metadata + const totalEvals = promptMetadata.reduce((count, meta) => count + (meta.evals?.length || 0), 0); + + // Create job with output included + const jobWithOutput = { + spanId, + sessionId, + promptMetadata, + output: value, + createdAt: new Date().toISOString() + }; + globalThis.__evalJobSchedulerInstance.pendingJobs.set(spanId, jobWithOutput); + } catch (error) { + } + } + } + `, + }, + }, + }, + }, + } + patches["@opentelemetry/sdk-trace-base"] = openTelemetryPatches +} diff --git a/internal/bundler/patch.go b/internal/bundler/patch.go index 10f6a7e6..7ec8ef83 100644 --- a/internal/bundler/patch.go +++ b/internal/bundler/patch.go @@ -14,9 +14,14 @@ type patchModule struct { Module string Filename string Functions map[string]patchAction + Classes map[string]patchClass Body *patchAction } +type patchClass struct { + Methods map[string]patchAction +} + type patchAction struct { Before string After string @@ -169,6 +174,80 @@ func createPlugin(logger logger.Logger, dir string, shimSourceMap bool) api.Plug suffix.WriteString("}\n") logger.Debug("patched %s -> %s", name, fn) } + + // Handle class method patching + for className, class := range mod.Classes { + for methodName, method := range class.Methods { + logger.Debug("attempting to patch class %s method %s", className, methodName) + + // Look for class definition + classPattern := "class " + className + classIndex := strings.Index(contents, classPattern) + if classIndex == -1 { + logger.Debug("class %s not found", className) + continue + } + logger.Debug("found class %s at index %d", className, classIndex) + + // Look for method definition within the class + methodPattern := methodName + "(" + methodIndex := strings.Index(contents[classIndex:], methodPattern) + if methodIndex == -1 { + logger.Debug("method %s not found in class %s", methodName, className) + continue + } + methodIndex += classIndex + logger.Debug("found method %s at index %d", methodName, methodIndex) + + // Find the start of the method + braceIndex := strings.LastIndex(contents[:methodIndex], "{") + if braceIndex == -1 { + logger.Debug("opening brace not found for method %s", methodName) + continue + } + + // Find the end of the method + braceCount := 0 + endIndex := braceIndex + for i := braceIndex; i < len(contents); i++ { + if contents[i] == '{' { + braceCount++ + } else if contents[i] == '}' { + braceCount-- + if braceCount == 0 { + endIndex = i + break + } + } + } + + // Extract method content + methodContent := contents[braceIndex+1 : endIndex] + + // Create patched method + patchedMethod := fmt.Sprintf(`{ + // Store original method + if (!%s.prototype.__agentuity_%s) { + %s.prototype.__agentuity_%s = %s.prototype.%s; + } + + // Create wrapper + %s.prototype.%s = function(...args) { + %s + return this.__agentuity_%s.apply(this, args); + }; + + // Original method implementation + %s + }`, className, methodName, className, methodName, className, methodName, className, methodName, method.Before, methodName, methodContent) + + // Replace the method in the content + contents = contents[:braceIndex] + patchedMethod + contents[endIndex+1:] + + logger.Debug("patched class method %s.%s", className, methodName) + } + } + contents = contents + "\n" + suffix.String() if mod.Body != nil { if mod.Body.Before != "" { diff --git a/internal/bundler/prompts/code_generator.go b/internal/bundler/prompts/code_generator.go index 07590a7a..dfeaa5e8 100644 --- a/internal/bundler/prompts/code_generator.go +++ b/internal/bundler/prompts/code_generator.go @@ -129,10 +129,11 @@ func (cg *CodeGenerator) generateSystemField(prompt Prompt) string { slug: %q, compiled, template: %q, - variables: { %s } + variables: { %s }, + evals: %s }); return compiled; - }`, jsdoc, paramStr, prompt.System, paramStr, prompt.Slug, prompt.System, variablesStr) + }`, jsdoc, paramStr, prompt.System, paramStr, prompt.Slug, prompt.System, variablesStr, cg.formatEvalsArray(prompt.Evals)) } else { // Parameters are required return fmt.Sprintf(`system: %s({ %s }) => { @@ -141,10 +142,11 @@ func (cg *CodeGenerator) generateSystemField(prompt Prompt) string { slug: %q, compiled, template: %q, - variables: { %s } + variables: { %s }, + evals: %s }); return compiled; - }`, jsdoc, paramStr, prompt.System, paramStr, prompt.Slug, prompt.System, variablesStr) + }`, jsdoc, paramStr, prompt.System, paramStr, prompt.Slug, prompt.System, variablesStr, cg.formatEvalsArray(prompt.Evals)) } } return fmt.Sprintf(`system: %s() => { @@ -153,10 +155,11 @@ func (cg *CodeGenerator) generateSystemField(prompt Prompt) string { slug: %q, compiled, template: %q, - variables: {} + variables: {}, + evals: %s }); return compiled; - }`, jsdoc, prompt.System, prompt.Slug, prompt.System) + }`, jsdoc, prompt.System, prompt.Slug, prompt.System, cg.formatEvalsArray(prompt.Evals)) } // generatePromptField generates the prompt field for a prompt @@ -191,10 +194,11 @@ func (cg *CodeGenerator) generatePromptField(prompt Prompt) string { slug: %q, compiled, template: %q, - variables: { %s } + variables: { %s }, + evals: %s }); return compiled; - }`, jsdoc, paramStr, prompt.Prompt, paramStr, prompt.Slug, prompt.Prompt, variablesStr) + }`, jsdoc, paramStr, prompt.Prompt, paramStr, prompt.Slug, prompt.Prompt, variablesStr, cg.formatEvalsArray(prompt.Evals)) } else { // Parameters are required return fmt.Sprintf(`prompt: %s({ %s }) => { @@ -203,10 +207,11 @@ func (cg *CodeGenerator) generatePromptField(prompt Prompt) string { slug: %q, compiled, template: %q, - variables: { %s } + variables: { %s }, + evals: %s }); return compiled; - }`, jsdoc, paramStr, prompt.Prompt, paramStr, prompt.Slug, prompt.Prompt, variablesStr) + }`, jsdoc, paramStr, prompt.Prompt, paramStr, prompt.Slug, prompt.Prompt, variablesStr, cg.formatEvalsArray(prompt.Evals)) } } return fmt.Sprintf(`prompt: %s() => { @@ -215,10 +220,11 @@ func (cg *CodeGenerator) generatePromptField(prompt Prompt) string { slug: %q, compiled, template: %q, - variables: {} + variables: {}, + evals: %s }); return compiled; - }`, jsdoc, prompt.Prompt, prompt.Slug, prompt.Prompt) + }`, jsdoc, prompt.Prompt, prompt.Slug, prompt.Prompt, cg.formatEvalsArray(prompt.Evals)) } // generateVariablesField generates the variables field for a prompt @@ -818,6 +824,19 @@ func (cg *CodeGenerator) generatePromptJSDocForType(prompt Prompt) string { return jsdoc.String() } +// formatEvalsArray formats a Go string slice as a JavaScript array +func (cg *CodeGenerator) formatEvalsArray(evals []string) string { + if len(evals) == 0 { + return "[]" + } + + var quoted []string + for _, eval := range evals { + quoted = append(quoted, fmt.Sprintf("%q", eval)) + } + return fmt.Sprintf("[%s]", strings.Join(quoted, ", ")) +} + // generateTypedefJSDoc generates JSDoc typedef for the prompt type func (cg *CodeGenerator) generateTypedefJSDoc(prompt Prompt) string { var jsdoc strings.Builder diff --git a/internal/bundler/upgrade.go b/internal/bundler/upgrade.go index b4f21082..b6c1246d 100644 --- a/internal/bundler/upgrade.go +++ b/internal/bundler/upgrade.go @@ -8,6 +8,7 @@ import ( "strings" "github.com/Masterminds/semver" + "github.com/agentuity/cli/internal/errsystem" "github.com/agentuity/cli/internal/util" "github.com/agentuity/go-common/tui" "github.com/pelletier/go-toml/v2" @@ -22,6 +23,18 @@ type breakingChange struct { } var breakingChanges = []breakingChange{ + { + Runtime: "bunjs", + Version: "<0.0.157", + Title: "đŸšĢ JS SDK Update Required đŸšĢ", + Message: "Please run `bun update @agentuity/sdk --latest` and then re-run this command again. There are no code changes required on your end.", + }, + { + Runtime: "nodejs", + Version: "<0.0.157", + Title: "đŸšĢ JS SDK Update Required đŸšĢ", + Message: "Please run `npm upgrade @agentuity/sdk` and then re-run this command again. There are no code changes required on your end.", + }, { Runtime: "bunjs", Version: "<0.0.154", @@ -240,32 +253,48 @@ func checkForBreakingChanges(ctx BundleContext, language string, runtime string) return nil } if c.Check(currentVersion) { + // Always show banner if we have a TTY + if tui.HasTTY { + tui.ShowBanner(change.Title, change.Message, true) + } + if change.Callback != nil { - var proceed bool - if tui.HasTTY && !ctx.DevMode { - tui.ShowBanner(change.Title, change.Message, true) - } else { - return fmt.Errorf("migration required: %s. %s", change.Title, change.Message) - } - proceed = tui.AskForConfirm("Would you like to migrate your project now?", 'y') == 'y' - if proceed { - if err := change.Callback(ctx); err != nil { - return err + if !ctx.DevMode { + proceed := tui.AskForConfirm("Would you like to migrate your project now?", 'y') == 'y' + if proceed { + if err := change.Callback(ctx); err != nil { + return err + } + return errsystem.New(errsystem.ErrBreakingChangeMigrationRequired, fmt.Errorf("migration performed, please re-run the command")) + } else { + return errsystem.New(errsystem.ErrBreakingChangeMigrationRequired, fmt.Errorf("migration required")) } - return fmt.Errorf("migration performed, please re-run the command") } else { - return fmt.Errorf("migration required") + // In dev mode, return specific error type + return errsystem.New(errsystem.ErrSdkUpdateRequired, fmt.Errorf("%s", change.Message)) } } else { - if tui.HasTTY && !ctx.DevMode { - tui.ShowBanner(change.Title, change.Message, true) - return fmt.Errorf("breaking change migration required") - } else { - return fmt.Errorf("%s", change.Message) - } + // For breaking changes without callbacks, return specific error type + return errsystem.New(errsystem.ErrSdkUpdateRequired, fmt.Errorf("breaking change migration required")) } } } return nil } + +// CheckForBreakingChangesWithBanner is a wrapper that handles breaking changes gracefully +// Returns true if a breaking change was detected and handled, false otherwise +func CheckForBreakingChangesWithBanner(ctx BundleContext, language string, runtime string) bool { + err := checkForBreakingChanges(ctx, language, runtime) + if err != nil { + // Check if this is a breaking change error that we should handle gracefully + if errsystem.IsBreakingChangeError(err) { + // Don't show the error code plane, just exit cleanly + os.Exit(1) + } + // For other errors, let them propagate + return false + } + return false +} diff --git a/internal/bundler/vercel_ai.go b/internal/bundler/vercel_ai.go index 97d19f32..4f3b206b 100644 --- a/internal/bundler/vercel_ai.go +++ b/internal/bundler/vercel_ai.go @@ -31,97 +31,117 @@ func createVercelAIProviderPatch(module string, createFn string, envkey string, } func init() { - // Generate PatchPortal integration patch with hashing and telemetry - var patchPortalPatch = ` - const { PatchPortal } = await import('@agentuity/sdk'); - const { internal } = await import('@agentuity/sdk'); - const crypto = await import('node:crypto'); - internal.debug('🔧 generateText patch executing...'); - const patchPortal = await PatchPortal.getInstance(); - internal.debug('✅ PatchPortal instance created'); - let compiledSystemHash = ''; - let compiledPromptHash = ''; - const agentuityPromptMetadata = []; - patchPortal.printState(); - - if (_args[0]?.system) { - // Extract prompt from arguments - const systemString = _args[0]?.system; - internal.debug('📝 Extracted system:', systemString.substring(0, 100) + '...'); - compiledSystemHash = crypto.createHash('sha256').update(systemString).digest('hex'); - internal.debug('🔑 SYSTEM Generated compiled hash:', compiledSystemHash); + var vercelTelemetryPatch = generateJSArgsPatch(0, ` `+"") - // Get patch data using the same key format as processPromptMetadata - const key = 'prompt:' + compiledPromptHash; - internal.debug('🔍 Looking for key:', key); - const patchData = await patchPortal.get(key); - if (patchData) { - internal.debug('🔍 Retrieved patch data:', patchData); - agentuityPromptMetadata.push(...patchData); - } else { - internal.debug('â„šī¸ No patch data found for compiled hash:', compiledSystemHash); - } - } - - - if (_args[0]?.prompt) { - const prompt = _args[0]?.prompt || _args[0]?.messages || ''; - const promptString = typeof prompt === 'string' ? prompt : JSON.stringify(prompt); - internal.debug('📝 Extracted prompt:', promptString.substring(0, 100) + '...'); - // Generate hash for the compiled prompt (same as processPromptMetadata uses) - compiledPromptHash = crypto.createHash('sha256').update(promptString).digest('hex'); - internal.debug('🔑 PROMPT Generated compiled hash:', compiledPromptHash); - - // Get patch data using the same key format as processPromptMetadata - const key = 'prompt:' + compiledPromptHash; - internal.debug('🔍 Looking for key:', key); - const patchData = await patchPortal.get(key); - if (patchData) { - internal.debug('🔍 Retrieved patch data:', patchData); - agentuityPromptMetadata.push(...patchData); - } else { - internal.debug('â„šī¸ No patch data found for compiled hash:', compiledPromptHash); - } - } - - if (agentuityPromptMetadata.length > 0) { - // Prepare telemetry metadata with PatchPortal data - const opts = {...(_args[0] ?? {}) }; - const userMetadata = opts?.experimental_telemetry?.metadata || {}; - opts.experimental_telemetry = { isEnabled: true, metadata: { ...userMetadata, 'agentuity.prompts': JSON.stringify(agentuityPromptMetadata) } }; - _args[0] = opts; - internal.debug('✅ Patch metadata attached:', agentuityPromptMetadata); - } else { - internal.debug('â„šī¸ No patch data found for this invocation'); - } - ` - - var vercelTelemetryPatch = generateJSArgsPatch(0, ``) + var enableTelemetryPatch = ` + // Enable experimental telemetry to capture response text + const opts = {...(_args[0] ?? {}) }; + opts.experimental_telemetry = { isEnabled: true }; + _args[0] = opts; + ` vercelAIPatches := patchModule{ Module: "ai", Functions: map[string]patchAction{ "generateText": { - Before: vercelTelemetryPatch + "\n" + patchPortalPatch, + Before: vercelTelemetryPatch + enableTelemetryPatch, }, "streamText": { - Before: vercelTelemetryPatch, + Before: vercelTelemetryPatch + enableTelemetryPatch, }, "generateObject": { - Before: vercelTelemetryPatch, + Before: vercelTelemetryPatch + enableTelemetryPatch, }, "streamObject": { - Before: vercelTelemetryPatch, + Before: vercelTelemetryPatch + enableTelemetryPatch, }, "embed": { - Before: vercelTelemetryPatch, + Before: vercelTelemetryPatch + enableTelemetryPatch, }, "embedMany": { - Before: vercelTelemetryPatch, + Before: vercelTelemetryPatch + enableTelemetryPatch, + }, + "recordSpan": { + Before: ` + if (_args[0]?.name && ['ai.generateText', 'ai.generateObject', 'ai.streamText', 'ai.streamObject'].includes(_args[0].name)) { + // Add our custom attributes to the span configuration + const originalAttributes = _args[0].attributes || {}; + + // Extract system and prompt from the span attributes + let systemString = ''; + let promptString = ''; + + if (_args[0]?.attributes) { + // Try to extract from span attributes + systemString = _args[0].attributes['ai.system'] || _args[0].attributes['system'] || ''; + promptString = _args[0].attributes['ai.prompt'] || _args[0].attributes['prompt'] || ''; + + // If prompt is a JSON object, extract the individual fields + if (typeof promptString === 'string' && promptString.startsWith('{')) { + try { + const promptObj = JSON.parse(promptString); + systemString = promptObj.system || systemString; + promptString = promptObj.prompt || promptString; + } catch (e) { + // If parsing fails, keep the original string + } + } + } + + // Generate hashes using SDK utility + const { hashSync } = require('@agentuity/sdk'); + let compiledSystemHash = ''; + let compiledPromptHash = ''; + + if (systemString) { + compiledSystemHash = hashSync(systemString); + + } + + if (promptString) { + compiledPromptHash = hashSync(promptString); + + } + + // Access PatchPortal state synchronously + const agentuityPromptMetadata = []; + + if (globalThis.__patchPortalInstance) { + if (systemString) { + const key = 'prompt:' + compiledSystemHash; + const patchData = globalThis.__patchPortalInstance.state[key]; + if (patchData) { + agentuityPromptMetadata.push(...patchData); + } + } + + if (promptString) { + const key = 'prompt:' + compiledPromptHash; + const patchData = globalThis.__patchPortalInstance.state[key]; + if (patchData) { + agentuityPromptMetadata.push(...patchData); + } + } + } + + + + + // Add attributes to span configuration + if (agentuityPromptMetadata.length > 0) { + _args[0].attributes = { + ...originalAttributes, + '@agentuity/prompts': JSON.stringify(agentuityPromptMetadata) + }; + } + + } + `, }, }, } + patches["@vercel/ai"] = vercelAIPatches // register all the providers that we support in our Agentuity AI Gateway diff --git a/internal/errsystem/errorcodes.go b/internal/errsystem/errorcodes.go index bbc95e95..be7352ed 100644 --- a/internal/errsystem/errorcodes.go +++ b/internal/errsystem/errorcodes.go @@ -118,4 +118,12 @@ var ( Code: "CLI-0029", Message: "Failed to retrieve devmode endpoint", } + ErrBreakingChangeMigrationRequired = errorType{ + Code: "CLI-0030", + Message: "Breaking change migration required", + } + ErrSdkUpdateRequired = errorType{ + Code: "CLI-0031", + Message: "SDK update required", + } ) diff --git a/internal/errsystem/errsystem.go b/internal/errsystem/errsystem.go index 9693d860..9620704f 100644 --- a/internal/errsystem/errsystem.go +++ b/internal/errsystem/errsystem.go @@ -89,6 +89,26 @@ func WithUserId(userId string) option { } } +// IsBreakingChangeError checks if an error is a breaking change error that should be handled gracefully +func IsBreakingChangeError(err error) bool { + if err == nil { + return false + } + + // Check if it's an errSystem error with breaking change codes + if es, ok := err.(*errSystem); ok { + return es.code.Code == "CLI-0030" || es.code.Code == "CLI-0031" + } + + // Check if it's wrapped in an errSystem + var es *errSystem + if errors.As(err, &es) && es != nil { + return es.code.Code == "CLI-0030" || es.code.Code == "CLI-0031" + } + + return false +} + // WithProjectId adds the project ID to the error attributes. func WithProjectId(projectId string) option { return func(e *errSystem) { diff --git a/internal/eval/eval.go b/internal/eval/eval.go new file mode 100644 index 00000000..722caf79 --- /dev/null +++ b/internal/eval/eval.go @@ -0,0 +1,103 @@ +package eval + +import ( + "context" + "fmt" + "os" + "path/filepath" + + "github.com/agentuity/cli/internal/util" + "github.com/agentuity/go-common/logger" +) + +type Response[T any] struct { + Success bool `json:"success"` + Message string `json:"message"` + Data T `json:"data"` +} + +type EvalData struct { + ID string `json:"id"` + Slug string `json:"slug"` + Name string `json:"name"` +} + +type EvalMetadata struct { + ID string `json:"id"` + Slug string `json:"slug"` + Name string `json:"name"` + Description string `json:"description"` +} + +// LoadEvalMetadataMap scans the evals directory and builds a map of slug -> eval ID +func LoadEvalMetadataMap(logger logger.Logger, projectDir string) (map[string]string, error) { + evalsDir := filepath.Join(projectDir, "src", "evals") + + // Check if evals directory exists + if !util.Exists(evalsDir) { + logger.Debug("evals directory not found: %s", evalsDir) + return make(map[string]string), nil + } + + files, err := os.ReadDir(evalsDir) + if err != nil { + return nil, fmt.Errorf("failed to read evals directory: %w", err) + } + + slugToIDMap := make(map[string]string) + + for _, file := range files { + ext := filepath.Ext(file.Name()) + if file.IsDir() || (ext != ".ts" && ext != ".js") { + continue + } + + // Skip index files + if file.Name() == "index.ts" || file.Name() == "index.js" { + continue + } + + filePath := filepath.Join(evalsDir, file.Name()) + content, err := os.ReadFile(filePath) + if err != nil { + logger.Warn("failed to read eval file %s: %v", file.Name(), err) + continue + } + + // Parse metadata from file content + metadata, err := ParseEvalMetadata(string(content)) + if err != nil { + logger.Warn("failed to parse metadata from %s: %v", file.Name(), err) + continue + } + + if metadata.Slug != "" && metadata.ID != "" { + slugToIDMap[metadata.Slug] = metadata.ID + logger.Debug("mapped eval slug '%s' to ID '%s'", metadata.Slug, metadata.ID) + } + } + + logger.Debug("loaded %d eval mappings", len(slugToIDMap)) + return slugToIDMap, nil +} + +// CreateEval creates a new evaluation function in the project +func CreateEval(ctx context.Context, logger logger.Logger, baseUrl string, token string, projectId string, slug string, name string, description string) (string, error) { + client := util.NewAPIClient(ctx, logger, baseUrl, token) + + payload := map[string]any{ + "projectId": projectId, + "slug": slug, + "name": name, + "description": description, + } + + var resp Response[EvalData] + if err := client.Do("POST", "/cli/evals", payload, &resp); err != nil { + return "", fmt.Errorf("error creating eval: %s", err) + } + if !resp.Success { + return "", fmt.Errorf("error creating eval: %s", resp.Message) + } + return resp.Data.ID, nil +} diff --git a/internal/eval/metadata_parser.go b/internal/eval/metadata_parser.go new file mode 100644 index 00000000..b290d5f5 --- /dev/null +++ b/internal/eval/metadata_parser.go @@ -0,0 +1,190 @@ +package eval + +import ( + "encoding/json" + "fmt" + "regexp" +) + +// ParseEvalMetadata extracts metadata from TypeScript/JavaScript eval file content +func ParseEvalMetadata(content string) (*EvalMetadata, error) { + // Find the metadata export pattern with optional type annotations + metadataRegex := regexp.MustCompile(`export\s+const\s+metadata(?:\s*:[^=]+)?\s*=\s*\{`) + metadataStart := metadataRegex.FindStringIndex(content) + if metadataStart == nil { + return nil, fmt.Errorf("no metadata export found") + } + + // Find the opening brace position + braceStart := metadataStart[1] - 1 // Position of the opening brace + if braceStart >= len(content) || content[braceStart] != '{' { + return nil, fmt.Errorf("invalid metadata format: opening brace not found") + } + + // Find the matching closing brace using string-aware parsing + braceEnd, err := findMatchingBrace(content, braceStart) + if err != nil { + return nil, fmt.Errorf("no matching closing brace found: %w", err) + } + + // Extract the object content + objectContent := content[braceStart : braceEnd+1] + + // Convert single quotes to double quotes for JSON compatibility + jsonStr := normalizeToJSON(objectContent) + + // Parse the JSON + var metadata EvalMetadata + if err := json.Unmarshal([]byte(jsonStr), &metadata); err != nil { + return nil, fmt.Errorf("expect valid JSON object after `export const metadata =`: %w", err) + } + + return &metadata, nil +} + +// findMatchingBrace finds the matching closing brace using string-aware parsing +func findMatchingBrace(content string, start int) (int, error) { + braceCount := 0 + inString := false + escapeNext := false + + for i := start; i < len(content); i++ { + char := content[i] + + if escapeNext { + escapeNext = false + continue + } + + if char == '\\' { + escapeNext = true + continue + } + + if char == '"' || char == '\'' { + inString = !inString + continue + } + + if !inString { + if char == '{' { + braceCount++ + } else if char == '}' { + braceCount-- + if braceCount == 0 { + return i, nil + } + } + } + } + + return -1, fmt.Errorf("no matching closing brace found") +} + +// normalizeToJSON converts JavaScript object syntax to valid JSON +func normalizeToJSON(content string) string { + result := make([]rune, 0, len(content)) + inString := false + escapeNext := false + + for _, char := range content { + if escapeNext { + escapeNext = false + result = append(result, char) + continue + } + + if char == '\\' { + escapeNext = true + result = append(result, char) + continue + } + + if char == '"' || char == '\'' { + if !inString { + // Opening quote - always use double quote + result = append(result, '"') + inString = true + } else { + // Closing quote - always use double quote + result = append(result, '"') + inString = false + } + continue + } + + if !inString && char == '\'' { + // Single quote outside string - convert to double quote + result = append(result, '"') + inString = true + continue + } + + result = append(result, char) + } + + // Now quote unquoted keys, but only outside of strings + jsonStr := string(result) + jsonStr = quoteUnquotedKeys(jsonStr) + + return jsonStr +} + +// quoteUnquotedKeys quotes unquoted keys in JSON, but only outside of strings +func quoteUnquotedKeys(content string) string { + result := make([]rune, 0, len(content)) + inString := false + escapeNext := false + + for i, char := range content { + if escapeNext { + escapeNext = false + result = append(result, char) + continue + } + + if char == '\\' { + escapeNext = true + result = append(result, char) + continue + } + + if char == '"' { + inString = !inString + result = append(result, char) + continue + } + + if !inString && char == ':' { + // Look backwards to find the start of the key + keyStart := i + for j := i - 1; j >= 0; j-- { + if content[j] == ' ' || content[j] == '\t' || content[j] == '\n' { + keyStart = j + 1 + break + } + if content[j] == ',' || content[j] == '{' { + keyStart = j + 1 + break + } + } + + // Check if the key is already quoted + if keyStart < i && content[keyStart] != '"' { + // Key is not quoted, add quotes around the key we already added + // Remove the key from result and add it quoted + keyLength := i - keyStart + result = result[:len(result)-keyLength] + result = append(result, '"') + result = append(result, []rune(content[keyStart:i])...) + result = append(result, '"') + } + result = append(result, char) + continue + } + + result = append(result, char) + } + + return string(result) +} diff --git a/internal/eval/metadata_parser_test.go b/internal/eval/metadata_parser_test.go new file mode 100644 index 00000000..67cb9d0f --- /dev/null +++ b/internal/eval/metadata_parser_test.go @@ -0,0 +1,393 @@ +package eval + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestParseEvalMetadata(t *testing.T) { + tests := []struct { + name string + content string + expected *EvalMetadata + wantErr bool + }{ + { + name: "coherence-check.ts", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "eval_3b74dc768cbdec883d857c011bb85925", + slug: "coherence-check", + name: "Coherence Check", + description: "Evaluates if the response is coherent and logically consistent" +}; + +/** + * Coherence Check + * Evaluates if the response is coherent and logically consistent + */ +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + const { input, output } = req; + + // TODO: Implement your evaluation logic here + // Example: Score the output based on some criteria + + const score = 0.8; // Replace with your actual scoring logic + const metadata = { + reasoning: 'Replace with your evaluation reasoning' + }; + + res.score(score, metadata); +}`, + expected: &EvalMetadata{ + ID: "eval_3b74dc768cbdec883d857c011bb85925", + Slug: "coherence-check", + Name: "Coherence Check", + Description: "Evaluates if the response is coherent and logically consistent", + }, + wantErr: false, + }, + { + name: "politeness.ts", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; +import { groq } from '@ai-sdk/groq'; +import { generateObject } from 'ai'; +import { z } from 'zod'; + +export const metadata = { + id: "eval_politeness_123456789", + slug: "politeness", + name: "Politeness Evaluation", + description: "Evaluates the politeness of AI responses on a scale of 0 to 1" +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + const { input, output } = req; + const prompt = "You are a human judge evaluating the politeness of an AI response. Please rate the politeness of the following response on a scale of 0 to 1, where: 0.0-0.3: Not polite (rude, dismissive, or inappropriate), 0.4-0.6: Moderately polite (neutral tone, some courtesy), 0.7-0.9: Polite (courteous, respectful, helpful tone), 1.0: Very polite (exceptionally courteous, warm, and respectful). Consider these factors: Greeting and closing politeness, Use of please, thank you, and other courtesy words, Overall tone and respectfulness, Helpful and considerate language, Professional yet friendly demeanor. User Input: \"" + input + "\" AI Response: \"" + output + "\" Please provide a JSON response with a score (0-1) and reasoning."; + + const result = await generateObject({ + model: groq('llama-3.1-8b-instant'), + prompt, + schema: z.object({ + score: z.number().min(0).max(1), + reasoning: z.string(), + }), + }); + + const { score, reasoning } = result.object; + res.score(score, { reasoning: reasoning || 'No reasoning provided' }); +}`, + expected: &EvalMetadata{ + ID: "eval_politeness_123456789", + Slug: "politeness", + Name: "Politeness Evaluation", + Description: "Evaluates the politeness of AI responses on a scale of 0 to 1", + }, + wantErr: false, + }, + { + name: "missing metadata", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: nil, + wantErr: true, + }, + { + name: "malformed metadata", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "test", + slug: "test", + name: "Test", + description: "Test description" + // Missing closing brace +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: nil, + wantErr: true, + }, + { + name: "nested objects in metadata", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "eval_nested_123", + slug: "nested-test", + name: "Nested Test", + description: "Test with nested objects", + config: { + threshold: 0.5, + enabled: true + } +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: &EvalMetadata{ + ID: "eval_nested_123", + Slug: "nested-test", + Name: "Nested Test", + Description: "Test with nested objects", + }, + wantErr: false, + }, + { + name: "with TypeScript type annotation", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata: { id: string; slug: string; name: string; description: string } = { + id: "eval_typed_123", + slug: "typed-test", + name: "Typed Test", + description: "Test with TypeScript type annotation" +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: &EvalMetadata{ + ID: "eval_typed_123", + Slug: "typed-test", + Name: "Typed Test", + Description: "Test with TypeScript type annotation", + }, + wantErr: false, + }, + { + name: "with URLs in description", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "eval_url_123", + slug: "url-test", + name: "URL Test", + description: "Test with URLs: https://example.com/api and http://test.org:8080/path" +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: &EvalMetadata{ + ID: "eval_url_123", + Slug: "url-test", + Name: "URL Test", + Description: "Test with URLs: https://example.com/api and http://test.org:8080/path", + }, + wantErr: false, + }, + { + name: "with colons in string values", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "eval_colon_123", + slug: "colon-test", + name: "Colon Test", + description: "Test with colons: time is 12:30:45, ratio is 3:1, and protocol is https:" +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: &EvalMetadata{ + ID: "eval_colon_123", + Slug: "colon-test", + Name: "Colon Test", + Description: "Test with colons: time is 12:30:45, ratio is 3:1, and protocol is https:", + }, + wantErr: false, + }, + { + name: "with escaped quotes in strings", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "eval_escape_123", + slug: "escape-test", + name: "Escape Test", + description: "Test with escaped quotes: \\\"Hello world\\\" and \\\"single quotes\\\"" +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: &EvalMetadata{ + ID: "eval_escape_123", + Slug: "escape-test", + Name: "Escape Test", + Description: "Test with escaped quotes: \\\"Hello world\\\" and \\\"single quotes\\\"", + }, + wantErr: false, + }, + { + name: "with nested objects containing strings with colons", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "eval_nested_colon_123", + slug: "nested-colon-test", + name: "Nested Colon Test", + description: "Test with nested objects containing colons", + config: { + url: "https://api.example.com:8080/v1", + time: "12:30:45", + ratio: "3:1" + } +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: &EvalMetadata{ + ID: "eval_nested_colon_123", + Slug: "nested-colon-test", + Name: "Nested Colon Test", + Description: "Test with nested objects containing colons", + }, + wantErr: false, + }, + { + name: "with braces in strings", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "eval_brace_123", + slug: "brace-test", + name: "Brace Test", + description: "Test with braces in strings: {nested} and {another}" +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: &EvalMetadata{ + ID: "eval_brace_123", + Slug: "brace-test", + Name: "Brace Test", + Description: "Test with braces in strings: {nested} and {another}", + }, + wantErr: false, + }, + { + name: "invalid JSON format", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata = { + id: "eval_invalid_123", + slug: "invalid-test", + name: "Invalid Test", + description: "Test with invalid JSON format", + invalid: unquoted_value +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: nil, + wantErr: true, + }, + { + name: "complex TypeScript type annotation", + content: `import type { EvalContext, EvalRequest, EvalResponse } from '@agentuity/sdk'; + +export const metadata: Record & { id: string; slug: string } = { + id: "eval_complex_123", + slug: "complex-test", + name: "Complex Test", + description: "Test with complex TypeScript type annotation" +}; + +export default async function evaluate( + _ctx: EvalContext, + req: EvalRequest, + res: EvalResponse +) { + res.score(0.8, { reasoning: 'test' }); +}`, + expected: &EvalMetadata{ + ID: "eval_complex_123", + Slug: "complex-test", + Name: "Complex Test", + Description: "Test with complex TypeScript type annotation", + }, + wantErr: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result, err := ParseEvalMetadata(test.content) + + if test.wantErr { + require.Error(t, err) + assert.Nil(t, result) + } else { + require.NoError(t, err) + require.NotNil(t, result) + assert.Equal(t, test.expected.ID, result.ID) + assert.Equal(t, test.expected.Slug, result.Slug) + assert.Equal(t, test.expected.Name, result.Name) + assert.Equal(t, test.expected.Description, result.Description) + } + }) + } +}