Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

132 changes: 132 additions & 0 deletions src/api/providers/__tests__/vscode-lm.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { Mock } from "vitest"
import { checkModelSupportsImages, IMAGE_CAPABLE_MODEL_PATTERNS, IMAGE_INCAPABLE_MODEL_PATTERNS } from "../vscode-lm"

// Mocks must come first, before imports
vi.mock("vscode", () => {
Expand Down Expand Up @@ -537,3 +538,134 @@ describe("VsCodeLmHandler", () => {
})
})
})

describe("checkModelSupportsImages", () => {
describe("static vscodeLlmModels lookup", () => {
it("should return supportsImages from static definitions when model family matches", () => {
// Models in vscodeLlmModels should return their static supportsImages value
expect(checkModelSupportsImages("gpt-3.5-turbo", "gpt-3.5-turbo")).toBe(false)
expect(checkModelSupportsImages("gpt-4", "gpt-4")).toBe(false)
expect(checkModelSupportsImages("gpt-4o-mini", "gpt-4o-mini")).toBe(false)
expect(checkModelSupportsImages("gpt-4o", "gpt-4o")).toBe(true)
expect(checkModelSupportsImages("gpt-4.1", "gpt-4.1")).toBe(true)
expect(checkModelSupportsImages("gpt-5", "gpt-5")).toBe(true)
expect(checkModelSupportsImages("gpt-5-mini", "gpt-5-mini")).toBe(true)
expect(checkModelSupportsImages("o1", "o1")).toBe(false)
expect(checkModelSupportsImages("o3-mini", "o3-mini")).toBe(false)
expect(checkModelSupportsImages("o4-mini", "o4-mini")).toBe(false)
})

it("should return supportsImages from static definitions for claude models", () => {
expect(checkModelSupportsImages("claude-3.5-sonnet", "claude-3.5-sonnet")).toBe(true)
expect(checkModelSupportsImages("claude-4-sonnet", "claude-4-sonnet")).toBe(true)
})

it("should return supportsImages from static definitions for gemini models", () => {
expect(checkModelSupportsImages("gemini-2.0-flash-001", "gemini-2.0-flash-001")).toBe(true)
expect(checkModelSupportsImages("gemini-2.5-pro", "gemini-2.5-pro")).toBe(true)
})
})

describe("pattern matching for unknown models", () => {
it("should return true for gpt-4o (but not gpt-4o-mini)", () => {
expect(checkModelSupportsImages("custom", "gpt-4o")).toBe(true)
expect(checkModelSupportsImages("custom", "gpt-4o-mini")).toBe(false)
})

it("should return true for gpt-4.x and higher versions", () => {
expect(checkModelSupportsImages("custom", "gpt-4.1-preview")).toBe(true)
expect(checkModelSupportsImages("custom", "gpt-4.2")).toBe(true)
})

it("should return true for gpt-5 and higher (unknown variants)", () => {
expect(checkModelSupportsImages("custom", "gpt-5-turbo")).toBe(true)
expect(checkModelSupportsImages("custom", "gpt-6")).toBe(true)
})

it("should return true for all claude-* models", () => {
expect(checkModelSupportsImages("custom", "claude-haiku-4.5")).toBe(true)
expect(checkModelSupportsImages("custom", "claude-opus-4.5")).toBe(true)
expect(checkModelSupportsImages("custom", "claude-sonnet-4")).toBe(true)
})

it("should return true for all gemini-* models", () => {
expect(checkModelSupportsImages("custom", "gemini-2.5-pro")).toBe(true)
expect(checkModelSupportsImages("custom", "gemini-3-flash-preview")).toBe(true)
})
})

describe("non-vision models", () => {
it("should return false for gpt-3.5 models", () => {
expect(checkModelSupportsImages("custom", "gpt-3.5-turbo")).toBe(false)
expect(checkModelSupportsImages("custom", "gpt-3.5-turbo-16k")).toBe(false)
})

it("should return false for base gpt-4 and gpt-4-* variants", () => {
expect(checkModelSupportsImages("custom", "gpt-4")).toBe(false)
expect(checkModelSupportsImages("custom", "gpt-4-0125-preview")).toBe(false)
expect(checkModelSupportsImages("custom", "gpt-4-turbo")).toBe(false)
})

it("should return false for reasoning models (o1, o3-mini, o4-mini)", () => {
expect(checkModelSupportsImages("custom", "o1")).toBe(false)
expect(checkModelSupportsImages("custom", "o1-preview")).toBe(false)
expect(checkModelSupportsImages("custom", "o1-mini")).toBe(false)
expect(checkModelSupportsImages("custom", "o3-mini")).toBe(false)
expect(checkModelSupportsImages("custom", "o4-mini")).toBe(false)
})

it("should return false for grok models", () => {
expect(checkModelSupportsImages("custom", "grok-code-fast-1")).toBe(false)
expect(checkModelSupportsImages("custom", "grok-2")).toBe(false)
})

it("should return false for unknown model families", () => {
expect(checkModelSupportsImages("mistral", "mistral-large")).toBe(false)
expect(checkModelSupportsImages("llama", "llama-3-70b")).toBe(false)
expect(checkModelSupportsImages("unknown", "some-random-model")).toBe(false)
})
})

describe("case insensitivity", () => {
it("should match regardless of case for pattern matching", () => {
expect(checkModelSupportsImages("custom", "GPT-4O")).toBe(true)
expect(checkModelSupportsImages("custom", "CLAUDE-SONNET-4")).toBe(true)
expect(checkModelSupportsImages("custom", "GEMINI-2.5-PRO")).toBe(true)
})
})

describe("pattern matching edge cases", () => {
it("should only match IDs that start with known patterns", () => {
expect(checkModelSupportsImages("custom", "my-gpt-4o-model")).toBe(false) // gpt not at start
expect(checkModelSupportsImages("custom", "not-claude-model")).toBe(false) // claude not at start
})
})
})

describe("IMAGE_CAPABLE_MODEL_PATTERNS", () => {
it("should export the model patterns array", () => {
expect(Array.isArray(IMAGE_CAPABLE_MODEL_PATTERNS)).toBe(true)
expect(IMAGE_CAPABLE_MODEL_PATTERNS.length).toBeGreaterThan(0)
})

it("should contain RegExp patterns for vision-capable models", () => {
// All patterns should be RegExp instances
IMAGE_CAPABLE_MODEL_PATTERNS.forEach((pattern) => {
expect(pattern).toBeInstanceOf(RegExp)
})
})
})

describe("IMAGE_INCAPABLE_MODEL_PATTERNS", () => {
it("should export the incapable model patterns array", () => {
expect(Array.isArray(IMAGE_INCAPABLE_MODEL_PATTERNS)).toBe(true)
expect(IMAGE_INCAPABLE_MODEL_PATTERNS.length).toBeGreaterThan(0)
})

it("should contain RegExp patterns for non-vision models", () => {
// All patterns should be RegExp instances
IMAGE_INCAPABLE_MODEL_PATTERNS.forEach((pattern) => {
expect(pattern).toBeInstanceOf(RegExp)
})
})
})
77 changes: 73 additions & 4 deletions src/api/providers/vscode-lm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
import * as vscode from "vscode"
import OpenAI from "openai"

import { type ModelInfo, openAiModelInfoSaneDefaults } from "@roo-code/types"
import { type ModelInfo, openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"

import type { ApiHandlerOptions } from "../../shared/api"
import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
Expand Down Expand Up @@ -529,14 +529,18 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan

const modelId = this.client.id || modelParts.join(SELECTOR_SEPARATOR)

// Check if the model supports images based on known model families
// VS Code Language Model API 1.106+ supports image inputs via LanguageModelDataPart
const supportsImages = checkModelSupportsImages(this.client.family, this.client.id)

// Build model info with conservative defaults for missing values
const modelInfo: ModelInfo = {
maxTokens: -1, // Unlimited tokens by default
contextWindow:
typeof this.client.maxInputTokens === "number"
? Math.max(0, this.client.maxInputTokens)
: openAiModelInfoSaneDefaults.contextWindow,
supportsImages: false, // VSCode Language Model API currently doesn't support image inputs
supportsImages,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
Expand Down Expand Up @@ -586,8 +590,73 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
}
}

// Static blacklist of VS Code Language Model IDs that should be excluded from the model list e.g. because they will never work
const VSCODE_LM_STATIC_BLACKLIST: string[] = ["claude-3.7-sonnet", "claude-3.7-sonnet-thought"]
/**
* Model ID patterns that support image inputs via VS Code Language Model API.
* These models support the LanguageModelDataPart.image() API introduced in VS Code 1.106+.
*
* For models not in the static vscodeLlmModels definitions, we use pattern matching
* to determine image support. Only newer model versions support images.
*
* Source: https://models.dev/api.json (github-copilot provider models)
*/
export const IMAGE_CAPABLE_MODEL_PATTERNS = [
/^gpt-4o$/i, // GPT-4o (omni) supports images, but NOT gpt-4o-mini
/^gpt-4\.[1-9]/i, // GPT-4.1 and higher versions
/^gpt-[5-9]/i, // GPT-5 and higher (gpt-5, gpt-5-mini, gpt-5.1-codex, etc.)
/^claude-/i, // All Claude models support images
/^gemini-/i, // All Gemini models support images
]

/**
* Model ID patterns that explicitly do NOT support images.
* These patterns are checked before IMAGE_CAPABLE_MODEL_PATTERNS.
*/
export const IMAGE_INCAPABLE_MODEL_PATTERNS = [
/^gpt-3\.5/i, // GPT-3.5 models don't support images
/^gpt-4$/i, // Base GPT-4 doesn't support images
/^gpt-4-/i, // GPT-4 variants like gpt-4-0125-preview don't support images
/^gpt-4o-mini/i, // GPT-4o-mini doesn't support images
/^o[1-4]-?/i, // Reasoning models (o1, o3-mini, o4-mini) don't support images
/^grok-/i, // Grok models don't support images
]

/**
* Checks if a model supports image inputs based on its model ID.
* First checks static vscodeLlmModels definitions for known models,
* then falls back to pattern matching for unknown models.
*
* @param family The model family (used for lookup in static definitions)
* @param id The model ID
* @returns true if the model supports image inputs
*/
export function checkModelSupportsImages(family: string, id: string): boolean {
// First, check if the model exists in static definitions by family or id
const familyInfo = vscodeLlmModels[family as keyof typeof vscodeLlmModels]
if (familyInfo) {
return familyInfo.supportsImages ?? false
}

const idInfo = vscodeLlmModels[id as keyof typeof vscodeLlmModels]
if (idInfo) {
return idInfo.supportsImages ?? false
}

// For unknown models, first check if it matches any incapable patterns
if (IMAGE_INCAPABLE_MODEL_PATTERNS.some((pattern) => pattern.test(id))) {
return false
}

// Then check if it matches any capable patterns
return IMAGE_CAPABLE_MODEL_PATTERNS.some((pattern) => pattern.test(id))
}

// Static blacklist of VS Code Language Model IDs that should be excluded from the model list
// e.g. because they don't support native tool calling or will never work
const VSCODE_LM_STATIC_BLACKLIST: string[] = [
"claude-3.7-sonnet",
"claude-3.7-sonnet-thought",
"claude-opus-41", // Does not support native tool calling
]

export async function getVsCodeLmModels() {
try {
Expand Down
Loading
Loading