From 851df79a82c0846069f45409340e6a8b81cf3a43 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sat, 11 Apr 2026 05:48:42 +0800 Subject: [PATCH 01/14] fix: defer native LLM client construction when credentials are missing All native LLM providers built their SDK clients inside `@model_validator(mode="after")`, which required the API key at `LLM(...)` construction time. Instantiating an LLM at module scope (e.g. `chat_llm=LLM(model="openai/gpt-4o-mini")` on a `@crew` method) crashed during downstream crew-metadata extraction with a confusing `ImportError: Error importing native provider: 1 validation error...` before the process env vars were ever consulted. Wrap eager client construction in a try/except in each provider and add `_get_sync_client` / `_get_async_client` methods that build on first use. OpenAI call sites are routed through the lazy getters so calls made without eager construction still work. The descriptive "X_API_KEY is required" errors are re-raised from the lazy path at first real call. Update two Azure tests that asserted the old eager-error contract to assert the new lazy-error contract. --- .../llms/providers/anthropic/completion.py | 55 ++++++++---- .../crewai/llms/providers/azure/completion.py | 87 ++++++++++++------- .../llms/providers/bedrock/completion.py | 32 +++++-- .../llms/providers/gemini/completion.py | 32 +++++-- .../llms/providers/openai/completion.py | 78 +++++++++++------ lib/crewai/tests/llms/azure/test_azure.py | 16 ++-- 6 files changed, 209 insertions(+), 91 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 0d666a63c8..eeedf85235 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -189,16 +189,37 @@ def _normalize_anthropic_fields(cls, data: Any) -> Any: @model_validator(mode="after") def _init_clients(self) -> AnthropicCompletion: - self._client = Anthropic(**self._get_client_params()) + """Eagerly build clients when the API key is available, otherwise + defer so ``LLM(model="anthropic/...")`` can be constructed at module + import time even before deployment env vars are set. + """ + try: + self._client = self._build_sync_client() + self._async_client = self._build_async_client() + except ValueError: + pass + return self + def _build_sync_client(self) -> Any: + return Anthropic(**self._get_client_params()) + + def _build_async_client(self) -> Any: async_client_params = self._get_client_params() if self.interceptor: async_transport = AsyncHTTPTransport(interceptor=self.interceptor) async_http_client = httpx.AsyncClient(transport=async_transport) async_client_params["http_client"] = async_http_client + return AsyncAnthropic(**async_client_params) - self._async_client = AsyncAnthropic(**async_client_params) - return self + def _get_sync_client(self) -> Any: + if self._client is None: + self._client = self._build_sync_client() + return self._client + + def _get_async_client(self) -> Any: + if self._async_client is None: + self._async_client = self._build_async_client() + return self._async_client def to_config_dict(self) -> dict[str, Any]: """Extend base config with Anthropic-specific fields.""" @@ -790,11 +811,11 @@ def _handle_completion( try: if betas: params["betas"] = betas - response = self._client.beta.messages.create( + response = self._get_sync_client().beta.messages.create( **params, extra_body=extra_body ) else: - response = self._client.messages.create(**params) + response = self._get_sync_client().messages.create(**params) except Exception as e: if is_context_length_exceeded(e): @@ -942,9 +963,11 @@ def _handle_streaming_completion( current_tool_calls: dict[int, dict[str, Any]] = {} stream_context = ( - self._client.beta.messages.stream(**stream_params, extra_body=extra_body) + self._get_sync_client().beta.messages.stream( + **stream_params, extra_body=extra_body + ) if betas - else self._client.messages.stream(**stream_params) + else self._get_sync_client().messages.stream(**stream_params) ) with stream_context as stream: response_id = None @@ -1223,7 +1246,9 @@ def _handle_tool_use_conversation( try: # Send tool results back to Claude for final response - final_response: Message = self._client.messages.create(**follow_up_params) + final_response: Message = self._get_sync_client().messages.create( + **follow_up_params + ) # Track token usage for follow-up call follow_up_usage = self._extract_anthropic_token_usage(final_response) @@ -1319,11 +1344,11 @@ async def _ahandle_completion( try: if betas: params["betas"] = betas - response = await self._async_client.beta.messages.create( + response = await self._get_async_client().beta.messages.create( **params, extra_body=extra_body ) else: - response = await self._async_client.messages.create(**params) + response = await self._get_async_client().messages.create(**params) except Exception as e: if is_context_length_exceeded(e): @@ -1457,11 +1482,11 @@ async def _ahandle_streaming_completion( current_tool_calls: dict[int, dict[str, Any]] = {} stream_context = ( - self._async_client.beta.messages.stream( + self._get_async_client().beta.messages.stream( **stream_params, extra_body=extra_body ) if betas - else self._async_client.messages.stream(**stream_params) + else self._get_async_client().messages.stream(**stream_params) ) async with stream_context as stream: response_id = None @@ -1626,7 +1651,7 @@ async def _ahandle_tool_use_conversation( ] try: - final_response: Message = await self._async_client.messages.create( + final_response: Message = await self._get_async_client().messages.create( **follow_up_params ) @@ -1754,8 +1779,8 @@ def get_file_uploader(self) -> Any: from crewai_files.uploaders.anthropic import AnthropicFileUploader return AnthropicFileUploader( - client=self._client, - async_client=self._async_client, + client=self._get_sync_client(), + async_client=self._get_async_client(), ) except ImportError: return None diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index cac811bc7e..249c22550b 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -116,43 +116,72 @@ def _normalize_azure_fields(cls, data: Any) -> Any: data.get("api_version") or os.getenv("AZURE_API_VERSION") or "2024-06-01" ) - if not data["api_key"]: - raise ValueError( - "Azure API key is required. Set AZURE_API_KEY environment variable or pass api_key parameter." - ) - if not data["endpoint"]: - raise ValueError( - "Azure endpoint is required. Set AZURE_ENDPOINT environment variable or pass endpoint parameter." - ) - + # Credentials and endpoint are validated lazily in `_init_clients` + # so the LLM can be constructed before deployment env vars are set. model = data.get("model", "") - data["endpoint"] = AzureCompletion._validate_and_fix_endpoint( - data["endpoint"], model - ) + if data["endpoint"]: + data["endpoint"] = AzureCompletion._validate_and_fix_endpoint( + data["endpoint"], model + ) + parsed = urlparse(data["endpoint"]) + hostname = parsed.hostname or "" + data["is_azure_openai_endpoint"] = ( + hostname == "openai.azure.com" or hostname.endswith(".openai.azure.com") + ) and "/openai/deployments/" in data["endpoint"] + else: + data["is_azure_openai_endpoint"] = False data["is_openai_model"] = any( prefix in model.lower() for prefix in ["gpt-", "o1-", "text-"] ) - parsed = urlparse(data["endpoint"]) - hostname = parsed.hostname or "" - data["is_azure_openai_endpoint"] = ( - hostname == "openai.azure.com" or hostname.endswith(".openai.azure.com") - ) and "/openai/deployments/" in data["endpoint"] return data @model_validator(mode="after") def _init_clients(self) -> AzureCompletion: + """Eagerly build clients when credentials are available, otherwise + defer so ``LLM(model="azure/...")`` can be constructed at module + import time even before deployment env vars are set. + """ + try: + self._client = self._build_sync_client() + self._async_client = self._build_async_client() + except ValueError: + pass + return self + + def _build_sync_client(self) -> Any: + return ChatCompletionsClient(**self._make_client_kwargs()) + + def _build_async_client(self) -> Any: + return AsyncChatCompletionsClient(**self._make_client_kwargs()) + + def _make_client_kwargs(self) -> dict[str, Any]: if not self.api_key: - raise ValueError("Azure API key is required.") + raise ValueError( + "Azure API key is required. Set AZURE_API_KEY environment " + "variable or pass api_key parameter." + ) + if not self.endpoint: + raise ValueError( + "Azure endpoint is required. Set AZURE_ENDPOINT environment " + "variable or pass endpoint parameter." + ) client_kwargs: dict[str, Any] = { "endpoint": self.endpoint, "credential": AzureKeyCredential(self.api_key), } if self.api_version: client_kwargs["api_version"] = self.api_version + return client_kwargs - self._client = ChatCompletionsClient(**client_kwargs) - self._async_client = AsyncChatCompletionsClient(**client_kwargs) - return self + def _get_sync_client(self) -> Any: + if self._client is None: + self._client = self._build_sync_client() + return self._client + + def _get_async_client(self) -> Any: + if self._async_client is None: + self._async_client = self._build_async_client() + return self._async_client def to_config_dict(self) -> dict[str, Any]: """Extend base config with Azure-specific fields.""" @@ -713,8 +742,7 @@ def _handle_completion( ) -> str | Any: """Handle non-streaming chat completion.""" try: - # Cast params to Any to avoid type checking issues with TypedDict unpacking - response: ChatCompletions = self._client.complete(**params) + response: ChatCompletions = self._get_sync_client().complete(**params) return self._process_completion_response( response=response, params=params, @@ -913,7 +941,7 @@ def _handle_streaming_completion( tool_calls: dict[int, dict[str, Any]] = {} usage_data: dict[str, Any] | None = None - for update in self._client.complete(**params): + for update in self._get_sync_client().complete(**params): if isinstance(update, StreamingChatCompletionsUpdate): if update.usage: usage = update.usage @@ -953,8 +981,9 @@ async def _ahandle_completion( ) -> str | Any: """Handle non-streaming chat completion asynchronously.""" try: - # Cast params to Any to avoid type checking issues with TypedDict unpacking - response: ChatCompletions = await self._async_client.complete(**params) + response: ChatCompletions = await self._get_async_client().complete( + **params + ) return self._process_completion_response( response=response, params=params, @@ -980,7 +1009,7 @@ async def _ahandle_streaming_completion( usage_data: dict[str, Any] | None = None - stream = await self._async_client.complete(**params) + stream = await self._get_async_client().complete(**params) async for update in stream: if isinstance(update, StreamingChatCompletionsUpdate): if hasattr(update, "usage") and update.usage: @@ -1105,8 +1134,8 @@ async def aclose(self) -> None: This ensures proper cleanup of the underlying aiohttp session to avoid unclosed connector warnings. """ - if hasattr(self._async_client, "close"): - await self._async_client.close() + if hasattr(self._get_async_client(), "close"): + await self._get_async_client().close() async def __aenter__(self) -> Self: """Async context manager entry.""" diff --git a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py index 0747b70064..61800cbac3 100644 --- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py @@ -303,6 +303,18 @@ def _normalize_bedrock_fields(cls, data: Any) -> Any: @model_validator(mode="after") def _init_clients(self) -> BedrockCompletion: + """Eagerly build the sync client when AWS credentials resolve, + otherwise defer so ``LLM(model="bedrock/...")`` can be constructed + at module import time even before deployment env vars are set. + """ + try: + self._client = self._build_sync_client() + except Exception as e: + logging.debug("Deferring Bedrock client construction: %s", e) + self._async_exit_stack = AsyncExitStack() if AIOBOTOCORE_AVAILABLE else None + return self + + def _build_sync_client(self) -> Any: config = Config( read_timeout=300, retries={"max_attempts": 3, "mode": "adaptive"}, @@ -314,9 +326,17 @@ def _init_clients(self) -> BedrockCompletion: aws_session_token=self.aws_session_token, region_name=self.region_name, ) - self._client = session.client("bedrock-runtime", config=config) - self._async_exit_stack = AsyncExitStack() if AIOBOTOCORE_AVAILABLE else None - return self + return session.client("bedrock-runtime", config=config) + + def _get_sync_client(self) -> Any: + if self._client is None: + self._client = self._build_sync_client() + return self._client + + def _get_async_client(self) -> Any: + """Async client is set up separately by ``_ensure_async_client`` + using ``aiobotocore`` inside an exit stack.""" + return self._async_client def to_config_dict(self) -> dict[str, Any]: """Extend base config with Bedrock-specific fields.""" @@ -656,7 +676,7 @@ def _handle_converse( raise ValueError(f"Invalid message format at index {i}") # Call Bedrock Converse API with proper error handling - response = self._client.converse( + response = self._get_sync_client().converse( modelId=self.model_id, messages=cast( "Sequence[MessageTypeDef | MessageOutputTypeDef]", @@ -945,7 +965,7 @@ def _handle_streaming_converse( usage_data: dict[str, Any] | None = None try: - response = self._client.converse_stream( + response = self._get_sync_client().converse_stream( modelId=self.model_id, messages=cast( "Sequence[MessageTypeDef | MessageOutputTypeDef]", @@ -1174,7 +1194,7 @@ async def _ensure_async_client(self) -> Any: ) self._async_client = client self._async_client_initialized = True - return self._async_client + return self._get_async_client() async def _ahandle_converse( self, diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index 3959bc6596..d9b8da7c49 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -118,9 +118,25 @@ def _normalize_gemini_fields(cls, data: Any) -> Any: @model_validator(mode="after") def _init_client(self) -> GeminiCompletion: - self._client = self._initialize_client(self.use_vertexai) + """Eagerly build the client when credentials resolve, otherwise defer + so ``LLM(model="gemini/...")`` can be constructed at module import time + even before deployment env vars are set. + """ + try: + self._client = self._initialize_client(self.use_vertexai) + except ValueError: + pass return self + def _get_sync_client(self) -> Any: + if self._client is None: + self._client = self._initialize_client(self.use_vertexai) + return self._client + + def _get_async_client(self) -> Any: + """Gemini uses a single client for both sync and async calls.""" + return self._get_sync_client() + def to_config_dict(self) -> dict[str, Any]: """Extend base config with Gemini/Vertex-specific fields.""" config = super().to_config_dict() @@ -228,8 +244,8 @@ def _get_client_params(self) -> dict[str, Any]: if ( hasattr(self, "client") - and hasattr(self._client, "vertexai") - and self._client.vertexai + and hasattr(self._get_sync_client(), "vertexai") + and self._get_sync_client().vertexai ): # Vertex AI configuration params.update( @@ -1112,7 +1128,7 @@ def _handle_completion( try: # The API accepts list[Content] but mypy is overly strict about variance contents_for_api: Any = contents - response = self._client.models.generate_content( + response = self._get_sync_client().models.generate_content( model=self.model, contents=contents_for_api, config=config, @@ -1153,7 +1169,7 @@ def _handle_streaming_completion( # The API accepts list[Content] but mypy is overly strict about variance contents_for_api: Any = contents - for chunk in self._client.models.generate_content_stream( + for chunk in self._get_sync_client().models.generate_content_stream( model=self.model, contents=contents_for_api, config=config, @@ -1191,7 +1207,7 @@ async def _ahandle_completion( try: # The API accepts list[Content] but mypy is overly strict about variance contents_for_api: Any = contents - response = await self._client.aio.models.generate_content( + response = await self._get_sync_client().aio.models.generate_content( model=self.model, contents=contents_for_api, config=config, @@ -1232,7 +1248,7 @@ async def _ahandle_streaming_completion( # The API accepts list[Content] but mypy is overly strict about variance contents_for_api: Any = contents - stream = await self._client.aio.models.generate_content_stream( + stream = await self._get_sync_client().aio.models.generate_content_stream( model=self.model, contents=contents_for_api, config=config, @@ -1439,6 +1455,6 @@ def get_file_uploader(self) -> Any: try: from crewai_files.uploaders.gemini import GeminiFileUploader - return GeminiFileUploader(client=self._client) + return GeminiFileUploader(client=self._get_sync_client()) except ImportError: return None diff --git a/lib/crewai/src/crewai/llms/providers/openai/completion.py b/lib/crewai/src/crewai/llms/providers/openai/completion.py index 668794e83b..d0f9381724 100644 --- a/lib/crewai/src/crewai/llms/providers/openai/completion.py +++ b/lib/crewai/src/crewai/llms/providers/openai/completion.py @@ -253,22 +253,40 @@ def _normalize_openai_fields(cls, data: Any) -> Any: @model_validator(mode="after") def _init_clients(self) -> OpenAICompletion: + """Eagerly build clients when the API key is available, otherwise + defer so ``LLM(model="openai/...")`` can be constructed at module + import time even before deployment env vars are set. + """ + try: + self._client = self._build_sync_client() + self._async_client = self._build_async_client() + except ValueError: + pass + return self + + def _build_sync_client(self) -> Any: client_config = self._get_client_params() if self.interceptor: transport = HTTPTransport(interceptor=self.interceptor) - http_client = httpx.Client(transport=transport) - client_config["http_client"] = http_client + client_config["http_client"] = httpx.Client(transport=transport) + return OpenAI(**client_config) - self._client = OpenAI(**client_config) - - async_client_config = self._get_client_params() + def _build_async_client(self) -> Any: + client_config = self._get_client_params() if self.interceptor: - async_transport = AsyncHTTPTransport(interceptor=self.interceptor) - async_http_client = httpx.AsyncClient(transport=async_transport) - async_client_config["http_client"] = async_http_client + transport = AsyncHTTPTransport(interceptor=self.interceptor) + client_config["http_client"] = httpx.AsyncClient(transport=transport) + return AsyncOpenAI(**client_config) - self._async_client = AsyncOpenAI(**async_client_config) - return self + def _get_sync_client(self) -> Any: + if self._client is None: + self._client = self._build_sync_client() + return self._client + + def _get_async_client(self) -> Any: + if self._async_client is None: + self._async_client = self._build_async_client() + return self._async_client @property def last_response_id(self) -> str | None: @@ -797,7 +815,7 @@ def _handle_responses( ) -> str | ResponsesAPIResult | Any: """Handle non-streaming Responses API call.""" try: - response: Response = self._client.responses.create(**params) + response: Response = self._get_sync_client().responses.create(**params) # Track response ID for auto-chaining if self.auto_chain and response.id: @@ -933,7 +951,9 @@ async def _ahandle_responses( ) -> str | ResponsesAPIResult | Any: """Handle async non-streaming Responses API call.""" try: - response: Response = await self._async_client.responses.create(**params) + response: Response = await self._get_async_client().responses.create( + **params + ) # Track response ID for auto-chaining if self.auto_chain and response.id: @@ -1069,7 +1089,7 @@ def _handle_streaming_responses( final_response: Response | None = None usage: dict[str, Any] | None = None - stream = self._client.responses.create(**params) + stream = self._get_sync_client().responses.create(**params) response_id_stream = None for event in stream: @@ -1197,7 +1217,7 @@ async def _ahandle_streaming_responses( final_response: Response | None = None usage: dict[str, Any] | None = None - stream = await self._async_client.responses.create(**params) + stream = await self._get_async_client().responses.create(**params) response_id_stream = None async for event in stream: @@ -1591,7 +1611,7 @@ def _handle_completion( parse_params = { k: v for k, v in params.items() if k != "response_format" } - parsed_response = self._client.beta.chat.completions.parse( + parsed_response = self._get_sync_client().beta.chat.completions.parse( **parse_params, response_format=response_model, ) @@ -1615,7 +1635,9 @@ def _handle_completion( ) return parsed_object - response: ChatCompletion = self._client.chat.completions.create(**params) + response: ChatCompletion = self._get_sync_client().chat.completions.create( + **params + ) usage = self._extract_openai_token_usage(response) @@ -1842,7 +1864,7 @@ def _handle_streaming_completion( } stream: ChatCompletionStream[BaseModel] - with self._client.beta.chat.completions.stream( + with self._get_sync_client().beta.chat.completions.stream( **parse_params, response_format=response_model ) as stream: for chunk in stream: @@ -1879,7 +1901,7 @@ def _handle_streaming_completion( return "" completion_stream: Stream[ChatCompletionChunk] = ( - self._client.chat.completions.create(**params) + self._get_sync_client().chat.completions.create(**params) ) usage_data: dict[str, Any] | None = None @@ -1976,9 +1998,11 @@ async def _ahandle_completion( parse_params = { k: v for k, v in params.items() if k != "response_format" } - parsed_response = await self._async_client.beta.chat.completions.parse( - **parse_params, - response_format=response_model, + parsed_response = ( + await self._get_async_client().beta.chat.completions.parse( + **parse_params, + response_format=response_model, + ) ) math_reasoning = parsed_response.choices[0].message @@ -2000,8 +2024,8 @@ async def _ahandle_completion( ) return parsed_object - response: ChatCompletion = await self._async_client.chat.completions.create( - **params + response: ChatCompletion = ( + await self._get_async_client().chat.completions.create(**params) ) usage = self._extract_openai_token_usage(response) @@ -2127,7 +2151,7 @@ async def _ahandle_streaming_completion( if response_model: completion_stream: AsyncIterator[ ChatCompletionChunk - ] = await self._async_client.chat.completions.create(**params) + ] = await self._get_async_client().chat.completions.create(**params) accumulated_content = "" usage_data: dict[str, Any] | None = None @@ -2183,7 +2207,7 @@ async def _ahandle_streaming_completion( stream: AsyncIterator[ ChatCompletionChunk - ] = await self._async_client.chat.completions.create(**params) + ] = await self._get_async_client().chat.completions.create(**params) usage_data = None @@ -2379,8 +2403,8 @@ def get_file_uploader(self) -> Any: from crewai_files.uploaders.openai import OpenAIFileUploader return OpenAIFileUploader( - client=self._client, - async_client=self._async_client, + client=self._get_sync_client(), + async_client=self._get_async_client(), ) except ImportError: return None diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index 38b31a2e21..a9dbfb3ee2 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -378,23 +378,27 @@ def sample_tool(query: str) -> str: def test_azure_raises_error_when_endpoint_missing(): - """Test that AzureCompletion raises ValueError when endpoint is missing""" + """Credentials are validated lazily: construction succeeds, first + client build raises the descriptive error.""" from crewai.llms.providers.azure.completion import AzureCompletion - # Clear environment variables with patch.dict(os.environ, {}, clear=True): + llm = AzureCompletion(model="gpt-4", api_key="test-key") with pytest.raises(ValueError, match="Azure endpoint is required"): - AzureCompletion(model="gpt-4", api_key="test-key") + llm._get_sync_client() def test_azure_raises_error_when_api_key_missing(): - """Test that AzureCompletion raises ValueError when API key is missing""" + """Credentials are validated lazily: construction succeeds, first + client build raises the descriptive error.""" from crewai.llms.providers.azure.completion import AzureCompletion - # Clear environment variables with patch.dict(os.environ, {}, clear=True): + llm = AzureCompletion( + model="gpt-4", endpoint="https://test.openai.azure.com" + ) with pytest.raises(ValueError, match="Azure API key is required"): - AzureCompletion(model="gpt-4", endpoint="https://test.openai.azure.com") + llm._get_sync_client() def test_azure_endpoint_configuration(): From 9f61deb0728a19611422f1e1a60c46d84411732e Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sat, 11 Apr 2026 05:50:15 +0800 Subject: [PATCH 02/14] feat: add `crewai deploy validate` pre-deploy validation Adds a new `crewai deploy validate` command that checks a project locally against the most common categories of deploy-time failures, so users don't burn attempts on fixable project-structure problems. `crewai deploy create` and `crewai deploy push` now run the same checks automatically and abort on errors; `--skip-validate` opts out. Checks (errors block, warnings print only): 1. pyproject.toml present with `[project].name` 2. lockfile (uv.lock or poetry.lock) present and not stale 3. src// resolves, rejecting empty names and .egg-info dirs 4. crew.py, config/agents.yaml, config/tasks.yaml for standard crews 5. main.py for flow projects 6. hatchling wheel target resolves 7. crew/flow module imports cleanly in a `uv run` subprocess, with classification of common failures (missing provider extras, missing API keys at import, stale crewai pins, pydantic errors) 8. env vars referenced in source vs .env (warning only) 9. crewai lockfile pin vs a known-bad cutoff (warning only) Each finding has a stable code and a structured title/detail/hint so downstream tooling and tests can pin behavior. 33 tests cover the checks 1:1 against the failure patterns observed in practice. --- lib/crewai/src/crewai/cli/cli.py | 31 +- lib/crewai/src/crewai/cli/deploy/main.py | 36 +- lib/crewai/src/crewai/cli/deploy/validate.py | 842 ++++++++++++++++++ .../tests/cli/deploy/test_deploy_main.py | 6 +- lib/crewai/tests/cli/deploy/test_validate.py | 400 +++++++++ 5 files changed, 1306 insertions(+), 9 deletions(-) create mode 100644 lib/crewai/src/crewai/cli/deploy/validate.py create mode 100644 lib/crewai/tests/cli/deploy/test_validate.py diff --git a/lib/crewai/src/crewai/cli/cli.py b/lib/crewai/src/crewai/cli/cli.py index 03bdcd5024..2e10d51625 100644 --- a/lib/crewai/src/crewai/cli/cli.py +++ b/lib/crewai/src/crewai/cli/cli.py @@ -392,10 +392,15 @@ def deploy() -> None: @deploy.command(name="create") @click.option("-y", "--yes", is_flag=True, help="Skip the confirmation prompt") -def deploy_create(yes: bool) -> None: +@click.option( + "--skip-validate", + is_flag=True, + help="Skip the pre-deploy validation checks.", +) +def deploy_create(yes: bool, skip_validate: bool) -> None: """Create a Crew deployment.""" deploy_cmd = DeployCommand() - deploy_cmd.create_crew(yes) + deploy_cmd.create_crew(yes, skip_validate=skip_validate) @deploy.command(name="list") @@ -407,10 +412,28 @@ def deploy_list() -> None: @deploy.command(name="push") @click.option("-u", "--uuid", type=str, help="Crew UUID parameter") -def deploy_push(uuid: str | None) -> None: +@click.option( + "--skip-validate", + is_flag=True, + help="Skip the pre-deploy validation checks.", +) +def deploy_push(uuid: str | None, skip_validate: bool) -> None: """Deploy the Crew.""" deploy_cmd = DeployCommand() - deploy_cmd.deploy(uuid=uuid) + deploy_cmd.deploy(uuid=uuid, skip_validate=skip_validate) + + +@deploy.command(name="validate") +def deploy_validate() -> None: + """Validate the current project against common deployment failures. + + Runs the same pre-deploy checks that `crewai deploy create` and + `crewai deploy push` run automatically, without contacting the platform. + Exits non-zero if any blocking issues are found. + """ + from crewai.cli.deploy.validate import run_validate_command + + run_validate_command() @deploy.command(name="status") diff --git a/lib/crewai/src/crewai/cli/deploy/main.py b/lib/crewai/src/crewai/cli/deploy/main.py index f5a32eb8ed..5a677ba5d4 100644 --- a/lib/crewai/src/crewai/cli/deploy/main.py +++ b/lib/crewai/src/crewai/cli/deploy/main.py @@ -4,12 +4,35 @@ from crewai.cli import git from crewai.cli.command import BaseCommand, PlusAPIMixin +from crewai.cli.deploy.validate import validate_project from crewai.cli.utils import fetch_and_json_env_file, get_project_name console = Console() +def _run_predeploy_validation(skip_validate: bool) -> bool: + """Run pre-deploy validation unless skipped. + + Returns True if deployment should proceed, False if it should abort. + """ + if skip_validate: + console.print( + "[yellow]Skipping pre-deploy validation (--skip-validate).[/yellow]" + ) + return True + + console.print("Running pre-deploy validation...", style="bold blue") + validator = validate_project() + if not validator.ok: + console.print( + "\n[bold red]Pre-deploy validation failed. " + "Fix the issues above or re-run with --skip-validate.[/bold red]" + ) + return False + return True + + class DeployCommand(BaseCommand, PlusAPIMixin): """ A class to handle deployment-related operations for CrewAI projects. @@ -60,13 +83,16 @@ def _display_logs(self, log_messages: list[dict[str, Any]]) -> None: f"{log_message['timestamp']} - {log_message['level']}: {log_message['message']}" ) - def deploy(self, uuid: str | None = None) -> None: + def deploy(self, uuid: str | None = None, skip_validate: bool = False) -> None: """ Deploy a crew using either UUID or project name. Args: uuid (Optional[str]): The UUID of the crew to deploy. + skip_validate (bool): Skip pre-deploy validation checks. """ + if not _run_predeploy_validation(skip_validate): + return self._telemetry.start_deployment_span(uuid) console.print("Starting deployment...", style="bold blue") if uuid: @@ -80,10 +106,16 @@ def deploy(self, uuid: str | None = None) -> None: self._validate_response(response) self._display_deployment_info(response.json()) - def create_crew(self, confirm: bool = False) -> None: + def create_crew(self, confirm: bool = False, skip_validate: bool = False) -> None: """ Create a new crew deployment. + + Args: + confirm (bool): Whether to skip the interactive confirmation prompt. + skip_validate (bool): Skip pre-deploy validation checks. """ + if not _run_predeploy_validation(skip_validate): + return self._telemetry.create_crew_deployment_span() console.print("Creating deployment...", style="bold blue") env_vars = fetch_and_json_env_file() diff --git a/lib/crewai/src/crewai/cli/deploy/validate.py b/lib/crewai/src/crewai/cli/deploy/validate.py new file mode 100644 index 0000000000..0c9036c207 --- /dev/null +++ b/lib/crewai/src/crewai/cli/deploy/validate.py @@ -0,0 +1,842 @@ +"""Pre-deploy validation for CrewAI projects. + +Catches locally what a deploy would reject at build or runtime so users +don't burn deployment attempts on fixable project-structure problems. + +Each check is grouped into one of: +- ERROR: will block a deployment; validator exits non-zero. +- WARNING: may still deploy but is almost always a deployment bug; printed + but does not block. + +The individual checks mirror the categories observed in production +deployment-failure logs: + +1. pyproject.toml present with ``[project].name`` +2. lockfile (``uv.lock`` or ``poetry.lock``) present and not stale +3. package directory at ``src//`` exists (no empty name, no egg-info) +4. standard crew files: ``crew.py``, ``config/agents.yaml``, ``config/tasks.yaml`` +5. flow entrypoint: ``main.py`` with a Flow subclass +6. hatch wheel target resolves (packages = [...] or default dir matches name) +7. crew/flow module imports cleanly (catches ``@CrewBase not found``, + ``No Flow subclass found``, provider import errors) +8. environment variables referenced in code vs ``.env`` / deployment env +9. installed crewai vs lockfile pin (catches missing-attribute failures from + stale pins) +""" + +from __future__ import annotations + +from dataclasses import dataclass +from enum import Enum +import json +import logging +import os +from pathlib import Path +import re +import shutil +import subprocess +import sys +from typing import Any + +from rich.console import Console + +from crewai.cli.utils import parse_toml + + +console = Console() +logger = logging.getLogger(__name__) + + +class Severity(str, Enum): + """Severity of a validation finding.""" + + ERROR = "error" + WARNING = "warning" + + +@dataclass +class ValidationResult: + """A single finding from a validation check. + + Attributes: + severity: whether this blocks deploy or is advisory. + code: stable short identifier, used in tests and docs + (e.g. ``missing_pyproject``, ``stale_lockfile``). + title: one-line summary shown to the user. + detail: optional multi-line explanation. + hint: optional remediation suggestion. + """ + + severity: Severity + code: str + title: str + detail: str = "" + hint: str = "" + + +# Maps known provider env var names → label used in hint messages. +_KNOWN_API_KEY_HINTS: dict[str, str] = { + "OPENAI_API_KEY": "OpenAI", + "ANTHROPIC_API_KEY": "Anthropic", + "GOOGLE_API_KEY": "Google", + "GEMINI_API_KEY": "Gemini", + "AZURE_OPENAI_API_KEY": "Azure OpenAI", + "AZURE_API_KEY": "Azure", + "AWS_ACCESS_KEY_ID": "AWS", + "AWS_SECRET_ACCESS_KEY": "AWS", + "COHERE_API_KEY": "Cohere", + "GROQ_API_KEY": "Groq", + "MISTRAL_API_KEY": "Mistral", + "TAVILY_API_KEY": "Tavily", + "SERPER_API_KEY": "Serper", + "SERPLY_API_KEY": "Serply", + "PERPLEXITY_API_KEY": "Perplexity", + "DEEPSEEK_API_KEY": "DeepSeek", + "OPENROUTER_API_KEY": "OpenRouter", + "FIRECRAWL_API_KEY": "Firecrawl", + "EXA_API_KEY": "Exa", + "BROWSERBASE_API_KEY": "Browserbase", +} + + +def normalize_package_name(project_name: str) -> str: + """Normalize a pyproject project.name into a Python package directory name. + + Mirrors the rules in ``crewai.cli.create_crew.create_crew`` so the + validator agrees with the scaffolder about where ``src//`` should + live. + """ + folder = project_name.replace(" ", "_").replace("-", "_").lower() + return re.sub(r"[^a-zA-Z0-9_]", "", folder) + + +class DeployValidator: + """Runs the full pre-deploy validation suite against a project directory.""" + + def __init__(self, project_root: Path | None = None) -> None: + self.project_root: Path = (project_root or Path.cwd()).resolve() + self.results: list[ValidationResult] = [] + self._pyproject: dict[str, Any] | None = None + self._project_name: str | None = None + self._package_name: str | None = None + self._package_dir: Path | None = None + self._is_flow: bool = False + + def _add( + self, + severity: Severity, + code: str, + title: str, + detail: str = "", + hint: str = "", + ) -> None: + self.results.append( + ValidationResult( + severity=severity, + code=code, + title=title, + detail=detail, + hint=hint, + ) + ) + + @property + def errors(self) -> list[ValidationResult]: + return [r for r in self.results if r.severity is Severity.ERROR] + + @property + def warnings(self) -> list[ValidationResult]: + return [r for r in self.results if r.severity is Severity.WARNING] + + @property + def ok(self) -> bool: + return not self.errors + + def run(self) -> list[ValidationResult]: + """Run all checks. Later checks are skipped when earlier ones make + them impossible (e.g. no pyproject.toml → no lockfile check).""" + if not self._check_pyproject(): + return self.results + + self._check_lockfile() + + if not self._check_package_dir(): + self._check_hatch_wheel_target() + return self.results + + if self._is_flow: + self._check_flow_entrypoint() + else: + self._check_crew_entrypoint() + self._check_config_yamls() + + self._check_hatch_wheel_target() + self._check_module_imports() + self._check_env_vars() + self._check_version_vs_lockfile() + + return self.results + + def _check_pyproject(self) -> bool: + pyproject_path = self.project_root / "pyproject.toml" + if not pyproject_path.exists(): + self._add( + Severity.ERROR, + "missing_pyproject", + "Cannot find pyproject.toml", + detail=( + f"Expected pyproject.toml at {pyproject_path}. " + "CrewAI projects must be installable Python packages." + ), + hint="Run `crewai create crew ` to scaffold a valid project layout.", + ) + return False + + try: + self._pyproject = parse_toml(pyproject_path.read_text()) + except Exception as e: + self._add( + Severity.ERROR, + "invalid_pyproject", + "pyproject.toml is not valid TOML", + detail=str(e), + ) + return False + + project = self._pyproject.get("project") or {} + name = project.get("name") + if not isinstance(name, str) or not name.strip(): + self._add( + Severity.ERROR, + "missing_project_name", + "pyproject.toml is missing [project].name", + detail=( + "Without a project name the platform cannot resolve your " + "package directory (this produces errors like " + "'Cannot find src//crew.py')." + ), + hint='Set a `name = "..."` field under `[project]` in pyproject.toml.', + ) + return False + + self._project_name = name + self._package_name = normalize_package_name(name) + self._is_flow = (self._pyproject.get("tool") or {}).get("crewai", {}).get( + "type" + ) == "flow" + return True + + def _check_lockfile(self) -> None: + uv_lock = self.project_root / "uv.lock" + poetry_lock = self.project_root / "poetry.lock" + pyproject = self.project_root / "pyproject.toml" + + if not uv_lock.exists() and not poetry_lock.exists(): + self._add( + Severity.ERROR, + "missing_lockfile", + "Expected to find at least one of these files: uv.lock or poetry.lock", + hint=( + "Run `uv lock` (recommended) or `poetry lock` in your project " + "directory, commit the lockfile, then redeploy." + ), + ) + return + + lockfile = uv_lock if uv_lock.exists() else poetry_lock + try: + if lockfile.stat().st_mtime < pyproject.stat().st_mtime: + self._add( + Severity.WARNING, + "stale_lockfile", + f"{lockfile.name} is older than pyproject.toml", + detail=( + "Your lockfile may not reflect recent dependency changes. " + "The platform resolves from the lockfile, so deployed " + "dependencies may differ from local." + ), + hint="Run `uv lock` (or `poetry lock`) and commit the result.", + ) + except OSError: + pass + + def _check_package_dir(self) -> bool: + if self._package_name is None: + return False + + src_dir = self.project_root / "src" + if not src_dir.is_dir(): + self._add( + Severity.ERROR, + "missing_src_dir", + "Missing src/ directory", + detail=( + "CrewAI deployments expect a src-layout project: " + f"src/{self._package_name}/crew.py (or main.py for flows)." + ), + hint="Run `crewai create crew ` to see the expected layout.", + ) + return False + + package_dir = src_dir / self._package_name + if not package_dir.is_dir(): + siblings = [ + p.name + for p in src_dir.iterdir() + if p.is_dir() and not p.name.endswith(".egg-info") + ] + egg_info = [ + p.name for p in src_dir.iterdir() if p.name.endswith(".egg-info") + ] + + hint_parts = [ + f'Create src/{self._package_name}/ to match [project].name = "{self._project_name}".' + ] + if siblings: + hint_parts.append( + f"Found other package directories: {', '.join(siblings)}. " + f"Either rename one to '{self._package_name}' or update [project].name." + ) + if egg_info: + hint_parts.append( + f"Delete stale build artifacts: {', '.join(egg_info)} " + "(these confuse the platform's package discovery)." + ) + + self._add( + Severity.ERROR, + "missing_package_dir", + f"Cannot find src/{self._package_name}/", + detail=( + "The platform looks for your crew source under " + "src//, derived from [project].name." + ), + hint=" ".join(hint_parts), + ) + return False + + for p in src_dir.iterdir(): + if p.name.endswith(".egg-info"): + self._add( + Severity.WARNING, + "stale_egg_info", + f"Stale build artifact in src/: {p.name}", + detail=( + ".egg-info directories can be mistaken for your package " + "and cause 'Cannot find src/.egg-info/crew.py' errors." + ), + hint=f"Delete {p} and add `*.egg-info/` to .gitignore.", + ) + + self._package_dir = package_dir + return True + + def _check_crew_entrypoint(self) -> None: + if self._package_dir is None: + return + crew_py = self._package_dir / "crew.py" + if not crew_py.is_file(): + self._add( + Severity.ERROR, + "missing_crew_py", + f"Cannot find {crew_py.relative_to(self.project_root)}", + detail=( + "Standard crew projects must define a Crew class decorated " + "with @CrewBase inside crew.py." + ), + hint=( + "Create crew.py with an @CrewBase-annotated class, or set " + '`[tool.crewai] type = "flow"` in pyproject.toml if this is a flow.' + ), + ) + + def _check_config_yamls(self) -> None: + if self._package_dir is None: + return + config_dir = self._package_dir / "config" + if not config_dir.is_dir(): + self._add( + Severity.ERROR, + "missing_config_dir", + f"Cannot find {config_dir.relative_to(self.project_root)}", + hint="Create a config/ directory with agents.yaml and tasks.yaml.", + ) + return + + for yaml_name in ("agents.yaml", "tasks.yaml"): + yaml_path = config_dir / yaml_name + if not yaml_path.is_file(): + self._add( + Severity.ERROR, + f"missing_{yaml_name.replace('.', '_')}", + f"Cannot find {yaml_path.relative_to(self.project_root)}", + detail=( + "CrewAI loads agent and task config from these files; " + "missing them causes empty-config warnings and runtime crashes." + ), + ) + + def _check_flow_entrypoint(self) -> None: + if self._package_dir is None: + return + main_py = self._package_dir / "main.py" + if not main_py.is_file(): + self._add( + Severity.ERROR, + "missing_flow_main", + f"Cannot find {main_py.relative_to(self.project_root)}", + detail=( + "Flow projects must define a Flow subclass in main.py. " + 'This project has `[tool.crewai] type = "flow"` set.' + ), + hint="Create main.py with a `class MyFlow(Flow[...])`.", + ) + + def _check_hatch_wheel_target(self) -> None: + if not self._pyproject: + return + + build_system = self._pyproject.get("build-system") or {} + backend = build_system.get("build-backend", "") + if "hatchling" not in backend: + return + + hatch_wheel = ( + (self._pyproject.get("tool") or {}) + .get("hatch", {}) + .get("build", {}) + .get("targets", {}) + .get("wheel", {}) + ) + if hatch_wheel.get("packages") or hatch_wheel.get("only-include"): + return + + if self._package_dir and self._package_dir.is_dir(): + return + + self._add( + Severity.ERROR, + "hatch_wheel_target_missing", + "Hatchling cannot determine which files to ship", + detail=( + "Your pyproject uses hatchling but has no " + "[tool.hatch.build.targets.wheel] configuration and no " + "directory matching your project name." + ), + hint=( + "Add:\n" + " [tool.hatch.build.targets.wheel]\n" + f' packages = ["src/{self._package_name}"]' + ), + ) + + def _check_module_imports(self) -> None: + """Import the user's crew/flow via `uv run` so the check sees the same + package versions as `crewai run` would. Result is reported as JSON on + the subprocess's stdout.""" + script = ( + "import json, sys, traceback, os\n" + "os.chdir(sys.argv[1])\n" + "try:\n" + " from crewai.cli.utils import get_crews, get_flows\n" + " is_flow = sys.argv[2] == 'flow'\n" + " if is_flow:\n" + " instances = get_flows()\n" + " kind = 'flow'\n" + " else:\n" + " instances = get_crews()\n" + " kind = 'crew'\n" + " print(json.dumps({'ok': True, 'kind': kind, 'count': len(instances)}))\n" + "except BaseException as e:\n" + " print(json.dumps({\n" + " 'ok': False,\n" + " 'error_type': type(e).__name__,\n" + " 'error': str(e),\n" + " 'traceback': traceback.format_exc(),\n" + " }))\n" + ) + + uv_path = shutil.which("uv") + if uv_path is None: + self._add( + Severity.WARNING, + "uv_not_found", + "Skipping import check: `uv` not installed", + hint="Install uv: https://docs.astral.sh/uv/", + ) + return + + try: + proc = subprocess.run( # noqa: S603 - args constructed from trusted inputs + [ + uv_path, + "run", + "python", + "-c", + script, + str(self.project_root), + "flow" if self._is_flow else "crew", + ], + cwd=self.project_root, + capture_output=True, + text=True, + timeout=120, + check=False, + ) + except subprocess.TimeoutExpired: + self._add( + Severity.ERROR, + "import_timeout", + "Importing your crew/flow module timed out after 120s", + detail=( + "User code may be making network calls or doing heavy work " + "at import time. Move that work into agent methods." + ), + ) + return + + # The payload is the last JSON object on stdout; user code may print + # other lines before it. + payload: dict[str, Any] | None = None + for line in reversed(proc.stdout.splitlines()): + line = line.strip() + if line.startswith("{") and line.endswith("}"): + try: + payload = json.loads(line) + break + except json.JSONDecodeError: + continue + + if payload is None: + self._add( + Severity.ERROR, + "import_failed", + "Could not import your crew/flow module", + detail=(proc.stderr or proc.stdout or "").strip()[:1500], + hint="Run `crewai run` locally first to reproduce the error.", + ) + return + + if payload.get("ok"): + if payload.get("count", 0) == 0: + kind = payload.get("kind", "crew") + if kind == "flow": + self._add( + Severity.ERROR, + "no_flow_subclass", + "No Flow subclass found in the module", + hint=( + "main.py must define a class extending " + "`crewai.flow.Flow`, instantiable with no arguments." + ), + ) + else: + self._add( + Severity.ERROR, + "no_crewbase_class", + "Crew class annotated with @CrewBase not found", + hint=( + "Decorate your crew class with @CrewBase from " + "crewai.project (see `crewai create crew` template)." + ), + ) + return + + err_msg = str(payload.get("error", "")) + err_type = str(payload.get("error_type", "Exception")) + tb = str(payload.get("traceback", "")) + self._classify_import_error(err_type, err_msg, tb) + + def _classify_import_error(self, err_type: str, err_msg: str, tb: str) -> None: + """Turn a raw import-time exception into a user-actionable finding.""" + # Must be checked before the generic "native provider" branch below: + # the extras-missing message contains the same phrase. + m = re.search( + r"(?P[A-Za-z0-9_ -]+?)\s+native provider not available.*?`([^`]+)`", + err_msg, + ) + if m: + self._add( + Severity.ERROR, + "missing_provider_extra", + f"{m.group('pkg').strip()} provider extra not installed", + hint=f"Run: {m.group(2)}", + ) + return + + # crewai.llm.LLM.__new__ wraps provider init errors as + # ImportError("Error importing native provider: ..."). + if "Error importing native provider" in err_msg or "native provider" in err_msg: + missing_key = self._extract_missing_api_key(err_msg) + if missing_key: + provider = _KNOWN_API_KEY_HINTS.get(missing_key, missing_key) + self._add( + Severity.WARNING, + "llm_init_missing_key", + f"LLM is constructed at import time but {missing_key} is not set", + detail=( + f"Your crew instantiates a {provider} LLM during module " + "load (e.g. in a class field default or @crew method). " + f"The {provider} provider currently requires {missing_key} " + "at construction time, so this will fail on the platform " + "unless the key is set in your deployment environment." + ), + hint=( + f"Add {missing_key} to your deployment's Environment " + "Variables before deploying, or move LLM construction " + "inside agent methods so it runs lazily." + ), + ) + return + self._add( + Severity.ERROR, + "llm_provider_init_failed", + "LLM native provider failed to initialize", + detail=err_msg, + hint=( + "Check your LLM(model=...) configuration and provider-specific " + "extras (e.g. `uv add 'crewai[azure-ai-inference]'` for Azure)." + ), + ) + return + + if err_type == "KeyError": + key = err_msg.strip("'\"") + if key in _KNOWN_API_KEY_HINTS or key.endswith("_API_KEY"): + self._add( + Severity.WARNING, + "env_var_read_at_import", + f"{key} is read at import time via os.environ[...]", + detail=( + "Using os.environ[...] (rather than os.getenv(...)) " + "at module scope crashes the build if the key isn't set." + ), + hint=( + f"Either add {key} as a deployment env var, or switch " + "to os.getenv() and move the access inside agent methods." + ), + ) + return + + if "Crew class annotated with @CrewBase not found" in err_msg: + self._add( + Severity.ERROR, + "no_crewbase_class", + "Crew class annotated with @CrewBase not found", + detail=err_msg, + ) + return + if "No Flow subclass found" in err_msg: + self._add( + Severity.ERROR, + "no_flow_subclass", + "No Flow subclass found in the module", + detail=err_msg, + ) + return + + if ( + err_type == "AttributeError" + and "has no attribute '_load_response_format'" in err_msg + ): + self._add( + Severity.ERROR, + "stale_crewai_pin", + "Your lockfile pins a crewai version missing `_load_response_format`", + detail=err_msg, + hint=( + "Run `uv lock --upgrade-package crewai` (or `poetry update crewai`) " + "to pin a newer release." + ), + ) + return + + if "pydantic" in tb.lower() or "validation error" in err_msg.lower(): + self._add( + Severity.ERROR, + "pydantic_validation_error", + "Pydantic validation failed while loading your crew", + detail=err_msg[:800], + hint=( + "Check agent/task configuration fields. `crewai run` locally " + "will show the full traceback." + ), + ) + return + + self._add( + Severity.ERROR, + "import_failed", + f"Importing your crew failed: {err_type}", + detail=err_msg[:800], + hint="Run `crewai run` locally to see the full traceback.", + ) + + @staticmethod + def _extract_missing_api_key(err_msg: str) -> str | None: + """Pull 'FOO_API_KEY' out of '... FOO_API_KEY is required ...'.""" + m = re.search(r"([A-Z][A-Z0-9_]*_API_KEY)\s+is required", err_msg) + if m: + return m.group(1) + m = re.search(r"['\"]([A-Z][A-Z0-9_]*_API_KEY)['\"]", err_msg) + if m: + return m.group(1) + return None + + def _check_env_vars(self) -> None: + """Warn about env vars referenced in user code but missing locally. + Best-effort only — the platform sets vars server-side, so we never error. + """ + if not self._package_dir: + return + + referenced: set[str] = set() + pattern = re.compile( + r"""(?x) + (?:os\.environ\s*(?:\[\s*|\.get\s*\(\s*) + |os\.getenv\s*\(\s* + |getenv\s*\(\s*) + ['"]([A-Z][A-Z0-9_]*)['"] + """ + ) + + for path in self._package_dir.rglob("*.py"): + try: + text = path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + referenced.update(pattern.findall(text)) + + for path in self._package_dir.rglob("*.yaml"): + try: + text = path.read_text(encoding="utf-8", errors="ignore") + except OSError: + continue + referenced.update(re.findall(r"\$\{?([A-Z][A-Z0-9_]+)\}?", text)) + + env_file = self.project_root / ".env" + env_keys: set[str] = set() + if env_file.exists(): + for line in env_file.read_text(errors="ignore").splitlines(): + line = line.strip() + if not line or line.startswith("#") or "=" not in line: + continue + env_keys.add(line.split("=", 1)[0].strip()) + + missing_known: list[str] = sorted( + var + for var in referenced + if var in _KNOWN_API_KEY_HINTS + and var not in env_keys + and var not in os.environ + ) + if missing_known: + self._add( + Severity.WARNING, + "env_vars_not_in_dotenv", + f"{len(missing_known)} referenced API key(s) not in .env", + detail=( + "These env vars are referenced in your source but not set " + f"locally: {', '.join(missing_known)}. Deploys will fail " + "unless they are added to the deployment's Environment " + "Variables in the CrewAI dashboard." + ), + ) + + def _check_version_vs_lockfile(self) -> None: + """Warn when the lockfile pins a crewai release older than 1.13.0, + which is where ``_load_response_format`` was introduced. + """ + uv_lock = self.project_root / "uv.lock" + poetry_lock = self.project_root / "poetry.lock" + lockfile = ( + uv_lock + if uv_lock.exists() + else poetry_lock + if poetry_lock.exists() + else None + ) + if lockfile is None: + return + + try: + text = lockfile.read_text(errors="ignore") + except OSError: + return + + m = re.search( + r'name\s*=\s*"crewai"\s*\nversion\s*=\s*"([^"]+)"', + text, + ) + if not m: + return + locked = m.group(1) + + try: + from packaging.version import Version + + if Version(locked) < Version("1.13.0"): + self._add( + Severity.WARNING, + "old_crewai_pin", + f"Lockfile pins crewai=={locked} (older than 1.13.0)", + detail=( + "Older pinned versions are missing API surface the " + "platform builder expects (e.g. `_load_response_format`)." + ), + hint="Run `uv lock --upgrade-package crewai` and redeploy.", + ) + except Exception as e: + logger.debug("Could not parse crewai pin from lockfile: %s", e) + + +def render_report(results: list[ValidationResult]) -> None: + """Pretty-print results to the shared rich console.""" + if not results: + console.print("[bold green]Pre-deploy validation passed.[/bold green]") + return + + errors = [r for r in results if r.severity is Severity.ERROR] + warnings = [r for r in results if r.severity is Severity.WARNING] + + for result in errors: + console.print(f"[bold red]ERROR[/bold red] [{result.code}] {result.title}") + if result.detail: + console.print(f" {result.detail}") + if result.hint: + console.print(f" [dim]hint:[/dim] {result.hint}") + + for result in warnings: + console.print( + f"[bold yellow]WARNING[/bold yellow] [{result.code}] {result.title}" + ) + if result.detail: + console.print(f" {result.detail}") + if result.hint: + console.print(f" [dim]hint:[/dim] {result.hint}") + + summary_parts: list[str] = [] + if errors: + summary_parts.append(f"[bold red]{len(errors)} error(s)[/bold red]") + if warnings: + summary_parts.append(f"[bold yellow]{len(warnings)} warning(s)[/bold yellow]") + console.print(f"\n{' / '.join(summary_parts)}") + + +def validate_project(project_root: Path | None = None) -> DeployValidator: + """Entrypoint: run validation, render results, return the validator. + + The caller inspects ``validator.ok`` to decide whether to proceed with a + deploy. + """ + validator = DeployValidator(project_root=project_root) + validator.run() + render_report(validator.results) + return validator + + +def run_validate_command() -> None: + """Implementation of `crewai deploy validate`.""" + validator = validate_project() + if not validator.ok: + sys.exit(1) diff --git a/lib/crewai/tests/cli/deploy/test_deploy_main.py b/lib/crewai/tests/cli/deploy/test_deploy_main.py index 4b818cc58f..9b6e49e1ac 100644 --- a/lib/crewai/tests/cli/deploy/test_deploy_main.py +++ b/lib/crewai/tests/cli/deploy/test_deploy_main.py @@ -125,7 +125,7 @@ def test_deploy_with_uuid(self, mock_display): mock_response.json.return_value = {"uuid": "test-uuid"} self.mock_client.deploy_by_uuid.return_value = mock_response - self.deploy_command.deploy(uuid="test-uuid") + self.deploy_command.deploy(uuid="test-uuid", skip_validate=True) self.mock_client.deploy_by_uuid.assert_called_once_with("test-uuid") mock_display.assert_called_once_with({"uuid": "test-uuid"}) @@ -137,7 +137,7 @@ def test_deploy_with_project_name(self, mock_display): mock_response.json.return_value = {"uuid": "test-uuid"} self.mock_client.deploy_by_name.return_value = mock_response - self.deploy_command.deploy() + self.deploy_command.deploy(skip_validate=True) self.mock_client.deploy_by_name.assert_called_once_with("test_project") mock_display.assert_called_once_with({"uuid": "test-uuid"}) @@ -156,7 +156,7 @@ def test_create_crew(self, mock_input, mock_git_origin_url, mock_fetch_env): self.mock_client.create_crew.return_value = mock_response with patch("sys.stdout", new=StringIO()) as fake_out: - self.deploy_command.create_crew() + self.deploy_command.create_crew(skip_validate=True) self.assertIn("Deployment created successfully!", fake_out.getvalue()) self.assertIn("new-uuid", fake_out.getvalue()) diff --git a/lib/crewai/tests/cli/deploy/test_validate.py b/lib/crewai/tests/cli/deploy/test_validate.py new file mode 100644 index 0000000000..10812e3f81 --- /dev/null +++ b/lib/crewai/tests/cli/deploy/test_validate.py @@ -0,0 +1,400 @@ +"""Tests for `crewai.cli.deploy.validate`. + +The fixtures here correspond 1:1 to the deployment-failure patterns observed +in the #crewai-deployment-failures Slack channel that motivated this work. +""" + +from __future__ import annotations + +from pathlib import Path +from textwrap import dedent +from typing import Iterable +from unittest.mock import patch + +import pytest + +from crewai.cli.deploy.validate import ( + DeployValidator, + Severity, + normalize_package_name, +) + + +def _make_pyproject( + name: str = "my_crew", + dependencies: Iterable[str] = ("crewai>=1.14.0",), + *, + hatchling: bool = False, + flow: bool = False, + extra: str = "", +) -> str: + deps = ", ".join(f'"{d}"' for d in dependencies) + lines = [ + "[project]", + f'name = "{name}"', + 'version = "0.1.0"', + f"dependencies = [{deps}]", + ] + if hatchling: + lines += [ + "", + "[build-system]", + 'requires = ["hatchling"]', + 'build-backend = "hatchling.build"', + ] + if flow: + lines += ["", "[tool.crewai]", 'type = "flow"'] + if extra: + lines += ["", extra] + return "\n".join(lines) + "\n" + + +def _scaffold_standard_crew( + root: Path, + *, + name: str = "my_crew", + include_crew_py: bool = True, + include_agents_yaml: bool = True, + include_tasks_yaml: bool = True, + include_lockfile: bool = True, + pyproject: str | None = None, +) -> Path: + (root / "pyproject.toml").write_text(pyproject or _make_pyproject(name=name)) + if include_lockfile: + (root / "uv.lock").write_text("# dummy uv lockfile\n") + + pkg_dir = root / "src" / normalize_package_name(name) + pkg_dir.mkdir(parents=True) + (pkg_dir / "__init__.py").write_text("") + + if include_crew_py: + (pkg_dir / "crew.py").write_text( + dedent( + """ + from crewai.project import CrewBase, crew + + @CrewBase + class MyCrew: + agents_config = "config/agents.yaml" + tasks_config = "config/tasks.yaml" + + @crew + def crew(self): + from crewai import Crew + return Crew(agents=[], tasks=[]) + """ + ).strip() + + "\n" + ) + + config_dir = pkg_dir / "config" + config_dir.mkdir() + if include_agents_yaml: + (config_dir / "agents.yaml").write_text("{}\n") + if include_tasks_yaml: + (config_dir / "tasks.yaml").write_text("{}\n") + + return pkg_dir + + +def _codes(validator: DeployValidator) -> set[str]: + return {r.code for r in validator.results} + + +def _run_without_import_check(root: Path) -> DeployValidator: + """Run validation with the subprocess-based import check stubbed out; + the classifier is exercised directly in its own tests below.""" + with patch.object(DeployValidator, "_check_module_imports", lambda self: None): + v = DeployValidator(project_root=root) + v.run() + return v + + +@pytest.mark.parametrize( + "project_name, expected", + [ + ("my-crew", "my_crew"), + ("My Cool-Project", "my_cool_project"), + ("crew123", "crew123"), + ("crew.name!with$chars", "crewnamewithchars"), + ], +) +def test_normalize_package_name(project_name: str, expected: str) -> None: + assert normalize_package_name(project_name) == expected + + +def test_valid_standard_crew_project_passes(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path) + v = _run_without_import_check(tmp_path) + assert v.ok, f"expected clean run, got {v.results}" + + +def test_missing_pyproject_errors(tmp_path: Path) -> None: + v = _run_without_import_check(tmp_path) + assert "missing_pyproject" in _codes(v) + assert not v.ok + + +def test_invalid_pyproject_errors(tmp_path: Path) -> None: + (tmp_path / "pyproject.toml").write_text("this is not valid toml ====\n") + v = _run_without_import_check(tmp_path) + assert "invalid_pyproject" in _codes(v) + + +def test_missing_project_name_errors(tmp_path: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + '[project]\nversion = "0.1.0"\ndependencies = ["crewai>=1.14.0"]\n' + ) + v = _run_without_import_check(tmp_path) + assert "missing_project_name" in _codes(v) + + +def test_missing_lockfile_errors(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path, include_lockfile=False) + v = _run_without_import_check(tmp_path) + assert "missing_lockfile" in _codes(v) + + +def test_poetry_lock_is_accepted(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path, include_lockfile=False) + (tmp_path / "poetry.lock").write_text("# poetry lockfile\n") + v = _run_without_import_check(tmp_path) + assert "missing_lockfile" not in _codes(v) + + +def test_stale_lockfile_warns(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path) + # Make lockfile older than pyproject. + lock = tmp_path / "uv.lock" + pyproject = tmp_path / "pyproject.toml" + old_time = pyproject.stat().st_mtime - 60 + import os + + os.utime(lock, (old_time, old_time)) + v = _run_without_import_check(tmp_path) + assert "stale_lockfile" in _codes(v) + # Stale is a warning, so the run can still be ok (no errors). + assert v.ok + + +def test_missing_package_dir_errors(tmp_path: Path) -> None: + # pyproject says name=my_crew but we only create src/other_pkg/ + (tmp_path / "pyproject.toml").write_text(_make_pyproject(name="my_crew")) + (tmp_path / "uv.lock").write_text("") + (tmp_path / "src" / "other_pkg").mkdir(parents=True) + v = _run_without_import_check(tmp_path) + codes = _codes(v) + assert "missing_package_dir" in codes + finding = next(r for r in v.results if r.code == "missing_package_dir") + assert "other_pkg" in finding.hint + + +def test_egg_info_only_errors_with_targeted_hint(tmp_path: Path) -> None: + """Regression for the case where only src/.egg-info/ exists.""" + (tmp_path / "pyproject.toml").write_text(_make_pyproject(name="odoo_pm_agents")) + (tmp_path / "uv.lock").write_text("") + (tmp_path / "src" / "odoo_pm_agents.egg-info").mkdir(parents=True) + v = _run_without_import_check(tmp_path) + finding = next(r for r in v.results if r.code == "missing_package_dir") + assert "egg-info" in finding.hint + + +def test_stale_egg_info_sibling_warns(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path) + (tmp_path / "src" / "my_crew.egg-info").mkdir() + v = _run_without_import_check(tmp_path) + assert "stale_egg_info" in _codes(v) + + +def test_missing_crew_py_errors(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path, include_crew_py=False) + v = _run_without_import_check(tmp_path) + assert "missing_crew_py" in _codes(v) + + +def test_missing_agents_yaml_errors(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path, include_agents_yaml=False) + v = _run_without_import_check(tmp_path) + assert "missing_agents_yaml" in _codes(v) + + +def test_missing_tasks_yaml_errors(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path, include_tasks_yaml=False) + v = _run_without_import_check(tmp_path) + assert "missing_tasks_yaml" in _codes(v) + + +def test_flow_project_requires_main_py(tmp_path: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + _make_pyproject(name="my_flow", flow=True) + ) + (tmp_path / "uv.lock").write_text("") + (tmp_path / "src" / "my_flow").mkdir(parents=True) + v = _run_without_import_check(tmp_path) + assert "missing_flow_main" in _codes(v) + + +def test_flow_project_with_main_py_passes(tmp_path: Path) -> None: + (tmp_path / "pyproject.toml").write_text( + _make_pyproject(name="my_flow", flow=True) + ) + (tmp_path / "uv.lock").write_text("") + pkg = tmp_path / "src" / "my_flow" + pkg.mkdir(parents=True) + (pkg / "main.py").write_text("# flow entrypoint\n") + v = _run_without_import_check(tmp_path) + assert "missing_flow_main" not in _codes(v) + + +def test_hatchling_without_wheel_config_passes_when_pkg_dir_matches( + tmp_path: Path, +) -> None: + _scaffold_standard_crew( + tmp_path, pyproject=_make_pyproject(name="my_crew", hatchling=True) + ) + v = _run_without_import_check(tmp_path) + # src/my_crew/ exists, so hatch default should find it — no wheel error. + assert "hatch_wheel_target_missing" not in _codes(v) + + +def test_hatchling_with_explicit_wheel_config_passes(tmp_path: Path) -> None: + extra = ( + "[tool.hatch.build.targets.wheel]\n" + 'packages = ["src/my_crew"]' + ) + _scaffold_standard_crew( + tmp_path, + pyproject=_make_pyproject(name="my_crew", hatchling=True, extra=extra), + ) + v = _run_without_import_check(tmp_path) + assert "hatch_wheel_target_missing" not in _codes(v) + + +def test_classify_missing_openai_key_is_warning(tmp_path: Path) -> None: + v = DeployValidator(project_root=tmp_path) + v._classify_import_error( + "ImportError", + "Error importing native provider: 1 validation error for OpenAICompletion\n" + " Value error, OPENAI_API_KEY is required", + tb="", + ) + assert len(v.results) == 1 + result = v.results[0] + assert result.code == "llm_init_missing_key" + assert result.severity is Severity.WARNING + assert "OPENAI_API_KEY" in result.title + + +def test_classify_azure_extra_missing_is_error(tmp_path: Path) -> None: + v = DeployValidator(project_root=tmp_path) + v._classify_import_error( + "ImportError", + 'Azure AI Inference native provider not available, to install: `uv add "crewai[azure-ai-inference]"`', + tb="", + ) + assert "missing_provider_extra" in _codes(v) + + +def test_classify_keyerror_at_import_is_warning(tmp_path: Path) -> None: + """Regression for `KeyError: 'SERPLY_API_KEY'` raised at import time.""" + v = DeployValidator(project_root=tmp_path) + v._classify_import_error("KeyError", "'SERPLY_API_KEY'", tb="") + codes = _codes(v) + assert "env_var_read_at_import" in codes + + +def test_classify_no_crewbase_class_is_error(tmp_path: Path) -> None: + v = DeployValidator(project_root=tmp_path) + v._classify_import_error( + "ValueError", + "Crew class annotated with @CrewBase not found.", + tb="", + ) + assert "no_crewbase_class" in _codes(v) + + +def test_classify_no_flow_subclass_is_error(tmp_path: Path) -> None: + v = DeployValidator(project_root=tmp_path) + v._classify_import_error("ValueError", "No Flow subclass found in the module.", tb="") + assert "no_flow_subclass" in _codes(v) + + +def test_classify_stale_crewai_pin_attribute_error(tmp_path: Path) -> None: + """Regression for a stale crewai pin missing `_load_response_format`.""" + v = DeployValidator(project_root=tmp_path) + v._classify_import_error( + "AttributeError", + "'EmploymentServiceDecisionSupportSystemCrew' object has no attribute '_load_response_format'", + tb="", + ) + assert "stale_crewai_pin" in _codes(v) + + +def test_classify_unknown_error_is_fallback(tmp_path: Path) -> None: + v = DeployValidator(project_root=tmp_path) + v._classify_import_error("RuntimeError", "something weird happened", tb="") + assert "import_failed" in _codes(v) + + +def test_env_var_referenced_but_missing_warns(tmp_path: Path) -> None: + pkg = _scaffold_standard_crew(tmp_path) + (pkg / "tools.py").write_text( + 'import os\nkey = os.getenv("TAVILY_API_KEY")\n' + ) + import os + + # Make sure the test doesn't inherit the key from the host environment. + with patch.dict(os.environ, {}, clear=False): + os.environ.pop("TAVILY_API_KEY", None) + v = _run_without_import_check(tmp_path) + codes = _codes(v) + assert "env_vars_not_in_dotenv" in codes + + +def test_env_var_in_dotenv_does_not_warn(tmp_path: Path) -> None: + pkg = _scaffold_standard_crew(tmp_path) + (pkg / "tools.py").write_text( + 'import os\nkey = os.getenv("TAVILY_API_KEY")\n' + ) + (tmp_path / ".env").write_text("TAVILY_API_KEY=abc\n") + v = _run_without_import_check(tmp_path) + assert "env_vars_not_in_dotenv" not in _codes(v) + + +def test_old_crewai_pin_in_uv_lock_warns(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path) + (tmp_path / "uv.lock").write_text( + 'name = "crewai"\nversion = "1.10.0"\nsource = { registry = "..." }\n' + ) + v = _run_without_import_check(tmp_path) + assert "old_crewai_pin" in _codes(v) + + +def test_modern_crewai_pin_does_not_warn(tmp_path: Path) -> None: + _scaffold_standard_crew(tmp_path) + (tmp_path / "uv.lock").write_text( + 'name = "crewai"\nversion = "1.14.1"\nsource = { registry = "..." }\n' + ) + v = _run_without_import_check(tmp_path) + assert "old_crewai_pin" not in _codes(v) + + +def test_create_crew_aborts_on_validation_error(tmp_path: Path) -> None: + """`crewai deploy create` must not contact the API when validation fails.""" + from unittest.mock import MagicMock, patch as mock_patch + + from crewai.cli.deploy.main import DeployCommand + + with ( + mock_patch("crewai.cli.command.get_auth_token", return_value="tok"), + mock_patch("crewai.cli.deploy.main.get_project_name", return_value="p"), + mock_patch("crewai.cli.command.PlusAPI") as mock_api, + mock_patch( + "crewai.cli.deploy.main.validate_project" + ) as mock_validate, + ): + mock_validate.return_value = MagicMock(ok=False) + cmd = DeployCommand() + cmd.create_crew() + assert not cmd.plus_api_client.create_crew.called + del mock_api # silence unused-var lint \ No newline at end of file From 8312e1caadf1c3f48e81619bd374752de1b8451b Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 04:31:04 +0800 Subject: [PATCH 03/14] fix: keep Azure aclose a no-op when the async client was never built MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lazy-init refactor rewrote `aclose` to access the async client via `_get_async_client()`, which forces lazy construction. When an `AzureCompletion` is instantiated without credentials (the whole point of deferred init), that call raises `ValueError: "Azure API key is required"` during cleanup — including via `async with` / `__aexit__`. Access the cached `_async_client` attribute directly so cleanup on an uninitialized LLM is a harmless no-op. Add a regression test that enters and exits an `async with` block against a credentials-less `AzureCompletion`. --- .../src/crewai/llms/providers/azure/completion.py | 9 ++++++--- lib/crewai/tests/llms/azure/test_azure.py | 15 +++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index 249c22550b..156fa89840 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -1132,10 +1132,13 @@ async def aclose(self) -> None: """Close the async client and clean up resources. This ensures proper cleanup of the underlying aiohttp session - to avoid unclosed connector warnings. + to avoid unclosed connector warnings. Accesses the cached client + directly rather than going through `_get_async_client` so a + cleanup on an uninitialized LLM is a harmless no-op rather than + a credential-required error. """ - if hasattr(self._get_async_client(), "close"): - await self._get_async_client().close() + if self._async_client is not None and hasattr(self._async_client, "close"): + await self._async_client.close() async def __aenter__(self) -> Self: """Async context manager entry.""" diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index a9dbfb3ee2..bee767157c 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -401,6 +401,21 @@ def test_azure_raises_error_when_api_key_missing(): llm._get_sync_client() +@pytest.mark.asyncio +async def test_azure_aclose_is_noop_when_uninitialized(): + """`aclose` (and `async with`) on an uninstantiated-client LLM must be + a harmless no-op, not force lazy construction that then raises for + missing credentials.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch.dict(os.environ, {}, clear=True): + llm = AzureCompletion(model="gpt-4") + assert llm._async_client is None + await llm.aclose() + async with llm: + pass + + def test_azure_endpoint_configuration(): """ Test that Azure endpoint configuration works with multiple environment variable names From 9eb45950e4e5b3e50024ec3d7cf47122e91c09e3 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 04:34:23 +0800 Subject: [PATCH 04/14] fix: avoid forcing lazy client construction in lightweight accessors Two sites that were mechanically rewritten by the lazy-getter regex shouldn't actually go through the lazy getter: - `BedrockCompletion._ensure_async_client` manages its own client lifecycle through `aiobotocore` inside an exit stack. Its trailing `return self._get_async_client()` was a redundant indirection through a stub method that doesn't even attempt to build a client. Return the cached attribute directly. - `GeminiCompletion._get_client_params` is a lightweight config accessor used at `to_config_dict()` time. Calling `_get_sync_client()` here forced client construction (and would raise `ValueError` when credentials aren't set) just to check the `vertexai` attribute. Read `self._client` directly and null-guard before the `hasattr` check. --- lib/crewai/src/crewai/llms/providers/bedrock/completion.py | 2 +- lib/crewai/src/crewai/llms/providers/gemini/completion.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py index 56072968f5..c7676c778f 100644 --- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py @@ -1198,7 +1198,7 @@ async def _ensure_async_client(self) -> Any: ) self._async_client = client self._async_client_initialized = True - return self._get_async_client() + return self._async_client async def _ahandle_converse( self, diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index d9b8da7c49..c092f539d6 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -244,8 +244,9 @@ def _get_client_params(self) -> dict[str, Any]: if ( hasattr(self, "client") - and hasattr(self._get_sync_client(), "vertexai") - and self._get_sync_client().vertexai + and self._client is not None + and hasattr(self._client, "vertexai") + and self._client.vertexai ): # Vertex AI configuration params.update( From 6f870786318b02411764f82be34ee7a88121ba42 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 04:36:03 +0800 Subject: [PATCH 05/14] test: update deploy_push CLI tests for new skip_validate kwarg `deploy_push` gained a `--skip-validate` flag that forwards to `DeployCommand.deploy()` as `skip_validate=False` by default. Update the two CLI tests that pin the exact call args. --- lib/crewai/tests/cli/test_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/crewai/tests/cli/test_cli.py b/lib/crewai/tests/cli/test_cli.py index ed74a60367..b324294b1f 100644 --- a/lib/crewai/tests/cli/test_cli.py +++ b/lib/crewai/tests/cli/test_cli.py @@ -367,7 +367,7 @@ def test_deploy_push(command, runner): result = runner.invoke(deploy_push, ["-u", uuid]) assert result.exit_code == 0 - mock_deploy.deploy.assert_called_once_with(uuid=uuid) + mock_deploy.deploy.assert_called_once_with(uuid=uuid, skip_validate=False) @mock.patch("crewai.cli.cli.DeployCommand") @@ -376,7 +376,7 @@ def test_deploy_push_no_uuid(command, runner): result = runner.invoke(deploy_push) assert result.exit_code == 0 - mock_deploy.deploy.assert_called_once_with(uuid=None) + mock_deploy.deploy.assert_called_once_with(uuid=None, skip_validate=False) @mock.patch("crewai.cli.cli.DeployCommand") From 77e66647b09a427ceb309b5c97af4ece4dc9d1c6 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 04:42:19 +0800 Subject: [PATCH 06/14] test: update test_create_llm_openai_missing_api_key for lazy init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `LLM(model="gpt-4o")` no longer raises at construction when `OPENAI_API_KEY` is missing — the descriptive error now surfaces when the client is actually built. Update the test to assert that contract: `create_llm` succeeds, and `llm._get_sync_client()` raises. --- lib/crewai/tests/utilities/test_llm_utils.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/crewai/tests/utilities/test_llm_utils.py b/lib/crewai/tests/utilities/test_llm_utils.py index 5d7d70b768..a32fdcbc93 100644 --- a/lib/crewai/tests/utilities/test_llm_utils.py +++ b/lib/crewai/tests/utilities/test_llm_utils.py @@ -119,10 +119,12 @@ def test_create_llm_with_invalid_type() -> None: def test_create_llm_openai_missing_api_key() -> None: - """Test that create_llm raises error when OpenAI API key is missing""" + """Credentials are validated lazily: `create_llm` succeeds, and the + descriptive error only surfaces when the client is actually built.""" with patch.dict(os.environ, {}, clear=True): + llm = create_llm(llm_value="gpt-4o") with pytest.raises((ValueError, ImportError)) as exc_info: - create_llm(llm_value="gpt-4o") + llm._get_sync_client() error_message = str(exc_info.value).lower() assert "openai_api_key" in error_message or "api_key" in error_message From ad6d7fa1982bb5926268c45ee807615c94477f21 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 04:46:39 +0800 Subject: [PATCH 07/14] fix: re-read Azure env vars in lazy client build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Azure's `_normalize_azure_fields` captures env vars at construction time. When `LLM(model="azure/...")` is instantiated before deployment env vars are set, `self.api_key` / `self.endpoint` freeze as `None` and the lazy client builder then always raises — defeating the point of deferred init for Azure. Re-read `AZURE_API_KEY` / `AZURE_ENDPOINT` (and friends) inside `_make_client_kwargs` when the fields are still unset, matching OpenAI's `_get_client_params` pattern. Runs the endpoint validator on any env-provided value so the same normalization applies. Add a regression test that constructs the LLM with no env vars set, then patches them in afterwards and asserts `_get_sync_client()` successfully builds a client and writes the resolved values back onto the LLM instance. --- .../crewai/llms/providers/azure/completion.py | 16 ++++++++++++ lib/crewai/tests/llms/azure/test_azure.py | 26 +++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index 156fa89840..fc68b42be0 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -155,6 +155,22 @@ def _build_async_client(self) -> Any: return AsyncChatCompletionsClient(**self._make_client_kwargs()) def _make_client_kwargs(self) -> dict[str, Any]: + # Re-read env vars so that a deferred build can pick up credentials + # that weren't set at instantiation time (e.g. LLM constructed at + # module import before deployment env vars were injected). + if not self.api_key: + self.api_key = os.getenv("AZURE_API_KEY") + if not self.endpoint: + endpoint = ( + os.getenv("AZURE_ENDPOINT") + or os.getenv("AZURE_OPENAI_ENDPOINT") + or os.getenv("AZURE_API_BASE") + ) + if endpoint: + self.endpoint = AzureCompletion._validate_and_fix_endpoint( + endpoint, self.model + ) + if not self.api_key: raise ValueError( "Azure API key is required. Set AZURE_API_KEY environment " diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index bee767157c..d5efcfc696 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -416,6 +416,32 @@ async def test_azure_aclose_is_noop_when_uninitialized(): pass +def test_azure_lazy_build_reads_env_vars_set_after_construction(): + """When `LLM(model="azure/...")` is constructed before env vars are set, + the lazy client builder must re-read `AZURE_API_KEY` / `AZURE_ENDPOINT` + so the LLM actually works once credentials become available.""" + from crewai.llms.providers.azure.completion import AzureCompletion + + with patch.dict(os.environ, {}, clear=True): + llm = AzureCompletion(model="gpt-4") + assert llm.api_key is None + assert llm.endpoint is None + + with patch.dict( + os.environ, + { + "AZURE_API_KEY": "late-key", + "AZURE_ENDPOINT": "https://test.openai.azure.com/openai/deployments/gpt-4", + }, + clear=True, + ): + client = llm._get_sync_client() + assert client is not None + assert llm.api_key == "late-key" + assert llm.endpoint is not None + assert "test.openai.azure.com" in llm.endpoint + + def test_azure_endpoint_configuration(): """ Test that Azure endpoint configuration works with multiple environment variable names From 60e8a4a3644c894bc5e9c8d31bc9f9040b57d14f Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 04:59:18 +0800 Subject: [PATCH 08/14] fix: recompute is_azure_openai_endpoint after lazy endpoint resolve MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `_prepare_completion_params` uses `is_azure_openai_endpoint` to decide whether to include the `model` parameter in requests — Azure OpenAI endpoints embed the deployment name in the URL and reject a `model` field. When the endpoint was resolved lazily from env vars, the flag stayed at its pre-resolve `False` value, causing every lazily-inited Azure OpenAI request to include `model` and fail. Factor the classification into `_is_azure_openai_endpoint` and call it from both `_normalize_azure_fields` and `_make_client_kwargs`. Extend the lazy-build regression test to assert the flag flips to `True` once the endpoint is resolved. --- .../crewai/llms/providers/azure/completion.py | 26 ++++++++++++++----- lib/crewai/tests/llms/azure/test_azure.py | 6 ++++- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/azure/completion.py b/lib/crewai/src/crewai/llms/providers/azure/completion.py index fc68b42be0..4b8d842a5e 100644 --- a/lib/crewai/src/crewai/llms/providers/azure/completion.py +++ b/lib/crewai/src/crewai/llms/providers/azure/completion.py @@ -123,18 +123,23 @@ def _normalize_azure_fields(cls, data: Any) -> Any: data["endpoint"] = AzureCompletion._validate_and_fix_endpoint( data["endpoint"], model ) - parsed = urlparse(data["endpoint"]) - hostname = parsed.hostname or "" - data["is_azure_openai_endpoint"] = ( - hostname == "openai.azure.com" or hostname.endswith(".openai.azure.com") - ) and "/openai/deployments/" in data["endpoint"] - else: - data["is_azure_openai_endpoint"] = False + data["is_azure_openai_endpoint"] = AzureCompletion._is_azure_openai_endpoint( + data["endpoint"] + ) data["is_openai_model"] = any( prefix in model.lower() for prefix in ["gpt-", "o1-", "text-"] ) return data + @staticmethod + def _is_azure_openai_endpoint(endpoint: str | None) -> bool: + if not endpoint: + return False + hostname = urlparse(endpoint).hostname or "" + return ( + hostname == "openai.azure.com" or hostname.endswith(".openai.azure.com") + ) and "/openai/deployments/" in endpoint + @model_validator(mode="after") def _init_clients(self) -> AzureCompletion: """Eagerly build clients when credentials are available, otherwise @@ -170,6 +175,13 @@ def _make_client_kwargs(self) -> dict[str, Any]: self.endpoint = AzureCompletion._validate_and_fix_endpoint( endpoint, self.model ) + # Recompute the routing flag now that the endpoint is known — + # _prepare_completion_params uses it to decide whether to + # include `model` in the request body (Azure OpenAI endpoints + # embed the deployment name in the URL and reject it). + self.is_azure_openai_endpoint = ( + AzureCompletion._is_azure_openai_endpoint(self.endpoint) + ) if not self.api_key: raise ValueError( diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index d5efcfc696..f113bba297 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -419,13 +419,16 @@ async def test_azure_aclose_is_noop_when_uninitialized(): def test_azure_lazy_build_reads_env_vars_set_after_construction(): """When `LLM(model="azure/...")` is constructed before env vars are set, the lazy client builder must re-read `AZURE_API_KEY` / `AZURE_ENDPOINT` - so the LLM actually works once credentials become available.""" + so the LLM actually works once credentials become available, and the + `is_azure_openai_endpoint` routing flag must be recomputed off the + newly-resolved endpoint.""" from crewai.llms.providers.azure.completion import AzureCompletion with patch.dict(os.environ, {}, clear=True): llm = AzureCompletion(model="gpt-4") assert llm.api_key is None assert llm.endpoint is None + assert llm.is_azure_openai_endpoint is False with patch.dict( os.environ, @@ -440,6 +443,7 @@ def test_azure_lazy_build_reads_env_vars_set_after_construction(): assert llm.api_key == "late-key" assert llm.endpoint is not None assert "test.openai.azure.com" in llm.endpoint + assert llm.is_azure_openai_endpoint is True def test_azure_endpoint_configuration(): From 41403c09b449498204152faec36aab396b53977c Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 05:03:51 +0800 Subject: [PATCH 09/14] fix: re-read Gemini env vars in lazy client build `_normalize_gemini_fields` captures `GOOGLE_API_KEY` / `GEMINI_API_KEY` / `GOOGLE_CLOUD_PROJECT` at construction time, so an LLM constructed before deployment env vars are set would freeze `self.api_key = None` and the lazy `_get_sync_client` build would then always auth-fail. Re-read the env vars inside `_get_sync_client` when `self._client` is None and the corresponding field is still unset, matching the pattern used for the other native providers. Add a regression test that constructs `GeminiCompletion` with no env vars set, patches them in afterwards, and asserts the lazy build succeeds and writes the resolved key back onto the LLM. --- .../crewai/llms/providers/gemini/completion.py | 8 ++++++++ lib/crewai/tests/llms/google/test_google.py | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index c092f539d6..1607b63950 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -130,6 +130,14 @@ def _init_client(self) -> GeminiCompletion: def _get_sync_client(self) -> Any: if self._client is None: + # Re-read env vars so a deferred build can pick up credentials + # that weren't set at instantiation time. + if not self.api_key: + self.api_key = os.getenv("GOOGLE_API_KEY") or os.getenv( + "GEMINI_API_KEY" + ) + if not self.project: + self.project = os.getenv("GOOGLE_CLOUD_PROJECT") self._client = self._initialize_client(self.use_vertexai) return self._client diff --git a/lib/crewai/tests/llms/google/test_google.py b/lib/crewai/tests/llms/google/test_google.py index 25b755daee..f6e94f89e3 100644 --- a/lib/crewai/tests/llms/google/test_google.py +++ b/lib/crewai/tests/llms/google/test_google.py @@ -64,6 +64,23 @@ def test_gemini_completion_module_is_imported(): assert hasattr(completion_mod, 'GeminiCompletion') +def test_gemini_lazy_build_reads_env_vars_set_after_construction(): + """When `LLM(model="gemini/...")` is constructed before env vars are set, + the lazy client builder must re-read `GOOGLE_API_KEY` / `GEMINI_API_KEY` + so the LLM works once credentials become available.""" + from crewai.llms.providers.gemini.completion import GeminiCompletion + + with patch.dict(os.environ, {}, clear=True): + llm = GeminiCompletion(model="gemini-1.5-pro") + assert llm.api_key is None + assert llm._client is None + + with patch.dict(os.environ, {"GEMINI_API_KEY": "late-key"}, clear=True): + client = llm._get_sync_client() + assert client is not None + assert llm.api_key == "late-key" + + def test_native_gemini_raises_error_when_initialization_fails(): """ Test that LLM raises ImportError when native Gemini completion fails. From 1dc310844c2bc5b7317313e380add5d4c2f86fe7 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 05:08:32 +0800 Subject: [PATCH 10/14] refactor: route Gemini async paths through _get_async_client `_ahandle_completion` and `_ahandle_streaming_completion` were calling `_get_sync_client()` directly. The other native providers (OpenAI, Anthropic, Azure) consistently route async code through `_get_async_client()`; matching that abstraction here keeps the contract consistent and lets a future async-specific override work without re-touching call sites. --- lib/crewai/src/crewai/llms/providers/gemini/completion.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index 1607b63950..1b2fb26cb2 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -1216,7 +1216,7 @@ async def _ahandle_completion( try: # The API accepts list[Content] but mypy is overly strict about variance contents_for_api: Any = contents - response = await self._get_sync_client().aio.models.generate_content( + response = await self._get_async_client().aio.models.generate_content( model=self.model, contents=contents_for_api, config=config, @@ -1257,7 +1257,7 @@ async def _ahandle_streaming_completion( # The API accepts list[Content] but mypy is overly strict about variance contents_for_api: Any = contents - stream = await self._get_sync_client().aio.models.generate_content_stream( + stream = await self._get_async_client().aio.models.generate_content_stream( model=self.model, contents=contents_for_api, config=config, From 760a3f2b05435b88edf8330c0323684f64d44f9f Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 05:13:57 +0800 Subject: [PATCH 11/14] test: assert Azure endpoint hostname instead of substring match MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CodeQL flagged the `"test.openai.azure.com" in llm.endpoint` substring check as incomplete URL sanitization — the substring could match in an arbitrary position. Parse the URL and assert against `urlparse(...).hostname` instead, which is the precise check we want. --- lib/crewai/tests/llms/azure/test_azure.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/crewai/tests/llms/azure/test_azure.py b/lib/crewai/tests/llms/azure/test_azure.py index f113bba297..d42e2d7fe2 100644 --- a/lib/crewai/tests/llms/azure/test_azure.py +++ b/lib/crewai/tests/llms/azure/test_azure.py @@ -2,6 +2,7 @@ import sys import types from unittest.mock import patch, MagicMock, Mock +from urllib.parse import urlparse import pytest from crewai.llm import LLM @@ -442,7 +443,7 @@ def test_azure_lazy_build_reads_env_vars_set_after_construction(): assert client is not None assert llm.api_key == "late-key" assert llm.endpoint is not None - assert "test.openai.azure.com" in llm.endpoint + assert urlparse(llm.endpoint).hostname == "test.openai.azure.com" assert llm.is_azure_openai_endpoint is True From 755b1f21e09bea4c01e04c2df7695c25a7a5cdea Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 05:25:38 +0800 Subject: [PATCH 12/14] fix: don't leak sync httpx.Client when building Anthropic async client `_build_async_client` called `_get_client_params()`, which under `interceptor` constructs a sync `httpx.Client` and stores it under `http_client`. The async builder then immediately overwrote that key with an `httpx.AsyncClient`, leaving the sync client allocated and unclosed. Add an `include_http_client` flag to `_get_client_params` (defaults True for the sync path); the async builder passes False so no sync client is constructed and only the async one is attached. --- .../llms/providers/anthropic/completion.py | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py index 9313f55dea..b627a85394 100644 --- a/lib/crewai/src/crewai/llms/providers/anthropic/completion.py +++ b/lib/crewai/src/crewai/llms/providers/anthropic/completion.py @@ -208,11 +208,15 @@ def _build_sync_client(self) -> Any: return Anthropic(**self._get_client_params()) def _build_async_client(self) -> Any: - async_client_params = self._get_client_params() + # Skip the sync httpx.Client that `_get_client_params` would + # otherwise construct under `interceptor`; we attach an async one + # below and would leak the sync one if both were built. + async_client_params = self._get_client_params(include_http_client=False) if self.interceptor: async_transport = AsyncHTTPTransport(interceptor=self.interceptor) - async_http_client = httpx.AsyncClient(transport=async_transport) - async_client_params["http_client"] = async_http_client + async_client_params["http_client"] = httpx.AsyncClient( + transport=async_transport + ) return AsyncAnthropic(**async_client_params) def _get_sync_client(self) -> Any: @@ -238,8 +242,15 @@ def to_config_dict(self) -> dict[str, Any]: config["timeout"] = self.timeout return config - def _get_client_params(self) -> dict[str, Any]: - """Get client parameters.""" + def _get_client_params(self, include_http_client: bool = True) -> dict[str, Any]: + """Get client parameters. + + Args: + include_http_client: When True (default) and an interceptor is + set, attach a sync ``httpx.Client``. The async builder + passes ``False`` so it can attach its own async client + without leaking a sync one. + """ if self.api_key is None: self.api_key = os.getenv("ANTHROPIC_API_KEY") @@ -253,7 +264,7 @@ def _get_client_params(self) -> dict[str, Any]: "max_retries": self.max_retries, } - if self.interceptor: + if include_http_client and self.interceptor: transport = HTTPTransport(interceptor=self.interceptor) http_client = httpx.Client(transport=transport) client_params["http_client"] = http_client # type: ignore[assignment] From 4c3ff7ddaef640bb049dcb4b956955c0b322f1f0 Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 05:43:03 +0800 Subject: [PATCH 13/14] fix: narrow Bedrock _init_clients except to credential errors only The bare `except Exception` would silently swallow `TypeError`, `AttributeError`, and other real bugs in `_build_sync_client` with only a debug log. The intent is to defer on missing AWS credentials, which boto3 surfaces as `BotoCoreError` / `ClientError` (and `ValueError` for some validation paths). Catch only those; let everything else propagate so genuine failures stay loud. --- lib/crewai/src/crewai/llms/providers/bedrock/completion.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py index c7676c778f..5932b66f02 100644 --- a/lib/crewai/src/crewai/llms/providers/bedrock/completion.py +++ b/lib/crewai/src/crewai/llms/providers/bedrock/completion.py @@ -310,10 +310,14 @@ def _init_clients(self) -> BedrockCompletion: """Eagerly build the sync client when AWS credentials resolve, otherwise defer so ``LLM(model="bedrock/...")`` can be constructed at module import time even before deployment env vars are set. + + Only credential/SDK errors are caught — programming errors like + ``TypeError`` or ``AttributeError`` propagate so real bugs aren't + silently swallowed. """ try: self._client = self._build_sync_client() - except Exception as e: + except (BotoCoreError, ClientError, ValueError) as e: logging.debug("Deferring Bedrock client construction: %s", e) self._async_exit_stack = AsyncExitStack() if AIOBOTOCORE_AVAILABLE else None return self From 112196946e94c9b8afc6fa7860e390e4b6b08aae Mon Sep 17 00:00:00 2001 From: Greyson LaLonde Date: Sun, 12 Apr 2026 15:39:13 +0800 Subject: [PATCH 14/14] fix: match real provider error messages in missing-extra classifier The regex expected backtick-delimited install commands but the actual ImportError messages use plain double quotes. The test artificially injected backticks so it passed while production errors fell through to the less-specific llm_provider_init_failed classification. Update the regex to match the real format and tolerate backticks as a fallback. Fix the Azure test to use the exact string from the provider source. Add parametrized tests covering all four provider messages verbatim. --- lib/crewai/src/crewai/cli/deploy/validate.py | 9 ++++-- lib/crewai/tests/cli/deploy/test_validate.py | 32 +++++++++++++++++++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/lib/crewai/src/crewai/cli/deploy/validate.py b/lib/crewai/src/crewai/cli/deploy/validate.py index 0c9036c207..55246e1026 100644 --- a/lib/crewai/src/crewai/cli/deploy/validate.py +++ b/lib/crewai/src/crewai/cli/deploy/validate.py @@ -550,9 +550,12 @@ def _check_module_imports(self) -> None: def _classify_import_error(self, err_type: str, err_msg: str, tb: str) -> None: """Turn a raw import-time exception into a user-actionable finding.""" # Must be checked before the generic "native provider" branch below: - # the extras-missing message contains the same phrase. + # the extras-missing message contains the same phrase. Providers + # format the install command as plain text (`to install: uv add + # "crewai[extra]"`); also tolerate backtick-delimited variants. m = re.search( - r"(?P[A-Za-z0-9_ -]+?)\s+native provider not available.*?`([^`]+)`", + r"(?P[A-Za-z0-9_ -]+?)\s+native provider not available" + r".*?to install:\s*`?(?Puv add [\"']crewai\[[^\]]+\][\"'])`?", err_msg, ) if m: @@ -560,7 +563,7 @@ def _classify_import_error(self, err_type: str, err_msg: str, tb: str) -> None: Severity.ERROR, "missing_provider_extra", f"{m.group('pkg').strip()} provider extra not installed", - hint=f"Run: {m.group(2)}", + hint=f"Run: {m.group('cmd')}", ) return diff --git a/lib/crewai/tests/cli/deploy/test_validate.py b/lib/crewai/tests/cli/deploy/test_validate.py index 10812e3f81..ff8b263767 100644 --- a/lib/crewai/tests/cli/deploy/test_validate.py +++ b/lib/crewai/tests/cli/deploy/test_validate.py @@ -286,13 +286,43 @@ def test_classify_missing_openai_key_is_warning(tmp_path: Path) -> None: def test_classify_azure_extra_missing_is_error(tmp_path: Path) -> None: + """The real message raised by the Azure provider module uses plain + double quotes around the install command (no backticks). Match the + exact string that ships in the provider source so this test actually + guards the regex used in production.""" v = DeployValidator(project_root=tmp_path) v._classify_import_error( "ImportError", - 'Azure AI Inference native provider not available, to install: `uv add "crewai[azure-ai-inference]"`', + 'Azure AI Inference native provider not available, to install: uv add "crewai[azure-ai-inference]"', tb="", ) assert "missing_provider_extra" in _codes(v) + finding = next(r for r in v.results if r.code == "missing_provider_extra") + assert finding.title.startswith("Azure AI Inference") + assert 'uv add "crewai[azure-ai-inference]"' in finding.hint + + +@pytest.mark.parametrize( + "pkg_label, install_cmd", + [ + ("Anthropic", 'uv add "crewai[anthropic]"'), + ("AWS Bedrock", 'uv add "crewai[bedrock]"'), + ("Google Gen AI", 'uv add "crewai[google-genai]"'), + ], +) +def test_classify_missing_provider_extra_matches_real_messages( + tmp_path: Path, pkg_label: str, install_cmd: str +) -> None: + """Regression for the four provider error strings verbatim.""" + v = DeployValidator(project_root=tmp_path) + v._classify_import_error( + "ImportError", + f"{pkg_label} native provider not available, to install: {install_cmd}", + tb="", + ) + assert "missing_provider_extra" in _codes(v) + finding = next(r for r in v.results if r.code == "missing_provider_extra") + assert install_cmd in finding.hint def test_classify_keyerror_at_import_is_warning(tmp_path: Path) -> None: