From cd3fa00323e779b9d2180220c2062d5e15e44296 Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sat, 31 May 2025 17:49:57 +0000
Subject: [PATCH 1/2] I've added a new sample agent for yield analysis in
 manufacturing.

This agent can help you:
- Calculate yield and defect rates.
- Identify low-yield production stages.
- Suggest improvement actions based on low yield and common defect types.

The agent is structured with:
- `agent.py`: Defines the agent and its instructions.
- `tools.py`: Contains the core logic for the analysis functions.
- `README.md`: Provides documentation on features, usage, and customization.
- `__init__.py`: Makes the agent package importable.
- `requirements.txt`: Lists dependencies (currently none).
- `sample.session.json`: Shows an example interaction flow.
---
 .../samples/yield_analysis_agent/.gitkeep     |   0
 .../samples/yield_analysis_agent/README.md    |  69 ++++++++
 .../samples/yield_analysis_agent/__init__.py  |  19 ++
 .../samples/yield_analysis_agent/agent.py     |  94 ++++++++++
 .../yield_analysis_agent/requirements.txt     |   3 +
 .../yield_analysis_agent/sample.session.json  |  52 ++++++
 .../samples/yield_analysis_agent/tools.py     | 166 ++++++++++++++++++
 7 files changed, 403 insertions(+)
 create mode 100644 contributing/samples/yield_analysis_agent/.gitkeep
 create mode 100644 contributing/samples/yield_analysis_agent/README.md
 create mode 100644 contributing/samples/yield_analysis_agent/__init__.py
 create mode 100644 contributing/samples/yield_analysis_agent/agent.py
 create mode 100644 contributing/samples/yield_analysis_agent/requirements.txt
 create mode 100644 contributing/samples/yield_analysis_agent/sample.session.json
 create mode 100644 contributing/samples/yield_analysis_agent/tools.py

diff --git a/contributing/samples/yield_analysis_agent/.gitkeep b/contributing/samples/yield_analysis_agent/.gitkeep
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/contributing/samples/yield_analysis_agent/README.md b/contributing/samples/yield_analysis_agent/README.md
new file mode 100644
index 0000000000..792ed2b42f
--- /dev/null
+++ b/contributing/samples/yield_analysis_agent/README.md
@@ -0,0 +1,69 @@
+# Yield Analysis Agent
+
+This agent assists in performing yield analysis for manufacturing processes. It can calculate key yield metrics, identify stages with low yield, and suggest potential areas for improvement.
+
+## Features
+
+-   **Calculate Yield Metrics**: Computes yield rate and defect rate based on total units produced and defective units.
+-   **Identify Low-Yield Stages**: Pinpoints production stages that fall below a specified yield threshold.
+-   **Suggest Improvement Actions**: Offers potential actions to address low-yield stages and common defect types.
+
+## Prerequisites
+
+-   Google Agent Development Kit (ADK) installed.
+-   Access to a supported LLM (e.g., Gemini 1.5 Flash).
+-   Python environment where the ADK and its dependencies are available.
+
+## Tools
+
+The agent utilizes the following tools:
+
+1.  `calculate_yield_metrics(total_units: int, defective_units: int)`
+    -   **Description**: Calculates yield and defect rates.
+    -   **Args**:
+        -   `total_units` (int): Total number of units produced.
+        -   `defective_units` (int): Number of defective units.
+    -   **Returns**: A dictionary with `yield_rate` and `defect_rate`.
+
+2.  `identify_low_yield_stages(production_data_per_stage: list[dict], yield_threshold: float)`
+    -   **Description**: Identifies production stages performing below a yield threshold.
+    -   **Args**:
+        -   `production_data_per_stage` (list[dict]): Data for each stage, e.g., `[{'stage_name': 'Assembly', 'input_units': 100, 'output_units': 95}, ...]`.
+        -   `yield_threshold` (float): The minimum acceptable yield (e.g., 0.95 for 95%).
+    -   **Returns**: A dictionary with a list of `low_yield_stages`, including their names and calculated yields.
+
+3.  `suggest_improvement_actions(low_yield_stages: list[dict], common_defect_types: list[str])`
+    -   **Description**: Suggests actions to improve yield.
+    -   **Args**:
+        -   `low_yield_stages` (list[dict]): Output from `identify_low_yield_stages` or manually provided.
+        -   `common_defect_types` (list[str]): A list of common defect descriptions (e.g., `["Cracked casing", "Faulty sensor"]`).
+    -   **Returns**: A dictionary with a list of `suggested_actions`.
+
+## How to Run the Agent
+
+This agent is designed to be run within the Google ADK framework. Typically, you would use the ADK CLI or a custom script to serve and interact with the agent.
+
+**Example Interaction (Conceptual):**
+
+1.  **User**: "Calculate the yield if we produced 1000 widgets and 50 were defective."
+    -   **Agent (calls `calculate_yield_metrics`)**: "The yield rate is 95.0% and the defect rate is 5.0%."
+
+2.  **User**: "Analyze the following stage data with a 90% yield threshold: Stage A (Input: 200, Output: 185), Stage B (Input: 185, Output: 160), Stage C (Input: 160, Output: 155)."
+    -   **Agent (calls `identify_low_yield_stages`)**: "Stage B has a yield of approximately 86.49%, which is below the 90% threshold."
+
+3.  **User**: "Stage B is a problem, and we're seeing a lot of 'incomplete welds'. What should we do?"
+    -   **Agent (calls `suggest_improvement_actions`)**: "Based on low yield at Stage B and 'incomplete welds' defects, I suggest: Investigate root causes for low yield at stage: Stage B. Implement corrective actions for defect type: incomplete welds."
+
+## Agent Configuration
+
+-   **Model**: `gemini-1.5-flash` (configurable in `agent.py`)
+-   **Tools**: Defined in `tools.py`
+-   **Instructions**: The agent's behavior and tool usage guidelines are defined in the `instruction` parameter within `agent.py`.
+
+## Customization
+
+-   **Tools**: Modify or add new tools in `tools.py` to extend functionality (e.g., connect to a database for production data, perform statistical analysis).
+-   **Instructions**: Adjust the agent's instructions in `agent.py` to change its persona, specialize its knowledge, or modify how it uses tools.
+-   **Model**: Experiment with different LLM models compatible with the ADK.
+
+This agent provides a foundational example for yield analysis. Depending on the complexity of the manufacturing environment, further enhancements to tools, data integration, and analytical capabilities may be required.
diff --git a/contributing/samples/yield_analysis_agent/__init__.py b/contributing/samples/yield_analysis_agent/__init__.py
new file mode 100644
index 0000000000..9d7196a2c6
--- /dev/null
+++ b/contributing/samples/yield_analysis_agent/__init__.py
@@ -0,0 +1,19 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Yield Analysis Agent Sample."""
+
+from .agent import yield_analysis_agent
+
+__all__ = ["yield_analysis_agent"]
diff --git a/contributing/samples/yield_analysis_agent/agent.py b/contributing/samples/yield_analysis_agent/agent.py
new file mode 100644
index 0000000000..be74ed2dbf
--- /dev/null
+++ b/contributing/samples/yield_analysis_agent/agent.py
@@ -0,0 +1,94 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.adk import Agent
+from google.genai import types
+
+# Assuming tools.py is in the same directory
+from .tools import calculate_yield_metrics
+from .tools import identify_low_yield_stages
+from .tools import suggest_improvement_actions
+
+yield_analysis_agent = Agent(
+    model="gemini-1.5-flash",  # Using a recent model
+    name="yield_analysis_agent",
+    description=(
+        "An agent that performs yield analysis in a manufacturing context. "
+        "It can calculate yield and defect rates, identify low-yield production "
+        "stages, and suggest potential improvement actions."
+    ),
+    instruction="""
+    You are a manufacturing yield analysis assistant.
+    Your goal is to help users understand their production yield and identify areas for improvement.
+
+    Capabilities:
+    1.  **Calculate Yield Metrics**:
+        -   When asked to calculate yield, use the `calculate_yield_metrics` tool.
+        -   You require `total_units` and `defective_units` as input.
+        -   Ensure inputs are valid (e.g., total_units > 0, defective_units >= 0 and not exceeding total_units).
+        -   Report the calculated yield rate and defect rate.
+
+    2.  **Identify Low-Yield Stages**:
+        -   When asked to identify problematic stages, use the `identify_low_yield_stages` tool.
+        -   You require `production_data_per_stage` (a list of dicts, each with 'stage_name', 'input_units', 'output_units') and a `yield_threshold`.
+        -   Ensure inputs are valid (e.g., input_units > 0, output_units >= 0 and not exceeding input_units, 0 < yield_threshold <= 1).
+        -   Report the stages that fall below the specified yield threshold.
+
+    3.  **Suggest Improvement Actions**:
+        -   When asked for improvement suggestions, use the `suggest_improvement_actions` tool.
+        -   You can take `low_yield_stages` (output from the previous tool or user-provided) and `common_defect_types` as input.
+        -   Provide actionable suggestions based on the inputs.
+
+    Interaction Flow:
+    -   Start by understanding the user's specific request (e.g., "calculate yield for product X", "find bottlenecks in line Y", "suggest how to reduce waste").
+    -   Request necessary data if not provided. Be clear about the format required for each tool.
+    -   Call the appropriate tool(s) to perform the analysis.
+    -   Present the results clearly to the user.
+    -   If multiple analyses are requested (e.g., calculate yield and then identify low-yield stages), perform them sequentially, using results from one step as input for the next if applicable.
+    -   Maintain a helpful and analytical tone.
+    -   If inputs are invalid, explain the error and ask for correct data. Do not attempt to proceed with invalid data.
+    -   Store results of tool calls in the tool_context.state if the tool populates it, for potential future reference within the session.
+    """,
+    tools=[
+        calculate_yield_metrics,
+        identify_low_yield_stages,
+        suggest_improvement_actions,
+    ],
+    generate_content_config=types.GenerateContentConfig(
+        safety_settings=[
+            types.SafetySetting(
+                category=types.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT,
+                threshold=types.HarmBlockThreshold.BLOCK_NONE, # Adjust as needed for manufacturing context
+            ),
+            types.SafetySetting(
+                category=types.HarmCategory.HARM_CATEGORY_HARASSMENT,
+                threshold=types.HarmBlockThreshold.BLOCK_NONE,
+            ),
+            types.SafetySetting(
+                category=types.HarmCategory.HARM_CATEGORY_HATE_SPEECH,
+                threshold=types.HarmBlockThreshold.BLOCK_NONE,
+            ),
+            types.SafetySetting(
+                category=types.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT,
+                threshold=types.HarmBlockThreshold.BLOCK_NONE,
+            ),
+        ]
+    ),
+)
+
+# Example of how to make the agent available for import, if needed by a main script.
+# This depends on how agents are typically run or imported in your framework.
+# If this agent is run directly or via a command-line tool that discovers agents,
+# this specific aliasing might not be strictly necessary but is good practice.
+root_agent = yield_analysis_agent
diff --git a/contributing/samples/yield_analysis_agent/requirements.txt b/contributing/samples/yield_analysis_agent/requirements.txt
new file mode 100644
index 0000000000..7509627d99
--- /dev/null
+++ b/contributing/samples/yield_analysis_agent/requirements.txt
@@ -0,0 +1,3 @@
+# No specific external dependencies for the basic version of this agent.
+# If tools are added that require packages like pandas, numpy, etc.,
+# list them here (e.g., pandas>=1.0.0).
diff --git a/contributing/samples/yield_analysis_agent/sample.session.json b/contributing/samples/yield_analysis_agent/sample.session.json
new file mode 100644
index 0000000000..494404776d
--- /dev/null
+++ b/contributing/samples/yield_analysis_agent/sample.session.json
@@ -0,0 +1,52 @@
+{
+  "title": "Yield Analysis Agent - Sample Session",
+  "turns": [
+    {
+      "role": "user",
+      "input": "Hello, I need to analyze our production yield."
+    },
+    {
+      "role": "agent",
+      "output": "Hello! I can help with that. What specific analysis would you like to perform? For example, I can calculate yield metrics, identify low-yield stages, or suggest improvement actions."
+    },
+    {
+      "role": "user",
+      "input": "Let's start by calculating the yield. We produced 1250 units of Product X, and 75 of them were found to be defective."
+    },
+    {
+      "role": "agent",
+      "tool_code": "calculate_yield_metrics(total_units=1250, defective_units=75)",
+      "tool_result": "{\"yield_rate\": 0.94, \"defect_rate\": 0.06}"
+    },
+    {
+      "role": "agent",
+      "output": "Okay, for Product X, with 1250 total units and 75 defective units, the yield rate is 94.0% and the defect rate is 6.0%."
+    },
+    {
+      "role": "user",
+      "input": "That's helpful. Now, can you help me identify if any stages are performing poorly? Here's the data for our main assembly line. Stage 1: input 1000, output 980. Stage 2: input 980, output 950. Stage 3: input 950, output 880. Stage 4: input 880, output 870. Let's use a yield threshold of 95%."
+    },
+    {
+      "role": "agent",
+      "tool_code": "identify_low_yield_stages(production_data_per_stage=[{'stage_name': 'Stage 1', 'input_units': 1000, 'output_units': 980}, {'stage_name': 'Stage 2', 'input_units': 980, 'output_units': 950}, {'stage_name': 'Stage 3', 'input_units': 950, 'output_units': 880}, {'stage_name': 'Stage 4', 'input_units': 880, 'output_units': 870}], yield_threshold=0.95)",
+      "tool_result": "{\"low_yield_stages\": [{\"stage_name\": \"Stage 3\", \"yield\": 0.9263157894736842, \"input_units\": 950, \"output_units\": 880}]}"
+    },
+    {
+      "role": "agent",
+      "output": "Based on your data and a 95% yield threshold, Stage 3 is underperforming with a yield of approximately 92.63%."
+    },
+    {
+      "role": "user",
+      "input": "Okay, Stage 3 is a concern. We've also noticed a high number of 'misaligned components' and 'incomplete soldering' defects recently. What can we do?"
+    },
+    {
+      "role": "agent",
+      "tool_code": "suggest_improvement_actions(low_yield_stages=[{'stage_name': 'Stage 3', 'yield': 0.9263157894736842}], common_defect_types=['misaligned components', 'incomplete soldering'])",
+      "tool_result": "{\"suggested_actions\": [\"Investigate root causes for low yield at stage: Stage 3.\", \"Implement corrective actions for defect type: misaligned components.\", \"Implement corrective actions for defect type: incomplete soldering.\"]}"
+    },
+    {
+      "role": "agent",
+      "output": "Considering the low yield at Stage 3 and the defect types 'misaligned components' and 'incomplete soldering', I suggest the following actions: \n- Investigate root causes for low yield at Stage 3. \n- Implement corrective actions for the 'misaligned components' defect. \n- Implement corrective actions for the 'incomplete soldering' defect."
+    }
+  ]
+}
diff --git a/contributing/samples/yield_analysis_agent/tools.py b/contributing/samples/yield_analysis_agent/tools.py
new file mode 100644
index 0000000000..6a436a404c
--- /dev/null
+++ b/contributing/samples/yield_analysis_agent/tools.py
@@ -0,0 +1,166 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from google.adk.tools.tool_context import ToolContext
+
+def calculate_yield_metrics(
+    total_units: int, defective_units: int, tool_context: ToolContext
+) -> dict:
+    """Calculates yield and defect rates from production data.
+
+    Args:
+        total_units: The total number of units produced.
+        defective_units: The number of defective units.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing the yield rate and defect rate.
+        Returns an error message if total_units is zero.
+    """
+    if total_units <= 0:
+        return {"error": "Total units must be a positive number."}
+    if defective_units < 0:
+        return {"error": "Defective units cannot be negative."}
+    if defective_units > total_units:
+        return {"error": "Defective units cannot exceed total units."}
+
+    yield_rate = (total_units - defective_units) / total_units
+    defect_rate = defective_units / total_units
+
+    if 'calculations' not in tool_context.state:
+        tool_context.state['calculations'] = []
+    tool_context.state['calculations'].append({
+        "total_units": total_units,
+        "defective_units": defective_units,
+        "yield_rate": yield_rate,
+        "defect_rate": defect_rate
+    })
+
+    return {
+        "yield_rate": yield_rate,
+        "defect_rate": defect_rate,
+    }
+
+def identify_low_yield_stages(
+    production_data_per_stage: list[dict], yield_threshold: float, tool_context: ToolContext
+) -> dict:
+    """Identifies production stages with yield below a given threshold.
+
+    Args:
+        production_data_per_stage: A list of dictionaries, where each dictionary
+                                   represents a stage and contains 'stage_name',
+                                   'input_units', and 'output_units'.
+        yield_threshold: The minimum yield rate considered acceptable.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing a list of low-yield stages.
+        Each item in the list includes the stage name and its calculated yield.
+        Returns an error message if input data is invalid.
+    """
+    low_yield_stages = []
+    if not isinstance(production_data_per_stage, list):
+        return {"error": "Production data per stage must be a list of dictionaries."}
+    if not (0 < yield_threshold <= 1):
+        return {"error": "Yield threshold must be between 0 (exclusive) and 1 (inclusive)."}
+
+    for stage_data in production_data_per_stage:
+        if not all(key in stage_data for key in ['stage_name', 'input_units', 'output_units']):
+            return {"error": "Each stage must have 'stage_name', 'input_units', and 'output_units'."}
+        if not isinstance(stage_data['input_units'], int) or not isinstance(stage_data['output_units'], int):
+            return {"error": "Input and output units must be integers."}
+        if stage_data['input_units'] <= 0:
+            return {"error": f"Input units for stage {stage_data['stage_name']} must be positive."}
+        if stage_data['output_units'] < 0:
+            return {"error": f"Output units for stage {stage_data['stage_name']} cannot be negative."}
+        if stage_data['output_units'] > stage_data['input_units']:
+            return {"error": f"Output units cannot exceed input units for stage {stage_data['stage_name']}."}
+
+        stage_yield = stage_data['output_units'] / stage_data['input_units']
+        if stage_yield < yield_threshold:
+            low_yield_stages.append({
+                "stage_name": stage_data['stage_name'],
+                "yield": stage_yield,
+                "input_units": stage_data['input_units'],
+                "output_units": stage_data['output_units'],
+            })
+
+    if 'low_yield_analysis' not in tool_context.state:
+        tool_context.state['low_yield_analysis'] = []
+    tool_context.state['low_yield_analysis'].append({
+        "yield_threshold": yield_threshold,
+        "identified_low_yield_stages": low_yield_stages
+    })
+
+    return {"low_yield_stages": low_yield_stages}
+
+def suggest_improvement_actions(
+    low_yield_stages: list[dict], common_defect_types: list[str], tool_context: ToolContext
+) -> dict:
+    """Suggests potential improvement actions based on low-yield stages and defect types.
+
+    Args:
+        low_yield_stages: A list of dictionaries representing low-yield stages,
+                          including 'stage_name' and 'yield'.
+        common_defect_types: A list of strings describing common defect types.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing a list of suggested actions.
+    """
+    suggestions = []
+    if not isinstance(low_yield_stages, list):
+        return {"error": "Low yield stages must be a list."}
+    if not isinstance(common_defect_types, list):
+        return {"error": "Common defect types must be a list."}
+
+    for stage in low_yield_stages:
+        if not isinstance(stage, dict) or 'stage_name' not in stage:
+            return {"error": "Each low yield stage must be a dictionary with a 'stage_name'."}
+        suggestions.append(
+            f"Investigate root causes for low yield at stage: {stage['stage_name']}."
+        )
+
+    for defect in common_defect_types:
+        if not isinstance(defect, str):
+            return {"error": "Each defect type must be a string."}
+        suggestions.append(
+            f"Implement corrective actions for defect type: {defect}."
+        )
+
+    if not low_yield_stages and not common_defect_types:
+        suggestions.append(
+            "No specific low-yield stages or defect types provided. "
+            "Consider a general process review for potential improvements."
+        )
+    elif not low_yield_stages:
+        suggestions.append(
+            "No specific low-yield stages provided. "
+            "Focus on addressing the listed common defect types."
+        )
+    elif not common_defect_types:
+        suggestions.append(
+            "No common defect types provided. "
+            "Focus on investigating the identified low-yield stages."
+        )
+
+    if 'improvement_suggestions' not in tool_context.state:
+        tool_context.state['improvement_suggestions'] = []
+    tool_context.state['improvement_suggestions'].append({
+        "low_yield_stages_input": low_yield_stages,
+        "common_defect_types_input": common_defect_types,
+        "suggestions_provided": suggestions
+    })
+
+    return {"suggested_actions": suggestions}

From 05bf6956e84d994448d4eee6b41ddfbfacb8a73e Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Sun, 1 Jun 2025 22:24:50 +0000
Subject: [PATCH 2/2] feat: Enhance Yield Analysis Agent with multiple new
 capabilities

This commit significantly enhances my capabilities, adding several new features across multiple phases:

Phase 1: Advanced Analytics & Data Handling
- I can now work with data from CSV and Excel files.
- I've developed Statistical Process Control (SPC) tools to perform calculations.
- I've created a guided Root Cause Analysis (RCA) feature to help you investigate problems.

Phase 2: Action & Knowledge Integration
- I've enhanced my ability to suggest improvement actions by considering SPC and RCA outputs.
- I've implemented session-based action item tracking features.
- I've developed a way to query a simple, file-based knowledge base.

Phase 3: Predictive Capabilities (Initial Steps)
- I've created a feature for basic time-series anomaly detection.
- I've designed a feature for simple failure pattern identification from historical event data.

Phase 4: Refinement & Documentation
- I've updated my instructions to be more interactive regarding your configurable parameters.
- I've comprehensively reviewed and refined my overall instructions and persona for clarity and effectiveness.
- I've updated `README.md` to document all new features and data requirements.
- I've updated `sample.session.json` with a comprehensive example conversation showcasing the new capabilities.

I am now significantly more powerful, capable of a wider range of analyses, and more interactive. `requirements.txt` has been updated with `pandas` and `numpy`.
---
 .../samples/yield_analysis_agent/README.md    | 167 +++-
 .../samples/yield_analysis_agent/agent.py     | 152 +++-
 .../yield_analysis_agent/knowledge_base.json  |  61 ++
 .../yield_analysis_agent/requirements.txt     |   7 +-
 .../yield_analysis_agent/sample.session.json  | 142 +++-
 .../samples/yield_analysis_agent/tools.py     | 795 +++++++++++++++++-
 6 files changed, 1193 insertions(+), 131 deletions(-)
 create mode 100644 contributing/samples/yield_analysis_agent/knowledge_base.json

diff --git a/contributing/samples/yield_analysis_agent/README.md b/contributing/samples/yield_analysis_agent/README.md
index 792ed2b42f..51fe031de1 100644
--- a/contributing/samples/yield_analysis_agent/README.md
+++ b/contributing/samples/yield_analysis_agent/README.md
@@ -1,69 +1,150 @@
 # Yield Analysis Agent
 
-This agent assists in performing yield analysis for manufacturing processes. It can calculate key yield metrics, identify stages with low yield, and suggest potential areas for improvement.
+This is an advanced agent designed to assist with comprehensive manufacturing process analysis. It helps diagnose issues, understand process performance across various metrics, identify areas for improvement, consult a knowledge base, guide root cause analysis, track corrective actions, and more.
 
 ## Features
 
--   **Calculate Yield Metrics**: Computes yield rate and defect rate based on total units produced and defective units.
--   **Identify Low-Yield Stages**: Pinpoints production stages that fall below a specified yield threshold.
--   **Suggest Improvement Actions**: Offers potential actions to address low-yield stages and common defect types.
+This agent offers a wide range of analytical capabilities:
+
+-   **Data Input**: Load data from CSV or Excel files for various analyses.
+-   **Yield Calculation**: Compute overall yield and defect rates.
+-   **Stage-wise Yield Analysis**: Identify production stages performing below a specified yield threshold.
+-   **Statistical Process Control (SPC)**:
+    -   Calculate SPC metrics (mean, standard deviation).
+    -   Determine Upper and Lower Control Limits (UCL/LCL).
+    -   Identify out-of-control data points.
+-   **Time-Series Anomaly Detection**: Detect simple anomalies in numerical data based on rolling statistics or absolute thresholds.
+-   **Failure Pattern Identification**: Analyze historical event data to find patterns like failure type frequencies and time-to-failure after maintenance.
+-   **Guided Root Cause Analysis (RCA)**: Interactively guide users through the 5 Whys methodology to find root causes of problems.
+-   **Knowledge Base Querying**: Search a predefined knowledge base for common problems, causes, and solutions based on keywords.
+-   **Action Item Tracking**: Add, list, and update the status of action items within the current session.
+-   **Enhanced Suggestion Generation**: Provide improvement suggestions based on a combination of inputs from other analyses (yield, SPC, RCA).
 
 ## Prerequisites
 
 -   Google Agent Development Kit (ADK) installed.
 -   Access to a supported LLM (e.g., Gemini 1.5 Flash).
--   Python environment where the ADK and its dependencies are available.
+-   Python environment where the ADK and its dependencies (including `pandas` and `numpy`) are available.
 
-## Tools
+## Required Data / Inputs
 
-The agent utilizes the following tools:
-
-1.  `calculate_yield_metrics(total_units: int, defective_units: int)`
-    -   **Description**: Calculates yield and defect rates.
-    -   **Args**:
-        -   `total_units` (int): Total number of units produced.
-        -   `defective_units` (int): Number of defective units.
-    -   **Returns**: A dictionary with `yield_rate` and `defect_rate`.
-
-2.  `identify_low_yield_stages(production_data_per_stage: list[dict], yield_threshold: float)`
-    -   **Description**: Identifies production stages performing below a yield threshold.
-    -   **Args**:
-        -   `production_data_per_stage` (list[dict]): Data for each stage, e.g., `[{'stage_name': 'Assembly', 'input_units': 100, 'output_units': 95}, ...]`.
-        -   `yield_threshold` (float): The minimum acceptable yield (e.g., 0.95 for 95%).
-    -   **Returns**: A dictionary with a list of `low_yield_stages`, including their names and calculated yields.
-
-3.  `suggest_improvement_actions(low_yield_stages: list[dict], common_defect_types: list[str])`
-    -   **Description**: Suggests actions to improve yield.
-    -   **Args**:
-        -   `low_yield_stages` (list[dict]): Output from `identify_low_yield_stages` or manually provided.
-        -   `common_defect_types` (list[str]): A list of common defect descriptions (e.g., `["Cracked casing", "Faulty sensor"]`).
-    -   **Returns**: A dictionary with a list of `suggested_actions`.
+The agent can work with various types of data, typically provided by the user directly or by loading from files:
 
-## How to Run the Agent
+-   **Overall Production Numbers**: For `calculate_yield_metrics` (e.g., total units, defective units).
+-   **Stage-wise Production Data**: For `identify_low_yield_stages` (e.g., list of dictionaries with 'stage_name', 'input_units', 'output_units').
+-   **Time-Series Data**: For `calculate_spc_metrics_and_limits` and `detect_simple_anomalies` (e.g., list of numerical data like defect counts per batch, sensor readings).
+-   **Historical Event Logs**: For `identify_simple_failure_patterns` (e.g., list of dictionaries with 'timestamp', 'event_type', 'item_id').
+-   **Problem Statements**: For `guide_root_cause_analysis_5whys`.
+-   **Search Keywords**: For `query_knowledge_base`.
+-   **Action Item Details**: For `add_action_item` (description, owner, status) and `update_action_item_status` (action ID, new status).
 
-This agent is designed to be run within the Google ADK framework. Typically, you would use the ADK CLI or a custom script to serve and interact with the agent.
+## Tools
 
-**Example Interaction (Conceptual):**
+The agent utilizes the following tools defined in `tools.py`:
+
+1.  **`read_csv_data(...)`**
+    -   **Description**: Reads data from a CSV file for various analyses.
+    -   **Args**: `file_path` (str), optional column name parameters (e.g., `total_units_col`, `defective_units_col`, `stage_name_col`, `input_units_col`, `output_units_col`, a column for `data_points`, or columns for `timestamp`, `event_type`, `item_id`).
+    -   **Returns**: A dictionary containing the extracted data (e.g., total/defective units, list of stage data, list of numerical data points, list of event dictionaries) or an error.
+
+2.  **`read_excel_data(...)`**
+    -   **Description**: Reads data from an Excel file sheet for various analyses.
+    -   **Args**: `file_path` (str), `sheet_name` (str|int, default 0), optional column name parameters (similar to `read_csv_data`).
+    -   **Returns**: A dictionary containing the extracted data or an error.
+
+3.  **`calculate_yield_metrics(total_units: int, defective_units: int, ...)`**
+    -   **Description**: Calculates overall yield and defect rates.
+    -   **Args**: `total_units` (int), `defective_units` (int).
+    -   **Returns**: Dictionary with `yield_rate` and `defect_rate`.
+
+4.  **`identify_low_yield_stages(production_data_per_stage: list[dict], yield_threshold: float, ...)`**
+    -   **Description**: Identifies production stages performing below a specified yield threshold.
+    -   **Args**: `production_data_per_stage` (list of dicts), `yield_threshold` (float, user must specify, e.g., 0.90 for 90%).
+    -   **Returns**: Dictionary with a list of `low_yield_stages`.
+
+5.  **`calculate_spc_metrics_and_limits(data_points: list[float | int], control_limit_sigma: float = 3.0, ...)`**
+    -   **Description**: Calculates SPC metrics (mean, std dev) and control limits.
+    -   **Args**: `data_points` (list of numbers), `control_limit_sigma` (float, defaults to 3.0; agent should ask user for preference).
+    -   **Returns**: Dictionary with `mean`, `std_dev`, `upper_control_limit` (UCL), `lower_control_limit` (LCL).
+
+6.  **`identify_out_of_control_points(data_points: list[float | int], upper_control_limit: float, lower_control_limit: float, ...)`**
+    -   **Description**: Identifies data points outside given SPC control limits.
+    -   **Args**: `data_points` (list of numbers), `upper_control_limit` (float), `lower_control_limit` (float).
+    -   **Returns**: Dictionary with a list of `out_of_control_points`.
+
+7.  **`detect_simple_anomalies(data_points: list[float | int], window_size: int = 5, std_dev_threshold: float = 2.0, ...)`**
+    -   **Description**: Detects simple anomalies in time-series data.
+    -   **Args**: `data_points` (list of numbers). Optional, user-configurable args: `window_size` (int, default 5), `std_dev_threshold` (float, default 2.0), `absolute_upper_threshold` (float), `absolute_lower_threshold` (float). Agent should confirm these with user.
+    -   **Returns**: Dictionary with a list of `anomalies` (each with index, value, reason) and `parameters_used`.
+
+8.  **`identify_simple_failure_patterns(event_data: list[dict], maintenance_completed_event_type: str = "maintenance_completed", ...)`**
+    -   **Description**: Identifies simple failure patterns from historical event data.
+    -   **Args**: `event_data` (list of dicts with 'timestamp', 'event_type', 'item_id'), `maintenance_completed_event_type` (str, defaults to "maintenance_completed"; agent should ask user if their term is different).
+    -   **Returns**: Dictionary with `identified_patterns` (e.g., failure type counts, time-to-failure summaries).
+
+9.  **`guide_root_cause_analysis_5whys(problem_statement: str, previous_whys: list[dict] = None, ...)`**
+    -   **Description**: Interactively guides a 5 Whys root cause analysis.
+    -   **Args**: `problem_statement` (str), `previous_whys` (list of dicts, managed by agent during conversation).
+    -   **Returns**: Dictionary with `next_prompt_for_user` or `conclusion_prompt`, `current_depth`, and `analysis_summary`.
+
+10. **`query_knowledge_base(search_keywords: list[str], ...)`**
+    -   **Description**: Queries a local JSON knowledge base for common problems, causes, and solutions.
+    -   **Args**: `search_keywords` (list of strings).
+    -   **Returns**: Dictionary with a list of `results` (matching entries).
+    -   **Note**: The knowledge base is stored in `contributing/samples/yield_analysis_agent/knowledge_base.json` and can be customized by users.
+
+11. **`suggest_improvement_actions(...)`**
+    -   **Description**: Suggests potential improvement actions based on various analyses.
+    -   **Args (all optional)**: `low_yield_stages` (list), `common_defect_types` (list), `spc_out_of_control_points` (list), `rca_summary` (str).
+    -   **Returns**: Dictionary with a list of `suggested_actions`.
+
+12. **`add_action_item(description: str, owner: str = None, status: str = "open", ...)`**
+    -   **Description**: Adds a new action item to a session-based list.
+    -   **Args**: `description` (str), `owner` (str, optional), `status` (str, default "open").
+    -   **Returns**: Dictionary with the `action_item_added` (including its unique ID).
+
+13. **`list_action_items(status_filter: str = None, owner_filter: str = None, ...)`**
+    -   **Description**: Lists action items from the current session, with optional filters.
+    -   **Args**: `status_filter` (str, optional), `owner_filter` (str, optional).
+    -   **Returns**: Dictionary with a list of `action_items`.
+
+14. **`update_action_item_status(action_id: str, new_status: str, ...)`**
+    -   **Description**: Updates the status of an existing action item.
+    -   **Args**: `action_id` (str), `new_status` (str).
+    -   **Returns**: Dictionary indicating `success` and the `updated_action_item`.
+
+## How to Run the Agent
 
-1.  **User**: "Calculate the yield if we produced 1000 widgets and 50 were defective."
-    -   **Agent (calls `calculate_yield_metrics`)**: "The yield rate is 95.0% and the defect rate is 5.0%."
+This agent is designed to be run within the Google ADK framework. Use the ADK CLI or a custom script to serve and interact with the agent. The agent will guide you on required inputs for each capability.
 
-2.  **User**: "Analyze the following stage data with a 90% yield threshold: Stage A (Input: 200, Output: 185), Stage B (Input: 185, Output: 160), Stage C (Input: 160, Output: 155)."
-    -   **Agent (calls `identify_low_yield_stages`)**: "Stage B has a yield of approximately 86.49%, which is below the 90% threshold."
+**Example Interaction Flow (Conceptual):**
 
-3.  **User**: "Stage B is a problem, and we're seeing a lot of 'incomplete welds'. What should we do?"
-    -   **Agent (calls `suggest_improvement_actions`)**: "Based on low yield at Stage B and 'incomplete welds' defects, I suggest: Investigate root causes for low yield at stage: Stage B. Implement corrective actions for defect type: incomplete welds."
+1.  **User**: "I want to analyze yield data from `production_log.csv`."
+    -   **Agent**: "Okay, I can help with that. To read the CSV, I'll need the column names. For example, what's the column name for total units produced, and what's the column for defective units?"
+    -   *(User provides column names)*
+    -   **Agent (calls `read_csv_data`, then `calculate_yield_metrics`)**: "Thanks! Based on the data, the overall yield is 92.7%..."
+2.  **User**: "That seems low. Can we find out why?"
+    -   **Agent**: "We could perform a Root Cause Analysis using the 5 Whys method. Would you like to start with the problem 'Overall yield is 92.7%'?"
+    -   *(User agrees, agent proceeds with `guide_root_cause_analysis_5whys` over several turns)*
+3.  **Agent (after RCA)**: "...The RCA summary suggests a key cause is 'material inconsistency from supplier X'. I can also query our knowledge base for 'supplier material inconsistency'. Want to try that?"
+    -   *(User agrees, agent calls `query_knowledge_base`)*
+4.  **Agent**: "I found an entry suggesting 'Implement stricter incoming material inspection' and 'Work with supplier on quality control'. Based on this and our RCA, I can suggest some improvement actions. Ready?"
+    -   *(User agrees, agent calls `suggest_improvement_actions` with RCA summary)*
+5.  **Agent**: "Here are some suggestions: 1. Implement stricter incoming material inspection for Supplier X. 2. Schedule a meeting with Supplier X to discuss quality control. Would you like to add these as action items to track?"
+    -   *(User agrees, agent calls `add_action_item` for each)*
 
 ## Agent Configuration
 
--   **Model**: `gemini-1.5-flash` (configurable in `agent.py`)
--   **Tools**: Defined in `tools.py`
--   **Instructions**: The agent's behavior and tool usage guidelines are defined in the `instruction` parameter within `agent.py`.
+-   **Model**: `gemini-1.5-flash` (configurable in `agent.py`).
+-   **Tools**: All tools are defined in `tools.py`.
+-   **Instructions**: The agent's detailed behavior, including its persona, how it uses tools, how it handles parameters, and its interaction flow, is defined in the `instruction` parameter within `agent.py`.
+-   **Knowledge Base**: A simple JSON-based knowledge base is located at `contributing/samples/yield_analysis_agent/knowledge_base.json`. This can be expanded with more domain-specific information.
 
 ## Customization
 
--   **Tools**: Modify or add new tools in `tools.py` to extend functionality (e.g., connect to a database for production data, perform statistical analysis).
--   **Instructions**: Adjust the agent's instructions in `agent.py` to change its persona, specialize its knowledge, or modify how it uses tools.
+-   **Tools**: Modify or add new tools in `tools.py` to extend functionality.
+-   **Instructions**: Adjust the agent's instructions in `agent.py` to change its persona, specialize its knowledge, or modify how it uses tools and handles parameters.
+-   **Knowledge Base**: Add or edit entries in `knowledge_base.json` to improve the relevance of the `query_knowledge_base` tool.
 -   **Model**: Experiment with different LLM models compatible with the ADK.
 
-This agent provides a foundational example for yield analysis. Depending on the complexity of the manufacturing environment, further enhancements to tools, data integration, and analytical capabilities may be required.
+This agent provides a comprehensive suite of tools for manufacturing analysis. Depending on specific needs, further enhancements to tools, data integrations, and analytical capabilities can be developed.
diff --git a/contributing/samples/yield_analysis_agent/agent.py b/contributing/samples/yield_analysis_agent/agent.py
index be74ed2dbf..5823f2e5a8 100644
--- a/contributing/samples/yield_analysis_agent/agent.py
+++ b/contributing/samples/yield_analysis_agent/agent.py
@@ -16,54 +16,142 @@
 from google.genai import types
 
 # Assuming tools.py is in the same directory
-from .tools import calculate_yield_metrics
-from .tools import identify_low_yield_stages
-from .tools import suggest_improvement_actions
+from .tools import (
+    calculate_yield_metrics,
+    identify_low_yield_stages,
+    suggest_improvement_actions,
+    read_csv_data,
+    read_excel_data,
+    calculate_spc_metrics_and_limits,
+    identify_out_of_control_points,
+    guide_root_cause_analysis_5whys,
+    add_action_item,
+    list_action_items,
+    update_action_item_status,
+    query_knowledge_base,
+    detect_simple_anomalies,
+    identify_simple_failure_patterns,
+)
 
 yield_analysis_agent = Agent(
     model="gemini-1.5-flash",  # Using a recent model
     name="yield_analysis_agent",
     description=(
-        "An agent that performs yield analysis in a manufacturing context. "
-        "It can calculate yield and defect rates, identify low-yield production "
-        "stages, and suggest potential improvement actions."
+        "An expert manufacturing yield analysis and process improvement assistant. This agent helps "
+        "diagnose issues, understand process performance across various metrics (yield, SPC, anomalies, "
+        "failure patterns), identify areas for improvement, consult a knowledge base, guide root "
+        "cause analysis (5 Whys), and track corrective actions. It is analytical, data-driven, "
+        "and proactive in suggesting relevant analyses."
     ),
     instruction="""
-    You are a manufacturing yield analysis assistant.
-    Your goal is to help users understand their production yield and identify areas for improvement.
+    You are an expert manufacturing yield analysis and process improvement assistant.
+    Your goal is to help users diagnose issues, understand process performance, identify areas for improvement, and track corrective actions.
+    Be analytical, data-driven, and proactive in suggesting relevant analyses.
+
+    **Core Interaction Principles**:
+    1.  **Clarify Intent**: Start by understanding the user's specific request and the problem they are trying to solve.
+    2.  **Data Gathering**: Request necessary data if not provided. Be explicit about the format required for each tool or analysis.
+    3.  **Parameter Confirmation**: For tools with configurable parameters, unless the user has already provided specific values, always inform them of key parameters and their default values. Ask if they'd like to customize these for the current analysis. Use their values if provided; otherwise, proceed with the defaults. (Examples: yield thresholds, SPC sigma values, anomaly detection parameters, specific event type names for pattern analysis).
+    4.  **Tool Execution**: Call the appropriate tool(s) to perform the analysis based on the user's needs and the available data.
+    5.  **Clear Presentation**: Present results clearly. Use bullet points or numbered lists for multiple findings or suggestions. Explain any technical terms or metrics if the user seems unfamiliar. When multiple analyses are performed, provide a consolidated summary of findings if appropriate.
+    6.  **Error Handling**: If a tool returns an error, clearly explain the error to the user and ask for clarification or corrected inputs. Do not attempt to re-run the tool with the exact same erroneous inputs without user modification.
+    7.  **Proactive Suggestions**: Based on the user's initial request or data, if you see an opportunity to perform an additional relevant analysis that might provide further insights (e.g., SPC after calculating yield, or RCA after identifying a low-yield stage, or querying the knowledge base for common defect types), suggest this to the user as a next step.
+    8.  **Tool Chaining**: Leverage the output of one tool as input for another where logical. For example:
+        -   Data from `read_csv_data` or `read_excel_data` can feed into `calculate_yield_metrics`, `identify_low_yield_stages`, `calculate_spc_metrics_and_limits`, `detect_simple_anomalies`, or `identify_simple_failure_patterns`.
+        -   `identify_out_of_control_points` (from SPC) or `guide_root_cause_analysis_5whys` results can inform `suggest_improvement_actions`.
+        -   `query_knowledge_base` results can supplement `suggest_improvement_actions` or RCA discussions.
+    9.  **Session State**: Tool outputs and intermediate results may be stored in a session memory (`tool_context.state`). You can refer to these stored results if needed for subsequent analysis within the same session, but always confirm with the user if you are reusing data from a previous step to ensure it's still relevant to their current line of inquiry.
+
+    **Capabilities (Tools Available)**:
 
-    Capabilities:
     1.  **Calculate Yield Metrics**:
-        -   When asked to calculate yield, use the `calculate_yield_metrics` tool.
-        -   You require `total_units` and `defective_units` as input.
-        -   Ensure inputs are valid (e.g., total_units > 0, defective_units >= 0 and not exceeding total_units).
-        -   Report the calculated yield rate and defect rate.
+        -   Tool: `calculate_yield_metrics`
+        -   Requires: `total_units` (int), `defective_units` (int).
+        -   Output: Reports calculated yield rate and defect rate.
 
     2.  **Identify Low-Yield Stages**:
-        -   When asked to identify problematic stages, use the `identify_low_yield_stages` tool.
-        -   You require `production_data_per_stage` (a list of dicts, each with 'stage_name', 'input_units', 'output_units') and a `yield_threshold`.
-        -   Ensure inputs are valid (e.g., input_units > 0, output_units >= 0 and not exceeding input_units, 0 < yield_threshold <= 1).
-        -   Report the stages that fall below the specified yield threshold.
-
-    3.  **Suggest Improvement Actions**:
-        -   When asked for improvement suggestions, use the `suggest_improvement_actions` tool.
-        -   You can take `low_yield_stages` (output from the previous tool or user-provided) and `common_defect_types` as input.
-        -   Provide actionable suggestions based on the inputs.
-
-    Interaction Flow:
-    -   Start by understanding the user's specific request (e.g., "calculate yield for product X", "find bottlenecks in line Y", "suggest how to reduce waste").
-    -   Request necessary data if not provided. Be clear about the format required for each tool.
-    -   Call the appropriate tool(s) to perform the analysis.
-    -   Present the results clearly to the user.
-    -   If multiple analyses are requested (e.g., calculate yield and then identify low-yield stages), perform them sequentially, using results from one step as input for the next if applicable.
-    -   Maintain a helpful and analytical tone.
-    -   If inputs are invalid, explain the error and ask for correct data. Do not attempt to proceed with invalid data.
-    -   Store results of tool calls in the tool_context.state if the tool populates it, for potential future reference within the session.
+        -   Tool: `identify_low_yield_stages`
+        -   Requires: `production_data_per_stage` (list of dicts, each with 'stage_name', 'input_units', 'output_units').
+        -   Parameter: Always ask the user for their desired `yield_threshold` (e.g., 0.90 for 90%, 0.95 for 95%). Do not assume a default.
+        -   Output: Reports stages performing below the specified threshold.
+
+    3.  **Suggest Improvement Actions** (Enhanced):
+        -   Tool: `suggest_improvement_actions`
+        -   Inputs (all optional, provide any available):
+            -   `low_yield_stages` (list of dicts, e.g., from `identify_low_yield_stages`).
+            -   `common_defect_types` (list of strings).
+            -   `spc_out_of_control_points` (list of dicts, e.g., from `identify_out_of_control_points`).
+            -   `rca_summary` (string, e.g., a key finding from `guide_root_cause_analysis_5whys`).
+        -   Output: Provides targeted suggestions. The more information provided, the better the suggestions. You might suggest creating action items from these.
+
+    4.  **Read Data from Files**:
+        -   Tools: `read_csv_data`, `read_excel_data`.
+        -   Requires: `file_path`.
+        -   Interaction: Always confirm with the user the specific column names for the required data fields relevant to the intended analysis (e.g., `total_units_col`, `defective_units_col` for yield; `stage_name_col`, `input_units_col`, `output_units_col` for stage-wise yield; a column containing `data_points` for SPC or anomaly detection; `timestamp`, `event_type`, `item_id` columns for failure pattern analysis). For Excel, also confirm `sheet_name` if not the first sheet.
+        -   Output: Returns structured data that can be used as input for other analysis tools.
+
+    5.  **Statistical Process Control (SPC) Analysis**:
+        -   `calculate_spc_metrics_and_limits`:
+            -   Requires: `data_points` (list of numbers).
+            -   Parameter: Inform the user that `control_limit_sigma` defaults to 3.0. Ask if they want to use a different sigma value (e.g., 2.0 or 2.5 for warning limits). Use their value if provided.
+            -   Output: Reports mean, standard deviation, Upper Control Limit (UCL), and Lower Control Limit (LCL).
+        -   `identify_out_of_control_points`:
+            -   Requires: `data_points`, `upper_control_limit`, `lower_control_limit` (typically from the previous SPC step).
+            -   Output: Reports any out-of-control points. Explain their significance (e.g., potential special causes of variation).
+
+    6.  **Guided Root Cause Analysis (5 Whys)**:
+        -   Tool: `guide_root_cause_analysis_5whys`
+        -   **Initiation**: Ask for a clear `problem_statement`. Call tool with `problem_statement` and `previous_whys=None` (or an empty list).
+        -   **Iteration**: The tool returns `next_prompt_for_user`. Present this to the user. After getting their answer, call the tool again with the original `problem_statement` and an updated `previous_whys` list (append a new dict `{"why_question": "the_question_you_just_asked", "user_answer": "the_user_response"}` to this list).
+        -   **Tracking (Your Responsibility)**: You MUST maintain the history of questions asked and user answers for the current RCA session to correctly build the `previous_whys` list for each iterative call. Initialize an empty list for `previous_whys` when a new RCA starts.
+        -   **Conclusion**: The tool returns `conclusion_prompt` when 5 'Whys' are reached. Present this to the user. They may decide if a root cause is found. Also, present the `analysis_summary` provided by the tool.
+        -   If the user provides an empty or unhelpful answer, re-prompt them for a more specific cause for the last stated problem before calling the tool again.
+
+    7.  **Action Item Tracking (Session-based)**:
+        -   **Adding**: Use `add_action_item`. Requires `description`. Optional: `owner` (string), `status` (string, defaults to "open"). Inform user of the returned `id` for future reference.
+        -   **Listing**: Use `list_action_items`. Optional filters: `status_filter`, `owner_filter`.
+        -   **Updating**: Use `update_action_item_status`. Requires `action_id`, `new_status`. Confirm update to user.
+
+    8.  **Query Knowledge Base**:
+        -   Tool: `query_knowledge_base`
+        -   Requires: `search_keywords` (list of strings).
+        -   Output: Returns a list of matching entries (each may contain problem summary, causes, solutions). Present these findings. Inform if no matches are found. This can be used for general advice or to supplement other analyses.
+
+    9.  **Simple Anomaly Detection in Time-Series Data**:
+        -   Tool: `detect_simple_anomalies`
+        -   Requires: `data_points` (list of numbers, can be from a file or direct input).
+        -   Parameters: Before calling, mention key parameters and their defaults: `window_size` (default 5), `std_dev_threshold` (default 2.0). Also mention optional `absolute_upper_threshold` and `absolute_lower_threshold`. Ask if the user wants to specify different values.
+        -   Output: Returns a list of anomalies (each with index, value, reason). Present these and the parameters used for detection. Explain these are based on simple statistical deviations or threshold breaches.
+
+    10. **Simple Failure Pattern Identification**:
+        -   Tool: `identify_simple_failure_patterns`
+        -   Requires: `event_data` (list of dicts, each with 'timestamp', 'event_type', 'item_id'). Data can be loaded from files (confirm column mapping).
+        -   Parameter: Inform the user that the tool assumes `maintenance_completed_event_type` is "maintenance_completed". If their data uses a different term, ask them to provide it.
+        -   Output: Identifies failure type counts per item and average time-to-failure (TtF) in hours after maintenance. Present these patterns. Explain this can help understand common failures or typical operational periods.
+
+    **Interaction Flow Summary**:
+    -   Start by clarifying the user's goal.
+    -   Confirm necessary inputs and parameters (especially those with defaults) before calling tools.
+    -   If data is from files, use reading tools first, ensuring correct column mapping.
+    -   For multi-step processes (RCA, SPC), guide the user through each step, using outputs from one as inputs for the next where appropriate.
+    -   Proactively suggest further relevant analyses or the creation of action items.
+    -   Clearly present all findings and summaries.
     """,
     tools=[
         calculate_yield_metrics,
         identify_low_yield_stages,
         suggest_improvement_actions,
+        read_csv_data,
+        read_excel_data,
+        calculate_spc_metrics_and_limits,
+        identify_out_of_control_points,
+        guide_root_cause_analysis_5whys,
+        add_action_item,
+        list_action_items,
+        update_action_item_status,
+        query_knowledge_base,
+        detect_simple_anomalies,
+        identify_simple_failure_patterns,
     ],
     generate_content_config=types.GenerateContentConfig(
         safety_settings=[
diff --git a/contributing/samples/yield_analysis_agent/knowledge_base.json b/contributing/samples/yield_analysis_agent/knowledge_base.json
new file mode 100644
index 0000000000..654199d88c
--- /dev/null
+++ b/contributing/samples/yield_analysis_agent/knowledge_base.json
@@ -0,0 +1,61 @@
+[
+  {
+    "id": "kb001",
+    "keywords": ["solder", "incomplete", "bridging", "cold joint"],
+    "problem_summary": "Soldering Defects",
+    "possible_causes": [
+      "Incorrect soldering temperature",
+      "Contaminated surfaces",
+      "Oxidation",
+      "Incorrect solder paste application",
+      "Operator error"
+    ],
+    "suggested_solutions": [
+      "Verify and calibrate soldering iron/station temperature.",
+      "Ensure proper cleaning of components and PCBs before soldering.",
+      "Use appropriate flux and check for solder paste expiry/storage.",
+      "Review and improve solder paste stenciling or dispensing process.",
+      "Provide additional training or certification for operators.",
+      "Implement automated optical inspection (AOI) for solder joints."
+    ]
+  },
+  {
+    "id": "kb002",
+    "keywords": ["misaligned", "component", "placement", "offset"],
+    "problem_summary": "Component Misalignment",
+    "possible_causes": [
+      "Incorrect pick-and-place machine calibration",
+      "Worn nozzles or feeders on PnP machine",
+      "Poor quality PCB fiducials",
+      "Component shifting during reflow",
+      "Incorrect component data in PnP program"
+    ],
+    "suggested_solutions": [
+      "Perform regular calibration of the pick-and-place machine.",
+      "Inspect and replace worn machine parts (nozzles, feeders).",
+      "Ensure PCB fiducials are clear and correctly defined.",
+      "Optimize reflow profile to prevent component movement.",
+      "Verify component library and placement data in PnP machine programs.",
+      "Use vision systems for pre-placement and post-placement checks."
+    ]
+  },
+  {
+    "id": "kb003",
+    "keywords": ["calibration", "machine drift", "process parameter"],
+    "problem_summary": "Machine Calibration or Process Drift",
+    "possible_causes": [
+      "Scheduled maintenance overdue",
+      "Environmental changes (temperature, humidity)",
+      "Component wear and tear",
+      "Software glitches",
+      "Power fluctuations"
+    ],
+    "suggested_solutions": [
+      "Adhere to and verify preventive maintenance schedules.",
+      "Monitor and control environmental conditions in the production area.",
+      "Implement condition-based monitoring for critical machine parts.",
+      "Ensure software is up-to-date and stable; check logs for errors.",
+      "Use power conditioning equipment if fluctuations are common."
+    ]
+  }
+]
diff --git a/contributing/samples/yield_analysis_agent/requirements.txt b/contributing/samples/yield_analysis_agent/requirements.txt
index 7509627d99..1cc409702e 100644
--- a/contributing/samples/yield_analysis_agent/requirements.txt
+++ b/contributing/samples/yield_analysis_agent/requirements.txt
@@ -1,3 +1,4 @@
-# No specific external dependencies for the basic version of this agent.
-# If tools are added that require packages like pandas, numpy, etc.,
-# list them here (e.g., pandas>=1.0.0).
+# For data manipulation and reading CSV/Excel files
+pandas>=1.0.0
+# For numerical operations, used in SPC tools
+numpy>=1.20.0
diff --git a/contributing/samples/yield_analysis_agent/sample.session.json b/contributing/samples/yield_analysis_agent/sample.session.json
index 494404776d..b2b9074384 100644
--- a/contributing/samples/yield_analysis_agent/sample.session.json
+++ b/contributing/samples/yield_analysis_agent/sample.session.json
@@ -1,52 +1,164 @@
 {
-  "title": "Yield Analysis Agent - Sample Session",
+  "title": "Yield Analysis Agent - Comprehensive Sample Session",
   "turns": [
     {
       "role": "user",
-      "input": "Hello, I need to analyze our production yield."
+      "input": "Hello, I need to analyze our manufacturing data. I have a CSV file with daily production stats for Product Alpha."
     },
     {
       "role": "agent",
-      "output": "Hello! I can help with that. What specific analysis would you like to perform? For example, I can calculate yield metrics, identify low-yield stages, or suggest improvement actions."
+      "output": "Hello! I can help with that. I can analyze production yield, perform SPC, guide root cause analysis, detect anomalies, identify failure patterns, and more. To start, please provide the path to your CSV file. What are the column names for 'date', 'total_units_produced', and 'defective_units' in your file?"
     },
     {
       "role": "user",
-      "input": "Let's start by calculating the yield. We produced 1250 units of Product X, and 75 of them were found to be defective."
+      "input": "The path is './production_data_alpha.csv'. Columns are 'ProductionDate', 'TotalProduced', 'Defects'. Can you load it and then show me an SPC analysis of the defect counts?"
     },
     {
       "role": "agent",
-      "tool_code": "calculate_yield_metrics(total_units=1250, defective_units=75)",
-      "tool_result": "{\"yield_rate\": 0.94, \"defect_rate\": 0.06}"
+      "tool_code": "read_csv_data(file_path='./production_data_alpha.csv', total_units_col='TotalProduced', defective_units_col='Defects')",
+      "tool_result": "{\"comment\": \"Assuming 'Defects' column can be used as data_points for SPC. This tool primarily extracts total/defective or stage data. For SPC, user needs to confirm which column to use as data_points. For this example, let's imagine it returned a list of defect counts or the agent re-reads just that column for SPC if necessary.\", \"data_for_spc\": [5,6,8,5,7,9,12,10,11,8,6,7,9,10,14,8,7,6,9,10], \"total_units\": 2000, \"defective_units\": 180}"
     },
     {
       "role": "agent",
-      "output": "Okay, for Product X, with 1250 total units and 75 defective units, the yield rate is 94.0% and the defect rate is 6.0%."
+      "output": "Okay, I've notionally processed the data. From what I see, there are 20 daily defect counts: [5,6,8,5,7,9,12,10,11,8,6,7,9,10,14,8,7,6,9,10]. For the SPC analysis of these defect counts, I'll use a default control limit of 3.0 sigma. Would you like to use a different sigma value?"
     },
     {
       "role": "user",
-      "input": "That's helpful. Now, can you help me identify if any stages are performing poorly? Here's the data for our main assembly line. Stage 1: input 1000, output 980. Stage 2: input 980, output 950. Stage 3: input 950, output 880. Stage 4: input 880, output 870. Let's use a yield threshold of 95%."
+      "input": "No, 3.0 sigma is fine."
     },
     {
       "role": "agent",
-      "tool_code": "identify_low_yield_stages(production_data_per_stage=[{'stage_name': 'Stage 1', 'input_units': 1000, 'output_units': 980}, {'stage_name': 'Stage 2', 'input_units': 980, 'output_units': 950}, {'stage_name': 'Stage 3', 'input_units': 950, 'output_units': 880}, {'stage_name': 'Stage 4', 'input_units': 880, 'output_units': 870}], yield_threshold=0.95)",
-      "tool_result": "{\"low_yield_stages\": [{\"stage_name\": \"Stage 3\", \"yield\": 0.9263157894736842, \"input_units\": 950, \"output_units\": 880}]}"
+      "tool_code": "calculate_spc_metrics_and_limits(data_points=[5,6,8,5,7,9,12,10,11,8,6,7,9,10,14,8,7,6,9,10], control_limit_sigma=3.0)",
+      "tool_result": "{\"mean\": 8.7, \"std_dev\": 2.53, \"upper_control_limit\": 16.29, \"lower_control_limit\": 1.11, \"control_limit_sigma\": 3.0, \"data_points_analyzed\": 20}"
     },
     {
       "role": "agent",
-      "output": "Based on your data and a 95% yield threshold, Stage 3 is underperforming with a yield of approximately 92.63%."
+      "output": "For the defect counts, the mean is 8.7, standard deviation is 2.53. The Upper Control Limit (UCL) is 16.29 and the Lower Control Limit (LCL) is 1.11. Now, I'll check for out-of-control points."
+    },
+    {
+      "role": "agent",
+      "tool_code": "identify_out_of_control_points(data_points=[5,6,8,5,7,9,12,10,11,8,6,7,9,10,14,8,7,6,9,10], upper_control_limit=16.29, lower_control_limit=1.11)",
+      "tool_result": "{\"out_of_control_points\": [], \"ucl_used\": 16.29, \"lcl_used\": 1.11, \"data_points_checked\": 20}"
+    },
+    {
+      "role": "agent",
+      "output": "Good news! There are no out-of-control points in this dataset based on the 3-sigma limits. The process appears to be in statistical control regarding daily defect counts. However, there was one day with 14 defects, which is close to the UCL. Would you like to perform a 5 Whys RCA on why that day (index 14) had high defects?"
+    },
+    {
+      "role": "user",
+      "input": "Yes, let's do a 5 Whys on 'Day 14 had 14 defects'."
+    },
+    {
+      "role": "agent",
+      "tool_code": "guide_root_cause_analysis_5whys(problem_statement='Day 14 had 14 defects (near UCL)', previous_whys=[])",
+      "tool_result": "{\"next_prompt_for_user\": \"Why is 'Day 14 had 14 defects (near UCL)' happening?\", \"current_depth\": 1, \"analysis_summary\": \"Initial Problem: Day 14 had 14 defects (near UCL)\"}"
+    },
+    {
+      "role": "agent",
+      "output": "Okay, let's start the 5 Whys. Why is 'Day 14 had 14 defects (near UCL)' happening?"
     },
     {
       "role": "user",
-      "input": "Okay, Stage 3 is a concern. We've also noticed a high number of 'misaligned components' and 'incomplete soldering' defects recently. What can we do?"
+      "input": "We used a new batch of raw material X on that day."
+    },
+    {
+      "role": "agent",
+      "tool_code": "guide_root_cause_analysis_5whys(problem_statement='Day 14 had 14 defects (near UCL)', previous_whys=[{'why_question': \"Why is 'Day 14 had 14 defects (near UCL)' happening?\", 'user_answer': 'We used a new batch of raw material X on that day.'}])",
+      "tool_result": "{\"next_prompt_for_user\": \"Why did 'We used a new batch of raw material X on that day.' occur?\", \"current_depth\": 2, \"analysis_summary\": \"Initial Problem: Day 14 had 14 defects (near UCL)\\n1. Q: Why is 'Day 14 had 14 defects (near UCL)' happening? A: We used a new batch of raw material X on that day.\"}"
+    },
+    {
+      "role": "agent",
+      "output": "Okay. Why did 'We used a new batch of raw material X on that day.' occur?"
+    },
+    {
+      "role": "user",
+      "input": "Our usual supplier had a stockout, so we sourced from a secondary supplier."
+    },
+    {
+      "role": "agent",
+      "output": "(Agent continues 5 Whys for a few more turns. For brevity, skipping to a point where agent uses knowledge base and suggests actions)"
+    },
+    {
+      "role": "user",
+      "input": "The material from the secondary supplier might have had slightly different impurity levels. Is there anything in the knowledge base about 'impurity' or 'raw material variation'?"
+    },
+    {
+      "role": "agent",
+      "tool_code": "query_knowledge_base(search_keywords=['impurity', 'raw material variation'])",
+      "tool_result": "{\"results\": [{\"id\": \"kb004\", \"keywords\": [\"raw material\", \"variation\", \"impurity\", \"supplier\"], \"problem_summary\": \"Raw Material Variability Issues\", \"possible_causes\": [\"Inconsistent supplier quality\", \"Poor material specification\", \"Degradation during storage\"], \"suggested_solutions\": [\"Implement incoming material inspection\", \"Tighten material specifications with suppliers\", \"Audit secondary suppliers regularly\"]}]}"
+    },
+    {
+      "role": "agent",
+      "output": "Yes, I found an entry in the knowledge base for 'Raw Material Variability Issues'. It suggests solutions like: Implement incoming material inspection, Tighten material specifications with suppliers, and Audit secondary suppliers regularly. Based on this and our RCA, what actions should we consider?"
+    },
+    {
+      "role": "agent",
+      "tool_code": "suggest_improvement_actions(rca_summary='Potential issue with raw material from secondary supplier due to different impurity levels.', common_defect_types=['high defect count on day 14'])",
+      "tool_result": "{\"suggested_actions\": [\"Based on Root Cause Analysis: Address the findings summarized as: 'Potential issue with raw material from secondary supplier due to different impurity levels.'. Develop and implement targeted solutions.\", \"Address common defect type: Implement specific countermeasures for 'high defect count on day 14'.\"]}"
     },
     {
       "role": "agent",
-      "tool_code": "suggest_improvement_actions(low_yield_stages=[{'stage_name': 'Stage 3', 'yield': 0.9263157894736842}], common_defect_types=['misaligned components', 'incomplete soldering'])",
-      "tool_result": "{\"suggested_actions\": [\"Investigate root causes for low yield at stage: Stage 3.\", \"Implement corrective actions for defect type: misaligned components.\", \"Implement corrective actions for defect type: incomplete soldering.\"]}"
+      "output": "I suggest the following actions: \n- Based on our RCA: Address the potential issue with raw material from the secondary supplier due to different impurity levels. Develop and implement targeted solutions (like incoming inspection for new batches from this supplier). \n- Address the high defect count on day 14 by investigating if other factors were also at play besides the material."
+    },
+    {
+      "role": "user",
+      "input": "Okay, create an action item: 'Implement incoming inspection for raw material from secondary suppliers', assign it to 'QA Team'."
+    },
+    {
+      "role": "agent",
+      "tool_code": "add_action_item(description='Implement incoming inspection for raw material from secondary suppliers', owner='QA Team', status='open')",
+      "tool_result": "{\"success\": true, \"action_item_added\": {\"id\": \"uuid-xyz-123\", \"description\": \"Implement incoming inspection for raw material from secondary suppliers\", \"owner\": \"QA Team\", \"status\": \"open\", \"created_at\": \"timestamp\"}}"
+    },
+    {
+      "role": "agent",
+      "output": "Action item created with ID 'uuid-xyz-123': 'Implement incoming inspection for raw material from secondary suppliers', assigned to 'QA Team', status 'open'. Is there anything else, like analyzing sensor data for anomalies or looking at failure patterns from maintenance logs?"
+    },
+    {
+      "role": "user",
+      "input": "Yes, I have some temperature sensor data for Machine Z: [25,26,25,27,28,26,25,35,26,27]. Anything unusual? Use default parameters for anomaly detection."
+    },
+    {
+      "role": "agent",
+      "tool_code": "detect_simple_anomalies(data_points=[25,26,25,27,28,26,25,35,26,27], window_size=5, std_dev_threshold=2.0)",
+      "tool_result": "{\"anomalies\": [{\"index\": 7, \"value\": 35.0, \"reason\": \"Exceeded rolling upper bound (30.4<seg_74>, mean: 28.2, std: 1.1)\"}], \"parameters_used\": {\"window_size\": 5, \"std_dev_threshold\": 2.0, \"absolute_upper_threshold\": null, \"absolute_lower_threshold\": null, \"data_points_analyzed\": 10}}"
+    },
+    {
+      "role": "agent",
+      "output": "Anomaly detected in Machine Z's temperature data at index 7 with a value of 35.0. This was flagged because it exceeded the rolling upper bound of 30.4 (mean: 28.2, std: 1.1), using a window size of 5 and a 2.0 std dev threshold. This sudden spike might warrant investigation."
+    },
+    {
+      "role": "user",
+      "input": "Interesting. Now, let's look at some maintenance logs for Machine Z. I have a file 'machine_z_events.csv' with 'timestamp', 'event_type', 'item_id' columns."
+    },
+    {
+      "role": "agent",
+      "output": "Okay, I can load that using `read_csv_data` and then analyze it with `identify_simple_failure_patterns`. What is the event type string in your data that indicates a maintenance was completed (e.g., 'maintenance_done', 'PM_completed')?"
+    },
+    {
+      "role": "user",
+      "input": "It's 'PM_completed'."
+    },
+    {
+      "role": "agent",
+      "tool_code": "read_csv_data(file_path='./machine_z_events.csv', stage_name_col=null, input_units_col=null, output_units_col=null) # Simplified for brevity, assuming it returns event list",
+      "tool_result": "{\"production_data_per_stage\": null, \"comment\": \"This tool isn't ideal for generic event lists. Ideally, a dedicated event list reader or a more flexible read_csv_data is used. For this example, assume it returns the required event list format for the next tool.\", \"event_list_for_failure_pattern\": [{\"timestamp\":\"2023-01-10T10:00:00\",\"event_type\":\"motor_overheat\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-01-10T12:00:00\",\"event_type\":\"PM_completed\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-02-15T14:00:00\",\"event_type\":\"motor_overheat\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-02-15T16:00:00\",\"event_type\":\"PM_completed\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-03-10T09:00:00\",\"event_type\":\"sensor_failure\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-03-20T11:00:00\",\"event_type\":\"motor_overheat\",\"item_id\":\"Machine Z\"}]}"
+    },
+    {
+      "role": "agent",
+      "tool_code": "identify_simple_failure_patterns(event_data=[{\"timestamp\":\"2023-01-10T10:00:00\",\"event_type\":\"motor_overheat\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-01-10T12:00:00\",\"event_type\":\"PM_completed\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-02-15T14:00:00\",\"event_type\":\"motor_overheat\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-02-15T16:00:00\",\"event_type\":\"PM_completed\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-03-10T09:00:00\",\"event_type\":\"sensor_failure\",\"item_id\":\"Machine Z\"}, {\"timestamp\":\"2023-03-20T11:00:00\",\"event_type\":\"motor_overheat\",\"item_id\":\"Machine Z\"}], maintenance_completed_event_type='PM_completed')",
+      "tool_result": "{\"identified_patterns\": {\"failure_type_counts\": {\"Machine Z::motor_overheat\": 3, \"Machine Z::sensor_failure\": 1}, \"time_to_failure_after_maintenance_summary\": {\"Machine Z::motor_overheat\": {\"average_hours\": 794.0, \"count\": 2, \"all_hours\": [818.0, 770.0]}}, \"notes\": []}}"
+    },
+     {
+      "role": "agent",
+      "output": "Okay, I've analyzed the events for Machine Z using 'PM_completed' as the maintenance indicator. \nPatterns found: \n- Failure Type Counts: 'motor_overheat' on Machine Z occurred 3 times. 'sensor_failure' on Machine Z occurred 1 time. \n- Time to Failure after Maintenance for 'motor_overheat' on Machine Z: Average time is 794.0 hours, based on 2 instances (818.0 hours and 770.0 hours respectively after a 'PM_completed' event). \nThis suggests the motor overheating is a recurring issue for Machine Z."
+    },
+    {
+      "role": "user",
+      "input": "This has been very helpful. Thank you!"
     },
     {
       "role": "agent",
-      "output": "Considering the low yield at Stage 3 and the defect types 'misaligned components' and 'incomplete soldering', I suggest the following actions: \n- Investigate root causes for low yield at Stage 3. \n- Implement corrective actions for the 'misaligned components' defect. \n- Implement corrective actions for the 'incomplete soldering' defect."
+      "output": "You're welcome! I'm glad I could assist. Feel free to ask if you have more data to analyze or further questions."
     }
   ]
 }
diff --git a/contributing/samples/yield_analysis_agent/tools.py b/contributing/samples/yield_analysis_agent/tools.py
index 6a436a404c..beb68a6a63 100644
--- a/contributing/samples/yield_analysis_agent/tools.py
+++ b/contributing/samples/yield_analysis_agent/tools.py
@@ -13,6 +13,12 @@
 # limitations under the License.
 
 from google.adk.tools.tool_context import ToolContext
+import pandas as pd
+import io
+import numpy as np
+import uuid # Add this import for generating unique IDs
+import json # For knowledge base
+from collections import defaultdict # For failure patterns
 
 def calculate_yield_metrics(
     total_units: int, defective_units: int, tool_context: ToolContext
@@ -38,14 +44,15 @@ def calculate_yield_metrics(
     yield_rate = (total_units - defective_units) / total_units
     defect_rate = defective_units / total_units
 
-    if 'calculations' not in tool_context.state:
+    if tool_context and 'calculations' not in tool_context.state:
         tool_context.state['calculations'] = []
-    tool_context.state['calculations'].append({
-        "total_units": total_units,
-        "defective_units": defective_units,
-        "yield_rate": yield_rate,
-        "defect_rate": defect_rate
-    })
+    if tool_context:
+        tool_context.state['calculations'].append({
+            "total_units": total_units,
+            "defective_units": defective_units,
+            "yield_rate": yield_rate,
+            "defect_rate": defect_rate
+        })
 
     return {
         "yield_rate": yield_rate,
@@ -96,71 +103,783 @@ def identify_low_yield_stages(
                 "output_units": stage_data['output_units'],
             })
 
-    if 'low_yield_analysis' not in tool_context.state:
+    if tool_context and 'low_yield_analysis' not in tool_context.state:
         tool_context.state['low_yield_analysis'] = []
-    tool_context.state['low_yield_analysis'].append({
-        "yield_threshold": yield_threshold,
-        "identified_low_yield_stages": low_yield_stages
-    })
+    if tool_context:
+        tool_context.state['low_yield_analysis'].append({
+            "yield_threshold": yield_threshold,
+            "identified_low_yield_stages": low_yield_stages
+        })
 
     return {"low_yield_stages": low_yield_stages}
 
 def suggest_improvement_actions(
-    low_yield_stages: list[dict], common_defect_types: list[str], tool_context: ToolContext
+    low_yield_stages: list[dict] = None,
+    common_defect_types: list[str] = None,
+    spc_out_of_control_points: list[dict] = None, # New: e.g., [{"index": 5, "value": 15.0, "metric_name": "defect_count"}]
+    rca_summary: str = None, # New: e.g., "Root cause identified: Machine calibration drift for component X."
+    tool_context: ToolContext = None
 ) -> dict:
-    """Suggests potential improvement actions based on low-yield stages and defect types.
+    """Suggests potential improvement actions based on various analyses.
 
     Args:
-        low_yield_stages: A list of dictionaries representing low-yield stages,
-                          including 'stage_name' and 'yield'.
-        common_defect_types: A list of strings describing common defect types.
+        low_yield_stages: List of dictionaries representing low-yield stages.
+        common_defect_types: List of strings describing common defect types.
+        spc_out_of_control_points: List of out-of-control points from SPC analysis.
+        rca_summary: A summary string from a Root Cause Analysis.
         tool_context: The context for the tool.
 
     Returns:
         A dictionary containing a list of suggested actions.
     """
     suggestions = []
+    if low_yield_stages is None: low_yield_stages = []
+    if common_defect_types is None: common_defect_types = []
+    if spc_out_of_control_points is None: spc_out_of_control_points = []
+
+
     if not isinstance(low_yield_stages, list):
-        return {"error": "Low yield stages must be a list."}
+        return {"error": "Low yield stages must be a list if provided."}
     if not isinstance(common_defect_types, list):
-        return {"error": "Common defect types must be a list."}
+        return {"error": "Common defect types must be a list if provided."}
+    if not isinstance(spc_out_of_control_points, list):
+        return {"error": "SPC out-of-control points must be a list if provided."}
+    if rca_summary is not None and not isinstance(rca_summary, str):
+        return {"error": "RCA summary must be a string if provided."}
 
+    # Existing logic for low_yield_stages and common_defect_types
     for stage in low_yield_stages:
         if not isinstance(stage, dict) or 'stage_name' not in stage:
-            return {"error": "Each low yield stage must be a dictionary with a 'stage_name'."}
+            # Allow for flexibility if other keys like 'yield' are also expected
+            return {"error": "Each low yield stage must be a dictionary with at least a 'stage_name'."}
+        stage_info = stage['stage_name']
+        if 'yield' in stage:
+            stage_info += f" (Yield: {stage['yield']:.2%})"
         suggestions.append(
-            f"Investigate root causes for low yield at stage: {stage['stage_name']}."
+            f"Investigate root causes and implement corrective actions for low yield at stage: {stage_info}."
         )
 
     for defect in common_defect_types:
         if not isinstance(defect, str):
             return {"error": "Each defect type must be a string."}
         suggestions.append(
-            f"Implement corrective actions for defect type: {defect}."
+            f"Address common defect type: Implement specific countermeasures for '{defect}'."
         )
 
-    if not low_yield_stages and not common_defect_types:
+    # New logic for SPC OOC points
+    if spc_out_of_control_points:
         suggestions.append(
-            "No specific low-yield stages or defect types provided. "
-            "Consider a general process review for potential improvements."
+            "Address SPC out-of-control signals: Investigate the causes for the following out-of-control points:"
         )
-    elif not low_yield_stages:
+        for point in spc_out_of_control_points:
+            point_desc = f"- Index {point.get('index', 'N/A')}, Value {point.get('value', 'N/A')}"
+            if 'metric_name' in point: # Optional: if metric name is passed
+                 point_desc += f" for metric '{point['metric_name']}'"
+            suggestions.append(point_desc)
+        suggestions.append("Implement corrective actions to bring the process back into statistical control.")
+
+
+    # New logic for RCA summary
+    if rca_summary and rca_summary.strip():
         suggestions.append(
-            "No specific low-yield stages provided. "
-            "Focus on addressing the listed common defect types."
+            f"Based on Root Cause Analysis: Address the findings summarized as: '{rca_summary}'. Develop and implement targeted solutions."
         )
-    elif not common_defect_types:
+
+    if not suggestions:
         suggestions.append(
-            "No common defect types provided. "
-            "Focus on investigating the identified low-yield stages."
+            "No specific issues (low yield, defects, OOC points, or RCA summary) provided. "
+            "Consider a general process review or provide more data for targeted suggestions."
         )
 
-    if 'improvement_suggestions' not in tool_context.state:
-        tool_context.state['improvement_suggestions'] = []
-    tool_context.state['improvement_suggestions'].append({
-        "low_yield_stages_input": low_yield_stages,
-        "common_defect_types_input": common_defect_types,
-        "suggestions_provided": suggestions
-    })
+    if tool_context:
+        if 'improvement_suggestions' not in tool_context.state:
+            tool_context.state['improvement_suggestions'] = []
+        # Log more comprehensive input that led to these suggestions
+        current_inputs = {
+            "low_yield_stages_input": low_yield_stages,
+            "common_defect_types_input": common_defect_types,
+            "spc_ooc_points_input": spc_out_of_control_points,
+            "rca_summary_input": rca_summary,
+            "suggestions_provided": suggestions
+        }
+        tool_context.state['improvement_suggestions'].append(current_inputs)
 
     return {"suggested_actions": suggestions}
+
+def read_csv_data(
+    file_path: str,
+    total_units_col: str = None,
+    defective_units_col: str = None,
+    input_units_col: str = None,
+    output_units_col: str = None,
+    stage_name_col: str = None,
+    tool_context: ToolContext = None
+) -> dict:
+    """Reads production data from a CSV file.
+
+    The CSV can be used for overall yield (total_units_col, defective_units_col)
+    or for stage-wise yield (stage_name_col, input_units_col, output_units_col).
+
+    Args:
+        file_path: Path to the CSV file accessible by the agent.
+        total_units_col: Name of the column for total units (for overall yield).
+        defective_units_col: Name of the column for defective units (for overall yield).
+        input_units_col: Name of the column for input units (for stage-wise yield).
+        output_units_col: Name of the column for output units (for stage-wise yield).
+        stage_name_col: Name of the column for stage names (for stage-wise yield).
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing the extracted data or an error message.
+        For overall yield, it returns: {"total_units": X, "defective_units": Y}
+        For stage-wise yield, it returns: {"production_data_per_stage": [{"stage_name": S, "input_units": I, "output_units": O}, ...]}
+    """
+    try:
+        df = pd.read_csv(file_path)
+
+        if tool_context and 'file_reads' not in tool_context.state:
+            tool_context.state['file_reads'] = []
+        if tool_context:
+            tool_context.state['file_reads'].append({"file_path": file_path, "type": "csv"})
+
+        result = {}
+        if total_units_col and defective_units_col:
+            if total_units_col not in df.columns or defective_units_col not in df.columns:
+                return {"error": f"One or both columns ('{total_units_col}', '{defective_units_col}') not found in CSV."}
+            # Assuming one row of summary data or summing up if multiple rows
+            result["total_units"] = int(df[total_units_col].sum())
+            result["defective_units"] = int(df[defective_units_col].sum())
+        elif input_units_col and output_units_col and stage_name_col:
+            if not all(col in df.columns for col in [input_units_col, output_units_col, stage_name_col]):
+                return {"error": f"One or more columns ('{input_units_col}', '{output_units_col}', '{stage_name_col}') not found."}
+
+            production_data = []
+            for _, row in df.iterrows():
+                production_data.append({
+                    "stage_name": str(row[stage_name_col]),
+                    "input_units": int(row[input_units_col]),
+                    "output_units": int(row[output_units_col])
+                })
+            result["production_data_per_stage"] = production_data
+        else:
+            return {"error": "Please specify columns for either overall yield or stage-wise yield."
+
+        return result
+
+    except FileNotFoundError:
+        return {"error": f"CSV file not found at path: {file_path}"}
+    except pd.errors.EmptyDataError:
+        return {"error": "CSV file is empty."}
+    except Exception as e:
+        return {"error": f"Error reading CSV: {str(e)}"}
+
+def read_excel_data(
+    file_path: str,
+    sheet_name: str | int = 0,
+    total_units_col: str = None,
+    defective_units_col: str = None,
+    input_units_col: str = None,
+    output_units_col: str = None,
+    stage_name_col: str = None,
+    tool_context: ToolContext = None
+) -> dict:
+    """Reads production data from an Excel file.
+
+    The Excel sheet can be used for overall yield (total_units_col, defective_units_col)
+    or for stage-wise yield (stage_name_col, input_units_col, output_units_col).
+
+    Args:
+        file_path: Path to the Excel file accessible by the agent.
+        sheet_name: Name or index of the sheet to read (default is the first sheet).
+        total_units_col: Name of the column for total units.
+        defective_units_col: Name of the column for defective units.
+        input_units_col: Name of the column for input units.
+        output_units_col: Name of the column for output units.
+        stage_name_col: Name of the column for stage names.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing the extracted data or an error message.
+    """
+    try:
+        df = pd.read_excel(file_path, sheet_name=sheet_name)
+
+        if tool_context and 'file_reads' not in tool_context.state:
+            tool_context.state['file_reads'] = []
+        if tool_context:
+            tool_context.state['file_reads'].append({"file_path": file_path, "type": "excel", "sheet": sheet_name})
+
+        result = {}
+        if total_units_col and defective_units_col:
+            if total_units_col not in df.columns or defective_units_col not in df.columns:
+                return {"error": f"One or both columns ('{total_units_col}', '{defective_units_col}') not found in Excel sheet."}
+            result["total_units"] = int(df[total_units_col].sum())
+            result["defective_units"] = int(df[defective_units_col].sum())
+        elif input_units_col and output_units_col and stage_name_col:
+            if not all(col in df.columns for col in [input_units_col, output_units_col, stage_name_col]):
+                return {"error": f"One or more columns ('{input_units_col}', '{output_units_col}', '{stage_name_col}') not found."}
+
+            production_data = []
+            for _, row in df.iterrows():
+                production_data.append({
+                    "stage_name": str(row[stage_name_col]),
+                    "input_units": int(row[input_units_col]),
+                    "output_units": int(row[output_units_col])
+                })
+            result["production_data_per_stage"] = production_data
+        else:
+            return {"error": "Please specify columns for either overall yield or stage-wise yield."
+
+        return result
+
+    except FileNotFoundError:
+        return {"error": f"Excel file not found at path: {file_path}"}
+    except pd.errors.EmptyDataError:
+        return {"error": "Excel sheet is empty."}
+    except Exception as e: # Could be bad sheet name, etc.
+        return {"error": f"Error reading Excel: {str(e)}"}
+
+def calculate_spc_metrics_and_limits(
+    data_points: list[float | int],
+    control_limit_sigma: float = 3.0,
+    tool_context: ToolContext = None
+) -> dict:
+    """Calculates SPC metrics (mean, std dev) and control limits for a given dataset.
+
+    Args:
+        data_points: A list of numerical data points (e.g., defect counts, measurements).
+        control_limit_sigma: The number of standard deviations to use for control limits (default is 3.0).
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing:
+        - mean: The average of the data points.
+        - std_dev: The standard deviation of the data points.
+        - upper_control_limit (UCL): Mean + (control_limit_sigma * std_dev).
+        - lower_control_limit (LCL): Mean - (control_limit_sigma * std_dev), cannot be less than 0 for count data.
+        - control_limit_sigma: The sigma value used.
+        Or an error message if data is insufficient.
+    """
+    if not isinstance(data_points, list) or len(data_points) < 2:
+        return {"error": "Insufficient data points. At least 2 data points are required."}
+    if not all(isinstance(x, (int, float)) for x in data_points):
+        return {"error": "All data points must be numerical (int or float)."}
+    if control_limit_sigma <= 0:
+        return {"error": "Control limit sigma must be a positive value."}
+
+    np_data = np.array(data_points)
+    mean = np.mean(np_data)
+    std_dev = np.std(np_data)
+
+    # Ensure LCL is not negative if data points suggest counts or positive measurements
+    # A more sophisticated check might involve parameterizing this behavior.
+    # For now, if all data points are >=0, LCL won't go below 0.
+    is_non_negative_data = all(x >= 0 for x in data_points)
+
+    ucl = mean + (control_limit_sigma * std_dev)
+    lcl = mean - (control_limit_sigma * std_dev)
+
+    if is_non_negative_data and lcl < 0:
+        lcl = 0.0
+
+    results = {
+        "mean": float(mean),
+        "std_dev": float(std_dev),
+        "upper_control_limit": float(ucl),
+        "lower_control_limit": float(lcl),
+        "control_limit_sigma": control_limit_sigma,
+        "data_points_analyzed": len(data_points)
+    }
+
+    if tool_context:
+        if 'spc_analysis' not in tool_context.state:
+            tool_context.state['spc_analysis'] = []
+        tool_context.state['spc_analysis'].append(results)
+
+    return results
+
+def identify_out_of_control_points(
+    data_points: list[float | int],
+    upper_control_limit: float,
+    lower_control_limit: float,
+    tool_context: ToolContext = None
+) -> dict:
+    """Identifies data points that fall outside the given control limits.
+
+    Args:
+        data_points: A list of numerical data points.
+        upper_control_limit: The upper control limit (UCL).
+        lower_control_limit: The lower control limit (LCL).
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing:
+        - out_of_control_points: A list of dicts, each with 'index' and 'value' of the OOC point.
+        - ucl_used: The UCL value used for the check.
+        - lcl_used: The LCL value used for the check.
+        Or an error message if inputs are invalid.
+    """
+    if not isinstance(data_points, list) or not data_points:
+        return {"error": "Data points list cannot be empty."}
+    if not all(isinstance(x, (int, float)) for x in data_points):
+        return {"error": "All data points must be numerical (int or float)."}
+    if not isinstance(upper_control_limit, (int, float)) or not isinstance(lower_control_limit, (int, float)):
+        return {"error": "Upper and Lower control limits must be numerical."}
+    if lower_control_limit > upper_control_limit:
+        return {"error": "Lower control limit cannot be greater than Upper control limit."}
+
+    ooc_points = []
+    for i, value in enumerate(data_points):
+        if value > upper_control_limit or value < lower_control_limit:
+            ooc_points.append({"index": i, "value": float(value)})
+
+    results = {
+        "out_of_control_points": ooc_points,
+        "ucl_used": upper_control_limit,
+        "lcl_used": lower_control_limit,
+        "data_points_checked": len(data_points)
+    }
+
+    if tool_context:
+        if 'spc_ooc_checks' not in tool_context.state:
+            tool_context.state['spc_ooc_checks'] = []
+        tool_context.state['spc_ooc_checks'].append(results)
+
+    return results
+
+def guide_root_cause_analysis_5whys(
+    problem_statement: str,
+    previous_whys: list[dict] = None, # Each dict: {"why_question": "...", "user_answer": "..."}
+    tool_context: ToolContext = None
+) -> dict:
+    """Guides the user through a 5 Whys root cause analysis.
+
+    This tool helps structure the 5 Whys process. The agent uses this tool
+    to ask the next 'Why?' question or suggest conclusion.
+
+    Args:
+        problem_statement: The initial problem to analyze.
+        previous_whys: A list of previous 'why' questions and user answers.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing:
+        - next_prompt_for_user: The next question to ask the user (e.g., "Why did '{last_answer}' occur?").
+        - current_depth: The current number of 'Whys' asked.
+        - analysis_summary: A summary of the
+        Or, if depth is 5 or user indicates root cause found (logic to be handled by agent):
+        - conclusion_prompt: A suggestion to conclude or summarize.
+    """
+    if not isinstance(problem_statement, str) or not problem_statement.strip():
+        return {"error": "Problem statement cannot be empty."}
+
+    current_depth = len(previous_whys) if previous_whys else 0
+    analysis_path = [f"Initial Problem: {problem_statement}"]
+
+    if previous_whys:
+        for i, item in enumerate(previous_whys):
+            analysis_path.append(f"{i+1}. Q: {item['why_question']} A: {item['user_answer']}")
+
+    if tool_context:
+        if 'rca_sessions' not in tool_context.state:
+            tool_context.state['rca_sessions'] = []
+        # Log current state of this RCA
+        rca_log_entry = {
+            "problem": problem_statement,
+            "path": analysis_path,
+            "depth": current_depth
+        }
+        # Avoid appending duplicates if called multiple times for the same RCA state
+        # This simplistic check might need refinement for more robust state tracking
+        if not any(s == rca_log_entry for s in tool_context.state['rca_sessions']):
+             tool_context.state['rca_sessions'].append(rca_log_entry)
+
+
+    if current_depth >= 5:
+        return {
+            "conclusion_prompt": "We have reached 5 'Whys'. Do you feel we have identified a potential root cause, or would you like to delve deeper?",
+            "current_depth": current_depth,
+            "analysis_summary": "\n".join(analysis_path)
+        }
+
+    last_answer = problem_statement
+    if previous_whys:
+        last_answer = previous_whys[-1]["user_answer"]
+        if not isinstance(last_answer, str) or not last_answer.strip():
+            return {"error": f"The answer to 'Why #{current_depth}' was empty. Please provide a valid answer."}
+
+
+    next_question = f"Why did '{last_answer}' occur?"
+    if current_depth == 0 and problem_statement:
+        next_question = f"Why is '{problem_statement}' happening?"
+
+
+    return {
+        "next_prompt_for_user": next_question,
+        "current_depth": current_depth + 1, # Next depth will be current_depth + 1
+        "analysis_summary": "\n".join(analysis_path)
+    }
+
+def add_action_item(
+    description: str,
+    owner: str = None, # Optional: name or team responsible
+    status: str = "open", # Default status
+    tool_context: ToolContext = None
+) -> dict:
+    """Adds a new action item to the session's action list.
+
+    Args:
+        description: A clear description of the action item.
+        owner: Optional. The person or team responsible for the action.
+        status: Optional. The initial status of the action (e.g., "open", "in-progress").
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing the newly added action item with its ID,
+        or an error message.
+    """
+    if not isinstance(description, str) or not description.strip():
+        return {"error": "Action item description cannot be empty."}
+    if owner is not None and not isinstance(owner, str):
+        return {"error": "Owner must be a string if provided."}
+    if not isinstance(status, str) or not status.strip():
+        return {"error": "Status must be a non-empty string."}
+
+    action_id = str(uuid.uuid4()) # Generate a unique ID for the action item
+    action_item = {
+        "id": action_id,
+        "description": description,
+        "owner": owner,
+        "status": status,
+        "created_at": pd.Timestamp.now().isoformat() # Optional: timestamp
+    }
+
+    if tool_context:
+        if 'action_items' not in tool_context.state:
+            tool_context.state['action_items'] = []
+        tool_context.state['action_items'].append(action_item)
+        return {"success": True, "action_item_added": action_item}
+    else:
+        # This case should ideally not happen if tool_context is always passed by the agent
+        return {"error": "Tool context not available. Cannot save action item."}
+
+def list_action_items(
+    status_filter: str = None, # Optional: e.g., "open", "in-progress"
+    owner_filter: str = None,  # Optional: filter by owner
+    tool_context: ToolContext = None
+) -> dict:
+    """Lists action items from the session, with optional filters.
+
+    Args:
+        status_filter: Optional. Filter actions by this status.
+        owner_filter: Optional. Filter actions by this owner.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing a list of action items, or an error message.
+    """
+    if tool_context and 'action_items' in tool_context.state:
+        actions = tool_context.state['action_items']
+
+        if status_filter:
+            if not isinstance(status_filter, str):
+                return {"error": "Status filter must be a string."}
+            actions = [a for a in actions if a.get('status', '').lower() == status_filter.lower()]
+
+        if owner_filter:
+            if not isinstance(owner_filter, str):
+                return {"error": "Owner filter must be a string."}
+            actions = [a for a in actions if a.get('owner', '').lower() == owner_filter.lower()]
+
+        return {"action_items": actions}
+    return {"action_items": []} # Return empty list if no actions or no context
+
+def update_action_item_status(
+    action_id: str,
+    new_status: str,
+    tool_context: ToolContext = None
+) -> dict:
+    """Updates the status of an existing action item.
+
+    Args:
+        action_id: The unique ID of the action item to update.
+        new_status: The new status for the action item (e.g., "in-progress", "completed", "cancelled").
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary indicating success or failure, and the updated item if successful.
+    """
+    if not isinstance(action_id, str) or not action_id.strip():
+        return {"error": "Action ID must be a non-empty string."}
+    if not isinstance(new_status, str) or not new_status.strip():
+        return {"error": "New status must be a non-empty string."}
+
+    if tool_context and 'action_items' in tool_context.state:
+        actions = tool_context.state['action_items']
+        for item in actions:
+            if item.get('id') == action_id:
+                item['status'] = new_status
+                item['updated_at'] = pd.Timestamp.now().isoformat() # Optional: timestamp
+                # No need to save back to tool_context.state['action_items'] as 'item' is a reference to an element within that list.
+                return {"success": True, "updated_action_item": item}
+        return {"error": f"Action item with ID '{action_id}' not found."}
+
+    return {"error": "No action items found in context or context unavailable."}
+
+def query_knowledge_base(
+    search_keywords: list[str],
+    tool_context: ToolContext = None
+) -> dict:
+    """Queries a simple knowledge base for problems, causes, and solutions.
+
+    Args:
+        search_keywords: A list of keywords to search for in the knowledge base.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing a list of matching entries from the knowledge base
+        or an error message.
+    """
+    if not isinstance(search_keywords, list) or not search_keywords:
+        return {"error": "Search keywords list cannot be empty."}
+    if not all(isinstance(kw, str) for kw in search_keywords):
+        return {"error": "All search keywords must be strings."}
+
+    # In a real scenario, this path might be configurable or more robust.
+    kb_file_path = "contributing/samples/yield_analysis_agent/knowledge_base.json"
+
+    try:
+        with open(kb_file_path, 'r') as f:
+            knowledge_base_data = json.load(f)
+    except FileNotFoundError:
+        return {"error": f"Knowledge base file not found at {kb_file_path}."}
+    except json.JSONDecodeError:
+        return {"error": f"Error decoding JSON from knowledge base file {kb_file_path}."}
+    except Exception as e:
+        return {"error": f"An unexpected error occurred while reading the knowledge base: {str(e)}"}
+
+    matching_entries = []
+    search_keywords_lower = [kw.lower() for kw in search_keywords]
+
+    for entry in knowledge_base_data:
+        # Check if any search keyword is in the entry's keywords or problem summary
+        entry_text_corpus = " ".join(entry.get("keywords", [])).lower() +                             " " + entry.get("problem_summary", "").lower()
+
+        if any(kw_lower in entry_text_corpus for kw_lower in search_keywords_lower):
+            matching_entries.append(entry)
+            continue # Go to next entry if already matched
+
+        # Optionally, extend search to possible_causes and suggested_solutions
+        # For now, keeping it simpler by focusing on keywords and summary.
+
+    if tool_context:
+        if 'kb_queries' not in tool_context.state:
+            tool_context.state['kb_queries'] = []
+        tool_context.state['kb_queries'].append({
+            "keywords_used": search_keywords,
+            "matches_found": len(matching_entries)
+        })
+
+    if not matching_entries:
+        return {"message": "No matching entries found in the knowledge base for the given keywords.", "results": []}
+
+    return {"results": matching_entries}
+
+def detect_simple_anomalies(
+    data_points: list[float | int],
+    window_size: int = 5, # For rolling calculations
+    std_dev_threshold: float = 2.0, # Number of std devs to consider an anomaly
+    absolute_upper_threshold: float = None, # Optional absolute upper limit
+    absolute_lower_threshold: float = None, # Optional absolute lower limit
+    tool_context: ToolContext = None
+) -> dict:
+    """Detects simple anomalies in a list of numerical data points.
+
+    Anomalies can be identified based on deviation from a rolling mean
+    by a certain number of standard deviations, or by exceeding absolute thresholds.
+
+    Args:
+        data_points: A list of numerical data points (time-series).
+        window_size: The window size for calculating rolling mean and std dev. Min 2.
+        std_dev_threshold: Number of standard deviations from the rolling mean
+                           to classify a point as an anomaly.
+        absolute_upper_threshold: Optional. If a point exceeds this, it's an anomaly.
+        absolute_lower_threshold: Optional. If a point is below this, it's an anomaly.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing:
+        - anomalies: A list of dicts, each with 'index', 'value', and 'reason' for the anomaly.
+        - parameters_used: A dict of the parameters used for detection.
+        Or an error message if inputs are invalid.
+    """
+    if not isinstance(data_points, list) or len(data_points) < window_size :
+        return {"error": f"Insufficient data points. At least {window_size} data points are required for window size {window_size}."}
+    if not all(isinstance(x, (int, float)) for x in data_points):
+        return {"error": "All data points must be numerical (int or float)."}
+    if not isinstance(window_size, int) or window_size < 2:
+        return {"error": "Window size must be an integer of at least 2."}
+    if not isinstance(std_dev_threshold, (int,float)) or std_dev_threshold <= 0:
+        return {"error": "Standard deviation threshold must be a positive number."}
+    if absolute_upper_threshold is not None and not isinstance(absolute_upper_threshold, (int,float)):
+        return {"error": "Absolute upper threshold must be a number if provided."}
+    if absolute_lower_threshold is not None and not isinstance(absolute_lower_threshold, (int,float)):
+         return {"error": "Absolute lower threshold must be a number if provided."}
+    if absolute_upper_threshold is not None and absolute_lower_threshold is not None and absolute_lower_threshold >= absolute_upper_threshold:
+        return {"error": "Absolute lower threshold cannot be greater than or equal to absolute upper threshold."}
+
+
+    anomalies = []
+    series = pd.Series(data_points)
+    rolling_mean = series.rolling(window=window_size, center=True).mean()
+    rolling_std = series.rolling(window=window_size, center=True).std()
+
+    # Pad rolling calculations for edges if center=True is used, or handle NaN
+    # For simplicity, we'll start comparing where rolling values are available
+    # More sophisticated padding (e.g. expanding window) could be used.
+
+    for i in range(len(data_points)):
+        point_value = data_points[i]
+        reason = []
+
+        # Check absolute thresholds first
+        if absolute_upper_threshold is not None and point_value > absolute_upper_threshold:
+            reason.append(f"Exceeded absolute upper threshold ({absolute_upper_threshold})")
+        if absolute_lower_threshold is not None and point_value < absolute_lower_threshold:
+            reason.append(f"Below absolute lower threshold ({absolute_lower_threshold})")
+
+        # Check rolling std dev based anomaly if no absolute breach or to add info
+        # Only check if rolling_mean and rolling_std are not NaN for that point
+        if pd.notna(rolling_mean[i]) and pd.notna(rolling_std[i]):
+            upper_bound = rolling_mean[i] + (std_dev_threshold * rolling_std[i])
+            lower_bound = rolling_mean[i] - (std_dev_threshold * rolling_std[i])
+
+            if point_value > upper_bound:
+                reason.append(f"Exceeded rolling upper bound ({upper_bound:.2f}, mean: {rolling_mean[i]:.2f}, std: {rolling_std[i]:.2f})")
+            elif point_value < lower_bound:
+                reason.append(f"Below rolling lower bound ({lower_bound:.2f}, mean: {rolling_mean[i]:.2f}, std: {rolling_std[i]:.2f})")
+
+        if reason:
+            anomalies.append({"index": i, "value": float(point_value), "reason": "; ".join(reason)})
+
+    parameters_used = {
+        "window_size": window_size,
+        "std_dev_threshold": std_dev_threshold,
+        "absolute_upper_threshold": absolute_upper_threshold,
+        "absolute_lower_threshold": absolute_lower_threshold,
+        "data_points_analyzed": len(data_points)
+    }
+
+    if tool_context:
+        if 'anomaly_detection_runs' not in tool_context.state:
+            tool_context.state['anomaly_detection_runs'] = []
+        tool_context.state['anomaly_detection_runs'].append({
+            "parameters": parameters_used,
+            "anomalies_found_count": len(anomalies)
+        })
+
+    return {"anomalies": anomalies, "parameters_used": parameters_used}
+
+from collections import defaultdict # Add this import
+
+def identify_simple_failure_patterns(
+    event_data: list[dict], # Expected keys: 'timestamp', 'event_type', 'item_id' (e.g., machine_id)
+    maintenance_completed_event_type: str = "maintenance_completed",
+    tool_context: ToolContext = None
+) -> dict:
+    """Identifies simple failure patterns from a list of historical events.
+
+    Example patterns:
+    - Time between 'maintenance_completed' and subsequent failures for the same item.
+    - Frequency of different failure types.
+
+    Args:
+        event_data: A list of dictionaries, each representing an event.
+                    Required keys: 'timestamp' (ISO format string or datetime object),
+                                   'event_type' (string, e.g., "failure_A", "maintenance_completed"),
+                                   'item_id' (string, e.g., machine ID or component ID).
+        maintenance_completed_event_type: The event_type string that signifies completion of maintenance.
+        tool_context: The context for the tool.
+
+    Returns:
+        A dictionary containing identified patterns or an error message.
+    """
+    if not isinstance(event_data, list) or not event_data:
+        return {"error": "Event data list cannot be empty."}
+
+    required_keys = ['timestamp', 'event_type', 'item_id']
+    for i, event in enumerate(event_data):
+        if not isinstance(event, dict) or not all(key in event for key in required_keys):
+            return {"error": f"Event at index {i} is missing one or more required keys: {required_keys}."}
+        try:
+            # Ensure timestamp is a pandas Timestamp for easier manipulation
+            event['timestamp'] = pd.to_datetime(event['timestamp'])
+        except Exception as e:
+            return {"error": f"Invalid timestamp format for event at index {i}: {event.get('timestamp')}. Error: {e}"}
+
+    # Sort events by item_id and then by timestamp
+    try:
+        event_data.sort(key=lambda x: (x['item_id'], x['timestamp']))
+    except Exception as e: # Catch potential issues if item_id is not comparable, though strings should be.
+        return {"error": f"Could not sort event data. Ensure item_ids are consistent. Error: {e}"}
+
+    patterns = {
+        "failure_type_counts": defaultdict(int),
+        "time_to_failure_after_maintenance": defaultdict(list), # item_id -> list of timedelta in hours
+        "notes": []
+    }
+
+    last_maintenance_time = {} # item_id -> timestamp of last maintenance
+
+    for event in event_data:
+        item_id = event['item_id']
+        event_type = event['event_type']
+        timestamp = event['timestamp']
+
+        # Count all event types that are not maintenance_completed as potential failures for frequency
+        if event_type != maintenance_completed_event_type:
+            patterns["failure_type_counts"][f"{item_id}::{event_type}"] += 1
+
+
+        if event_type == maintenance_completed_event_type:
+            last_maintenance_time[item_id] = timestamp
+        # If it's a failure event and there was a prior maintenance for this item
+        elif event_type != maintenance_completed_event_type and item_id in last_maintenance_time:
+            time_since_maintenance = timestamp - last_maintenance_time[item_id]
+            # Convert timedelta to hours for easier interpretation
+            hours_since_maintenance = time_since_maintenance.total_seconds() / 3600.0
+            patterns["time_to_failure_after_maintenance"][f"{item_id}::{event_type}"].append(round(hours_since_maintenance, 2))
+            # Remove last_maintenance_time for this item_id to ensure next failure is after next maintenance
+            # Or, if we want to see all failures after last maintenance, then don't delete.
+            # For "time *between* maintenance and next failure", deleting is appropriate.
+            del last_maintenance_time[item_id]
+
+
+    # Consolidate results for output
+    # Calculate average time to failure if data exists
+    avg_ttf_after_maintenance = {}
+    for key, times in patterns["time_to_failure_after_maintenance"].items():
+        if times:
+            avg_ttf_after_maintenance[key] = {"average_hours": round(np.mean(times),2), "count": len(times), "all_hours": times}
+
+    # Make failure_type_counts a regular dict for JSON serialization
+    patterns["failure_type_counts"] = dict(patterns["failure_type_counts"])
+    patterns["time_to_failure_after_maintenance_summary"] = avg_ttf_after_maintenance
+    del patterns["time_to_failure_after_maintenance"] # Remove raw list to simplify output
+
+    if not patterns["failure_type_counts"] and not patterns["time_to_failure_after_maintenance_summary"]:
+        patterns["notes"].append("No significant failure patterns identified with the provided data and logic.")
+        patterns["notes"].append(f"Ensure '{maintenance_completed_event_type}' event type is correctly specified and present for TtF calculations.")
+
+
+    if tool_context:
+        if 'failure_pattern_runs' not in tool_context.state:
+            tool_context.state['failure_pattern_runs'] = []
+        tool_context.state['failure_pattern_runs'].append({
+            "maintenance_event_type_used": maintenance_completed_event_type,
+            "events_analyzed_count": len(event_data),
+            "failure_types_found": len(patterns["failure_type_counts"]),
+            "ttf_calculations_count": len(patterns["time_to_failure_after_maintenance_summary"])
+        })
+
+    return {"identified_patterns": patterns}