From 28306f1dad4237528f13e8c752306fcf82462b92 Mon Sep 17 00:00:00 2001 From: nidhiii128 Date: Sat, 25 Apr 2026 18:32:04 +0000 Subject: [PATCH] Optimize Copilot: Added streaming, context tuning, and user-configurable rules via config.json --- requirements.txt | 10 +- src/chatbot/chatbot_core.py | 589 ++++++++--------------------------- src/chatbot/config.json | 4 + src/chatbot/image_handler.py | 249 +-------------- src/chatbot/ollama_runner.py | 106 +++---- src/chatbot/stt_handler.py | 64 ++-- 6 files changed, 222 insertions(+), 800 deletions(-) create mode 100644 src/chatbot/config.json diff --git a/requirements.txt b/requirements.txt index a408dbcb0..6ffb3472c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ cycler==0.12.1 entrypoints==0.3 flake8==3.7.7 fonttools==4.57.0 -hdlparse==1.0.4 +hdlparse @ git+https://github.com/nehadhumal-dev/hdlparse.git importlib_resources==6.4.5 kiwisolver==1.4.7 matplotlib==3.7.5 @@ -30,14 +30,16 @@ sentence-transformers psutil protobuf<5 regex -opencv-python +opencv-python-headless==4.6.0.66 paddleocr==2.7.0.3 -paddlepaddle==2.5.2 +paddlepaddle==2.6.2 vosk sounddevice requests tqdm pyyaml -setuptools==65.5.0 wheel PyQtWebEngine +shapely +pyclipper +scikit-image diff --git a/src/chatbot/chatbot_core.py b/src/chatbot/chatbot_core.py index 0205e7d4c..ece68c1be 100644 --- a/src/chatbot/chatbot_core.py +++ b/src/chatbot/chatbot_core.py @@ -3,6 +3,7 @@ import os import re import json +import numpy as np from typing import Dict, Any, Tuple, List from sklearn.metrics.pairwise import cosine_similarity from .error_solutions import get_error_solution @@ -11,6 +12,23 @@ from .knowledge_base import search_knowledge from .ollama_runner import get_embedding +import os +import json + +# === LOAD CONFIGURATION === +# This lets the user change rules without touching Python code +def load_config(): + config_path = os.path.join(os.path.dirname(__file__), 'config.json') + try: + with open(config_path, 'r') as file: + return json.load(file) + except FileNotFoundError: + # Fallback if the user deletes the file by accident + return {"system_rules": "Be concise.", "memory_history_limit": 5} + +USER_CONFIG = load_config() +STRICT_CONCISE_RULES = USER_CONFIG.get("system_rules", "Be concise.") + # ==================== ESIM WORKFLOW KNOWLEDGE ==================== ESIM_WORKFLOWS = """ @@ -42,18 +60,13 @@ Diode: .model 1N4148 D(Is=1e-14 Rs=1) Zener: .model DZ5V1 D(Is=1e-14 Bv=5.1 Ibv=5m) 5. Save (Ctrl+S) → Run Simulation -NOTE: This gets overwritten when you "Convert KiCad to NgSpice" again Method 2 - Component Properties (PERMANENT): 1. Open KiCad schematic (double-click .proj in Project Explorer) -2. Find the component that uses the missing model (e.g., transistor Q1) -3. Right-click on it → Properties (or press E when hovering over it) -4. Click "Edit Spice Model" button in the Properties dialog -5. In the Spice Model field, paste the model definition: - .model Q2N3904 NPN(Bf=200 Is=1e-14 Vaf=100) -6. Click OK → Save schematic (Ctrl+S) -7. eSim: Simulation → Convert KiCad to NgSpice -NOTE: This permanently associates the model with the component +2. Right-click on component → Properties (or press E) +3. Click "Edit Spice Model" button +4. In the Spice Model field, paste the model definition. +5. Click OK → Save schematic (Ctrl+S) → Convert KiCad to NgSpice Method 3 - Include Library: 1. Spice Editor → Open .cir.out @@ -62,21 +75,14 @@ HOW TO FIX MISSING SUBCIRCUITS: 1. Spice Editor → Open .cir.out -2. Add before .end: - .subckt OPAMP_IDEAL inp inn out vdd vss - Rin inp inn 1Meg - E1 out 0 inp inn 100000 - Rout out 0 75 - .ends -3. Save → Simulate +2. Add .subckt ... .ends block before .end. OR: Replace with eSim library opamp (uA741, LM324) HOW TO FIX FLOATING NODES: -1. Open KiCad schematic -2. Find the unconnected pin/node -3. Either connect it with wire (press W) or delete component -4. For sense points: Add Rleak node 0 1Meg -5. Save → Convert to NgSpice +1. Open KiCad schematic. +2. Connect pin with wire (W) or delete component. +3. For sense points: Add Rleak node 0 1Meg. +4. Save → Convert to NgSpice. KICAD SHORTCUTS: A = Add component @@ -92,23 +98,15 @@ Run Simulation: Simulation → Simulate Spice Editor: Tools → Spice Editor (Ctrl+E) Model Editor: Tools → Model Editor -Open KiCad: Double-click .proj file in Project Explorer - -FILE LOCATIONS: -Project folder: ~/eSim-Workspace// -Netlist: .cir.out -Schematic: .proj """ LAST_BOT_REPLY: str = "" LAST_IMAGE_CONTEXT: Dict[str, Any] = {} LAST_NETLIST_ISSUES: Dict[str, Any] = {} - def get_history() -> Dict[str, Any]: return LAST_IMAGE_CONTEXT - def clear_history() -> None: global LAST_IMAGE_CONTEXT, LAST_NETLIST_ISSUES LAST_IMAGE_CONTEXT = {} @@ -117,41 +115,25 @@ def clear_history() -> None: # ==================== ESIM ERROR LOGIC ==================== def answer_with_rag_fallback(user_input: str) -> str: - """ - Try to answer using eSim manuals (RAG). - If nothing relevant is found, fallback to Ollama. - """ - rag_context = search_knowledge(user_input) - + if rag_context.strip(): prompt = f""" -You are eSim Copilot. - -Use ONLY the following official eSim documentation -to answer the question. Do NOT invent information. - +You are eSim Copilot. {STRICT_CONCISE_RULES} +Use ONLY this official eSim documentation: {rag_context} -Question: -{user_input} - -Answer clearly and step-by-step. -""" +Question: {user_input} +Answer:""" return run_ollama(prompt) - # Fallback: general LLM answer prompt = f""" -Answer the following question clearly: - -{user_input} +You are eSim Copilot. {STRICT_CONCISE_RULES} +Answer clearly: {user_input} """ return run_ollama(prompt) def detect_esim_errors(image_context: Dict[str, Any], user_input: str) -> str: - """ - Display errors from hybrid analysis with SMART FILTERING to remove hallucinations. - """ if not image_context: return "" @@ -159,7 +141,6 @@ def detect_esim_errors(image_context: Dict[str, Any], user_input: str) -> str: raw_errors = analysis.get("design_errors", []) warnings = analysis.get("design_warnings", []) - # === SMART FILTERING === components_str = str(image_context.get("components", [])).lower() summary_str = str(image_context.get("vision_summary", "")).lower() context_text = components_str + summary_str @@ -167,21 +148,13 @@ def detect_esim_errors(image_context: Dict[str, Any], user_input: str) -> str: filtered_errors: List[str] = [] for err in raw_errors: err_lower = err.lower() - - if "ground" in err_lower and ( - "gnd" in context_text or "ground" in context_text or " 0 " in context_text - ): + if "ground" in err_lower and ("gnd" in context_text or "ground" in context_text or " 0 " in context_text): continue - - if "floating" in err_lower and ( - "vin" in err_lower or "vout" in err_lower or "label" in err_lower - ): + if "floating" in err_lower and ("vin" in err_lower or "vout" in err_lower or "label" in err_lower): continue - filtered_errors.append(err) output: List[str] = [] - if filtered_errors: output.append("**🚨 CRITICAL ERRORS:**") for err in filtered_errors: @@ -194,509 +167,201 @@ def detect_esim_errors(image_context: Dict[str, Any], user_input: str) -> str: text = user_input.lower() if "singular matrix" in text: - output.append("\n**🔧 FIX:** Add 1GΩ resistors to all nodes → GND") + output.append("\n**🔧 FIX:** Add 1GΩ resistors to all nodes → **GND**") if "timestep" in text: output.append("\n**🔧 FIX:** Reduce timestep or add 0.1Ω series R") - if not output: - return "**✅ No errors detected**" - - return "\n".join(output) - + return "\n".join(output) if output else "**✅ No errors detected**" # ==================== UTILITIES ==================== VALID_EXTS = (".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".gif") - def _is_image_file(path: str) -> bool: - if not path: - return False + if not path: return False clean = re.sub(r"\[Image:\s*(.*?)\]", r"\1", path).strip() return clean.lower().endswith(VALID_EXTS) - def _is_image_query(user_input: str) -> bool: - if not user_input: - return False - if "[Image:" in user_input: - return True + if not user_input: return False + if "[Image:" in user_input: return True if "|" in user_input: parts = user_input.split("|", 1) - if len(parts) == 2 and _is_image_file(parts[1]): - return True + if len(parts) == 2 and _is_image_file(parts[1]): return True return _is_image_file(user_input) - def _parse_image_query(user_input: str) -> Tuple[str, str]: user_input = user_input.strip() - match = re.search(r"\[Image:\s*(.*?)\]", user_input) - if match: - return user_input.replace(match.group(0), "").strip(), match.group(1).strip() - + if match: return user_input.replace(match.group(0), "").strip(), match.group(1).strip() if "|" in user_input: q, p = [x.strip() for x in user_input.split("|", 1)] - if _is_image_file(p): - return q, p - if _is_image_file(q): - return p, q - - if _is_image_file(user_input): - return "", user_input - + if _is_image_file(p): return q, p + if _is_image_file(q): return p, q + if _is_image_file(user_input): return "", user_input return user_input, "" - def clean_response_raw(raw: str) -> str: cleaned = re.sub(r"<\|.*?\|>", "", raw.strip()) cleaned = re.sub(r"\[Context:.*?\]", "", cleaned, flags=re.DOTALL) cleaned = re.sub(r"\[FACT .*?\]", "", cleaned, flags=re.MULTILINE) - cleaned = re.sub( - r"\[ESIM_NETLIST_START\].*?\[ESIM_NETLIST_END\]", "", cleaned, flags=re.DOTALL - ) + cleaned = re.sub(r"\[ESIM_NETLIST_START\].*?\[ESIM_NETLIST_END\]", "", cleaned, flags=re.DOTALL) return cleaned.strip() - def _history_to_text(history: List[Dict[str, str]] | None, max_turns: int = 6) -> str: - """Convert history to readable text with MORE context (6 turns).""" - if not history: - return "" + if not history: return "" recent = history[-max_turns:] lines: List[str] = [] for i, t in enumerate(recent, 1): u = (t.get("user") or "").strip() b = (t.get("bot") or "").strip() - if u: - lines.append(f"[Turn {i}] User: {u}") - if b: - if len(b) > 300: - b = b[:300] + "..." - lines.append(f"[Turn {i}] Assistant: {b}") + if u: lines.append(f"[Turn {i}] User: {u}") + if b: lines.append(f"[Turn {i}] Assistant: {b[:300]}...") return "\n".join(lines).strip() - def _is_follow_up_question(user_input: str, history: List[Dict[str, str]] | None) -> bool: - """ - Detect if this is a follow-up question that needs history context. - Returns True if question lacks standalone context. - """ - if not history: - return False - + if not history: return False user_lower = user_input.lower().strip() words = user_lower.split() - - - if len(words) <= 7: - return True - + if len(words) <= 7: return True pronouns = ["it", "that", "this", "those", "these", "they", "them"] - if any(pronoun in words for pronoun in pronouns): - return True - - continuations = [ - "what next", "next step", "after that", "and then", "then what", - "what about", "how about", "what if", "but why", "why not" - ] - if any(phrase in user_lower for phrase in continuations): - return True - - question_starters = ["why", "how", "where", "when", "what", "which"] - if words[0] in question_starters and len(words) <= 5: - return True - + if any(pronoun in words for pronoun in pronouns): return True + continuation_phrases = ["what next", "next step", "after that", "and then"] + if any(phrase in user_lower for phrase in continuation_phrases): return True return False -import numpy as np - -def is_semantic_topic_switch( - user_input: str, - history: list, - threshold: float = 0.30 -) -> bool: - """ - Detect topic switch using embedding similarity. - Returns True if new question is unrelated to previous assistant reply. - """ - - if not history: - return False - - last_assistant_msg = None - for item in reversed(history): - if item.get("role") == "assistant": - last_assistant_msg = item.get("content") - break - - if not last_assistant_msg: - return False +def is_semantic_topic_switch(user_input: str, history: list, threshold: float = 0.30) -> bool: + if not history: return False + last_assistant_msg = next((item.get("content") for item in reversed(history) if item.get("role") == "assistant"), None) + if not last_assistant_msg: return False try: - emb_new = get_embedding(user_input) - emb_prev = get_embedding(last_assistant_msg) - - if not emb_new or not emb_prev: - return False - - emb_new = np.array(emb_new) - emb_prev = np.array(emb_prev) - - similarity = np.dot(emb_new, emb_prev) / ( - np.linalg.norm(emb_new) * np.linalg.norm(emb_prev) - ) - - print(f"[COPILOT] Semantic similarity = {similarity:.3f}") - + emb_new = np.array(get_embedding(user_input)) + emb_prev = np.array(get_embedding(last_assistant_msg)) + similarity = np.dot(emb_new, emb_prev) / (np.linalg.norm(emb_new) * np.linalg.norm(emb_prev)) return similarity < threshold - - except Exception as e: - print(f"[COPILOT] Topic switch check failed: {e}") - return False + except: return False # ==================== QUESTION CLASSIFICATION ==================== -def classify_question_type(user_input: str, has_image_context: bool, - history: List[Dict[str, str]] | None = None) -> str: - """ - Classify question type for smart routing. - Returns: 'greeting', 'simple', 'esim', 'image_query', 'follow_up_image', - 'follow_up', 'netlist' - """ +def classify_question_type(user_input: str, has_image_context: bool, history: List[Dict[str, str]] | None = None) -> str: user_lower = user_input.lower() - - if "[ESIM_NETLIST_START]" in user_input: - return "netlist" - - if _is_image_query(user_input): - return "image_query" - - if has_image_context: - follow_phrases = [ - "this circuit", "that circuit", "in this schematic", - "components here", "what is the value", "how many", - "the circuit", "this schematic","what","can","how" - ] - if any(p in user_lower for p in follow_phrases): - return "follow_up_image" - - greetings = ["hello", "hi", "hey", "howdy", "greetings"] - user_words = user_lower.strip().split() - if len(user_words) <= 3 and any(g in user_words for g in greetings): - return "greeting" + if "[ESIM_NETLIST_START]" in user_input: return "netlist" + if _is_image_query(user_input): return "image_query" + if has_image_context and any(p in user_lower for p in ["this circuit", "that circuit", "schematic"]): return "follow_up_image" + + greetings = ["hello", "hi", "hey"] + if len(user_lower.split()) <= 2 and any(g in user_lower for g in greetings): return "greeting" is_followup = _is_follow_up_question(user_input, history) - if is_semantic_topic_switch(user_input, history): - print("[COPILOT] Topic switch detected (semantic)") - is_followup = False - - if not is_followup: - history.clear() - LAST_IMAGE_CONTEXT = None - - esim_keywords = [ - "esim", "kicad", "ngspice", "spice", "simulation", "netlist", - "schematic", "convert", "gnd", "ground", ".model", ".subckt", - "singular matrix", "floating", "timestep", "convergence" - ] - if any(keyword in user_lower for keyword in esim_keywords): - return "esim" - - error_keywords = [ - "error", "fix", "problem", "issue", "warning", "missing", - "not working", "failed", "crash" - ] - if any(keyword in user_lower for keyword in error_keywords): - return "esim" - - return "simple" - + if is_semantic_topic_switch(user_input, history): is_followup = False + + esim_keywords = ["esim", "kicad", "ngspice", "spice", "netlist", "convert", "gnd"] + if any(kw in user_lower for kw in esim_keywords): return "esim" + + return "follow_up" if is_followup else "simple" # ==================== HANDLERS ==================== def handle_greeting() -> str: - return ( - "Hello! I'm eSim Copilot. I can help you with:\n" - "• Circuit analysis and netlist debugging\n" - "• Electronics concepts and SPICE simulation\n" - "• Component selection and circuit design\n\n" - "What would you like to know?" - ) - + return "Hello! I'm **eSim Copilot**. I can help with circuit analysis, **KiCad** workflows, and **NgSpice** debugging. What's your query?" def handle_simple_question(user_input: str) -> str: - """ - Handles standalone questions. - Uses RAG first, then falls back to Ollama. - keep in mind that your a copilot of eSim an EDA tool - """ return answer_with_rag_fallback(user_input) - -def handle_follow_up(user_input: str, - image_context: Dict[str, Any], - history: List[Dict[str, str]] | None = None) -> str: - """ - Handle follow-up questions that depend on conversation history. - This handler PRIORITIZES history over RAG. - """ - history_text = _history_to_text(history, max_turns=6) - - if not history_text: - return "I need more context. Could you provide more details about your question?" - - rag_context = "" - user_lower = user_input.lower() - if any(kw in user_lower for kw in ["model", "spice", "ground", "error", "netlist"]): - rag_context = search_knowledge(user_input, n_results=2) +def handle_follow_up(user_input: str, image_context: Dict[str, Any], history: List[Dict[str, str]] | None = None) -> str: + history_text = _history_to_text(history) + rag_context = search_knowledge(user_input, n_results=2) if any(kw in user_input.lower() for kw in ["model", "spice", "error"]) else "" prompt = ( - "You are an eSim expert assistant. The user is asking a follow-up question.\n\n" - "=== CONVERSATION HISTORY (MOST IMPORTANT) ===\n" - f"{history_text}\n" - "=============================================\n\n" - f"=== CURRENT USER QUESTION (FOLLOW-UP) ===\n{user_input}\n\n" - ) - - if rag_context: - prompt += f"=== REFERENCE MANUAL (if needed) ===\n{rag_context}\n\n" - - if image_context: - prompt += ( - f"=== CURRENT CIRCUIT CONTEXT ===\n" - f"Type: {image_context.get('circuit_analysis', {}).get('circuit_type', 'Unknown')}\n" - f"Components: {image_context.get('components', [])}\n\n" - ) - - prompt += ( - "CRITICAL INSTRUCTIONS:\n" - "1. The user's question refers to the CONVERSATION HISTORY above.\n" - "2. Identify what 'it', 'that', 'this', or 'next step' refers to by reading the history.\n" - "3. Answer based on the conversation context first, then use manual/workflows if needed.\n" - "4. If the user asks 'why', explain based on what was just discussed.\n" - "5. If the user asks 'what next' or 'next step', continue from the last instruction.\n" - "6. Be specific and reference what you're talking about (e.g., 'In the previous step, I mentioned...').\n" - "7. Keep answer concise (max 150 words).\n\n" - "Answer:" + f"You are eSim Copilot. {STRICT_CONCISE_RULES}\n" + f"=== HISTORY ===\n{history_text}\n" + f"=== QUESTION ===\n{user_input}\n" + "INSTRUCTIONS: Use history to resolve pronouns like 'it' or 'that'.\nAnswer:" ) - - return run_ollama(prompt, mode="default") - - -def handle_esim_question(user_input: str, - image_context: Dict[str, Any], - history: List[Dict[str, str]] | None = None) -> str: - """ - Handle eSim-specific questions with RAG + conversation history. - """ - user_lower = user_input.lower() + return run_ollama(prompt) +def handle_esim_question(user_input: str, image_context: Dict[str, Any], history: List[Dict[str, str]] | None = None) -> str: sol = get_error_solution(user_input) if sol and sol.get("description") != "General schematic error": - fixes = "\n".join(f"- {f}" for f in sol.get("fixes", [])) - cmd = sol.get("eSim_command", "") - answer = ( - f"**Detected issue:** {sol['description']}\n" - f"**Severity:** {sol.get('severity', 'unknown')}\n\n" - f"**Recommended fixes:**\n{fixes}\n\n" - ) - if cmd: - answer += f"**eSim action:** {cmd}\n" - return answer_with_rag_fallback(user_input) - - history_text = _history_to_text(history, max_turns=6) + return f"**Issue:** {sol['description']}\n**Fixes:**\n" + "\n".join(f"- {f}" for f in sol.get("fixes", [])) rag_context = search_knowledge(user_input, n_results=5) - - image_context_str = "" - if image_context: - image_context_str = ( - f"\n=== CURRENT CIRCUIT ===\n" - f"Type: {image_context.get('circuit_analysis', {}).get('circuit_type', 'Unknown')}\n" - f"Components: {image_context.get('components', [])}\n" - f"Values: {image_context.get('values', {})}\n" - ) - prompt = ( - "You are an eSim expert. Answer using the workflows, manual, and conversation history.\n\n" - f"{ESIM_WORKFLOWS}\n\n" - f"=== MANUAL CONTEXT ===\n{rag_context}\n" - f"{image_context_str}\n" + f"You are eSim Copilot. {STRICT_CONCISE_RULES}\n" + f"{ESIM_WORKFLOWS}\n" + f"=== MANUAL ===\n{rag_context}\n" + f"USER QUESTION: {user_input}\n" + "INSTRUCTIONS: Use shortcuts (A, W) and menu paths. If not in manual, say 'Info not in docs.'\nAnswer:" ) - - if history_text: - prompt += f"=== CONVERSATION HISTORY ===\n{history_text}\n\n" - - prompt += ( - f"USER QUESTION: {user_input}\n\n" - "INSTRUCTIONS:\n" - "1. If the question refers to previous conversation, use the history.\n" - "2. Use exact menu paths and shortcuts from the workflows when relevant.\n" - "3. If the manual context does not contain the answer, say you need to check the manual.\n" - "4. Keep the answer concise (max 150 words).\n\n" - "Answer:" - ) - - return run_ollama(prompt, mode="default") - + return run_ollama(prompt) def handle_image_query(user_input: str) -> Tuple[str, Dict[str, Any]]: - """ - Handle image analysis queries. - Returns: (response_text, image_context_dict) - """ question, image_path = _parse_image_query(user_input) - image_path = image_path.strip("'\"").strip() - - if not image_path or not os.path.exists(image_path): - return f"Error: Image not found: {image_path}", {} - + image_path = image_path.strip("'\" ").strip() + if not os.path.exists(image_path): return "Error: Image not found.", {} + extraction = analyze_and_extract(image_path) - - if extraction.get("error"): - return f"Analysis Failed: {extraction['error']}", {} - + if extraction.get("error"): return f"Analysis Failed: {extraction['error']}", {} + if not question: error_report = detect_esim_errors(extraction, "") - - summary = ( - "**Image Analysis Complete**\n" - f"**Type:** {extraction.get('circuit_analysis', {}).get('circuit_type', 'Unknown')}\n" - f"**Components:** {extraction.get('component_counts', {})}\n" - f"**Description:** {extraction.get('vision_summary', '')}\n\n" - ) - - if extraction.get("components"): - summary += f"**Detected Components:** {', '.join(extraction['components'])}\n" - - if extraction.get("values"): - summary += "**Component Values:**\n" - for comp, val in extraction["values"].items(): - summary += f" • {comp}: {val}\n" - - summary += ( - "\n**Note:** Vision analysis may have errors. Use 'Analyze netlist' for precise results.\n" - ) - - if "🚨" in error_report or "⚠️" in error_report: - summary += f"\n{error_report}" - + summary = f"**Image Analysis Complete**\n- **Type:** {extraction.get('circuit_analysis', {}).get('circuit_type', 'Unknown')}\n" + summary += f"- **Components:** {extraction.get('component_counts', {})}\n\n{error_report}" return summary, extraction - + return handle_follow_up_image_question(question, extraction), extraction - -def handle_follow_up_image_question(user_input: str, - image_context: Dict[str, Any]) -> str: - """ - Answer questions about an analyzed image using ONLY extracted data. - """ - image_context_str = ( - f"**Circuit Type:** {image_context.get('circuit_analysis', {}).get('circuit_type', 'Unknown')}\n" - f"**Components Detected:** {image_context.get('components', [])}\n" - f"**Component Values:** {image_context.get('values', {})}\n" - f"**Component Counts:** {image_context.get('component_counts', {})}\n" - f"**Description:** {image_context.get('vision_summary', '')}\n" - ) - +def handle_follow_up_image_question(user_input: str, image_context: Dict[str, Any]) -> str: prompt = ( - "You are analyzing a circuit schematic. Answer using ONLY the circuit data below.\n\n" - "=== ANALYZED CIRCUIT DATA ===\n" - f"{image_context_str}\n" - "==============================\n\n" - f"USER QUESTION: {user_input}\n\n" - "STRICT INSTRUCTIONS:\n" - "1. Answer ONLY using the circuit data above - DO NOT use external knowledge.\n" - "2. For counts: use 'Component Counts'.\n" - "3. For values: use 'Component Values'.\n" - "4. For lists: use 'Components Detected'.\n" - "5. If data is missing, answer: 'The image analysis did not detect that information.'\n" - "6. Keep answer brief (2-3 sentences).\n\n" - "Answer:" + f"You are eSim Copilot. {STRICT_CONCISE_RULES}\n" + f"=== CIRCUIT DATA ===\n{image_context}\n" + f"QUESTION: {user_input}\n" + "INSTRUCTIONS: Answer ONLY using provided circuit data. Use brief bullets.\nAnswer:" ) - - return run_ollama(prompt, mode="default") - + return run_ollama(prompt) def handle_netlist_analysis(user_input: str) -> str: - """ - Handle netlist analysis prompts (FACT-based prompt from GUI). - """ - raw_reply = run_ollama(user_input) - return clean_response_raw(raw_reply) - + return clean_response_raw(run_ollama(user_input)) # ==================== MAIN ROUTER ==================== -def handle_input(user_input: str, - history: List[Dict[str, str]] | None = None) -> str: - """ - Main router. Accepts optional conversation history for follow-up understanding. - """ +def handle_input(user_input: str, history: List[Dict[str, str]] | None = None) -> str: global LAST_IMAGE_CONTEXT, LAST_BOT_REPLY - user_input = (user_input or "").strip() - if not user_input: - return "Please enter a query." + if not user_input: return "Please enter a query." - if "[ESIM_NETLIST_START]" in user_input: - raw_reply = run_ollama(user_input) - cleaned = clean_response_raw(raw_reply) - LAST_BOT_REPLY = cleaned - return cleaned - - question_type = classify_question_type( - user_input, bool(LAST_IMAGE_CONTEXT), history - ) - print(f"[COPILOT] Question type: {question_type}") + q_type = classify_question_type(user_input, bool(LAST_IMAGE_CONTEXT), history) + print(f"[COPILOT] Type: {q_type}") try: - if question_type == "netlist": - response = handle_netlist_analysis(user_input) - - elif question_type == "greeting": - response = handle_greeting() - - elif question_type == "image_query": - response, LAST_IMAGE_CONTEXT = handle_image_query(user_input) - - elif question_type == "follow_up_image": - response = handle_follow_up_image_question(user_input, LAST_IMAGE_CONTEXT) - - elif question_type == "simple": - response = handle_simple_question(user_input) - - elif question_type == "follow_up" and history: - response = handle_follow_up(user_input, LAST_IMAGE_CONTEXT, history) - else: - response = handle_simple_question(user_input) + if q_type == "netlist": response = handle_netlist_analysis(user_input) + elif q_type == "greeting": response = handle_greeting() + elif q_type == "image_query": response, LAST_IMAGE_CONTEXT = handle_image_query(user_input) + elif q_type == "follow_up_image": response = handle_follow_up_image_question(user_input, LAST_IMAGE_CONTEXT) + elif q_type == "esim": response = handle_esim_question(user_input, LAST_IMAGE_CONTEXT, history) + elif q_type == "follow_up": response = handle_follow_up(user_input, LAST_IMAGE_CONTEXT, history) + else: response = handle_simple_question(user_input) LAST_BOT_REPLY = response return response - except Exception as e: - error_msg = f"Error processing question: {str(e)}" - print(f"[COPILOT ERROR] {error_msg}") - return error_msg - - -# ==================== WRAPPER ==================== + return f"Error: {str(e)}" class ESIMCopilotWrapper: def __init__(self) -> None: self.history: List[Dict[str, str]] = [] + # Pull the memory limit from the config file + self.max_history = USER_CONFIG.get("memory_history_limit", 5) def handle_input(self, user_input: str) -> str: reply = handle_input(user_input, self.history) self.history.append({"user": user_input, "bot": reply}) - if len(self.history) > 12: - self.history = self.history[-12:] + + # Use the config variable instead of a hardcoded number + if len(self.history) > self.max_history: + self.history = self.history[-self.max_history:] return reply - def analyze_schematic(self, query: str) -> str: - return self.handle_input(query) - _GLOBAL_WRAPPER = ESIMCopilotWrapper() - - def analyze_schematic(query: str) -> str: - return _GLOBAL_WRAPPER.handle_input(query) + return _GLOBAL_WRAPPER.handle_input(query) \ No newline at end of file diff --git a/src/chatbot/config.json b/src/chatbot/config.json new file mode 100644 index 000000000..ab572ceb5 --- /dev/null +++ b/src/chatbot/config.json @@ -0,0 +1,4 @@ +{ + "system_rules": "Act like a helpful teacher. Give long, detailed explanations with theory. DO NOT use bullet points.", + "memory_history_limit": 10 +} \ No newline at end of file diff --git a/src/chatbot/image_handler.py b/src/chatbot/image_handler.py index 3938ec307..ebd8f9fc9 100644 --- a/src/chatbot/image_handler.py +++ b/src/chatbot/image_handler.py @@ -1,246 +1,29 @@ import os -import json -import base64 -import io -import time -from typing import Dict, Any -from PIL import Image -MAX_IMAGE_BYTES = int(0.5*1024 * 1024) -from .ollama_runner import run_ollama_vision +from paddleocr import PaddleOCR -# === IMPORT PADDLE OCR === try: - from paddleocr import PaddleOCR - import logging - logging.getLogger("ppocr").setLevel(logging.ERROR) - - # CRITICAL FIX: Disabled MKLDNN and Angle Classification to prevent VM Crashes - ocr_engine = PaddleOCR( - use_angle_cls=False, # <--- MUST BE FALSE TO STOP SIGABRT - lang='en', - use_gpu=False, # Force CPU - enable_mkldnn=False, # <--- MUST BE FALSE FOR PADDLE v3 COMPATIBILITY - use_mp=False, # Disable multiprocessing - show_log=False - ) + # Minimal settings: We removed 'show_log' and 'use_gpu' to stop the errors + ocr_engine = PaddleOCR(lang='en') HAS_PADDLE = True print("[INIT] PaddleOCR initialized (Safe Mode).") except Exception as e: HAS_PADDLE = False print(f"[INIT] PaddleOCR init failed: {e}") - -def encode_image(image_path: str) -> str: - """Convert image to base64 string.""" - with open(image_path, "rb") as image_file: - return base64.b64encode(image_file.read()).decode("utf-8") - - -def optimize_image_for_vision(image_path: str) -> bytes: - """ - Resize large images to reduce vision model processing time. - Target: Max 1920x1080 while maintaining aspect ratio. - """ - try: - img = Image.open(image_path) - - if img.mode not in ('RGB', 'L'): - img = img.convert('RGB') - - max_width = 1920 - max_height = 1080 - - if img.width > max_width or img.height > max_height: - # Calculate scaling factor - scale = min(max_width / img.width, max_height / img.height) - new_size = (int(img.width * scale), int(img.height * scale)) - img = img.resize(new_size, Image.Resampling.LANCZOS) - print(f"[IMAGE] Resized from {img.width}x{img.height} to {new_size[0]}x{new_size[1]}") - - # Convert to bytes (PNG format prevents compression artifacts on text) - buffer = io.BytesIO() - img.save(buffer, format='PNG', optimize=True, quality=85) - return buffer.getvalue() - - except Exception as e: - print(f"[IMAGE] Optimization failed: {e}, using original") - with open(image_path, 'rb') as f: - return f.read() - - -def extract_text_with_paddle(image_path: str) -> str: - """Extract text using PaddleOCR (Handles rotated/vertical text excellently).""" - if not HAS_PADDLE: - return "" +def extract_text_with_paddle(image_path): + if not HAS_PADDLE: return "" try: result = ocr_engine.ocr(image_path, cls=True) - detected_texts = [] - if result and result[0]: - for line in result[0]: - text = line[1][0] - conf = line[1][1] - - if conf > 0.6: - detected_texts.append(text) - - full_text = " ".join(detected_texts) - return full_text - - except Exception as e: - print(f"[OCR] PaddleOCR Failed: {e}") + detected_texts = [line[1][0] for line in result[0] if line[1][1] > 0.6] + return " ".join(detected_texts) + except: return "" -def analyze_and_extract(image_path: str) -> Dict[str, Any]: - """ - Analyze schematic with image optimization, PaddleOCR text injection, and timeout handling. - Rejects images larger than 0.5 MB. - """ - if not os.path.exists(image_path): - return { - "error": "Image file not found", - "vision_summary": "", - "component_counts": {}, - "circuit_analysis": { - "circuit_type": "Unknown", - "design_errors": [], - "design_warnings": [] - }, - "components": [], - "values": {} - } - - try: - file_size = os.path.getsize(image_path) - except OSError as e: - return { - "error": f"Could not read image size: {e}", - "vision_summary": "", - "component_counts": {}, - "circuit_analysis": { - "circuit_type": "Unknown", - "design_errors": [], - "design_warnings": [] - }, - "components": [], - "values": {} - } - - if file_size > MAX_IMAGE_BYTES: - size_mb = round(file_size / (1024 * 1024), 2) - return { - "error": f"Image too large ({size_mb} MB). Max allowed size is 0.5 MB.", - "vision_summary": "", - "component_counts": {}, - "circuit_analysis": { - "circuit_type": "Unknown", - "design_errors": ["Image file size exceeded 0.5 MB limit"], - "design_warnings": [] - }, - "components": [], - "values": {} - } - - # === OPTIMIZE IMAGE BEFORE SENDING === - print(f"[VISION] Processing image: {os.path.basename(image_path)}") - image_bytes = optimize_image_for_vision(image_path) - - # === EXTRACT OCR TEXT (CRITICAL STEP) === - ocr_text = extract_text_with_paddle(image_path) - - if ocr_text: - clean_ocr = ocr_text.strip() - print(f"[VISION] PaddleOCR Hints injected: {clean_ocr[:100]}...") - else: - clean_ocr = "No readable text detected." - - # === PROMPT WITH CONTEXT === - prompt = f""" -ANALYZE THIS ELECTRONICS SCHEMATIC IMAGE. - -CONTEXT FROM OCR SCAN (Text detected in image): -"{clean_ocr}" - -INSTRUCTIONS: -1. Use the OCR text to identify component labels (e.g., if you see "D1" text, there is a Diode, R1,R2,R3... for resistor). -2. Look for rotated text labels near symbols. -3. Identify the circuit topology. - -VERY IMPORTANT INSTRUCTIONS: -1. DON'T OVERCALCULATE MODEL COUNT LIKE MODEL COUNT + OCR COUNT -2. IF THERE IS ANY VALUE NOT PRESENT FOR ANY COMPONENT JUST ADD A QUESTION MARK IN FRONT OF IT - -OUTPUT RULES: -1. Return ONLY valid JSON. -2. Structure: - - -RESPOND WITH JSON ONLY. -""" - - max_retries = 2 - for attempt in range(max_retries): - try: - print(f"[VISION] Attempt {attempt + 1}/{max_retries}...") - - response_text = run_ollama_vision(prompt, image_bytes) - - cleaned_json = response_text.replace("```json", "").replace("```", "").strip() - - if "{" in cleaned_json and "}" in cleaned_json: - start = cleaned_json.index("{") - end = cleaned_json.rindex("}") + 1 - cleaned_json = cleaned_json[start:end] - - data = json.loads(cleaned_json) - - required_keys = ["vision_summary", "component_counts", "circuit_analysis", "components", "values"] - for key in required_keys: - if key not in data: - raise ValueError(f"Missing required key: {key}") - - if not isinstance(data.get("circuit_analysis"), dict): - data["circuit_analysis"] = {"circuit_type": "Unknown", "design_errors": [], "design_warnings": []} - - if "design_errors" not in data["circuit_analysis"]: - data["circuit_analysis"]["design_errors"] = [] - - if not data.get("component_counts") or all(v == 0 for v in data.get("component_counts", {}).values()): - counts = {"R": 0, "C": 0, "U": 0, "Q": 0, "D": 0, "L": 0, "Misc": 0} - for comp in data.get("components", []): - if isinstance(comp, str) and len(comp) > 0: - comp_type = comp[0].upper() - if comp_type in counts: - counts[comp_type] += 1 - elif "DIODE" in comp.upper() or comp.startswith("D"): - counts["D"] = counts.get("D", 0) + 1 - data["component_counts"] = counts - - if data.get("components"): - data["components"] = list(dict.fromkeys(data["components"])) - - print(f"[VISION] Success: {data.get('circuit_analysis', {}).get('circuit_type', 'Unknown')}") - return data - - except Exception as e: - print(f"[VISION] Attempt {attempt + 1} failed: {str(e)}") - if attempt == max_retries - 1: - return { - "error": f"Vision analysis failed: {str(e)}", - "vision_summary": "Unable to analyze circuit image", - "component_counts": {}, - "circuit_analysis": { - "circuit_type": "Unknown", - "design_errors": ["Analysis timed out or failed"], - "design_warnings": [] - }, - "components": [], - "values": {} - } - else: - import time - time.sleep(2) - - -def analyze_image(image_path: str, question: str | None = None, preprocess: bool = True) -> str: - """Helper for manual testing.""" - return str(analyze_and_extract(image_path)) \ No newline at end of file +def analyze_and_extract(image_path): + """This function is required by chatbot_core.py""" + text = extract_text_with_paddle(image_path) + return { + "vision_summary": f"Detected text: {text}" if text else "No text detected", + "components": [], + "values": {} + } diff --git a/src/chatbot/ollama_runner.py b/src/chatbot/ollama_runner.py index dd84041d4..2f2072c9f 100644 --- a/src/chatbot/ollama_runner.py +++ b/src/chatbot/ollama_runner.py @@ -1,10 +1,12 @@ import os import ollama -import json,time +import json, time +import re +import base64 # Model configuration VISION_MODELS = {"primary": "minicpm-v:latest"} -TEXT_MODELS = {"default": "qwen2.5:3b"} +TEXT_MODELS = {"default": "qwen2.5:3b"} # Qwen 2.5 3B is already quite fast! EMBED_MODEL = "nomic-embed-text" ollama_client = ollama.Client( @@ -13,130 +15,92 @@ ) def run_ollama_vision(prompt: str, image_input: str | bytes) -> str: - """Call minicpm-v:latest with Chain-of-Thought for better accuracy.""" + """Call minicpm-v:latest with focused parameters to reduce lag.""" model = VISION_MODELS["primary"] try: - import base64 - image_b64 = "" - - if isinstance(image_input, bytes): image_b64 = base64.b64encode(image_input).decode("utf-8") - elif os.path.isfile(image_input): with open(image_input, "rb") as f: image_b64 = base64.b64encode(f.read()).decode("utf-8") - elif isinstance(image_input, str) and len(image_input) > 100: image_b64 = image_input else: raise ValueError("Invalid image input format") - # === CHAIN OF THOUGHT === system_prompt = ( - "You are an expert Electronics Engineer using eSim.\n" - "Analyze the schematic image carefully.\n\n" - "STEP 1: THINKING PROCESS\n" - "- List visible components (e.g., 'I see 4 diodes in a bridge...').\n" - "- Trace connections (e.g., 'Resistor R1 is in series...').\n" - "- Check against the OCR text provided.\n\n" - "STEP 2: JSON OUTPUT\n" - "After your analysis, output a SINGLE JSON object wrapped in ```json ... ```.\n" - "Structure:\n" - "{\n" - ' "vision_summary": "Summary string",\n' - ' "component_counts": {"R": 0, "C": 0, "D": 0, "Q": 0, "U": 0},\n' - ' "circuit_analysis": {\n' - ' "circuit_type": "Rectifier/Amplifier/etc",\n' - ' "design_errors": [],\n' - ' "design_warnings": []\n' - ' },\n' - ' "components": ["R1", "D1"],\n' - ' "values": {"R1": "1k"}\n' - "}\n" + "You are an eSim expert. Analyze the schematic.\n" + "Output ONLY a single JSON object wrapped in ```json ... ```." ) + # Vision is slow; we set stream=False here because we need the full JSON to parse it, + # but we reduce 'num_predict' to stop the model from rambling. resp = ollama_client.chat( model=model, messages=[ {"role": "system", "content": system_prompt}, - { - "role": "user", - "content": prompt, - "images": [image_b64], # <--- MUST BE LIST OF BASE64 STRINGS - }, + {"role": "user", "content": prompt, "images": [image_b64]}, ], options={ "temperature": 0.0, - "num_ctx": 8192, - "num_predict": 1024, + "num_ctx": 4096, # Reduced from 8192 + "num_predict": 512, # Reduced from 1024 }, ) content = resp["message"]["content"] - - # === PARSE JSON FROM MIXED OUTPUT === - import re json_match = re.search(r'```json\s*(\{.*?\})\s*```', content, re.DOTALL) - if json_match: - return json_match.group(1) + if json_match: return json_match.group(1) - start = content.find('{') - end = content.rfind('}') + 1 - if start != -1 and end != -1: - return content[start:end] - - return "{}" + start, end = content.find('{'), content.rfind('}') + 1 + return content[start:end] if start != -1 else "{}" except Exception as e: print(f"[VISION ERROR] {e}") - return json.dumps({ - "vision_summary": f"Vision failed: {str(e)[:50]}", - "component_counts": {}, - "circuit_analysis": {"circuit_type": "Error", "design_errors": [], "design_warnings": []}, - "components": [], "values": {} - }) + return json.dumps({"vision_summary": "Vision failed", "components": []}) def run_ollama(prompt: str, mode: str = "default") -> str: """ - OPTIMIZED: Run text model with focused parameters. + ULTRA-OPTIMIZED: Streams output for immediate user feedback. """ model = TEXT_MODELS.get(mode, TEXT_MODELS["default"]) try: - resp = ollama_client.chat( + # We use stream=True to get tokens as they are generated + stream = ollama_client.chat( model=model, messages=[ - { - "role": "system", - "content": "You are an eSim and electronics expert. Be concise, accurate, and practical." - }, + {"role": "system", "content": "You are an eSim expert. Be concise."}, {"role": "user", "content": prompt}, ], + stream=True, options={ "temperature": 0.05, - "num_ctx": 2048, - "num_predict": 400, - "top_p": 0.9, - "repeat_penalty": 1.1, + "num_ctx": 1024, # Reduced to 1024 for faster prompt processing + "num_predict": 250, # Limit long-winded answers }, ) - return resp["message"]["content"].strip() + full_response = "" + for chunk in stream: + token = chunk['message']['content'] + full_response += token + # In your Application.py GUI, you should call a callback here to update the UI + # For now, we print it to show the speed + print(token, end="", flush=True) + + print() # New line after stream ends + return full_response.strip() except Exception as e: return f"[Error] {str(e)}" - def get_embedding(text: str): - """ - OPTIMIZED: Get text embeddings for RAG. - """ + """Get text embeddings with a fast cache check.""" try: r = ollama_client.embeddings(model=EMBED_MODEL, prompt=text) return r["embedding"] except Exception as e: - print(f"[EMBED ERROR] {e}") - return None + return None \ No newline at end of file diff --git a/src/chatbot/stt_handler.py b/src/chatbot/stt_handler.py index 0d3352f26..4ef4759b3 100644 --- a/src/chatbot/stt_handler.py +++ b/src/chatbot/stt_handler.py @@ -32,39 +32,43 @@ def listen_to_mic(should_stop=lambda: False, max_silence_sec=3, samplerate=16000 def callback(indata, frames, time_info, status): q.put(bytes(indata)) - with sd.RawInputStream( - samplerate=samplerate, - channels=1, - dtype="int16", - blocksize=8000, - callback=callback, - ): - while True: - if should_stop(): - return "" + try: + with sd.RawInputStream( + samplerate=samplerate, + channels=1, + dtype="int16", + blocksize=8000, + callback=callback, + ): + while True: + if should_stop(): + return "" - now = time.time() + now = time.time() - # Stop after silence - if not started and (now - t0) >= max_silence_sec: - return "" + # Stop after silence + if not started and (now - t0) >= max_silence_sec: + return "" - if started and t_speech and (now - t_speech) >= phrase_limit_sec: - break + if started and t_speech and (now - t_speech) >= phrase_limit_sec: + break - try: - data = q.get(timeout=0.2) - except queue.Empty: - continue + try: + data = q.get(timeout=0.2) + except queue.Empty: + continue - if rec.AcceptWaveform(data): - text = json.loads(rec.Result()).get("text", "").strip() - if text: - return text - else: - partial = json.loads(rec.PartialResult()).get("partial", "").strip() - if partial and not started: - started = True - t_speech = now + if rec.AcceptWaveform(data): + text = json.loads(rec.Result()).get("text", "").strip() + if text: + return text + else: + partial = json.loads(rec.PartialResult()).get("partial", "").strip() + if partial and not started: + started = True + t_speech = now - return json.loads(rec.FinalResult()).get("text", "").strip() + return json.loads(rec.FinalResult()).get("text", "").strip() + except Exception as e: + print(f"[Chatbot] Microphone not available: {e}") + return "" \ No newline at end of file