diff --git a/.bumpversion.toml b/.bumpversion.toml
new file mode 100644
index 0000000..235db6a
--- /dev/null
+++ b/.bumpversion.toml
@@ -0,0 +1,54 @@
+[tool.bumpversion]
+# 当前版本号（自动从 pyproject.toml 读取）
+current_version = "0.7.0"
+
+# 版本解析格式（支持 major.minor.patch）
+parse = "(?P<major>\\d+)\\.(?P<minor>\\d+)\\.(?P<patch>\\d+)"
+
+# 版本序列化格式
+serialize = ["{major}.{minor}.{patch}"]
+
+# 搜索和替换的格式
+search = "{current_version}"
+replace = "{new_version}"
+
+# Git 集成
+commit = true
+commit_args = ""
+tag = true
+tag_name = "v{new_version}"
+tag_message = "Release version {new_version}"
+sign_tags = false
+
+# 允许 dirty 工作区（如果需要）
+allow_dirty = false
+
+# 提交信息模板
+message = "chore: bump version from {current_version} to {new_version}"
+
+# 需要更新版本号的文件列表
+[[tool.bumpversion.files]]
+filename = "pyproject.toml"
+search = 'version = "{current_version}"'
+replace = 'version = "{new_version}"'
+
+[[tool.bumpversion.files]]
+filename = "src/__version__.py"
+search = '__version__ = "{current_version}"'
+replace = '__version__ = "{new_version}"'
+
+# Note: docs/changelog.md is now automatically generated by scripts/generate-changelog.py
+# It's called by scripts/bump-version.sh before bump-my-version runs
+
+# 版本部分定义
+[tool.bumpversion.parts.major]
+# major 版本从 0 开始，递增到任意数字
+values = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
+
+[tool.bumpversion.parts.minor]
+# minor 版本从 0 开始，递增到任意数字
+values = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
+
+[tool.bumpversion.parts.patch]
+# patch 版本从 0 开始，递增到任意数字
+values = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
new file mode 100644
index 0000000..8de2b7b
--- /dev/null
+++ b/.github/workflows/docker-build.yml
@@ -0,0 +1,246 @@
+name: Build and Push Docker Images
+
+on:
+  push:
+    branches:
+      - main
+    tags:
+      - 'v*'
+  pull_request:
+    branches:
+      - main
+  workflow_dispatch:
+
+env:
+  DOCKER_USER: royisme
+  DOCKER_REGISTRY: docker.io
+
+jobs:
+  validate-version:
+    name: Validate Version Consistency
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Validate version consistency
+        run: |
+          echo "=== Version Validation ==="
+
+          # Get version from pyproject.toml
+          PROJECT_VERSION=$(grep '^version = ' pyproject.toml | cut -d'"' -f2)
+          echo "pyproject.toml version: $PROJECT_VERSION"
+
+          # Get version from __version__.py
+          VERSION_PY=$(grep '__version__ = ' src/__version__.py | cut -d'"' -f2)
+          echo "__version__.py version: $VERSION_PY"
+
+          # Validate Python version file
+          if [[ "$PROJECT_VERSION" != "$VERSION_PY" ]]; then
+            echo "❌ Error: Version mismatch!"
+            echo "  pyproject.toml: $PROJECT_VERSION"
+            echo "  __version__.py: $VERSION_PY"
+            exit 1
+          fi
+
+          # If this is a tag push, validate tag version
+          if [[ $GITHUB_REF == refs/tags/* ]]; then
+            TAG_VERSION=${GITHUB_REF#refs/tags/v}
+            echo "Git tag version: v$TAG_VERSION"
+
+            if [[ "$PROJECT_VERSION" != "$TAG_VERSION" ]]; then
+              echo "❌ Error: Version mismatch with tag!"
+              echo "  pyproject.toml: $PROJECT_VERSION"
+              echo "  Git tag: $TAG_VERSION"
+              exit 1
+            fi
+          fi
+
+          echo "✅ All versions consistent: $PROJECT_VERSION"
+
+  build-minimal:
+    needs: validate-version
+    name: Build Minimal Image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ env.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_HUB_TOKEN }}
+
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.DOCKER_USER }}/codebase-rag
+          tags: |
+            type=ref,event=branch,suffix=-minimal
+            type=ref,event=pr,suffix=-minimal
+            type=semver,pattern={{version}},suffix=-minimal
+            type=semver,pattern={{major}}.{{minor}},suffix=-minimal
+            type=raw,value=minimal,enable={{is_default_branch}}
+            type=raw,value=minimal-latest,enable={{is_default_branch}}
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: docker/Dockerfile.minimal
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          platforms: linux/amd64,linux/arm64
+
+  build-standard:
+    needs: validate-version
+    name: Build Standard Image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ env.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_HUB_TOKEN }}
+
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.DOCKER_USER }}/codebase-rag
+          tags: |
+            type=ref,event=branch,suffix=-standard
+            type=ref,event=pr,suffix=-standard
+            type=semver,pattern={{version}},suffix=-standard
+            type=semver,pattern={{major}}.{{minor}},suffix=-standard
+            type=raw,value=standard,enable={{is_default_branch}}
+            type=raw,value=standard-latest,enable={{is_default_branch}}
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: docker/Dockerfile.standard
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          platforms: linux/amd64,linux/arm64
+
+  build-full:
+    needs: validate-version
+    name: Build Full Image
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Log in to Docker Hub
+        if: github.event_name != 'pull_request'
+        uses: docker/login-action@v3
+        with:
+          username: ${{ env.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_HUB_TOKEN }}
+
+      - name: Extract metadata
+        id: meta
+        uses: docker/metadata-action@v5
+        with:
+          images: ${{ env.DOCKER_USER }}/codebase-rag
+          tags: |
+            type=ref,event=branch,suffix=-full
+            type=ref,event=pr,suffix=-full
+            type=semver,pattern={{version}},suffix=-full
+            type=semver,pattern={{major}}.{{minor}},suffix=-full
+            type=raw,value=full,enable={{is_default_branch}}
+            type=raw,value=full-latest,enable={{is_default_branch}}
+            type=raw,value=latest,enable={{is_default_branch}}
+
+      - name: Build and push
+        uses: docker/build-push-action@v5
+        with:
+          context: .
+          file: docker/Dockerfile.full
+          push: ${{ github.event_name != 'pull_request' }}
+          tags: ${{ steps.meta.outputs.tags }}
+          labels: ${{ steps.meta.outputs.labels }}
+          cache-from: type=gha
+          cache-to: type=gha,mode=max
+          platforms: linux/amd64,linux/arm64
+
+  create-release:
+    name: Create GitHub Release
+    if: startsWith(github.ref, 'refs/tags/v')
+    needs: [build-minimal, build-standard, build-full]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Create Release
+        uses: softprops/action-gh-release@v1
+        with:
+          generate_release_notes: true
+          body: |
+            ## Docker Images
+
+            ### Minimal (Code Graph only)
+            ```bash
+            docker pull royisme/codebase-rag:minimal
+            docker pull royisme/codebase-rag:${{ github.ref_name }}-minimal
+            ```
+
+            ### Standard (Code Graph + Memory)
+            ```bash
+            docker pull royisme/codebase-rag:standard
+            docker pull royisme/codebase-rag:${{ github.ref_name }}-standard
+            ```
+
+            ### Full (All Features)
+            ```bash
+            docker pull royisme/codebase-rag:full
+            docker pull royisme/codebase-rag:${{ github.ref_name }}-full
+            docker pull royisme/codebase-rag:latest
+            ```
+
+            ## Quick Start
+
+            See [documentation](https://code-graph.vantagecraft.dev) for detailed setup instructions.
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  notify:
+    name: Notify Build Status
+    needs: [build-minimal, build-standard, build-full]
+    if: always()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Build Summary
+        run: |
+          echo "## 🐳 Docker Build Summary" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "- **Minimal**: ✅ Built" >> $GITHUB_STEP_SUMMARY
+          echo "- **Standard**: ✅ Built" >> $GITHUB_STEP_SUMMARY
+          echo "- **Full**: ✅ Built" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Images available at: https://hub.docker.com/r/royisme/codebase-rag" >> $GITHUB_STEP_SUMMARY
diff --git a/.github/workflows/docs-deploy.yml b/.github/workflows/docs-deploy.yml
new file mode 100644
index 0000000..875323f
--- /dev/null
+++ b/.github/workflows/docs-deploy.yml
@@ -0,0 +1,73 @@
+name: Deploy Documentation
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+      - '.github/workflows/docs-deploy.yml'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'docs/**'
+      - 'mkdocs.yml'
+  workflow_dispatch:
+
+permissions:
+  contents: write
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0  # Fetch all history for git info plugin
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+          cache: 'pip'
+
+      - name: Install dependencies
+        run: |
+          pip install --upgrade pip
+          pip install mkdocs-material
+          pip install mkdocs-minify-plugin
+          pip install mkdocs-git-revision-date-localized-plugin
+
+      - name: Build documentation
+        run: mkdocs build --strict
+
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: site
+
+  deploy:
+    if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request'
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
+
+      - name: Notify deployment
+        run: |
+          echo "📚 Documentation deployed successfully!"
+          echo "🔗 URL: https://code-graph.vantagecraft.dev"
diff --git a/GITHUB_PAGES_TROUBLESHOOTING.md b/GITHUB_PAGES_TROUBLESHOOTING.md
new file mode 100644
index 0000000..9bbb657
--- /dev/null
+++ b/GITHUB_PAGES_TROUBLESHOOTING.md
@@ -0,0 +1,347 @@
+# GitHub Pages 部署诊断和配置指南
+
+## 问题：编译成功但没有部署
+
+### 原因分析
+
+你的 `.github/workflows/docs-deploy.yml` 配置了：
+
+```yaml
+deploy:
+  if: github.ref == 'refs/heads/main' && github.event_name != 'pull_request'
+```
+
+**这意味着**：
+- ✅ 在 main 分支会部署
+- ❌ 在 feature 分支只会 build，不会 deploy
+- ❌ PR 只会 build，不会 deploy
+
+### 当前状态检查
+
+1. **查看你当前在哪个分支**
+   ```bash
+   git branch
+   # 如果显示 claude/fix-docker-env-config-*，那就是在 feature 分支
+   ```
+
+2. **查看 GitHub Actions 运行记录**
+   - 访问：https://github.com/royisme/codebase-rag/actions
+   - 点击最近的 "Deploy Documentation" workflow
+   - 检查是否有 "deploy" job
+   - 如果只有 "build" job，说明条件不满足
+
+## 解决方案
+
+### 方案 1：合并到 main 分支（推荐）
+
+```bash
+# 1. 确保当前分支所有更改已提交
+git status
+
+# 2. 切换到 main 分支
+git checkout main
+
+# 3. 合并你的 feature 分支
+git merge claude/fix-docker-env-config-011CUqY1Y431FvqPZW6YAEhT
+
+# 4. 推送到远程
+git push origin main
+
+# 5. GitHub Actions 会自动触发，这次会执行 deploy job
+```
+
+**验证**：
+- 访问 Actions 页面
+- 应该看到 "build" 和 "deploy" 两个 job
+- deploy job 完成后，会显示部署 URL
+
+### 方案 2：手动触发 workflow
+
+如果你还在 feature 分支，但想测试部署：
+
+```bash
+# 在 GitHub 网站上
+1. 访问：https://github.com/royisme/codebase-rag/actions/workflows/docs-deploy.yml
+2. 点击 "Run workflow" 按钮
+3. 选择 "main" 分支
+4. 点击 "Run workflow"
+```
+
+## GitHub Pages 设置配置
+
+### 必须配置的设置
+
+1. **访问仓库设置**
+   ```
+   https://github.com/royisme/codebase-rag/settings/pages
+   ```
+
+2. **Source 设置**
+   - ✅ 选择 "GitHub Actions"
+   - ❌ 不要选择 "Deploy from a branch"
+
+   ![Source Setting](https://docs.github.com/assets/cb-49683/mw-1440/images/help/pages/publishing-source-drop-down.webp)
+
+3. **Custom domain 设置**
+
+   **需要配置！** 因为你有 `docs/CNAME` 文件：
+
+   ```
+   Custom domain: code-graph.vantagecraft.dev
+   ☑ Enforce HTTPS
+   ```
+
+   **为什么需要？**
+   - 你的 `docs/CNAME` 文件内容是 `code-graph.vantagecraft.dev`
+   - 这告诉 GitHub Pages 你想使用自定义域名
+   - 必须在 Settings 中也配置这个域名
+
+### DNS 配置（必须）
+
+在你的域名服务商（vantagecraft.dev）配置：
+
+**CNAME 记录**：
+```
+类型: CNAME
+名称: code-graph
+目标: royisme.github.io
+TTL: 3600 (或自动)
+```
+
+**如何验证**：
+```bash
+# 检查 DNS 是否生效
+nslookup code-graph.vantagecraft.dev
+
+# 或者
+dig code-graph.vantagecraft.dev
+
+# 应该显示：
+# code-graph.vantagecraft.dev. IN CNAME royisme.github.io.
+```
+
+### 完整配置步骤
+
+#### Step 1: 配置 DNS（在域名服务商）
+
+```
+记录类型: CNAME
+主机记录: code-graph
+记录值: royisme.github.io
+TTL: 默认或3600
+```
+
+保存后等待 5-10 分钟生效。
+
+#### Step 2: 配置 GitHub Pages
+
+1. 访问：https://github.com/royisme/codebase-rag/settings/pages
+
+2. **Source 设置**：
+   - Source: GitHub Actions ✅
+
+3. **Custom domain 设置**：
+   - 输入：`code-graph.vantagecraft.dev`
+   - 点击 Save
+   - 等待 DNS 验证（可能需要几分钟）
+   - 验证成功后，勾选 "Enforce HTTPS"
+
+#### Step 3: 触发部署
+
+```bash
+# 方法 1: 合并到 main 分支（推荐）
+git checkout main
+git merge your-feature-branch
+git push origin main
+
+# 方法 2: 手动触发
+# 在 GitHub Actions 页面点击 "Run workflow"
+
+# 方法 3: 修改文档触发
+echo "test" >> docs/index.md
+git add docs/index.md
+git commit -m "docs: trigger deployment"
+git push origin main
+```
+
+#### Step 4: 验证部署
+
+1. **查看 GitHub Actions**
+   - https://github.com/royisme/codebase-rag/actions
+   - 应该看到 "build" 和 "deploy" 两个 job
+   - deploy job 状态应该是绿色 ✅
+
+2. **查看 Pages 设置**
+   - https://github.com/royisme/codebase-rag/settings/pages
+   - 应该显示："Your site is live at https://code-graph.vantagecraft.dev"
+
+3. **访问网站**
+   - https://code-graph.vantagecraft.dev
+   - 应该能看到文档
+
+## 常见问题排查
+
+### 问题 1: deploy job 不执行
+
+**症状**：只有 build job，没有 deploy job
+
+**原因**：
+- 不在 main 分支
+- 是 Pull Request
+
+**解决**：
+```bash
+git checkout main
+git push origin main
+```
+
+### 问题 2: DNS check failed
+
+**症状**：GitHub Pages 显示 "DNS check unsuccessful"
+
+**原因**：DNS 记录未生效或配置错误
+
+**解决**：
+```bash
+# 1. 检查 DNS
+dig code-graph.vantagecraft.dev
+
+# 2. 确保返回 CNAME 记录指向 royisme.github.io
+# 3. 等待 DNS 传播（5-60分钟）
+# 4. 在 GitHub Pages 设置中点击 "Remove" 再重新添加域名
+```
+
+### 问题 3: 404 Not Found
+
+**症状**：访问域名显示 404
+
+**原因**：
+- 部署未完成
+- CNAME 文件缺失
+- 域名配置不一致
+
+**解决**：
+```bash
+# 1. 确认 docs/CNAME 文件存在
+cat docs/CNAME
+# 应该显示：code-graph.vantagecraft.dev
+
+# 2. 确认 GitHub Pages 设置中的 Custom domain 与 CNAME 一致
+
+# 3. 重新构建
+git commit --allow-empty -m "chore: trigger rebuild"
+git push origin main
+```
+
+### 问题 4: HTTPS 证书问题
+
+**症状**："Certificate error" 或 "Not secure"
+
+**原因**：GitHub 还在生成 HTTPS 证书
+
+**解决**：
+- 等待 1-24 小时
+- GitHub 会自动从 Let's Encrypt 获取证书
+- 在此期间可以用 HTTP 访问：http://code-graph.vantagecraft.dev
+
+### 问题 5: 部署成功但内容是旧的
+
+**症状**：网站内容没更新
+
+**解决**：
+```bash
+# 清除浏览器缓存
+# 或强制刷新：Ctrl+Shift+R (Windows/Linux) 或 Cmd+Shift+R (Mac)
+
+# 或等待 CDN 缓存过期（通常 10 分钟）
+```
+
+## 最佳实践
+
+### 1. 开发流程
+
+```bash
+# Feature 分支开发
+git checkout -b feature/docs-update
+# ... 修改文档 ...
+git commit -m "docs: update guide"
+git push origin feature/docs-update
+
+# 创建 PR → 在 PR 中会 build（但不 deploy）
+# 合并到 main → 自动 deploy
+
+# 或者直接在 main 分支开发（小改动）
+git checkout main
+# ... 修改 ...
+git commit -m "docs: fix typo"
+git push origin main  # 自动触发 deploy
+```
+
+### 2. 快速测试部署
+
+如果想快速看到部署效果：
+
+```bash
+# 1. 空提交触发部署
+git commit --allow-empty -m "docs: trigger deployment"
+git push origin main
+
+# 2. 或修改任意文档
+echo "" >> docs/index.md
+git add docs/index.md
+git commit -m "docs: trigger deployment"
+git push origin main
+```
+
+### 3. 监控部署状态
+
+```bash
+# 使用 GitHub CLI
+gh run list --workflow=docs-deploy.yml
+
+# 查看最新运行
+gh run view --log
+
+# 或在浏览器中查看
+open https://github.com/royisme/codebase-rag/actions
+```
+
+## 配置检查清单
+
+使用这个清单确保所有配置正确：
+
+- [ ] **DNS 配置**
+  - [ ] CNAME 记录：code-graph → royisme.github.io
+  - [ ] DNS 已生效（用 dig/nslookup 验证）
+
+- [ ] **GitHub Pages 设置**
+  - [ ] Source: GitHub Actions
+  - [ ] Custom domain: code-graph.vantagecraft.dev
+  - [ ] DNS check: ✅ (绿色对勾)
+  - [ ] Enforce HTTPS: ☑ (勾选)
+
+- [ ] **代码仓库**
+  - [ ] docs/CNAME 文件存在，内容正确
+  - [ ] .github/workflows/docs-deploy.yml 存在
+  - [ ] 在 main 分支
+
+- [ ] **GitHub Actions**
+  - [ ] Workflow 权限正确（pages: write）
+  - [ ] 最近一次运行包含 deploy job
+  - [ ] deploy job 状态：✅ Success
+
+- [ ] **访问验证**
+  - [ ] https://code-graph.vantagecraft.dev 可访问
+  - [ ] HTTPS 证书有效
+  - [ ] 内容显示正确
+
+## 需要帮助？
+
+如果按照以上步骤仍有问题，请提供：
+
+1. 当前分支名：`git branch`
+2. GitHub Actions 运行日志截图
+3. GitHub Pages 设置页面截图
+4. DNS 查询结果：`dig code-graph.vantagecraft.dev`
+
+我会帮你进一步诊断！
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..18fcbd2
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,279 @@
+# Makefile for Code Graph Knowledge System
+# Provides convenient commands for Docker operations
+
+.PHONY: help docker-minimal docker-standard docker-full docker-full-with-ollama \
+        docker-build-minimal docker-build-standard docker-build-full docker-build-all \
+        docker-push docker-pull docker-clean docker-logs docker-stop \
+        dev-minimal dev-standard dev-full docs-serve docs-build docs-deploy
+
+# Docker Hub username
+DOCKER_USER ?= royisme
+
+# Default target
+help:
+	@echo "Code Graph Knowledge System - Docker Commands"
+	@echo "=============================================="
+	@echo ""
+	@echo "Quick Start:"
+	@echo "  make docker-minimal          - Start minimal deployment (Code Graph only, no LLM)"
+	@echo "  make docker-standard         - Start standard deployment (+ Memory, needs Embedding)"
+	@echo "  make docker-full             - Start full deployment (all features, needs LLM)"
+	@echo "  make docker-full-with-ollama - Start full deployment with local Ollama"
+	@echo ""
+	@echo "Build Commands:"
+	@echo "  make docker-build-minimal    - Build minimal image"
+	@echo "  make docker-build-standard   - Build standard image"
+	@echo "  make docker-build-full       - Build full image"
+	@echo "  make docker-build-all        - Build all images"
+	@echo ""
+	@echo "Management:"
+	@echo "  make docker-stop             - Stop all services"
+	@echo "  make docker-clean            - Stop and remove all containers/volumes"
+	@echo "  make docker-logs             - Show logs from all services"
+	@echo "  make docker-push             - Push all images to Docker Hub"
+	@echo "  make docker-pull             - Pull all images from Docker Hub"
+	@echo ""
+	@echo "Development:"
+	@echo "  make dev-minimal             - Start minimal in dev mode (mounted code)"
+	@echo "  make dev-standard            - Start standard in dev mode"
+	@echo "  make dev-full                - Start full in dev mode"
+	@echo ""
+	@echo "Documentation:"
+	@echo "  make docs-serve              - Serve documentation locally"
+	@echo "  make docs-build              - Build documentation"
+	@echo "  make docs-deploy             - Deploy documentation to vantagecraft.dev"
+	@echo ""
+
+# ============================================
+# Deployment Commands
+# ============================================
+
+docker-minimal:
+	@echo "🚀 Starting Minimal deployment (Code Graph only)..."
+	@echo "   ✓ No LLM or Embedding required"
+	@echo "   ✓ Code Graph tools available"
+	@echo ""
+	docker-compose -f docker/docker-compose.minimal.yml up -d
+	@echo ""
+	@echo "✅ Minimal deployment started!"
+	@echo "   API: http://localhost:8000"
+	@echo "   Neo4j Browser: http://localhost:7474"
+	@echo ""
+	@echo "Check status: make docker-logs"
+
+docker-standard:
+	@echo "🚀 Starting Standard deployment (Code Graph + Memory)..."
+	@echo "   ⚠️  Embedding provider required (check .env)"
+	@echo "   ✓ Code Graph + Memory Store"
+	@echo ""
+	docker-compose -f docker/docker-compose.standard.yml up -d
+	@echo ""
+	@echo "✅ Standard deployment started!"
+	@echo "   API: http://localhost:8000"
+	@echo "   Neo4j Browser: http://localhost:7474"
+
+docker-full:
+	@echo "🚀 Starting Full deployment (All features)..."
+	@echo "   ⚠️  LLM + Embedding required (check .env)"
+	@echo "   ✓ Code Graph + Memory + Knowledge RAG"
+	@echo ""
+	docker-compose -f docker/docker-compose.full.yml up -d
+	@echo ""
+	@echo "✅ Full deployment started!"
+	@echo "   API: http://localhost:8000"
+	@echo "   Neo4j Browser: http://localhost:7474"
+
+docker-full-with-ollama:
+	@echo "🚀 Starting Full deployment with local Ollama..."
+	@echo "   ✓ Ollama will be started in Docker"
+	@echo "   ✓ All features enabled"
+	@echo ""
+	docker-compose -f docker/docker-compose.full.yml --profile with-ollama up -d
+	@echo ""
+	@echo "✅ Full deployment with Ollama started!"
+	@echo "   API: http://localhost:8000"
+	@echo "   Neo4j Browser: http://localhost:7474"
+	@echo "   Ollama: http://localhost:11434"
+	@echo ""
+	@echo "⏳ Ollama may take a few minutes to download models..."
+	@echo "   Check: docker logs codebase-rag-ollama-full -f"
+
+# ============================================
+# Build Commands
+# ============================================
+
+docker-build-minimal:
+	@echo "🔨 Building minimal image..."
+	docker-compose -f docker/docker-compose.minimal.yml build
+	@echo "✅ Minimal image built: royisme/codebase-rag:minimal"
+
+docker-build-standard:
+	@echo "🔨 Building standard image..."
+	docker-compose -f docker/docker-compose.standard.yml build
+	@echo "✅ Standard image built: royisme/codebase-rag:standard"
+
+docker-build-full:
+	@echo "🔨 Building full image..."
+	docker-compose -f docker/docker-compose.full.yml build
+	@echo "✅ Full image built: royisme/codebase-rag:full"
+
+docker-build-all: docker-build-minimal docker-build-standard docker-build-full
+	@echo ""
+	@echo "✅ All images built successfully!"
+
+# ============================================
+# Docker Hub Commands
+# ============================================
+
+docker-push: docker-build-all
+	@echo "📤 Pushing images to Docker Hub..."
+	docker tag royisme/codebase-rag:minimal royisme/codebase-rag:minimal-latest
+	docker tag royisme/codebase-rag:standard royisme/codebase-rag:standard-latest
+	docker tag royisme/codebase-rag:full royisme/codebase-rag:full-latest
+	docker push royisme/codebase-rag:minimal
+	docker push royisme/codebase-rag:minimal-latest
+	docker push royisme/codebase-rag:standard
+	docker push royisme/codebase-rag:standard-latest
+	docker push royisme/codebase-rag:full
+	docker push royisme/codebase-rag:full-latest
+	@echo "✅ All images pushed to Docker Hub!"
+
+docker-pull:
+	@echo "📥 Pulling images from Docker Hub..."
+	docker pull royisme/codebase-rag:minimal
+	docker pull royisme/codebase-rag:standard
+	docker pull royisme/codebase-rag:full
+	@echo "✅ All images pulled!"
+
+# ============================================
+# Management Commands
+# ============================================
+
+docker-stop:
+	@echo "🛑 Stopping all services..."
+	-docker-compose -f docker/docker-compose.minimal.yml down
+	-docker-compose -f docker/docker-compose.standard.yml down
+	-docker-compose -f docker/docker-compose.full.yml down
+	@echo "✅ All services stopped"
+
+docker-clean:
+	@echo "🧹 Cleaning up all containers and volumes..."
+	@read -p "This will remove all data. Continue? [y/N] " confirm; \
+	if [ "$$confirm" = "y" ] || [ "$$confirm" = "Y" ]; then \
+		docker-compose -f docker/docker-compose.minimal.yml down -v; \
+		docker-compose -f docker/docker-compose.standard.yml down -v; \
+		docker-compose -f docker/docker-compose.full.yml down -v; \
+		echo "✅ Cleanup complete"; \
+	else \
+		echo "❌ Cleanup cancelled"; \
+	fi
+
+docker-logs:
+	@echo "📋 Showing logs from all services..."
+	@echo "   Press Ctrl+C to exit"
+	@echo ""
+	@if docker ps | grep -q codebase-rag-mcp-minimal; then \
+		docker-compose -f docker/docker-compose.minimal.yml logs -f; \
+	elif docker ps | grep -q codebase-rag-mcp-standard; then \
+		docker-compose -f docker/docker-compose.standard.yml logs -f; \
+	elif docker ps | grep -q codebase-rag-mcp-full; then \
+		docker-compose -f docker/docker-compose.full.yml logs -f; \
+	else \
+		echo "❌ No services running. Start with: make docker-minimal"; \
+	fi
+
+# ============================================
+# Development Mode
+# ============================================
+
+docker-compose.dev.yml:
+	@echo "Creating dev compose file..."
+	@echo "version: '3.8'" > docker/docker-compose.dev.yml
+	@echo "services:" >> docker/docker-compose.dev.yml
+	@echo "  mcp:" >> docker/docker-compose.dev.yml
+	@echo "    volumes:" >> docker/docker-compose.dev.yml
+	@echo "      - .:/app:delegated  # Mount source code" >> docker/docker-compose.dev.yml
+	@echo "    environment:" >> docker/docker-compose.dev.yml
+	@echo "      - DEBUG=true" >> docker/docker-compose.dev.yml
+	@echo "      - PYTHONDONTWRITEBYTECODE=1" >> docker/docker-compose.dev.yml
+
+dev-minimal: docker-compose.dev.yml
+	@echo "🔧 Starting minimal in development mode..."
+	docker-compose -f docker/docker-compose.minimal.yml -f docker/docker-compose.dev.yml up
+
+dev-standard: docker-compose.dev.yml
+	@echo "🔧 Starting standard in development mode..."
+	docker-compose -f docker/docker-compose.standard.yml -f docker/docker-compose.dev.yml up
+
+dev-full: docker-compose.dev.yml
+	@echo "🔧 Starting full in development mode..."
+	docker-compose -f docker/docker-compose.full.yml -f docker/docker-compose.dev.yml up
+
+# ============================================
+# Documentation Commands
+# ============================================
+
+docs-serve:
+	@echo "📚 Serving documentation locally..."
+	@if ! command -v mkdocs &> /dev/null; then \
+		echo "❌ MkDocs not installed. Installing..."; \
+		pip install mkdocs-material mkdocs-i18n; \
+	fi
+	mkdocs serve
+
+docs-build:
+	@echo "🔨 Building documentation..."
+	@if ! command -v mkdocs &> /dev/null; then \
+		echo "❌ MkDocs not installed. Installing..."; \
+		pip install mkdocs-material mkdocs-i18n; \
+	fi
+	mkdocs build
+
+docs-deploy:
+	@echo "🚀 Deploying documentation to vantagecraft.dev..."
+	@echo "   Building documentation..."
+	mkdocs build
+	@echo "✅ Documentation built in site/ directory"
+	@echo ""
+	@echo "📝 Next steps for vantagecraft.dev deployment:"
+	@echo "   1. Upload site/ contents to your web server"
+	@echo "   2. Configure DNS: code-graph.vantagecraft.dev -> your server"
+	@echo "   3. Set up SSL certificate (recommended: Let's Encrypt)"
+	@echo ""
+	@echo "   Or use GitHub Pages:"
+	@echo "   - mkdocs gh-deploy"
+
+# ============================================
+# Utility Commands
+# ============================================
+
+health-check:
+	@echo "🏥 Checking service health..."
+	@echo ""
+	@echo "Neo4j:"
+	@curl -s http://localhost:7474 > /dev/null && echo "  ✅ Running" || echo "  ❌ Not running"
+	@echo "API:"
+	@curl -s http://localhost:8000/api/v1/health > /dev/null && echo "  ✅ Running" || echo "  ❌ Not running"
+	@if docker ps | grep -q ollama; then \
+		echo "Ollama:"; \
+		curl -s http://localhost:11434/api/tags > /dev/null && echo "  ✅ Running" || echo "  ❌ Not running"; \
+	fi
+
+init-env:
+	@echo "📝 Initializing environment file..."
+	@echo "Which deployment mode? [minimal/standard/full]"
+	@read mode; \
+	if [ "$$mode" = "minimal" ]; then \
+		cp docker/.env.template/.env.minimal .env; \
+		echo "✅ Created .env for minimal deployment"; \
+	elif [ "$$mode" = "standard" ]; then \
+		cp docker/.env.template/.env.standard .env; \
+		echo "✅ Created .env for standard deployment"; \
+		echo "⚠️  Don't forget to configure EMBEDDING_PROVIDER"; \
+	elif [ "$$mode" = "full" ]; then \
+		cp docker/.env.template/.env.full .env; \
+		echo "✅ Created .env for full deployment"; \
+		echo "⚠️  Don't forget to configure LLM_PROVIDER and EMBEDDING_PROVIDER"; \
+	else \
+		echo "❌ Invalid mode. Choose: minimal, standard, or full"; \
+	fi
diff --git a/docker-compose.yml b/docker-compose.yml
index 01f2d62..698b3c6 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,15 +1,23 @@
+# Default Docker Compose - Points to Minimal deployment
+# For other modes, use:
+#   - docker-compose -f docker/docker-compose.standard.yml up
+#   - docker-compose -f docker/docker-compose.full.yml up
+# Or use Makefile commands:
+#   - make docker-minimal
+#   - make docker-standard
+#   - make docker-full
+
 version: '3.8'
 
 services:
-  # Neo4j Database
   neo4j:
     image: neo4j:5.15-community
     container_name: codebase-rag-neo4j
     ports:
-      - "7474:7474"  # HTTP
-      - "7687:7687"  # Bolt
+      - "${NEO4J_HTTP_PORT:-7474}:7474"
+      - "${NEO4J_BOLT_PORT:-7687}:7687"
     environment:
-      - NEO4J_AUTH=neo4j/password123
+      - NEO4J_AUTH=${NEO4J_USER:-neo4j}/${NEO4J_PASSWORD:-password}
       - NEO4J_PLUGINS=["apoc"]
       - NEO4J_dbms_security_procedures_unrestricted=apoc.*
       - NEO4J_dbms_security_procedures_allowlist=apoc.*
@@ -22,86 +30,41 @@ services:
       - neo4j_import:/var/lib/neo4j/import
       - neo4j_plugins:/plugins
     healthcheck:
-      test: ["CMD-SHELL", "cypher-shell -u neo4j -p password123 'RETURN 1' || exit 1"]
+      test: ["CMD-SHELL", "cypher-shell -u ${NEO4J_USER:-neo4j} -p ${NEO4J_PASSWORD:-password} 'RETURN 1' || exit 1"]
       interval: 10s
       timeout: 5s
       retries: 5
       start_period: 30s
     networks:
-      - codebase-rag-network
-    restart: unless-stopped
-
-  # Ollama (Optional - for local LLM)
-  ollama:
-    image: ollama/ollama:latest
-    container_name: codebase-rag-ollama
-    ports:
-      - "11434:11434"
-    volumes:
-      - ollama_data:/root/.ollama
-    environment:
-      - OLLAMA_HOST=0.0.0.0
-    networks:
-      - codebase-rag-network
+      - codebase-rag
     restart: unless-stopped
-    profiles:
-      - with-ollama
 
-  # Application
-  app:
+  mcp:
     build:
       context: .
-      dockerfile: Dockerfile
-    container_name: codebase-rag-app
+      dockerfile: docker/Dockerfile.minimal
+    image: royisme/codebase-rag:minimal
+    container_name: codebase-rag-mcp
     ports:
-      - "8000:8000"
+      - "${APP_PORT:-8000}:8000"
     environment:
-      # Neo4j Configuration
+      - APP_NAME=${APP_NAME:-Code Graph Knowledge System}
+      - DEBUG=${DEBUG:-false}
+      - HOST=0.0.0.0
+      - PORT=8000
+      - DEPLOYMENT_MODE=minimal
+      - ENABLE_KNOWLEDGE_RAG=false
+      - ENABLE_AUTO_EXTRACTION=false
+      - ENABLE_MEMORY_SEARCH=false
       - NEO4J_URI=bolt://neo4j:7687
-      - NEO4J_USER=neo4j
-      - NEO4J_PASSWORD=password123
-      - NEO4J_DATABASE=neo4j
-
-      # LLM Provider (ollama, openai, gemini, openrouter)
-      - LLM_PROVIDER=ollama
-      - EMBEDDING_PROVIDER=ollama
-
-      # Ollama Configuration (if using ollama)
-      - OLLAMA_BASE_URL=http://ollama:11434
-      - OLLAMA_MODEL=llama3.2
-      - OLLAMA_EMBEDDING_MODEL=nomic-embed-text
-
-      # OpenAI Configuration (if using openai)
-      # - OPENAI_API_KEY=your-key-here
-      # - OPENAI_MODEL=gpt-4
-      # - OPENAI_EMBEDDING_MODEL=text-embedding-3-small
-
-      # Gemini Configuration (if using gemini)
-      # - GOOGLE_API_KEY=your-key-here
-      # - GEMINI_MODEL=gemini-pro
-      # - GEMINI_EMBEDDING_MODEL=models/embedding-001
-
-      # Application Configuration
-      - APP_NAME=Code Graph Knowledge System
-      - APP_VERSION=0.5.0
-      - LOG_LEVEL=INFO
-      - ENABLE_MONITORING=true
-
-      # Timeouts
-      - CONNECTION_TIMEOUT=30
-      - OPERATION_TIMEOUT=300
-      - LARGE_DOCUMENT_TIMEOUT=600
-
-      # Chunking
-      - CHUNK_SIZE=512
-      - CHUNK_OVERLAP=50
-
-      # Search
-      - TOP_K=10
-      - VECTOR_DIMENSION=384
+      - NEO4J_USER=${NEO4J_USER:-neo4j}
+      - NEO4J_PASSWORD=${NEO4J_PASSWORD:-password}
+      - NEO4J_DATABASE=${NEO4J_DATABASE:-neo4j}
+      - CONNECTION_TIMEOUT=${CONNECTION_TIMEOUT:-30}
+      - OPERATION_TIMEOUT=${OPERATION_TIMEOUT:-120}
     volumes:
+      - ${REPOS_PATH:-./repos}:/repos
       - ./data:/data
-      - /tmp/repos:/tmp/repos
       - ./logs:/app/logs
     depends_on:
       neo4j:
@@ -113,7 +76,7 @@ services:
       retries: 3
       start_period: 40s
     networks:
-      - codebase-rag-network
+      - codebase-rag
     restart: unless-stopped
 
 volumes:
@@ -125,9 +88,7 @@ volumes:
     driver: local
   neo4j_plugins:
     driver: local
-  ollama_data:
-    driver: local
 
 networks:
-  codebase-rag-network:
+  codebase-rag:
     driver: bridge
diff --git a/docker/.env.template/.env.full b/docker/.env.template/.env.full
new file mode 100644
index 0000000..992c768
--- /dev/null
+++ b/docker/.env.template/.env.full
@@ -0,0 +1,79 @@
+# Full Deployment Configuration
+# All Features - LLM + Embedding required
+
+# Application
+APP_NAME=Code Graph Knowledge System
+DEBUG=false
+APP_PORT=8000
+ENABLE_MONITORING=true
+
+# Neo4j Configuration
+NEO4J_HTTP_PORT=7474
+NEO4J_BOLT_PORT=7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_secure_password_here
+NEO4J_DATABASE=neo4j
+
+# Repository path
+REPOS_PATH=./repos
+
+# LLM Provider (required for full features)
+# Options: ollama, openai, gemini, openrouter
+LLM_PROVIDER=ollama
+EMBEDDING_PROVIDER=ollama
+
+# ============================================
+# Ollama Configuration (local or docker)
+# ============================================
+# Use this if running Ollama in Docker (with-ollama profile):
+OLLAMA_BASE_URL=http://ollama:11434
+# Use this if running Ollama on your host machine (outside Docker):
+# OLLAMA_BASE_URL=http://host.docker.internal:11434
+
+OLLAMA_PORT=11434
+OLLAMA_MODEL=llama3.2
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# ============================================
+# OpenAI Configuration (alternative)
+# ============================================
+# LLM_PROVIDER=openai
+# EMBEDDING_PROVIDER=openai
+# OPENAI_API_KEY=your_openai_api_key_here
+# OPENAI_MODEL=gpt-4
+# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+# OPENAI_BASE_URL=https://api.openai.com/v1
+
+# ============================================
+# Google Gemini Configuration (alternative)
+# ============================================
+# LLM_PROVIDER=gemini
+# EMBEDDING_PROVIDER=gemini
+# GOOGLE_API_KEY=your_google_api_key_here
+# GEMINI_MODEL=gemini-pro
+# GEMINI_EMBEDDING_MODEL=models/embedding-001
+
+# ============================================
+# OpenRouter Configuration (alternative)
+# ============================================
+# LLM_PROVIDER=openrouter
+# EMBEDDING_PROVIDER=openrouter
+# OPENROUTER_API_KEY=your_openrouter_api_key_here
+# OPENROUTER_MODEL=openai/gpt-3.5-turbo
+# OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
+# OPENROUTER_MAX_TOKENS=2048
+
+# Model Parameters
+TEMPERATURE=0.1
+MAX_TOKENS=2048
+
+# RAG Settings
+CHUNK_SIZE=512
+CHUNK_OVERLAP=50
+TOP_K=10
+VECTOR_DIMENSION=384
+
+# Timeouts (in seconds)
+CONNECTION_TIMEOUT=30
+OPERATION_TIMEOUT=300
+LARGE_DOCUMENT_TIMEOUT=600
diff --git a/docker/.env.template/.env.minimal b/docker/.env.template/.env.minimal
new file mode 100644
index 0000000..0ee172e
--- /dev/null
+++ b/docker/.env.template/.env.minimal
@@ -0,0 +1,23 @@
+# Minimal Deployment Configuration
+# Code Graph Only - No LLM or Embedding required
+
+# Application
+APP_NAME=Code Graph Knowledge System
+DEBUG=false
+APP_PORT=8000
+
+# Neo4j Configuration
+NEO4J_HTTP_PORT=7474
+NEO4J_BOLT_PORT=7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_secure_password_here
+NEO4J_DATABASE=neo4j
+
+# Repository path (local path to mount)
+REPOS_PATH=./repos
+
+# Timeouts (in seconds)
+CONNECTION_TIMEOUT=30
+OPERATION_TIMEOUT=120
+
+# Note: Minimal mode does not require LLM or Embedding configuration
diff --git a/docker/.env.template/.env.standard b/docker/.env.template/.env.standard
new file mode 100644
index 0000000..4d2546d
--- /dev/null
+++ b/docker/.env.template/.env.standard
@@ -0,0 +1,42 @@
+# Standard Deployment Configuration
+# Code Graph + Memory Store - Embedding required
+
+# Application
+APP_NAME=Code Graph Knowledge System
+DEBUG=false
+APP_PORT=8000
+
+# Neo4j Configuration
+NEO4J_HTTP_PORT=7474
+NEO4J_BOLT_PORT=7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_secure_password_here
+NEO4J_DATABASE=neo4j
+
+# Repository path
+REPOS_PATH=./repos
+
+# Embedding Provider (required for memory vector search)
+# Options: ollama, openai, gemini, huggingface
+EMBEDDING_PROVIDER=ollama
+
+# Ollama Configuration (if using local Ollama on host)
+OLLAMA_BASE_URL=http://host.docker.internal:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# OpenAI Configuration (alternative)
+# EMBEDDING_PROVIDER=openai
+# OPENAI_API_KEY=your_openai_api_key_here
+# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+
+# Gemini Configuration (alternative)
+# EMBEDDING_PROVIDER=gemini
+# GOOGLE_API_KEY=your_google_api_key_here
+# GEMINI_EMBEDDING_MODEL=models/embedding-001
+
+# Vector Settings
+VECTOR_DIMENSION=384
+
+# Timeouts
+CONNECTION_TIMEOUT=30
+OPERATION_TIMEOUT=120
diff --git a/docker/Dockerfile.base b/docker/Dockerfile.base
new file mode 100644
index 0000000..262d191
--- /dev/null
+++ b/docker/Dockerfile.base
@@ -0,0 +1,71 @@
+# Base Docker image for Code Graph Knowledge System
+# Multi-stage build for optimized image size
+
+FROM python:3.13-slim as builder
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install uv for faster dependency management
+RUN pip install uv
+
+# Set work directory
+WORKDIR /app
+
+# Copy dependency files
+COPY pyproject.toml ./
+COPY README.md ./
+
+# Install Python dependencies
+RUN uv pip install --system -e .
+
+# ============================================
+# Final stage
+# ============================================
+FROM python:3.13-slim
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PATH="/app:${PATH}"
+
+# Install runtime dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create non-root user
+RUN useradd -m -u 1000 appuser && \
+    mkdir -p /app /data /repos && \
+    chown -R appuser:appuser /app /data /repos
+
+# Set work directory
+WORKDIR /app
+
+# Copy Python packages from builder
+COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/python3.13/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+# Copy application code
+COPY --chown=appuser:appuser . .
+
+# Switch to non-root user
+USER appuser
+
+# Expose port
+EXPOSE 8000
+
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/health || exit 1
diff --git a/docker/Dockerfile.full b/docker/Dockerfile.full
new file mode 100644
index 0000000..04d8962
--- /dev/null
+++ b/docker/Dockerfile.full
@@ -0,0 +1,63 @@
+# Full Docker image - All features (LLM + Embedding required)
+FROM python:3.13-slim as builder
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1
+
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install uv
+
+WORKDIR /app
+
+# Copy source files needed for package installation
+COPY pyproject.toml ./
+COPY api ./api
+COPY core ./core
+COPY services ./services
+COPY monitoring ./monitoring
+COPY mcp_tools ./mcp_tools
+COPY start.py start_mcp.py mcp_server.py config.py main.py ./
+
+# Install the package and its dependencies
+RUN uv pip install --system .
+
+# ============================================
+# Final stage
+# ============================================
+FROM python:3.13-slim
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    DEPLOYMENT_MODE=full
+
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN useradd -m -u 1000 appuser && \
+    mkdir -p /app /data /repos && \
+    chown -R appuser:appuser /app /data /repos
+
+WORKDIR /app
+
+COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/python3.13/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+COPY --chown=appuser:appuser . .
+
+USER appuser
+
+EXPOSE 8000
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/health || exit 1
+
+# Start in full mode
+CMD ["python", "start_mcp.py", "--mode=full"]
diff --git a/docker/Dockerfile.minimal b/docker/Dockerfile.minimal
new file mode 100644
index 0000000..bc39713
--- /dev/null
+++ b/docker/Dockerfile.minimal
@@ -0,0 +1,64 @@
+# Minimal Docker image - Code Graph only (No LLM required)
+FROM python:3.13-slim as builder
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install uv
+
+WORKDIR /app
+
+# Copy source files needed for package installation
+COPY pyproject.toml ./
+COPY api ./api
+COPY core ./core
+COPY services ./services
+COPY monitoring ./monitoring
+COPY mcp_tools ./mcp_tools
+COPY start.py start_mcp.py mcp_server.py config.py main.py ./
+
+# Install the package and its dependencies
+RUN uv pip install --system .
+
+# ============================================
+# Final stage
+# ============================================
+FROM python:3.13-slim
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    DEPLOYMENT_MODE=minimal
+
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN useradd -m -u 1000 appuser && \
+    mkdir -p /app /data /repos && \
+    chown -R appuser:appuser /app /data /repos
+
+WORKDIR /app
+
+COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/python3.13/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+COPY --chown=appuser:appuser . .
+
+USER appuser
+
+EXPOSE 8000
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/health || exit 1
+
+# Start in minimal mode
+CMD ["python", "start_mcp.py", "--mode=minimal"]
diff --git a/docker/Dockerfile.standard b/docker/Dockerfile.standard
new file mode 100644
index 0000000..d7e6ba7
--- /dev/null
+++ b/docker/Dockerfile.standard
@@ -0,0 +1,63 @@
+# Standard Docker image - Code Graph + Memory (Embedding required)
+FROM python:3.13-slim as builder
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_NO_CACHE_DIR=1
+
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install uv
+
+WORKDIR /app
+
+# Copy source files needed for package installation
+COPY pyproject.toml ./
+COPY api ./api
+COPY core ./core
+COPY services ./services
+COPY monitoring ./monitoring
+COPY mcp_tools ./mcp_tools
+COPY start.py start_mcp.py mcp_server.py config.py main.py ./
+
+# Install the package and its dependencies
+RUN uv pip install --system .
+
+# ============================================
+# Final stage
+# ============================================
+FROM python:3.13-slim
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    DEPLOYMENT_MODE=standard
+
+RUN apt-get update && apt-get install -y \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN useradd -m -u 1000 appuser && \
+    mkdir -p /app /data /repos && \
+    chown -R appuser:appuser /app /data /repos
+
+WORKDIR /app
+
+COPY --from=builder /usr/local/lib/python3.13/site-packages /usr/local/lib/python3.13/site-packages
+COPY --from=builder /usr/local/bin /usr/local/bin
+
+COPY --chown=appuser:appuser . .
+
+USER appuser
+
+EXPOSE 8000
+
+HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
+    CMD curl -f http://localhost:8000/api/v1/health || exit 1
+
+# Start in standard mode
+CMD ["python", "start_mcp.py", "--mode=standard"]
diff --git a/docker/docker-compose.full.yml b/docker/docker-compose.full.yml
new file mode 100644
index 0000000..ceeb807
--- /dev/null
+++ b/docker/docker-compose.full.yml
@@ -0,0 +1,160 @@
+version: '3.8'
+
+# Full deployment - All features
+# LLM + Embedding required
+
+services:
+  # Neo4j Database
+  neo4j:
+    image: neo4j:5.15-community
+    container_name: codebase-rag-neo4j-full
+    ports:
+      - "${NEO4J_HTTP_PORT:-7474}:7474"
+      - "${NEO4J_BOLT_PORT:-7687}:7687"
+    environment:
+      - NEO4J_AUTH=${NEO4J_USER:-neo4j}/${NEO4J_PASSWORD:-password}
+      - NEO4J_PLUGINS=["apoc"]
+      - NEO4J_dbms_security_procedures_unrestricted=apoc.*
+      - NEO4J_dbms_security_procedures_allowlist=apoc.*
+      - NEO4J_dbms_memory_heap_initial__size=512m
+      - NEO4J_dbms_memory_heap_max__size=2G
+      - NEO4J_dbms_memory_pagecache_size=512m
+    volumes:
+      - neo4j_full_data:/data
+      - neo4j_full_logs:/logs
+      - neo4j_full_import:/var/lib/neo4j/import
+      - neo4j_full_plugins:/plugins
+    healthcheck:
+      test: ["CMD-SHELL", "cypher-shell -u ${NEO4J_USER:-neo4j} -p ${NEO4J_PASSWORD:-password} 'RETURN 1' || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    networks:
+      - codebase-rag-full
+    restart: unless-stopped
+
+  # Ollama (optional, for local LLM)
+  ollama:
+    image: ollama/ollama:latest
+    container_name: codebase-rag-ollama-full
+    ports:
+      - "${OLLAMA_PORT:-11434}:11434"
+    volumes:
+      - ollama_full_data:/root/.ollama
+    environment:
+      - OLLAMA_HOST=0.0.0.0
+    networks:
+      - codebase-rag-full
+    restart: unless-stopped
+    profiles:
+      - with-ollama
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: all
+              capabilities: [gpu]
+
+  # MCP Server - Full mode
+  mcp:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile.full
+    image: royisme/codebase-rag:full
+    container_name: codebase-rag-mcp-full
+    ports:
+      - "${APP_PORT:-8000}:8000"
+    environment:
+      # Application
+      - APP_NAME=${APP_NAME:-Code Graph Knowledge System}
+      - DEBUG=${DEBUG:-false}
+      - HOST=0.0.0.0
+      - PORT=8000
+
+      # Deployment mode
+      - DEPLOYMENT_MODE=full
+      - ENABLE_KNOWLEDGE_RAG=true
+      - ENABLE_AUTO_EXTRACTION=true
+      - ENABLE_MEMORY_SEARCH=true
+      - ENABLE_MONITORING=${ENABLE_MONITORING:-true}
+
+      # Neo4j Configuration
+      - NEO4J_URI=bolt://neo4j:7687
+      - NEO4J_USER=${NEO4J_USER:-neo4j}
+      - NEO4J_PASSWORD=${NEO4J_PASSWORD:-password}
+      - NEO4J_DATABASE=${NEO4J_DATABASE:-neo4j}
+
+      # LLM Provider
+      - LLM_PROVIDER=${LLM_PROVIDER:-ollama}
+      - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER:-ollama}
+
+      # Ollama Configuration (for with-ollama profile)
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://ollama:11434}
+      - OLLAMA_MODEL=${OLLAMA_MODEL:-llama3.2}
+      - OLLAMA_EMBEDDING_MODEL=${OLLAMA_EMBEDDING_MODEL:-nomic-embed-text}
+
+      # OpenAI Configuration (alternative)
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+      - OPENAI_MODEL=${OPENAI_MODEL:-gpt-4}
+      - OPENAI_EMBEDDING_MODEL=${OPENAI_EMBEDDING_MODEL:-text-embedding-3-small}
+
+      # Gemini Configuration (alternative)
+      - GOOGLE_API_KEY=${GOOGLE_API_KEY:-}
+      - GEMINI_MODEL=${GEMINI_MODEL:-gemini-pro}
+      - GEMINI_EMBEDDING_MODEL=${GEMINI_EMBEDDING_MODEL:-models/embedding-001}
+
+      # OpenRouter Configuration (alternative)
+      - OPENROUTER_API_KEY=${OPENROUTER_API_KEY:-}
+      - OPENROUTER_MODEL=${OPENROUTER_MODEL:-openai/gpt-3.5-turbo}
+      - OPENROUTER_BASE_URL=${OPENROUTER_BASE_URL:-https://openrouter.ai/api/v1}
+
+      # Model Parameters
+      - TEMPERATURE=${TEMPERATURE:-0.1}
+      - MAX_TOKENS=${MAX_TOKENS:-2048}
+
+      # RAG Settings
+      - CHUNK_SIZE=${CHUNK_SIZE:-512}
+      - CHUNK_OVERLAP=${CHUNK_OVERLAP:-50}
+      - TOP_K=${TOP_K:-10}
+      - VECTOR_DIMENSION=${VECTOR_DIMENSION:-384}
+
+      # Timeouts
+      - CONNECTION_TIMEOUT=${CONNECTION_TIMEOUT:-30}
+      - OPERATION_TIMEOUT=${OPERATION_TIMEOUT:-300}
+      - LARGE_DOCUMENT_TIMEOUT=${LARGE_DOCUMENT_TIMEOUT:-600}
+    volumes:
+      - ${REPOS_PATH:-./repos}:/repos
+      - ./data:/data
+      - ./logs:/app/logs
+    depends_on:
+      neo4j:
+        condition: service_healthy
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    networks:
+      - codebase-rag-full
+    restart: unless-stopped
+
+volumes:
+  neo4j_full_data:
+    driver: local
+  neo4j_full_logs:
+    driver: local
+  neo4j_full_import:
+    driver: local
+  neo4j_full_plugins:
+    driver: local
+  ollama_full_data:
+    driver: local
+
+networks:
+  codebase-rag-full:
+    driver: bridge
diff --git a/docker/docker-compose.minimal.yml b/docker/docker-compose.minimal.yml
new file mode 100644
index 0000000..90df7a1
--- /dev/null
+++ b/docker/docker-compose.minimal.yml
@@ -0,0 +1,97 @@
+version: '3.8'
+
+# Minimal deployment - Code Graph only
+# No LLM or Embedding required
+
+services:
+  # Neo4j Database
+  neo4j:
+    image: neo4j:5.15-community
+    container_name: codebase-rag-neo4j-minimal
+    ports:
+      - "${NEO4J_HTTP_PORT:-7474}:7474"  # HTTP
+      - "${NEO4J_BOLT_PORT:-7687}:7687"  # Bolt
+    environment:
+      - NEO4J_AUTH=${NEO4J_USER:-neo4j}/${NEO4J_PASSWORD:-password}
+      - NEO4J_PLUGINS=["apoc"]
+      - NEO4J_dbms_security_procedures_unrestricted=apoc.*
+      - NEO4J_dbms_security_procedures_allowlist=apoc.*
+      - NEO4J_dbms_memory_heap_initial__size=512m
+      - NEO4J_dbms_memory_heap_max__size=2G
+      - NEO4J_dbms_memory_pagecache_size=512m
+    volumes:
+      - neo4j_minimal_data:/data
+      - neo4j_minimal_logs:/logs
+      - neo4j_minimal_import:/var/lib/neo4j/import
+      - neo4j_minimal_plugins:/plugins
+    healthcheck:
+      test: ["CMD-SHELL", "cypher-shell -u ${NEO4J_USER:-neo4j} -p ${NEO4J_PASSWORD:-password} 'RETURN 1' || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    networks:
+      - codebase-rag-minimal
+    restart: unless-stopped
+
+  # MCP Server - Minimal mode
+  mcp:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile.minimal
+    image: royisme/codebase-rag:minimal
+    container_name: codebase-rag-mcp-minimal
+    ports:
+      - "${APP_PORT:-8000}:8000"
+    environment:
+      # Application
+      - APP_NAME=${APP_NAME:-Code Graph Knowledge System}
+      - DEBUG=${DEBUG:-false}
+      - HOST=0.0.0.0
+      - PORT=8000
+
+      # Deployment mode
+      - DEPLOYMENT_MODE=minimal
+      - ENABLE_KNOWLEDGE_RAG=false
+      - ENABLE_AUTO_EXTRACTION=false
+      - ENABLE_MEMORY_SEARCH=false
+
+      # Neo4j Configuration
+      - NEO4J_URI=bolt://neo4j:7687
+      - NEO4J_USER=${NEO4J_USER:-neo4j}
+      - NEO4J_PASSWORD=${NEO4J_PASSWORD:-password}
+      - NEO4J_DATABASE=${NEO4J_DATABASE:-neo4j}
+
+      # Timeouts
+      - CONNECTION_TIMEOUT=${CONNECTION_TIMEOUT:-30}
+      - OPERATION_TIMEOUT=${OPERATION_TIMEOUT:-120}
+    volumes:
+      - ${REPOS_PATH:-./repos}:/repos  # Mount repository path
+      - ./data:/data
+      - ./logs:/app/logs
+    depends_on:
+      neo4j:
+        condition: service_healthy
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    networks:
+      - codebase-rag-minimal
+    restart: unless-stopped
+
+volumes:
+  neo4j_minimal_data:
+    driver: local
+  neo4j_minimal_logs:
+    driver: local
+  neo4j_minimal_import:
+    driver: local
+  neo4j_minimal_plugins:
+    driver: local
+
+networks:
+  codebase-rag-minimal:
+    driver: bridge
diff --git a/docker/docker-compose.standard.yml b/docker/docker-compose.standard.yml
new file mode 100644
index 0000000..9cebbd0
--- /dev/null
+++ b/docker/docker-compose.standard.yml
@@ -0,0 +1,111 @@
+version: '3.8'
+
+# Standard deployment - Code Graph + Memory Store
+# Embedding required, LLM optional
+
+services:
+  # Neo4j Database
+  neo4j:
+    image: neo4j:5.15-community
+    container_name: codebase-rag-neo4j-standard
+    ports:
+      - "${NEO4J_HTTP_PORT:-7474}:7474"
+      - "${NEO4J_BOLT_PORT:-7687}:7687"
+    environment:
+      - NEO4J_AUTH=${NEO4J_USER:-neo4j}/${NEO4J_PASSWORD:-password}
+      - NEO4J_PLUGINS=["apoc"]
+      - NEO4J_dbms_security_procedures_unrestricted=apoc.*
+      - NEO4J_dbms_security_procedures_allowlist=apoc.*
+      - NEO4J_dbms_memory_heap_initial__size=512m
+      - NEO4J_dbms_memory_heap_max__size=2G
+      - NEO4J_dbms_memory_pagecache_size=512m
+    volumes:
+      - neo4j_standard_data:/data
+      - neo4j_standard_logs:/logs
+      - neo4j_standard_import:/var/lib/neo4j/import
+      - neo4j_standard_plugins:/plugins
+    healthcheck:
+      test: ["CMD-SHELL", "cypher-shell -u ${NEO4J_USER:-neo4j} -p ${NEO4J_PASSWORD:-password} 'RETURN 1' || exit 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+      start_period: 30s
+    networks:
+      - codebase-rag-standard
+    restart: unless-stopped
+
+  # MCP Server - Standard mode
+  mcp:
+    build:
+      context: ..
+      dockerfile: docker/Dockerfile.standard
+    image: royisme/codebase-rag:standard
+    container_name: codebase-rag-mcp-standard
+    ports:
+      - "${APP_PORT:-8000}:8000"
+    environment:
+      # Application
+      - APP_NAME=${APP_NAME:-Code Graph Knowledge System}
+      - DEBUG=${DEBUG:-false}
+      - HOST=0.0.0.0
+      - PORT=8000
+
+      # Deployment mode
+      - DEPLOYMENT_MODE=standard
+      - ENABLE_KNOWLEDGE_RAG=false
+      - ENABLE_AUTO_EXTRACTION=false
+      - ENABLE_MEMORY_SEARCH=true
+
+      # Neo4j Configuration
+      - NEO4J_URI=bolt://neo4j:7687
+      - NEO4J_USER=${NEO4J_USER:-neo4j}
+      - NEO4J_PASSWORD=${NEO4J_PASSWORD:-password}
+      - NEO4J_DATABASE=${NEO4J_DATABASE:-neo4j}
+
+      # Embedding Provider (required for memory search)
+      - EMBEDDING_PROVIDER=${EMBEDDING_PROVIDER:-ollama}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL:-http://host.docker.internal:11434}
+      - OLLAMA_EMBEDDING_MODEL=${OLLAMA_EMBEDDING_MODEL:-nomic-embed-text}
+
+      # OpenAI Embedding (alternative)
+      - OPENAI_API_KEY=${OPENAI_API_KEY:-}
+      - OPENAI_EMBEDDING_MODEL=${OPENAI_EMBEDDING_MODEL:-text-embedding-3-small}
+
+      # Timeouts
+      - CONNECTION_TIMEOUT=${CONNECTION_TIMEOUT:-30}
+      - OPERATION_TIMEOUT=${OPERATION_TIMEOUT:-120}
+
+      # Vector settings
+      - VECTOR_DIMENSION=${VECTOR_DIMENSION:-384}
+    volumes:
+      - ${REPOS_PATH:-./repos}:/repos
+      - ./data:/data
+      - ./logs:/app/logs
+    depends_on:
+      neo4j:
+        condition: service_healthy
+    extra_hosts:
+      - "host.docker.internal:host-gateway"  # Access host services (e.g., Ollama on host)
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 40s
+    networks:
+      - codebase-rag-standard
+    restart: unless-stopped
+
+volumes:
+  neo4j_standard_data:
+    driver: local
+  neo4j_standard_logs:
+    driver: local
+  neo4j_standard_import:
+    driver: local
+  neo4j_standard_plugins:
+    driver: local
+
+networks:
+  codebase-rag-standard:
+    driver: bridge
diff --git a/docs/CNAME b/docs/CNAME
new file mode 100644
index 0000000..0e88ad8
--- /dev/null
+++ b/docs/CNAME
@@ -0,0 +1 @@
+code-graph.vantagecraft.dev
diff --git a/docs/api/mcp-tools.md b/docs/api/mcp-tools.md
new file mode 100644
index 0000000..dac4011
--- /dev/null
+++ b/docs/api/mcp-tools.md
@@ -0,0 +1,1457 @@
+# MCP Tools Reference
+
+Complete reference for all 30 Model Context Protocol (MCP) tools available in the Code Graph Knowledge System.
+
+**MCP Server Version**: 2.0.0
+**MCP Protocol Version**: 1.1.0
+**Total Tools**: 30
+
+## Overview
+
+The MCP server provides AI assistants (like Claude Desktop, VS Code with MCP, etc.) with direct access to the Code Graph Knowledge System through the official Model Context Protocol SDK.
+
+**Key Features**:
+- 30 specialized tools across 6 categories
+- Session management for tracking context
+- Streaming support for long-running operations
+- Multi-transport capability (stdio, SSE, WebSocket)
+- Standard MCP protocol compliance
+
+**Architecture**:
+- Main server: `mcp_server.py` (310 lines - modular design)
+- Tool handlers: `mcp_tools/` package (organized by category)
+- Official SDK: `mcp>=1.1.0`
+
+---
+
+## Installation & Setup
+
+### Start MCP Server
+
+```bash
+# Using start script
+python start_mcp.py
+
+# Using uv (recommended)
+uv run mcp_client
+```
+
+### Claude Desktop Configuration
+
+Add to `~/Library/Application Support/Claude/claude_desktop_config.json`:
+
+```json
+{
+  "mcpServers": {
+    "code-graph": {
+      "command": "python",
+      "args": ["/path/to/codebase-rag/start_mcp.py"],
+      "env": {
+        "NEO4J_URI": "bolt://localhost:7687",
+        "NEO4J_USER": "neo4j",
+        "NEO4J_PASSWORD": "your-password"
+      }
+    }
+  }
+}
+```
+
+### VS Code MCP Extension
+
+Configure in `.vscode/mcp.json`:
+
+```json
+{
+  "servers": {
+    "code-graph": {
+      "command": "python /path/to/codebase-rag/start_mcp.py"
+    }
+  }
+}
+```
+
+---
+
+## Tool Categories
+
+| Category | Tools | Description |
+|----------|-------|-------------|
+| **Knowledge Base** | 5 | Query and manage knowledge graph |
+| **Code Graph** | 4 | Repository analysis and context |
+| **Memory Store** | 7 | Project knowledge persistence |
+| **Memory Extraction** | 5 | Automatic memory extraction (v0.7) |
+| **Task Management** | 6 | Async task monitoring |
+| **System** | 3 | Schema and statistics |
+
+---
+
+## Knowledge Base Tools (5)
+
+Tools for querying and managing the knowledge graph.
+
+### 1. query_knowledge
+
+Query the knowledge base using Neo4j GraphRAG.
+
+**Input Parameters**:
+```typescript
+{
+  question: string;        // Required: Question to ask
+  mode?: "hybrid" | "graph_only" | "vector_only";  // Default: "hybrid"
+}
+```
+
+**Query Modes**:
+- `hybrid` (recommended): Graph traversal + vector search
+- `graph_only`: Use only graph relationships
+- `vector_only`: Use only vector similarity
+
+**Example**:
+```json
+{
+  "question": "How does authentication work in this system?",
+  "mode": "hybrid"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "answer": "The system uses JWT-based authentication with refresh tokens...",
+  "source_nodes": [
+    {
+      "text": "JWT implementation details...",
+      "score": 0.92,
+      "metadata": {"title": "Auth Guide"}
+    }
+  ],
+  "mode": "hybrid"
+}
+```
+
+---
+
+### 2. search_similar_nodes
+
+Search for similar nodes using vector similarity.
+
+**Input Parameters**:
+```typescript
+{
+  query: string;     // Required: Search query
+  top_k?: number;    // Default: 10, Range: 1-50
+}
+```
+
+**Example**:
+```json
+{
+  "query": "database configuration",
+  "top_k": 10
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "results": [
+    {
+      "text": "Database connection settings...",
+      "score": 0.89,
+      "metadata": {"title": "Config Guide"}
+    }
+  ]
+}
+```
+
+---
+
+### 3. add_document
+
+Add a document to the knowledge base.
+
+**Input Parameters**:
+```typescript
+{
+  content: string;       // Required: Document content
+  title?: string;        // Optional: Document title
+  metadata?: object;     // Optional: Additional metadata
+}
+```
+
+**Size Handling**:
+- **Small documents (<10KB)**: Processed synchronously
+- **Large documents (>=10KB)**: Processed asynchronously with task_id
+
+**Example**:
+```json
+{
+  "content": "This is the document content with important information...",
+  "title": "Architecture Guide",
+  "metadata": {
+    "author": "Team",
+    "tags": ["architecture", "design"]
+  }
+}
+```
+
+**Response (Small)**:
+```json
+{
+  "success": true,
+  "message": "Document added successfully",
+  "document_id": "doc-abc123",
+  "chunks_created": 5
+}
+```
+
+**Response (Large)**:
+```json
+{
+  "success": true,
+  "task_id": "task-xyz789",
+  "message": "Document processing queued",
+  "processing_async": true
+}
+```
+
+---
+
+### 4. add_file
+
+Add a file to the knowledge base.
+
+**Input Parameters**:
+```typescript
+{
+  file_path: string;  // Required: Absolute path to file
+}
+```
+
+**Supported file types**: Text files, code files, markdown, etc.
+
+**Example**:
+```json
+{
+  "file_path": "/absolute/path/to/document.md"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "File added successfully",
+  "file_path": "/absolute/path/to/document.md",
+  "chunks_created": 8
+}
+```
+
+---
+
+### 5. add_directory
+
+Add all files from a directory to the knowledge base.
+
+**Input Parameters**:
+```typescript
+{
+  directory_path: string;   // Required: Absolute directory path
+  recursive?: boolean;      // Default: true
+}
+```
+
+**Example**:
+```json
+{
+  "directory_path": "/absolute/path/to/docs",
+  "recursive": true
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Directory processed",
+  "files_processed": 23,
+  "total_chunks": 156
+}
+```
+
+---
+
+## Code Graph Tools (4)
+
+Tools for repository analysis and code understanding.
+
+### 1. code_graph_ingest_repo
+
+Ingest a code repository into the graph database.
+
+**Input Parameters**:
+```typescript
+{
+  local_path: string;              // Required: Local repository path
+  repo_url?: string;               // Optional: Repository URL
+  mode?: "full" | "incremental";  // Default: "incremental"
+}
+```
+
+**Ingestion Modes**:
+- `full`: Complete re-ingestion (slow but thorough)
+- `incremental`: Only changed files (60x faster)
+
+**Extracts**:
+- File nodes
+- Symbol nodes (functions, classes)
+- IMPORTS relationships
+- Code structure
+
+**Example**:
+```json
+{
+  "local_path": "/path/to/repository",
+  "mode": "incremental"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "task_id": "ing-20250115-103045-abc12345",
+  "status": "done",
+  "message": "Successfully ingested 125 files",
+  "files_processed": 125,
+  "mode": "incremental"
+}
+```
+
+---
+
+### 2. code_graph_related
+
+Find files related to a query using fulltext search.
+
+**Input Parameters**:
+```typescript
+{
+  query: string;      // Required: Search query
+  repo_id: string;    // Required: Repository identifier
+  limit?: number;     // Default: 30, Range: 1-100
+}
+```
+
+**Example**:
+```json
+{
+  "query": "authentication jwt token",
+  "repo_id": "myproject",
+  "limit": 30
+}
+```
+
+**Response**:
+```json
+{
+  "nodes": [
+    {
+      "type": "file",
+      "ref": "ref://file/src/auth/jwt.py",
+      "path": "src/auth/jwt.py",
+      "lang": "python",
+      "score": 0.92,
+      "summary": "JWT authentication implementation with token generation"
+    }
+  ],
+  "query": "authentication jwt token",
+  "repo_id": "myproject"
+}
+```
+
+---
+
+### 3. code_graph_impact
+
+Analyze impact of changes to a file (reverse dependencies).
+
+**Input Parameters**:
+```typescript
+{
+  repo_id: string;      // Required: Repository identifier
+  file_path: string;    // Required: File path to analyze
+  depth?: number;       // Default: 2, Range: 1-5
+}
+```
+
+**Use Cases**:
+- Understanding blast radius of changes
+- Finding code that needs updating
+- Identifying critical files with many dependents
+
+**Example**:
+```json
+{
+  "repo_id": "myproject",
+  "file_path": "src/auth/jwt.py",
+  "depth": 2
+}
+```
+
+**Response**:
+```json
+{
+  "nodes": [
+    {
+      "type": "file",
+      "path": "src/api/auth_routes.py",
+      "lang": "python",
+      "repoId": "myproject",
+      "relationship": "IMPORTS",
+      "depth": 1,
+      "score": 0.85,
+      "ref": "ref://file/src/api/auth_routes.py",
+      "summary": "Auth API routes (imports jwt.py)"
+    }
+  ],
+  "file": "src/auth/jwt.py",
+  "repo_id": "myproject",
+  "depth": 2
+}
+```
+
+---
+
+### 4. context_pack
+
+Build a context pack for AI agents within token budget.
+
+**Input Parameters**:
+```typescript
+{
+  repo_id: string;                             // Required: Repository ID
+  stage?: "plan" | "review" | "implement";    // Default: "implement"
+  budget?: number;                             // Default: 1500, Range: 500-10000
+  keywords?: string;                           // Optional: Focus keywords
+  focus?: string;                              // Optional: Focus file paths
+}
+```
+
+**Stages**:
+- `plan`: Project overview and high-level architecture
+- `review`: Code review focus with detailed analysis
+- `implement`: Implementation details and code snippets
+
+**Example**:
+```json
+{
+  "repo_id": "myproject",
+  "stage": "implement",
+  "budget": 2000,
+  "keywords": "authentication, jwt, middleware"
+}
+```
+
+**Response**:
+```json
+{
+  "items": [
+    {
+      "kind": "file",
+      "title": "src/auth/jwt.py",
+      "summary": "JWT authentication with token generation and validation",
+      "ref": "ref://file/src/auth/jwt.py",
+      "extra": {"lang": "python", "score": 0.92}
+    }
+  ],
+  "budget_used": 1850,
+  "budget_limit": 2000,
+  "stage": "implement",
+  "repo_id": "myproject"
+}
+```
+
+---
+
+## Memory Store Tools (7)
+
+Tools for project knowledge persistence and management.
+
+### 1. add_memory
+
+Add a new memory to project knowledge base.
+
+**Input Parameters**:
+```typescript
+{
+  project_id: string;                          // Required
+  memory_type: MemoryType;                     // Required
+  title: string;                               // Required (max 200 chars)
+  content: string;                             // Required
+  reason?: string;                             // Optional: Rationale
+  tags?: string[];                             // Optional: Tags
+  importance?: number;                         // Default: 0.5, Range: 0-1
+  related_refs?: string[];                     // Optional: ref:// handles
+}
+
+type MemoryType = "decision" | "preference" | "experience" | "convention" | "plan" | "note";
+```
+
+**Memory Types**:
+- `decision`: Architecture choices, tech stack selection
+- `preference`: Coding style, tool preferences
+- `experience`: Problems encountered and solutions
+- `convention`: Team rules, naming conventions
+- `plan`: Future improvements, TODOs
+- `note`: Other important information
+
+**Example**:
+```json
+{
+  "project_id": "myapp",
+  "memory_type": "decision",
+  "title": "Use JWT for authentication",
+  "content": "Decided to use JWT tokens instead of session-based auth",
+  "reason": "Need stateless authentication for mobile clients",
+  "tags": ["auth", "architecture"],
+  "importance": 0.9,
+  "related_refs": ["ref://file/src/auth/jwt.py"]
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "memory_id": "mem-abc123-def456",
+  "project_id": "myapp",
+  "message": "Memory added successfully"
+}
+```
+
+---
+
+### 2. search_memories
+
+Search project memories with filters.
+
+**Input Parameters**:
+```typescript
+{
+  project_id: string;           // Required
+  query?: string;               // Optional: Search text
+  memory_type?: MemoryType;     // Optional: Filter by type
+  tags?: string[];              // Optional: Filter by tags
+  min_importance?: number;      // Default: 0.0, Range: 0-1
+  limit?: number;               // Default: 20, Range: 1-100
+}
+```
+
+**Example**:
+```json
+{
+  "project_id": "myapp",
+  "query": "authentication security",
+  "memory_type": "decision",
+  "min_importance": 0.7,
+  "limit": 20
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "memories": [
+    {
+      "memory_id": "mem-abc123",
+      "memory_type": "decision",
+      "title": "Use JWT for authentication",
+      "content": "Decided to use JWT tokens...",
+      "importance": 0.9,
+      "tags": ["auth", "architecture"],
+      "created_at": "2025-01-15T10:30:00Z"
+    }
+  ],
+  "total": 1
+}
+```
+
+---
+
+### 3. get_memory
+
+Get a specific memory by ID with full details.
+
+**Input Parameters**:
+```typescript
+{
+  memory_id: string;  // Required
+}
+```
+
+**Example**:
+```json
+{
+  "memory_id": "mem-abc123-def456"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "memory": {
+    "memory_id": "mem-abc123",
+    "project_id": "myapp",
+    "memory_type": "decision",
+    "title": "Use JWT for authentication",
+    "content": "Decided to use JWT tokens...",
+    "reason": "Need stateless authentication...",
+    "tags": ["auth", "architecture"],
+    "importance": 0.9,
+    "related_refs": ["ref://file/src/auth/jwt.py"],
+    "created_at": "2025-01-15T10:30:00Z",
+    "updated_at": "2025-01-15T10:30:00Z",
+    "is_superseded": false
+  }
+}
+```
+
+---
+
+### 4. update_memory
+
+Update an existing memory (partial update supported).
+
+**Input Parameters**:
+```typescript
+{
+  memory_id: string;        // Required
+  title?: string;           // Optional
+  content?: string;         // Optional
+  reason?: string;          // Optional
+  tags?: string[];          // Optional
+  importance?: number;      // Optional: Range: 0-1
+}
+```
+
+**Example**:
+```json
+{
+  "memory_id": "mem-abc123",
+  "importance": 0.95,
+  "tags": ["auth", "security", "critical"]
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Memory updated successfully",
+  "memory_id": "mem-abc123"
+}
+```
+
+---
+
+### 5. delete_memory
+
+Delete a memory (soft delete - data retained).
+
+**Input Parameters**:
+```typescript
+{
+  memory_id: string;  // Required
+}
+```
+
+**Example**:
+```json
+{
+  "memory_id": "mem-abc123"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Memory deleted successfully",
+  "memory_id": "mem-abc123"
+}
+```
+
+---
+
+### 6. supersede_memory
+
+Create a new memory that supersedes an old one (preserves history).
+
+**Input Parameters**:
+```typescript
+{
+  old_memory_id: string;        // Required
+  new_memory_type: MemoryType;  // Required
+  new_title: string;            // Required
+  new_content: string;          // Required
+  new_reason?: string;          // Optional
+  new_tags?: string[];          // Optional
+  new_importance?: number;      // Default: 0.5, Range: 0-1
+}
+```
+
+**Use Case**: When decisions change or better solutions are found.
+
+**Example**:
+```json
+{
+  "old_memory_id": "mem-abc123",
+  "new_memory_type": "decision",
+  "new_title": "Use PostgreSQL instead of MySQL",
+  "new_content": "Switched to PostgreSQL for better JSON support",
+  "new_reason": "Need advanced JSON querying capabilities",
+  "new_importance": 0.8
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "old_memory_id": "mem-abc123",
+  "new_memory_id": "mem-xyz789",
+  "message": "Memory superseded successfully"
+}
+```
+
+---
+
+### 7. get_project_summary
+
+Get summary of all memories for a project, organized by type.
+
+**Input Parameters**:
+```typescript
+{
+  project_id: string;  // Required
+}
+```
+
+**Example**:
+```json
+{
+  "project_id": "myapp"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "project_id": "myapp",
+  "total_memories": 42,
+  "by_type": {
+    "decision": {
+      "count": 12,
+      "top_memories": [
+        {
+          "memory_id": "mem-abc123",
+          "title": "Use JWT for authentication",
+          "importance": 0.9
+        }
+      ]
+    },
+    "preference": {"count": 8},
+    "experience": {"count": 15},
+    "convention": {"count": 5},
+    "plan": {"count": 2}
+  }
+}
+```
+
+---
+
+## Memory Extraction Tools (5)
+
+Automatic memory extraction from various sources (v0.7).
+
+### 1. extract_from_conversation
+
+Extract memories from conversation using LLM analysis.
+
+**Input Parameters**:
+```typescript
+{
+  project_id: string;                 // Required
+  conversation: Array<{               // Required
+    role: string;
+    content: string;
+  }>;
+  auto_save?: boolean;                // Default: false
+}
+```
+
+**Auto-save**: If true, automatically saves memories with confidence >= 0.7
+
+**Example**:
+```json
+{
+  "project_id": "myapp",
+  "conversation": [
+    {"role": "user", "content": "Should we use Redis or Memcached?"},
+    {"role": "assistant", "content": "Let's use Redis because it supports data persistence"}
+  ],
+  "auto_save": false
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "memory_type": "decision",
+      "title": "Use Redis for caching",
+      "content": "Decided to use Redis instead of Memcached",
+      "reason": "Redis supports data persistence",
+      "confidence": 0.85,
+      "auto_saved": false,
+      "memory_id": null
+    }
+  ],
+  "total_extracted": 1,
+  "auto_saved_count": 0
+}
+```
+
+---
+
+### 2. extract_from_git_commit
+
+Extract memories from git commit using LLM analysis.
+
+**Input Parameters**:
+```typescript
+{
+  project_id: string;           // Required
+  commit_sha: string;           // Required
+  commit_message: string;       // Required
+  changed_files: string[];      // Required
+  auto_save?: boolean;          // Default: false
+}
+```
+
+**Identifies**:
+- Feature additions → `decision`
+- Bug fixes → `experience`
+- Refactoring → `experience`/`convention`
+- Breaking changes → high importance `decision`
+
+**Example**:
+```json
+{
+  "project_id": "myapp",
+  "commit_sha": "abc123def456",
+  "commit_message": "feat: add JWT authentication\n\nImplemented JWT-based auth",
+  "changed_files": ["src/auth/jwt.py", "src/middleware/auth.py"],
+  "auto_save": true
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "memory_type": "decision",
+      "title": "Implement JWT authentication",
+      "content": "Added JWT-based authentication system",
+      "confidence": 0.92,
+      "auto_saved": true,
+      "memory_id": "mem-xyz789"
+    }
+  ]
+}
+```
+
+---
+
+### 3. extract_from_code_comments
+
+Extract memories from code comments in source file.
+
+**Input Parameters**:
+```typescript
+{
+  project_id: string;    // Required
+  file_path: string;     // Required: Path to source file
+}
+```
+
+**Marker Mappings**:
+- `TODO:` → `plan`
+- `FIXME:` / `BUG:` → `experience`
+- `NOTE:` / `IMPORTANT:` → `convention`
+- `DECISION:` → `decision`
+
+**Example**:
+```json
+{
+  "project_id": "myapp",
+  "file_path": "/path/to/project/src/service.py"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "memory_type": "plan",
+      "title": "TODO: Add rate limiting",
+      "content": "Need to implement rate limiting for API endpoints",
+      "line_number": 45,
+      "auto_saved": true,
+      "memory_id": "mem-plan123"
+    }
+  ],
+  "total_extracted": 1
+}
+```
+
+---
+
+### 4. suggest_memory_from_query
+
+Suggest creating memory from knowledge base query.
+
+**Input Parameters**:
+```typescript
+{
+  project_id: string;    // Required
+  query: string;         // Required: User query
+  answer: string;        // Required: LLM answer
+}
+```
+
+**Use Cases**:
+- Frequently asked questions
+- Important architectural information
+- Non-obvious solutions or workarounds
+
+**Example**:
+```json
+{
+  "project_id": "myapp",
+  "query": "How does the authentication work?",
+  "answer": "The system uses JWT tokens with refresh token rotation..."
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "should_save": true,
+  "confidence": 0.88,
+  "suggested_memory": {
+    "memory_type": "note",
+    "title": "Authentication mechanism",
+    "content": "System uses JWT with refresh token rotation",
+    "importance": 0.7
+  }
+}
+```
+
+---
+
+### 5. batch_extract_from_repository
+
+Batch extract memories from entire repository.
+
+**Input Parameters**:
+```typescript
+{
+  project_id: string;           // Required
+  repo_path: string;            // Required: Path to git repo
+  max_commits?: number;         // Default: 50, Range: 1-200
+  file_patterns?: string[];     // Optional: e.g., ["*.py", "*.js"]
+}
+```
+
+**Analyzes**:
+- Recent git commits (configurable count)
+- Code comments in source files
+- Documentation files (README, CHANGELOG, etc.)
+
+**Note**: Long-running operation (may take several minutes).
+
+**Example**:
+```json
+{
+  "project_id": "myapp",
+  "repo_path": "/path/to/repository",
+  "max_commits": 50,
+  "file_patterns": ["*.py", "*.js"]
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "summary": {
+    "commits_analyzed": 50,
+    "files_scanned": 125,
+    "total_extracted": 23,
+    "by_source": {
+      "git_commits": 12,
+      "code_comments": 11
+    },
+    "by_type": {
+      "decision": 5,
+      "experience": 8,
+      "plan": 10
+    }
+  },
+  "execution_time_seconds": 45.2
+}
+```
+
+---
+
+## Task Management Tools (6)
+
+Tools for monitoring asynchronous task execution.
+
+### 1. get_task_status
+
+Get status of a specific task.
+
+**Input Parameters**:
+```typescript
+{
+  task_id: string;  // Required
+}
+```
+
+**Example**:
+```json
+{
+  "task_id": "task-abc123"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "task_id": "task-abc123",
+  "status": "SUCCESS",
+  "progress": 100.0,
+  "message": "Task completed successfully",
+  "result": {
+    "chunks_created": 15
+  }
+}
+```
+
+**Status Values**: `PENDING`, `PROCESSING`, `SUCCESS`, `FAILED`, `CANCELLED`
+
+---
+
+### 2. watch_task
+
+Monitor a task in real-time until completion (with timeout).
+
+**Input Parameters**:
+```typescript
+{
+  task_id: string;           // Required
+  timeout?: number;          // Default: 300, Range: 10-600 (seconds)
+  poll_interval?: number;    // Default: 2, Range: 1-10 (seconds)
+}
+```
+
+**Example**:
+```json
+{
+  "task_id": "task-abc123",
+  "timeout": 300,
+  "poll_interval": 2
+}
+```
+
+**Response** (Streaming):
+```json
+{
+  "success": true,
+  "task_id": "task-abc123",
+  "final_status": "SUCCESS",
+  "progress_history": [
+    {"timestamp": "2025-01-15T10:30:00Z", "progress": 0.0, "status": "PENDING"},
+    {"timestamp": "2025-01-15T10:30:05Z", "progress": 25.0, "status": "PROCESSING"},
+    {"timestamp": "2025-01-15T10:30:10Z", "progress": 100.0, "status": "SUCCESS"}
+  ],
+  "result": {"chunks_created": 15}
+}
+```
+
+---
+
+### 3. watch_tasks
+
+Monitor multiple tasks until all complete.
+
+**Input Parameters**:
+```typescript
+{
+  task_ids: string[];        // Required
+  timeout?: number;          // Default: 300, Range: 10-600
+  poll_interval?: number;    // Default: 2, Range: 1-10
+}
+```
+
+**Example**:
+```json
+{
+  "task_ids": ["task-abc123", "task-xyz789"],
+  "timeout": 300,
+  "poll_interval": 2
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "tasks": {
+    "task-abc123": {
+      "status": "SUCCESS",
+      "progress": 100.0,
+      "result": {"chunks_created": 15}
+    },
+    "task-xyz789": {
+      "status": "SUCCESS",
+      "progress": 100.0,
+      "result": {"chunks_created": 22}
+    }
+  },
+  "all_completed": true
+}
+```
+
+---
+
+### 4. list_tasks
+
+List tasks with optional status filter.
+
+**Input Parameters**:
+```typescript
+{
+  status_filter?: "pending" | "running" | "completed" | "failed";
+  limit?: number;  // Default: 20, Range: 1-100
+}
+```
+
+**Example**:
+```json
+{
+  "status_filter": "running",
+  "limit": 20
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "tasks": [
+    {
+      "task_id": "task-abc123",
+      "status": "PROCESSING",
+      "progress": 45.0,
+      "created_at": "2025-01-15T10:30:00Z"
+    }
+  ],
+  "total": 2
+}
+```
+
+---
+
+### 5. cancel_task
+
+Cancel a pending or running task.
+
+**Input Parameters**:
+```typescript
+{
+  task_id: string;  // Required
+}
+```
+
+**Example**:
+```json
+{
+  "task_id": "task-abc123"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Task cancelled successfully",
+  "task_id": "task-abc123"
+}
+```
+
+---
+
+### 6. get_queue_stats
+
+Get task queue statistics.
+
+**Input Parameters**: None
+
+**Example**:
+```json
+{}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "pending": 5,
+  "running": 2,
+  "completed": 142,
+  "failed": 6,
+  "total": 155,
+  "queue_active": true
+}
+```
+
+---
+
+## System Tools (3)
+
+System information and management tools.
+
+### 1. get_graph_schema
+
+Get Neo4j graph schema (node labels, relationship types, statistics).
+
+**Input Parameters**: None
+
+**Example**:
+```json
+{}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "node_labels": ["Document", "Chunk", "Entity", "Memory", "Project", "File", "Repo"],
+  "relationship_types": ["HAS_CHUNK", "MENTIONS", "RELATES_TO", "BELONGS_TO"],
+  "statistics": {
+    "node_count": 1523,
+    "relationship_count": 4567
+  }
+}
+```
+
+---
+
+### 2. get_statistics
+
+Get knowledge base statistics.
+
+**Input Parameters**: None
+
+**Example**:
+```json
+{}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "total_nodes": 1523,
+  "total_relationships": 4567,
+  "document_count": 45,
+  "chunk_count": 892,
+  "entity_count": 586,
+  "memory_count": 42,
+  "file_count": 125
+}
+```
+
+---
+
+### 3. clear_knowledge_base
+
+**⚠️ DANGEROUS**: Clear all data from knowledge base.
+
+**Input Parameters**:
+```typescript
+{
+  confirmation: string;  // Required: Must be "yes"
+}
+```
+
+**Example**:
+```json
+{
+  "confirmation": "yes"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Knowledge base cleared",
+  "nodes_deleted": 1523,
+  "relationships_deleted": 4567
+}
+```
+
+---
+
+## Resources
+
+MCP resources provide dynamic data access.
+
+### Available Resources
+
+1. **knowledge://config** - System configuration and settings
+2. **knowledge://status** - Current system status and health
+
+**Access via MCP**: Resources are accessed through the MCP protocol, not as tools.
+
+---
+
+## Prompts
+
+MCP prompts provide query suggestions.
+
+### suggest_queries
+
+Generate suggested queries for the knowledge graph.
+
+**Arguments**:
+- `domain`: Domain to focus on (general, code, documentation, sql, architecture)
+
+**Example Domains**:
+- `general`: General system questions
+- `code`: Code-specific queries
+- `documentation`: Documentation queries
+- `sql`: Database schema queries
+- `architecture`: Architecture questions
+
+---
+
+## Error Handling
+
+All tools follow consistent error response format.
+
+### Success Response
+
+```json
+{
+  "success": true,
+  "...": "tool-specific data"
+}
+```
+
+### Error Response
+
+```json
+{
+  "success": false,
+  "error": "Detailed error message",
+  "error_type": "ValidationError | NotFoundError | ServiceError"
+}
+```
+
+### Common Error Types
+
+**Validation Error**:
+```json
+{
+  "success": false,
+  "error": "Invalid memory_type. Must be one of: decision, preference, experience, convention, plan, note"
+}
+```
+
+**Not Found Error**:
+```json
+{
+  "success": false,
+  "error": "Memory not found: mem-abc123"
+}
+```
+
+**Service Error**:
+```json
+{
+  "success": false,
+  "error": "Failed to connect to Neo4j database"
+}
+```
+
+---
+
+## Best Practices
+
+### Memory Management
+
+1. **Importance Scoring**:
+   - 0.9-1.0: Critical decisions, security findings
+   - 0.7-0.8: Important architectural choices
+   - 0.5-0.6: Preferences and conventions
+   - 0.3-0.4: Plans and future work
+
+2. **Tagging Strategy**:
+   - Use domain tags: `auth`, `database`, `api`
+   - Use type tags: `security`, `performance`, `bug`
+   - Use status tags: `critical`, `deprecated`
+
+3. **When to Use Extraction**:
+   - Use `extract_from_conversation` for Q&A sessions
+   - Use `extract_from_git_commit` for commit hooks
+   - Use `extract_from_code_comments` for code reviews
+   - Use `batch_extract_from_repository` for initial setup
+
+### Task Monitoring
+
+1. Use `watch_task` for single long-running operations
+2. Use `watch_tasks` for batch operations
+3. Set appropriate timeouts based on operation size
+4. Use `cancel_task` to stop unnecessary work
+
+### Code Graph
+
+1. Use `incremental` mode for regular updates (60x faster)
+2. Use `full` mode for initial ingestion or major changes
+3. Use `context_pack` to stay within token limits
+4. Use `impact` analysis before making changes
+
+---
+
+**Last Updated**: 2025-01-15
+**MCP Server Version**: 2.0.0
+**Total Tools**: 30
diff --git a/docs/api/python-sdk.md b/docs/api/python-sdk.md
new file mode 100644
index 0000000..47aaa52
--- /dev/null
+++ b/docs/api/python-sdk.md
@@ -0,0 +1,1466 @@
+# Python SDK Guide
+
+Complete guide for using Code Graph Knowledge System services directly in Python applications.
+
+**Version**: 1.0.0
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Installation](#installation)
+- [Core Services](#core-services)
+- [Neo4jKnowledgeService](#neo4jknowledgeservice)
+- [MemoryStore](#memorystore)
+- [GraphService](#graphservice)
+- [CodeIngestor](#codeingestor)
+- [TaskQueue](#taskqueue)
+- [Configuration](#configuration)
+- [Examples](#examples)
+- [Error Handling](#error-handling)
+- [Best Practices](#best-practices)
+
+---
+
+## Overview
+
+The Python SDK provides direct access to all system services without going through REST API or MCP. This is ideal for:
+
+- Building custom integrations
+- Embedding knowledge graph capabilities in applications
+- Batch processing scripts
+- Custom AI agents
+- Testing and development
+
+**Key Services**:
+- `Neo4jKnowledgeService`: Knowledge graph and RAG
+- `MemoryStore`: Project memory persistence
+- `GraphService`: Low-level Neo4j operations
+- `CodeIngestor`: Repository ingestion
+- `TaskQueue`: Asynchronous task management
+
+---
+
+## Installation
+
+### Requirements
+
+```bash
+# Python 3.10+
+python --version
+
+# Install dependencies
+pip install -e .
+
+# Or with uv (recommended)
+uv pip install -e .
+```
+
+### Dependencies
+
+```python
+# Core dependencies
+neo4j>=5.0.0
+llama-index-core>=0.10.0
+llama-index-graph-stores-neo4j>=0.2.0
+fastapi>=0.104.0
+pydantic>=2.0.0
+```
+
+### Environment Setup
+
+Create `.env` file:
+
+```bash
+# Neo4j Configuration
+NEO4J_URI=bolt://localhost:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your-password
+NEO4J_DATABASE=neo4j
+
+# LLM Provider (ollama/openai/gemini/openrouter)
+LLM_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_MODEL=llama2
+
+# Embedding Provider
+EMBEDDING_PROVIDER=ollama
+EMBEDDING_MODEL=nomic-embed-text
+
+# Optional: OpenAI
+OPENAI_API_KEY=sk-...
+OPENAI_MODEL=gpt-4
+
+# Optional: Google Gemini
+GOOGLE_API_KEY=...
+GEMINI_MODEL=gemini-pro
+
+# Optional: OpenRouter
+OPENROUTER_API_KEY=...
+OPENROUTER_MODEL=anthropic/claude-3-opus
+```
+
+---
+
+## Core Services
+
+### Import Services
+
+```python
+from services.neo4j_knowledge_service import Neo4jKnowledgeService
+from services.memory_store import MemoryStore, memory_store
+from services.graph_service import Neo4jGraphService, graph_service
+from services.code_ingestor import CodeIngestor, get_code_ingestor
+from services.task_queue import TaskQueue, task_queue
+from config import settings
+```
+
+### Service Initialization Pattern
+
+All services follow async initialization:
+
+```python
+import asyncio
+
+async def main():
+    # Create service instance
+    service = Neo4jKnowledgeService()
+
+    # Initialize (connect to Neo4j, setup LLM, etc.)
+    success = await service.initialize()
+
+    if not success:
+        print("Failed to initialize service")
+        return
+
+    # Use service
+    result = await service.query("How does this work?")
+    print(result)
+
+asyncio.run(main())
+```
+
+---
+
+## Neo4jKnowledgeService
+
+Primary service for knowledge graph operations with LlamaIndex integration.
+
+### Initialization
+
+```python
+from services.neo4j_knowledge_service import Neo4jKnowledgeService
+
+# Create instance
+knowledge_service = Neo4jKnowledgeService()
+
+# Initialize (async)
+await knowledge_service.initialize()
+```
+
+### Key Methods
+
+#### query()
+
+Query knowledge base using GraphRAG.
+
+```python
+async def query(
+    question: str,
+    mode: str = "hybrid"
+) -> Dict[str, Any]:
+    """
+    Query knowledge base.
+
+    Args:
+        question: Question to ask
+        mode: "hybrid" | "graph_only" | "vector_only"
+
+    Returns:
+        {
+            "success": bool,
+            "answer": str,
+            "source_nodes": List[Dict],
+            "mode": str
+        }
+    """
+```
+
+**Example**:
+```python
+result = await knowledge_service.query(
+    question="How does authentication work?",
+    mode="hybrid"
+)
+
+if result["success"]:
+    print(f"Answer: {result['answer']}")
+    print(f"Sources: {len(result['source_nodes'])}")
+```
+
+#### search_similar_nodes()
+
+Vector similarity search.
+
+```python
+async def search_similar_nodes(
+    query: str,
+    top_k: int = 10
+) -> Dict[str, Any]:
+    """
+    Search similar nodes using vector similarity.
+
+    Args:
+        query: Search query
+        top_k: Number of results (1-50)
+
+    Returns:
+        {
+            "success": bool,
+            "results": List[Dict],
+            "query": str
+        }
+    """
+```
+
+**Example**:
+```python
+result = await knowledge_service.search_similar_nodes(
+    query="database configuration",
+    top_k=10
+)
+
+for node in result["results"]:
+    print(f"Score: {node['score']:.2f} - {node['text'][:100]}")
+```
+
+#### add_document()
+
+Add document to knowledge base.
+
+```python
+async def add_document(
+    content: str,
+    title: str = "Untitled",
+    metadata: Optional[Dict[str, Any]] = None
+) -> Dict[str, Any]:
+    """
+    Add document to knowledge base.
+
+    Args:
+        content: Document content
+        title: Document title
+        metadata: Additional metadata
+
+    Returns:
+        {
+            "success": bool,
+            "document_id": str,
+            "chunks_created": int
+        }
+    """
+```
+
+**Example**:
+```python
+result = await knowledge_service.add_document(
+    content="""
+    Authentication System Design
+
+    The system uses JWT tokens for stateless authentication.
+    Refresh tokens are stored in Redis with 7-day expiration.
+    """,
+    title="Auth Design",
+    metadata={
+        "author": "Team",
+        "tags": ["auth", "design"]
+    }
+)
+
+print(f"Document ID: {result['document_id']}")
+print(f"Chunks created: {result['chunks_created']}")
+```
+
+#### add_file()
+
+Add file to knowledge base.
+
+```python
+async def add_file(
+    file_path: str
+) -> Dict[str, Any]:
+    """
+    Add file to knowledge base.
+
+    Args:
+        file_path: Absolute path to file
+
+    Returns:
+        {
+            "success": bool,
+            "file_path": str,
+            "chunks_created": int
+        }
+    """
+```
+
+**Example**:
+```python
+result = await knowledge_service.add_file(
+    file_path="/path/to/documentation.md"
+)
+```
+
+#### add_directory()
+
+Add directory of files to knowledge base.
+
+```python
+async def add_directory(
+    directory_path: str,
+    recursive: bool = True,
+    file_extensions: Optional[List[str]] = None
+) -> Dict[str, Any]:
+    """
+    Add directory to knowledge base.
+
+    Args:
+        directory_path: Absolute directory path
+        recursive: Process subdirectories
+        file_extensions: File patterns (e.g., [".md", ".txt"])
+
+    Returns:
+        {
+            "success": bool,
+            "files_processed": int,
+            "total_chunks": int
+        }
+    """
+```
+
+**Example**:
+```python
+result = await knowledge_service.add_directory(
+    directory_path="/path/to/docs",
+    recursive=True,
+    file_extensions=[".md", ".txt"]
+)
+
+print(f"Processed {result['files_processed']} files")
+```
+
+#### get_graph_schema()
+
+Get graph schema information.
+
+```python
+async def get_graph_schema() -> Dict[str, Any]:
+    """
+    Get Neo4j graph schema.
+
+    Returns:
+        {
+            "success": bool,
+            "node_labels": List[str],
+            "relationship_types": List[str],
+            "statistics": Dict
+        }
+    """
+```
+
+#### get_statistics()
+
+Get knowledge base statistics.
+
+```python
+async def get_statistics() -> Dict[str, Any]:
+    """
+    Get knowledge base statistics.
+
+    Returns:
+        {
+            "success": bool,
+            "total_nodes": int,
+            "total_relationships": int,
+            "document_count": int,
+            "chunk_count": int
+        }
+    """
+```
+
+#### clear_knowledge_base()
+
+**⚠️ DANGEROUS**: Clear all knowledge base data.
+
+```python
+async def clear_knowledge_base() -> Dict[str, Any]:
+    """Clear all data from knowledge base."""
+```
+
+---
+
+## MemoryStore
+
+Project memory persistence for AI agents.
+
+### Initialization
+
+```python
+from services.memory_store import memory_store
+
+# Initialize (async)
+await memory_store.initialize()
+```
+
+### Key Methods
+
+#### add_memory()
+
+Add a new memory.
+
+```python
+async def add_memory(
+    project_id: str,
+    memory_type: str,  # "decision" | "preference" | "experience" | "convention" | "plan" | "note"
+    title: str,
+    content: str,
+    reason: Optional[str] = None,
+    tags: Optional[List[str]] = None,
+    importance: float = 0.5,
+    related_refs: Optional[List[str]] = None
+) -> Dict[str, Any]:
+    """
+    Add memory to project.
+
+    Returns:
+        {
+            "success": bool,
+            "memory_id": str,
+            "project_id": str
+        }
+    """
+```
+
+**Example**:
+```python
+result = await memory_store.add_memory(
+    project_id="myapp",
+    memory_type="decision",
+    title="Use JWT for authentication",
+    content="Decided to use JWT tokens instead of session-based auth",
+    reason="Need stateless authentication for mobile clients",
+    tags=["auth", "architecture"],
+    importance=0.9,
+    related_refs=["ref://file/src/auth/jwt.py"]
+)
+
+memory_id = result["memory_id"]
+```
+
+#### search_memories()
+
+Search memories with filters.
+
+```python
+async def search_memories(
+    project_id: str,
+    query: Optional[str] = None,
+    memory_type: Optional[str] = None,
+    tags: Optional[List[str]] = None,
+    min_importance: float = 0.0,
+    limit: int = 20
+) -> Dict[str, Any]:
+    """
+    Search project memories.
+
+    Returns:
+        {
+            "success": bool,
+            "memories": List[Dict],
+            "total": int
+        }
+    """
+```
+
+**Example**:
+```python
+result = await memory_store.search_memories(
+    project_id="myapp",
+    query="authentication security",
+    memory_type="decision",
+    min_importance=0.7,
+    limit=20
+)
+
+for memory in result["memories"]:
+    print(f"{memory['title']} (importance: {memory['importance']})")
+```
+
+#### get_memory()
+
+Get specific memory by ID.
+
+```python
+async def get_memory(
+    memory_id: str
+) -> Dict[str, Any]:
+    """
+    Get memory by ID.
+
+    Returns:
+        {
+            "success": bool,
+            "memory": Dict  # Full memory details
+        }
+    """
+```
+
+#### update_memory()
+
+Update existing memory.
+
+```python
+async def update_memory(
+    memory_id: str,
+    title: Optional[str] = None,
+    content: Optional[str] = None,
+    reason: Optional[str] = None,
+    tags: Optional[List[str]] = None,
+    importance: Optional[float] = None
+) -> Dict[str, Any]:
+    """Update memory (partial update supported)."""
+```
+
+**Example**:
+```python
+await memory_store.update_memory(
+    memory_id=memory_id,
+    importance=0.95,
+    tags=["auth", "security", "critical"]
+)
+```
+
+#### delete_memory()
+
+Delete memory (soft delete).
+
+```python
+async def delete_memory(
+    memory_id: str
+) -> Dict[str, Any]:
+    """Delete memory (soft delete - data retained)."""
+```
+
+#### supersede_memory()
+
+Create new memory that supersedes old one.
+
+```python
+async def supersede_memory(
+    old_memory_id: str,
+    new_memory_data: Dict[str, Any]
+) -> Dict[str, Any]:
+    """
+    Create memory that supersedes old one.
+
+    Args:
+        old_memory_id: ID of memory to supersede
+        new_memory_data: Data for new memory
+            {
+                "memory_type": str,
+                "title": str,
+                "content": str,
+                "reason": str,
+                "tags": List[str],
+                "importance": float
+            }
+
+    Returns:
+        {
+            "success": bool,
+            "old_memory_id": str,
+            "new_memory_id": str
+        }
+    """
+```
+
+**Example**:
+```python
+result = await memory_store.supersede_memory(
+    old_memory_id="mem-abc123",
+    new_memory_data={
+        "memory_type": "decision",
+        "title": "Use PostgreSQL instead of MySQL",
+        "content": "Switched to PostgreSQL for better JSON support",
+        "reason": "Need advanced JSON querying capabilities",
+        "importance": 0.8
+    }
+)
+```
+
+#### get_project_summary()
+
+Get project memory summary.
+
+```python
+async def get_project_summary(
+    project_id: str
+) -> Dict[str, Any]:
+    """
+    Get project memory summary.
+
+    Returns:
+        {
+            "success": bool,
+            "project_id": str,
+            "total_memories": int,
+            "by_type": Dict  # Breakdown by memory type
+        }
+    """
+```
+
+---
+
+## GraphService
+
+Low-level Neo4j graph operations.
+
+### Initialization
+
+```python
+from services.graph_service import graph_service
+
+# Connect to Neo4j
+await graph_service.connect()
+```
+
+### Key Methods
+
+#### execute_cypher()
+
+Execute Cypher query.
+
+```python
+def execute_cypher(
+    query: str,
+    parameters: Optional[Dict[str, Any]] = None
+) -> GraphQueryResult:
+    """
+    Execute Cypher query.
+
+    Args:
+        query: Cypher query string
+        parameters: Query parameters
+
+    Returns:
+        GraphQueryResult with nodes, relationships, paths
+    """
+```
+
+**Example**:
+```python
+result = graph_service.execute_cypher(
+    query="""
+    MATCH (n:Memory {project_id: $project_id})
+    WHERE n.importance > $min_importance
+    RETURN n
+    LIMIT 10
+    """,
+    parameters={
+        "project_id": "myapp",
+        "min_importance": 0.7
+    }
+)
+
+for node in result.nodes:
+    print(f"Node: {node.properties['title']}")
+```
+
+#### create_node()
+
+Create a node.
+
+```python
+def create_node(
+    labels: List[str],
+    properties: Dict[str, Any]
+) -> str:
+    """
+    Create node.
+
+    Args:
+        labels: Node labels
+        properties: Node properties
+
+    Returns:
+        Node ID
+    """
+```
+
+**Example**:
+```python
+node_id = graph_service.create_node(
+    labels=["CustomNode", "Entity"],
+    properties={
+        "name": "Example",
+        "value": 42,
+        "created_at": datetime.utcnow().isoformat()
+    }
+)
+```
+
+#### create_relationship()
+
+Create a relationship.
+
+```python
+def create_relationship(
+    start_node_id: str,
+    end_node_id: str,
+    relationship_type: str,
+    properties: Optional[Dict[str, Any]] = None
+) -> str:
+    """Create relationship between nodes."""
+```
+
+#### fulltext_search()
+
+Perform fulltext search on files.
+
+```python
+def fulltext_search(
+    query_text: str,
+    repo_id: str,
+    limit: int = 50
+) -> List[Dict[str, Any]]:
+    """
+    Fulltext search on files.
+
+    Args:
+        query_text: Search query
+        repo_id: Repository ID
+        limit: Max results
+
+    Returns:
+        List of file matches with paths and languages
+    """
+```
+
+**Example**:
+```python
+files = graph_service.fulltext_search(
+    query_text="authentication jwt",
+    repo_id="myproject",
+    limit=30
+)
+
+for file in files:
+    print(f"{file['path']} ({file['lang']})")
+```
+
+#### impact_analysis()
+
+Analyze impact of file changes.
+
+```python
+def impact_analysis(
+    repo_id: str,
+    file_path: str,
+    depth: int = 2,
+    limit: int = 50
+) -> List[Dict[str, Any]]:
+    """
+    Analyze file impact (reverse dependencies).
+
+    Args:
+        repo_id: Repository ID
+        file_path: File path to analyze
+        depth: Traversal depth
+        limit: Max results
+
+    Returns:
+        List of dependent files
+    """
+```
+
+---
+
+## CodeIngestor
+
+Repository code ingestion service.
+
+### Initialization
+
+```python
+from services.code_ingestor import get_code_ingestor
+from services.graph_service import graph_service
+
+# Initialize graph service first
+await graph_service.connect()
+
+# Get code ingestor
+code_ingestor = get_code_ingestor(graph_service)
+```
+
+### Key Methods
+
+#### scan_files()
+
+Scan repository files.
+
+```python
+def scan_files(
+    repo_path: str,
+    include_globs: List[str],
+    exclude_globs: List[str]
+) -> List[Dict[str, Any]]:
+    """
+    Scan files in repository.
+
+    Args:
+        repo_path: Repository path
+        include_globs: Include patterns (e.g., ["**/*.py"])
+        exclude_globs: Exclude patterns (e.g., ["**/node_modules/**"])
+
+    Returns:
+        List of file information dictionaries
+    """
+```
+
+**Example**:
+```python
+files = code_ingestor.scan_files(
+    repo_path="/path/to/repository",
+    include_globs=["**/*.py", "**/*.ts", "**/*.tsx"],
+    exclude_globs=["**/node_modules/**", "**/.git/**", "**/__pycache__/**"]
+)
+
+print(f"Found {len(files)} files")
+```
+
+#### ingest_files()
+
+Ingest files into Neo4j graph.
+
+```python
+def ingest_files(
+    repo_id: str,
+    files: List[Dict[str, Any]]
+) -> Dict[str, Any]:
+    """
+    Ingest files into Neo4j.
+
+    Args:
+        repo_id: Repository identifier
+        files: List of file info from scan_files()
+
+    Returns:
+        {
+            "success": bool,
+            "files_processed": int,
+            "nodes_created": int
+        }
+    """
+```
+
+**Example**:
+```python
+result = code_ingestor.ingest_files(
+    repo_id="myproject",
+    files=files
+)
+
+print(f"Processed {result['files_processed']} files")
+print(f"Created {result['nodes_created']} nodes")
+```
+
+---
+
+## TaskQueue
+
+Asynchronous task queue management.
+
+### Initialization
+
+```python
+from services.task_queue import task_queue, TaskStatus
+
+# Start task queue
+await task_queue.start()
+```
+
+### Key Methods
+
+#### submit_task()
+
+Submit a task to the queue.
+
+```python
+async def submit_task(
+    task_func: Callable,
+    task_kwargs: Dict[str, Any],
+    task_name: str,
+    task_type: str,
+    metadata: Optional[Dict[str, Any]] = None,
+    priority: int = 0
+) -> str:
+    """
+    Submit task to queue.
+
+    Args:
+        task_func: Function to execute
+        task_kwargs: Function arguments
+        task_name: Task name
+        task_type: Task type
+        metadata: Additional metadata
+        priority: Priority (higher = more important)
+
+    Returns:
+        Task ID
+    """
+```
+
+**Example**:
+```python
+from services.task_processors import process_document_task
+
+task_id = await task_queue.submit_task(
+    task_func=process_document_task,
+    task_kwargs={
+        "document_content": "Large document content...",
+        "title": "Large Doc"
+    },
+    task_name="Process Large Document",
+    task_type="document_processing",
+    metadata={"source": "api"},
+    priority=5
+)
+
+print(f"Task submitted: {task_id}")
+```
+
+#### get_task_status()
+
+Get task status.
+
+```python
+def get_task_status(
+    task_id: str
+) -> Optional[TaskResult]:
+    """
+    Get task status.
+
+    Returns:
+        TaskResult or None if not found
+    """
+```
+
+**Example**:
+```python
+task_result = task_queue.get_task_status(task_id)
+
+if task_result:
+    print(f"Status: {task_result.status.value}")
+    print(f"Progress: {task_result.progress}%")
+
+    if task_result.status == TaskStatus.SUCCESS:
+        print(f"Result: {task_result.result}")
+    elif task_result.status == TaskStatus.FAILED:
+        print(f"Error: {task_result.error}")
+```
+
+#### cancel_task()
+
+Cancel a task.
+
+```python
+async def cancel_task(
+    task_id: str
+) -> bool:
+    """Cancel task. Returns True if cancelled."""
+```
+
+#### get_queue_stats()
+
+Get queue statistics.
+
+```python
+def get_queue_stats() -> Dict[str, int]:
+    """
+    Get queue statistics.
+
+    Returns:
+        {
+            "pending": int,
+            "running": int,
+            "completed": int,
+            "failed": int
+        }
+    """
+```
+
+---
+
+## Configuration
+
+Access configuration settings.
+
+```python
+from config import settings
+
+# Neo4j settings
+print(settings.neo4j_uri)
+print(settings.neo4j_database)
+
+# LLM settings
+print(settings.llm_provider)
+print(settings.ollama_model)
+print(settings.temperature)
+
+# Embedding settings
+print(settings.embedding_provider)
+print(settings.embedding_model)
+
+# Timeouts
+print(settings.connection_timeout)
+print(settings.operation_timeout)
+print(settings.large_document_timeout)
+
+# Chunk settings
+print(settings.chunk_size)
+print(settings.chunk_overlap)
+print(settings.top_k)
+```
+
+### Get Current Model Info
+
+```python
+from config import get_current_model_info
+
+model_info = get_current_model_info()
+print(f"LLM: {model_info['llm']}")
+print(f"Embedding: {model_info['embedding']}")
+```
+
+---
+
+## Examples
+
+### Complete Knowledge Base Example
+
+```python
+import asyncio
+from services.neo4j_knowledge_service import Neo4jKnowledgeService
+
+async def main():
+    # Initialize service
+    service = Neo4jKnowledgeService()
+    await service.initialize()
+
+    # Add documents
+    await service.add_document(
+        content="JWT authentication guide...",
+        title="Auth Guide",
+        metadata={"tags": ["auth"]}
+    )
+
+    # Query
+    result = await service.query(
+        question="How does authentication work?",
+        mode="hybrid"
+    )
+
+    print(f"Answer: {result['answer']}")
+
+    # Search
+    search_results = await service.search_similar_nodes(
+        query="authentication",
+        top_k=5
+    )
+
+    for node in search_results["results"]:
+        print(f"- {node['text'][:100]}")
+
+asyncio.run(main())
+```
+
+### Memory Management Example
+
+```python
+import asyncio
+from services.memory_store import memory_store
+
+async def main():
+    # Initialize
+    await memory_store.initialize()
+
+    # Add decision
+    result = await memory_store.add_memory(
+        project_id="myapp",
+        memory_type="decision",
+        title="Use Redis for caching",
+        content="Decided to use Redis instead of Memcached",
+        reason="Redis supports data persistence",
+        importance=0.8,
+        tags=["cache", "architecture"]
+    )
+
+    memory_id = result["memory_id"]
+
+    # Search memories
+    search_result = await memory_store.search_memories(
+        project_id="myapp",
+        query="caching",
+        min_importance=0.5
+    )
+
+    for memory in search_result["memories"]:
+        print(f"{memory['title']}: {memory['content']}")
+
+    # Get project summary
+    summary = await memory_store.get_project_summary("myapp")
+    print(f"Total memories: {summary['total_memories']}")
+    print(f"By type: {summary['by_type']}")
+
+asyncio.run(main())
+```
+
+### Repository Ingestion Example
+
+```python
+import asyncio
+from services.graph_service import graph_service
+from services.code_ingestor import get_code_ingestor
+from services.git_utils import git_utils
+
+async def main():
+    # Connect to Neo4j
+    await graph_service.connect()
+
+    # Get code ingestor
+    code_ingestor = get_code_ingestor(graph_service)
+
+    # Get repository ID
+    repo_path = "/path/to/repository"
+    repo_id = git_utils.get_repo_id_from_path(repo_path)
+
+    # Scan files
+    files = code_ingestor.scan_files(
+        repo_path=repo_path,
+        include_globs=["**/*.py", "**/*.ts"],
+        exclude_globs=["**/node_modules/**", "**/.git/**"]
+    )
+
+    print(f"Found {len(files)} files")
+
+    # Ingest into Neo4j
+    result = code_ingestor.ingest_files(
+        repo_id=repo_id,
+        files=files
+    )
+
+    print(f"Success: {result['success']}")
+    print(f"Files processed: {result['files_processed']}")
+
+    # Search code
+    search_results = graph_service.fulltext_search(
+        query_text="authentication",
+        repo_id=repo_id,
+        limit=10
+    )
+
+    for file in search_results:
+        print(f"- {file['path']} ({file['lang']})")
+
+asyncio.run(main())
+```
+
+### Task Queue Example
+
+```python
+import asyncio
+from services.task_queue import task_queue, TaskStatus
+from services.task_processors import process_document_task
+
+async def main():
+    # Start task queue
+    await task_queue.start()
+
+    # Submit task
+    task_id = await task_queue.submit_task(
+        task_func=process_document_task,
+        task_kwargs={
+            "document_content": "Large document content...",
+            "title": "Large Doc"
+        },
+        task_name="Process Large Document",
+        task_type="document_processing"
+    )
+
+    print(f"Task submitted: {task_id}")
+
+    # Monitor task
+    while True:
+        task_result = task_queue.get_task_status(task_id)
+
+        if not task_result:
+            break
+
+        print(f"Status: {task_result.status.value}, Progress: {task_result.progress}%")
+
+        if task_result.status in [TaskStatus.SUCCESS, TaskStatus.FAILED]:
+            break
+
+        await asyncio.sleep(2)
+
+    if task_result.status == TaskStatus.SUCCESS:
+        print(f"Result: {task_result.result}")
+    else:
+        print(f"Error: {task_result.error}")
+
+    # Get queue stats
+    stats = task_queue.get_queue_stats()
+    print(f"Queue stats: {stats}")
+
+asyncio.run(main())
+```
+
+---
+
+## Error Handling
+
+All services return structured results with error information.
+
+### Standard Response Format
+
+```python
+# Success
+{
+    "success": True,
+    "...": "service-specific data"
+}
+
+# Error
+{
+    "success": False,
+    "error": "Error message"
+}
+```
+
+### Handling Errors
+
+```python
+result = await knowledge_service.query("question")
+
+if not result.get("success"):
+    error_msg = result.get("error", "Unknown error")
+    print(f"Error: {error_msg}")
+    # Handle error
+else:
+    # Process result
+    answer = result["answer"]
+```
+
+### Exception Handling
+
+```python
+try:
+    await knowledge_service.initialize()
+except Exception as e:
+    logger.error(f"Failed to initialize: {e}")
+    # Handle exception
+```
+
+---
+
+## Best Practices
+
+### 1. Always Initialize Services
+
+```python
+# Good
+service = Neo4jKnowledgeService()
+await service.initialize()
+
+# Bad - will fail
+service = Neo4jKnowledgeService()
+await service.query("question")  # Error: not initialized
+```
+
+### 2. Check Success Status
+
+```python
+# Good
+result = await service.query("question")
+if result["success"]:
+    print(result["answer"])
+else:
+    print(f"Error: {result['error']}")
+
+# Bad
+result = await service.query("question")
+print(result["answer"])  # May crash if error occurred
+```
+
+### 3. Use Context Managers for Neo4j Sessions
+
+```python
+# Good
+async with graph_service.driver.session() as session:
+    result = await session.run("MATCH (n) RETURN n LIMIT 10")
+    # Session automatically closed
+
+# Bad
+session = graph_service.driver.session()
+result = await session.run("MATCH (n) RETURN n LIMIT 10")
+# Session not closed - memory leak
+```
+
+### 4. Set Appropriate Timeouts
+
+```python
+from config import settings
+
+# Adjust timeouts for large operations
+settings.operation_timeout = 300  # 5 minutes
+settings.large_document_timeout = 600  # 10 minutes
+
+service = Neo4jKnowledgeService()
+await service.initialize()
+```
+
+### 5. Handle Large Documents Asynchronously
+
+```python
+# For large documents, use task queue
+if len(document_content) > 10_000:
+    task_id = await task_queue.submit_task(
+        task_func=process_document_task,
+        task_kwargs={"document_content": document_content},
+        task_name="Process Large Doc",
+        task_type="document_processing"
+    )
+    # Monitor task_id
+else:
+    # Process directly
+    await knowledge_service.add_document(content=document_content)
+```
+
+### 6. Batch Operations
+
+```python
+# Good - batch insert
+files = code_ingestor.scan_files(repo_path, include_globs, exclude_globs)
+result = code_ingestor.ingest_files(repo_id, files)
+
+# Bad - individual inserts
+for file in files:
+    code_ingestor.ingest_files(repo_id, [file])  # Slow!
+```
+
+### 7. Use Memory Store for Long-term Knowledge
+
+```python
+# Store important decisions
+await memory_store.add_memory(
+    project_id="myapp",
+    memory_type="decision",
+    title="Architecture decision",
+    content="Detailed rationale...",
+    importance=0.9  # High importance
+)
+
+# Search when needed
+memories = await memory_store.search_memories(
+    project_id="myapp",
+    memory_type="decision",
+    min_importance=0.7
+)
+```
+
+### 8. Clean Up Resources
+
+```python
+# Close connections when done
+await graph_service.driver.close()
+
+# Or use application lifecycle hooks
+async def startup():
+    await knowledge_service.initialize()
+    await memory_store.initialize()
+    await task_queue.start()
+
+async def shutdown():
+    await graph_service.driver.close()
+    await task_queue.stop()
+```
+
+---
+
+## Performance Tips
+
+### 1. Connection Pooling
+
+Neo4j driver handles connection pooling automatically. Reuse service instances:
+
+```python
+# Good - single instance
+service = Neo4jKnowledgeService()
+await service.initialize()
+
+for question in questions:
+    await service.query(question)
+
+# Bad - multiple instances
+for question in questions:
+    service = Neo4jKnowledgeService()
+    await service.initialize()  # Expensive!
+    await service.query(question)
+```
+
+### 2. Batch Queries
+
+```python
+# Good - batch cypher query
+query = """
+UNWIND $items as item
+CREATE (n:Node {name: item.name, value: item.value})
+"""
+graph_service.execute_cypher(query, {"items": items})
+
+# Bad - individual queries
+for item in items:
+    graph_service.execute_cypher(
+        "CREATE (n:Node {name: $name, value: $value})",
+        item
+    )
+```
+
+### 3. Use Incremental Repository Ingestion
+
+```python
+# 60x faster for updates
+from services.git_utils import git_utils
+
+if git_utils.is_git_repo(repo_path):
+    changed_files = git_utils.get_changed_files(repo_path)
+    files_to_process = filter_by_patterns(changed_files)
+else:
+    # Fall back to full scan
+    files_to_process = code_ingestor.scan_files(repo_path, ...)
+```
+
+### 4. Limit Result Sets
+
+```python
+# Always use limits for large datasets
+result = await knowledge_service.search_similar_nodes(
+    query="search term",
+    top_k=10  # Limit results
+)
+```
+
+---
+
+**Last Updated**: 2025-01-15
+**SDK Version**: 1.0.0
+**Python Version**: 3.10+
diff --git a/docs/api/rest.md b/docs/api/rest.md
new file mode 100644
index 0000000..3184249
--- /dev/null
+++ b/docs/api/rest.md
@@ -0,0 +1,1283 @@
+# REST API Reference
+
+Complete reference for Code Graph Knowledge System REST API endpoints.
+
+**Base URL**: `http://localhost:8000/api/v1`
+
+**Version**: 1.0.0
+
+## Authentication
+
+Currently, the API does not require authentication. This may be added in future versions.
+
+---
+
+## Health & System
+
+### Get Health Status
+
+Get system health and service status.
+
+**Endpoint**: `GET /health`
+
+**Response**:
+```json
+{
+  "status": "healthy",
+  "services": {
+    "neo4j_knowledge_service": true,
+    "graph_service": true,
+    "task_queue": true
+  },
+  "version": "0.1.0"
+}
+```
+
+### Get System Configuration
+
+Get current system configuration.
+
+**Endpoint**: `GET /config`
+
+**Response**:
+```json
+{
+  "app_name": "Code Graph Knowledge System",
+  "version": "0.1.0",
+  "debug": false,
+  "llm_provider": "ollama",
+  "embedding_provider": "ollama",
+  "monitoring_enabled": true
+}
+```
+
+### Get Graph Schema
+
+Get Neo4j graph schema information.
+
+**Endpoint**: `GET /schema`
+
+**Response**:
+```json
+{
+  "success": true,
+  "node_labels": ["Document", "Chunk", "Entity", "Memory", "Project", "File", "Repo"],
+  "relationship_types": ["HAS_CHUNK", "MENTIONS", "RELATES_TO", "BELONGS_TO"],
+  "statistics": {
+    "node_count": 1523,
+    "relationship_count": 4567
+  }
+}
+```
+
+### Get Statistics
+
+Get knowledge base statistics.
+
+**Endpoint**: `GET /statistics`
+
+**Response**:
+```json
+{
+  "success": true,
+  "total_nodes": 1523,
+  "total_relationships": 4567,
+  "document_count": 45,
+  "chunk_count": 892,
+  "entity_count": 586
+}
+```
+
+### Get Prometheus Metrics
+
+Get system metrics in Prometheus format.
+
+**Endpoint**: `GET /metrics`
+
+**Response**: Plain text Prometheus metrics
+```
+# HELP http_requests_total Total HTTP requests
+# TYPE http_requests_total counter
+http_requests_total{method="GET",endpoint="/health"} 1234
+
+# HELP neo4j_nodes_total Total nodes in Neo4j
+# TYPE neo4j_nodes_total gauge
+neo4j_nodes_total 1523
+```
+
+### Clear Knowledge Base
+
+**⚠️ DANGEROUS**: Clear all data from knowledge base.
+
+**Endpoint**: `DELETE /clear`
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Knowledge base cleared",
+  "nodes_deleted": 1523,
+  "relationships_deleted": 4567
+}
+```
+
+---
+
+## Knowledge Base
+
+### Query Knowledge Base
+
+Query the knowledge base using GraphRAG.
+
+**Endpoint**: `POST /knowledge/query`
+
+**Request Body**:
+```json
+{
+  "question": "How does authentication work in this system?",
+  "mode": "hybrid"
+}
+```
+
+**Parameters**:
+- `question` (string, required): Question to ask
+- `mode` (string, optional): Query mode
+  - `hybrid` (default): Graph traversal + vector search
+  - `graph_only`: Only graph relationships
+  - `vector_only`: Only vector similarity
+
+**Response**:
+```json
+{
+  "success": true,
+  "answer": "The system uses JWT-based authentication...",
+  "source_nodes": [
+    {
+      "text": "JWT implementation details...",
+      "score": 0.92,
+      "metadata": {
+        "title": "Authentication Guide",
+        "source": "docs/auth.md"
+      }
+    }
+  ],
+  "mode": "hybrid"
+}
+```
+
+### Search Similar Nodes
+
+Search for similar nodes using vector similarity.
+
+**Endpoint**: `POST /knowledge/search`
+
+**Request Body**:
+```json
+{
+  "query": "database configuration",
+  "top_k": 10
+}
+```
+
+**Parameters**:
+- `query` (string, required): Search query
+- `top_k` (integer, optional): Number of results (default: 10, max: 50)
+
+**Response**:
+```json
+{
+  "success": true,
+  "results": [
+    {
+      "text": "Database connection settings...",
+      "score": 0.89,
+      "metadata": {
+        "title": "Configuration Guide",
+        "type": "document"
+      }
+    }
+  ],
+  "query": "database configuration",
+  "top_k": 10
+}
+```
+
+### Add Document
+
+Add a document to knowledge base.
+
+**Endpoint**: `POST /documents`
+
+**Request Body**:
+```json
+{
+  "content": "This is the document content...",
+  "title": "My Document",
+  "metadata": {
+    "author": "John Doe",
+    "tags": ["tutorial", "api"]
+  }
+}
+```
+
+**Parameters**:
+- `content` (string, required): Document content
+- `title` (string, optional): Document title
+- `metadata` (object, optional): Additional metadata
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Document added successfully",
+  "document_id": "doc-abc123",
+  "chunks_created": 5
+}
+```
+
+**Note**: Large documents (>10KB) are processed asynchronously and return a task_id.
+
+### Add File
+
+Add a file to knowledge base.
+
+**Endpoint**: `POST /documents/file`
+
+**Request Body**:
+```json
+{
+  "file_path": "/absolute/path/to/file.txt"
+}
+```
+
+**Parameters**:
+- `file_path` (string, required): Absolute path to file
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "File added successfully",
+  "file_path": "/absolute/path/to/file.txt",
+  "chunks_created": 8
+}
+```
+
+### Add Directory
+
+Add all files from directory to knowledge base.
+
+**Endpoint**: `POST /documents/directory`
+
+**Request Body**:
+```json
+{
+  "directory_path": "/absolute/path/to/directory",
+  "recursive": true,
+  "file_patterns": ["*.md", "*.txt"]
+}
+```
+
+**Parameters**:
+- `directory_path` (string, required): Absolute directory path
+- `recursive` (boolean, optional): Process subdirectories (default: true)
+- `file_patterns` (array, optional): File patterns to include
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Directory processed",
+  "files_processed": 23,
+  "total_chunks": 156
+}
+```
+
+---
+
+## Memory Management
+
+Memory Store provides project knowledge persistence for AI agents.
+
+### Add Memory
+
+Add a new memory to project knowledge base.
+
+**Endpoint**: `POST /memory/add`
+
+**Request Body**:
+```json
+{
+  "project_id": "myapp",
+  "memory_type": "decision",
+  "title": "Use JWT for authentication",
+  "content": "Decided to use JWT tokens instead of session-based auth",
+  "reason": "Need stateless authentication for mobile clients",
+  "tags": ["auth", "architecture"],
+  "importance": 0.9,
+  "related_refs": ["ref://file/src/auth/jwt.py"]
+}
+```
+
+**Parameters**:
+- `project_id` (string, required): Project identifier
+- `memory_type` (string, required): Type of memory
+  - `decision`: Architecture choices, tech stack
+  - `preference`: Coding style, tool preferences
+  - `experience`: Problems and solutions
+  - `convention`: Team rules, naming patterns
+  - `plan`: Future improvements, TODOs
+  - `note`: Other important information
+- `title` (string, required): Short title (max 200 chars)
+- `content` (string, required): Detailed content
+- `reason` (string, optional): Rationale or explanation
+- `tags` (array, optional): Tags for categorization
+- `importance` (number, optional): Importance score 0-1 (default: 0.5)
+- `related_refs` (array, optional): Related ref:// handles
+
+**Response**:
+```json
+{
+  "success": true,
+  "memory_id": "mem-abc123-def456",
+  "project_id": "myapp",
+  "message": "Memory added successfully"
+}
+```
+
+### Search Memories
+
+Search project memories with filters.
+
+**Endpoint**: `POST /memory/search`
+
+**Request Body**:
+```json
+{
+  "project_id": "myapp",
+  "query": "authentication",
+  "memory_type": "decision",
+  "tags": ["auth"],
+  "min_importance": 0.7,
+  "limit": 20
+}
+```
+
+**Parameters**:
+- `project_id` (string, required): Project identifier
+- `query` (string, optional): Search query text
+- `memory_type` (string, optional): Filter by memory type
+- `tags` (array, optional): Filter by tags
+- `min_importance` (number, optional): Minimum importance (default: 0.0)
+- `limit` (integer, optional): Max results (default: 20, max: 100)
+
+**Response**:
+```json
+{
+  "success": true,
+  "memories": [
+    {
+      "memory_id": "mem-abc123",
+      "memory_type": "decision",
+      "title": "Use JWT for authentication",
+      "content": "Decided to use JWT tokens...",
+      "reason": "Need stateless authentication...",
+      "tags": ["auth", "architecture"],
+      "importance": 0.9,
+      "created_at": "2025-01-15T10:30:00Z",
+      "updated_at": "2025-01-15T10:30:00Z"
+    }
+  ],
+  "total": 1,
+  "query": "authentication"
+}
+```
+
+### Get Memory
+
+Get a specific memory by ID.
+
+**Endpoint**: `GET /memory/{memory_id}`
+
+**Response**:
+```json
+{
+  "success": true,
+  "memory": {
+    "memory_id": "mem-abc123",
+    "project_id": "myapp",
+    "memory_type": "decision",
+    "title": "Use JWT for authentication",
+    "content": "Decided to use JWT tokens...",
+    "reason": "Need stateless authentication...",
+    "tags": ["auth", "architecture"],
+    "importance": 0.9,
+    "related_refs": ["ref://file/src/auth/jwt.py"],
+    "created_at": "2025-01-15T10:30:00Z",
+    "updated_at": "2025-01-15T10:30:00Z",
+    "is_superseded": false,
+    "superseded_by": null
+  }
+}
+```
+
+### Update Memory
+
+Update an existing memory.
+
+**Endpoint**: `PUT /memory/{memory_id}`
+
+**Request Body**:
+```json
+{
+  "title": "Updated title",
+  "importance": 0.95,
+  "tags": ["auth", "security", "critical"]
+}
+```
+
+**Parameters**: All fields are optional, only provided fields will be updated
+- `title` (string): Update title
+- `content` (string): Update content
+- `reason` (string): Update reason
+- `tags` (array): Update tags
+- `importance` (number): Update importance
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Memory updated successfully",
+  "memory_id": "mem-abc123"
+}
+```
+
+### Delete Memory
+
+Delete a memory (soft delete).
+
+**Endpoint**: `DELETE /memory/{memory_id}`
+
+**Response**:
+```json
+{
+  "success": true,
+  "message": "Memory deleted successfully",
+  "memory_id": "mem-abc123"
+}
+```
+
+### Supersede Memory
+
+Create a new memory that supersedes an old one.
+
+**Endpoint**: `POST /memory/supersede`
+
+**Request Body**:
+```json
+{
+  "old_memory_id": "mem-abc123",
+  "new_memory_type": "decision",
+  "new_title": "Use PostgreSQL instead of MySQL",
+  "new_content": "Switched to PostgreSQL for better JSON support",
+  "new_reason": "Need advanced JSON querying capabilities",
+  "new_tags": ["database", "architecture"],
+  "new_importance": 0.8
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "old_memory_id": "mem-abc123",
+  "new_memory_id": "mem-xyz789",
+  "message": "Memory superseded successfully"
+}
+```
+
+### Get Project Summary
+
+Get summary of all memories for a project.
+
+**Endpoint**: `GET /memory/project/{project_id}/summary`
+
+**Response**:
+```json
+{
+  "success": true,
+  "project_id": "myapp",
+  "total_memories": 42,
+  "by_type": {
+    "decision": {
+      "count": 12,
+      "top_memories": [
+        {
+          "memory_id": "mem-abc123",
+          "title": "Use JWT for authentication",
+          "importance": 0.9,
+          "created_at": "2025-01-15T10:30:00Z"
+        }
+      ]
+    },
+    "preference": {"count": 8, "top_memories": []},
+    "experience": {"count": 15, "top_memories": []},
+    "convention": {"count": 5, "top_memories": []},
+    "plan": {"count": 2, "top_memories": []}
+  }
+}
+```
+
+### Memory Health Check
+
+Check memory store health.
+
+**Endpoint**: `GET /memory/health`
+
+**Response**:
+```json
+{
+  "service": "memory_store",
+  "status": "healthy",
+  "initialized": true,
+  "extraction_enabled": true
+}
+```
+
+---
+
+## Memory Extraction (v0.7)
+
+Automatic memory extraction from various sources.
+
+### Extract from Conversation
+
+Extract memories from conversation using LLM analysis.
+
+**Endpoint**: `POST /memory/extract/conversation`
+
+**Request Body**:
+```json
+{
+  "project_id": "myapp",
+  "conversation": [
+    {"role": "user", "content": "Should we use Redis or Memcached?"},
+    {"role": "assistant", "content": "Let's use Redis because it supports data persistence"}
+  ],
+  "auto_save": false
+}
+```
+
+**Parameters**:
+- `project_id` (string, required): Project identifier
+- `conversation` (array, required): Conversation messages
+- `auto_save` (boolean, optional): Auto-save high-confidence memories (default: false)
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "memory_type": "decision",
+      "title": "Use Redis for caching",
+      "content": "Decided to use Redis instead of Memcached",
+      "reason": "Redis supports data persistence",
+      "confidence": 0.85,
+      "auto_saved": false,
+      "memory_id": null
+    }
+  ],
+  "total_extracted": 1,
+  "auto_saved_count": 0
+}
+```
+
+### Extract from Git Commit
+
+Extract memories from git commit using LLM analysis.
+
+**Endpoint**: `POST /memory/extract/commit`
+
+**Request Body**:
+```json
+{
+  "project_id": "myapp",
+  "commit_sha": "abc123def456",
+  "commit_message": "feat: add JWT authentication\n\nImplemented JWT-based auth for stateless API",
+  "changed_files": ["src/auth/jwt.py", "src/middleware/auth.py"],
+  "auto_save": true
+}
+```
+
+**Parameters**:
+- `project_id` (string, required): Project identifier
+- `commit_sha` (string, required): Git commit SHA
+- `commit_message` (string, required): Full commit message
+- `changed_files` (array, required): List of changed file paths
+- `auto_save` (boolean, optional): Auto-save high-confidence memories
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "memory_type": "decision",
+      "title": "Implement JWT authentication",
+      "content": "Added JWT-based authentication system",
+      "confidence": 0.92,
+      "auto_saved": true,
+      "memory_id": "mem-xyz789"
+    }
+  ]
+}
+```
+
+### Extract from Code Comments
+
+Extract memories from code comments in source file.
+
+**Endpoint**: `POST /memory/extract/comments`
+
+**Request Body**:
+```json
+{
+  "project_id": "myapp",
+  "file_path": "/path/to/project/src/service.py"
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "memory_type": "plan",
+      "title": "TODO: Add rate limiting",
+      "content": "Need to implement rate limiting for API endpoints",
+      "line_number": 45,
+      "auto_saved": true,
+      "memory_id": "mem-plan123"
+    }
+  ],
+  "total_extracted": 1
+}
+```
+
+### Suggest Memory from Query
+
+Suggest creating memory from knowledge base query.
+
+**Endpoint**: `POST /memory/suggest`
+
+**Request Body**:
+```json
+{
+  "project_id": "myapp",
+  "query": "How does the authentication work?",
+  "answer": "The system uses JWT tokens with refresh token rotation..."
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "should_save": true,
+  "confidence": 0.88,
+  "suggested_memory": {
+    "memory_type": "note",
+    "title": "Authentication mechanism",
+    "content": "System uses JWT with refresh token rotation",
+    "importance": 0.7
+  }
+}
+```
+
+### Batch Extract from Repository
+
+Batch extract memories from entire repository.
+
+**Endpoint**: `POST /memory/extract/batch`
+
+**Request Body**:
+```json
+{
+  "project_id": "myapp",
+  "repo_path": "/path/to/repository",
+  "max_commits": 50,
+  "file_patterns": ["*.py", "*.js"]
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "summary": {
+    "commits_analyzed": 50,
+    "files_scanned": 125,
+    "total_extracted": 23,
+    "by_source": {
+      "git_commits": 12,
+      "code_comments": 11
+    },
+    "by_type": {
+      "decision": 5,
+      "experience": 8,
+      "plan": 10
+    }
+  },
+  "execution_time_seconds": 45.2
+}
+```
+
+---
+
+## Code Graph
+
+Code graph analysis endpoints for repository understanding.
+
+### Ingest Repository
+
+Ingest a code repository into the graph database.
+
+**Endpoint**: `POST /ingest/repo`
+
+**Request Body**:
+```json
+{
+  "local_path": "/path/to/repository",
+  "repo_url": null,
+  "branch": "main",
+  "mode": "incremental",
+  "include_globs": ["**/*.py", "**/*.ts", "**/*.tsx"],
+  "exclude_globs": ["**/node_modules/**", "**/.git/**"],
+  "since_commit": null
+}
+```
+
+**Parameters**:
+- `local_path` (string, optional): Local repository path
+- `repo_url` (string, optional): Repository URL to clone
+- `branch` (string, optional): Branch name (default: "main")
+- `mode` (string, optional): Ingestion mode
+  - `full`: Complete re-ingestion
+  - `incremental`: Only changed files (60x faster)
+- `include_globs` (array, optional): File patterns to include
+- `exclude_globs` (array, optional): File patterns to exclude
+- `since_commit` (string, optional): For incremental mode
+
+**Response**:
+```json
+{
+  "task_id": "ing-20250115-103045-abc12345",
+  "status": "done",
+  "message": "Successfully ingested 125 files",
+  "files_processed": 125,
+  "mode": "incremental",
+  "changed_files_count": 8
+}
+```
+
+### Get Related Files
+
+Find files related to a query using fulltext search.
+
+**Endpoint**: `GET /graph/related?query={query}&repoId={repoId}&limit={limit}`
+
+**Parameters**:
+- `query` (string, required): Search query
+- `repoId` (string, required): Repository identifier
+- `limit` (integer, optional): Max results (default: 30, max: 100)
+
+**Response**:
+```json
+{
+  "nodes": [
+    {
+      "type": "file",
+      "ref": "ref://file/src/auth/jwt.py",
+      "path": "src/auth/jwt.py",
+      "lang": "python",
+      "score": 0.92,
+      "summary": "JWT authentication implementation"
+    }
+  ],
+  "query": "authentication",
+  "repo_id": "myproject"
+}
+```
+
+### Impact Analysis
+
+Analyze impact of changes to a file (reverse dependencies).
+
+**Endpoint**: `GET /graph/impact?repoId={repoId}&file={file}&depth={depth}&limit={limit}`
+
+**Parameters**:
+- `repoId` (string, required): Repository identifier
+- `file` (string, required): File path to analyze
+- `depth` (integer, optional): Traversal depth (default: 2, max: 5)
+- `limit` (integer, optional): Max results (default: 50, max: 100)
+
+**Response**:
+```json
+{
+  "nodes": [
+    {
+      "type": "file",
+      "path": "src/api/auth_routes.py",
+      "lang": "python",
+      "repoId": "myproject",
+      "relationship": "IMPORTS",
+      "depth": 1,
+      "score": 0.85,
+      "ref": "ref://file/src/api/auth_routes.py",
+      "summary": "Auth API routes (imports jwt.py)"
+    }
+  ],
+  "file": "src/auth/jwt.py",
+  "repo_id": "myproject",
+  "depth": 2
+}
+```
+
+### Context Pack
+
+Build a context pack within token budget.
+
+**Endpoint**: `GET /context/pack?repoId={repoId}&stage={stage}&budget={budget}&keywords={keywords}&focus={focus}`
+
+**Parameters**:
+- `repoId` (string, required): Repository identifier
+- `stage` (string, optional): Development stage (default: "plan")
+  - `plan`: Project overview
+  - `review`: Code review focus
+  - `implement`: Implementation details
+- `budget` (integer, optional): Token budget (default: 1500, max: 10000)
+- `keywords` (string, optional): Comma-separated keywords
+- `focus` (string, optional): Comma-separated focus paths
+
+**Response**:
+```json
+{
+  "items": [
+    {
+      "kind": "file",
+      "title": "src/auth/jwt.py",
+      "summary": "JWT authentication implementation with token generation and validation",
+      "ref": "ref://file/src/auth/jwt.py",
+      "extra": {
+        "lang": "python",
+        "score": 0.92
+      }
+    }
+  ],
+  "budget_used": 1450,
+  "budget_limit": 1500,
+  "stage": "implement",
+  "repo_id": "myproject",
+  "category_counts": {
+    "file": 8,
+    "symbol": 12
+  }
+}
+```
+
+---
+
+## Task Management
+
+Asynchronous task queue management.
+
+### Create Task
+
+Create a new task.
+
+**Endpoint**: `POST /tasks/`
+
+**Request Body**:
+```json
+{
+  "task_type": "document_processing",
+  "task_name": "Process large document",
+  "payload": {
+    "document_content": "...",
+    "title": "Large Doc"
+  },
+  "priority": 0,
+  "metadata": {
+    "source": "api"
+  }
+}
+```
+
+**Valid task types**:
+- `document_processing`
+- `schema_parsing`
+- `knowledge_graph_construction`
+- `batch_processing`
+
+**Response**:
+```json
+{
+  "task_id": "task-abc123",
+  "status": "created"
+}
+```
+
+### Get Task Status
+
+Get status of a specific task.
+
+**Endpoint**: `GET /tasks/{task_id}`
+
+**Response**:
+```json
+{
+  "task_id": "task-abc123",
+  "status": "SUCCESS",
+  "progress": 100.0,
+  "message": "Task completed successfully",
+  "result": {
+    "chunks_created": 15,
+    "document_id": "doc-xyz789"
+  },
+  "error": null,
+  "created_at": "2025-01-15T10:30:00Z",
+  "started_at": "2025-01-15T10:30:05Z",
+  "completed_at": "2025-01-15T10:30:45Z",
+  "metadata": {
+    "source": "api"
+  }
+}
+```
+
+**Status values**:
+- `PENDING`: Waiting in queue
+- `PROCESSING`: Currently running
+- `SUCCESS`: Completed successfully
+- `FAILED`: Failed with error
+- `CANCELLED`: Cancelled by user
+
+### List Tasks
+
+List tasks with optional filtering.
+
+**Endpoint**: `GET /tasks/?status={status}&page={page}&page_size={page_size}&task_type={task_type}`
+
+**Parameters**:
+- `status` (string, optional): Filter by status
+- `page` (integer, optional): Page number (default: 1)
+- `page_size` (integer, optional): Page size (default: 20, max: 100)
+- `task_type` (string, optional): Filter by task type
+
+**Response**:
+```json
+{
+  "tasks": [
+    {
+      "task_id": "task-abc123",
+      "status": "SUCCESS",
+      "progress": 100.0,
+      "message": "Completed",
+      "created_at": "2025-01-15T10:30:00Z"
+    }
+  ],
+  "total": 42,
+  "page": 1,
+  "page_size": 20
+}
+```
+
+### Cancel Task
+
+Cancel a pending or running task.
+
+**Endpoint**: `DELETE /tasks/{task_id}`
+
+**Response**:
+```json
+{
+  "message": "Task cancelled successfully",
+  "task_id": "task-abc123"
+}
+```
+
+### Get Task Statistics
+
+Get task queue statistics.
+
+**Endpoint**: `GET /tasks/stats/overview`
+
+**Response**:
+```json
+{
+  "total_tasks": 156,
+  "pending_tasks": 5,
+  "processing_tasks": 2,
+  "completed_tasks": 142,
+  "failed_tasks": 6,
+  "cancelled_tasks": 1
+}
+```
+
+### Retry Task
+
+Retry a failed or cancelled task.
+
+**Endpoint**: `POST /tasks/{task_id}/retry`
+
+**Response**:
+```json
+{
+  "message": "Task retried successfully",
+  "original_task_id": "task-abc123",
+  "new_task_id": "task-xyz789"
+}
+```
+
+### Get Queue Status
+
+Get current queue status.
+
+**Endpoint**: `GET /tasks/queue/status`
+
+**Response**:
+```json
+{
+  "running_tasks": 2,
+  "max_concurrent_tasks": 5,
+  "available_slots": 3,
+  "queue_active": true
+}
+```
+
+---
+
+## SQL Parsing
+
+SQL parsing and analysis endpoints.
+
+### Parse SQL Statement
+
+Parse and analyze SQL statement.
+
+**Endpoint**: `POST /sql/parse`
+
+**Request Body**:
+```json
+{
+  "sql": "SELECT * FROM users WHERE id = 1",
+  "dialect": "mysql"
+}
+```
+
+**Supported dialects**: `mysql`, `postgresql`, `oracle`, `sqlserver`
+
+**Response**:
+```json
+{
+  "success": true,
+  "parsed": {
+    "statement_type": "SELECT",
+    "tables": ["users"],
+    "columns": ["*"],
+    "where_conditions": ["id = 1"]
+  }
+}
+```
+
+### Validate SQL Syntax
+
+Validate SQL syntax.
+
+**Endpoint**: `POST /sql/validate`
+
+**Request Body**:
+```json
+{
+  "sql": "SELECT * FROM users",
+  "dialect": "mysql"
+}
+```
+
+**Response**:
+```json
+{
+  "valid": true,
+  "errors": []
+}
+```
+
+### Convert SQL Dialect
+
+Convert SQL between dialects.
+
+**Endpoint**: `POST /sql/convert?sql={sql}&from_dialect={from}&to_dialect={to}`
+
+**Response**:
+```json
+{
+  "success": true,
+  "original_sql": "SELECT * FROM users LIMIT 10",
+  "converted_sql": "SELECT TOP 10 * FROM users",
+  "from_dialect": "mysql",
+  "to_dialect": "sqlserver"
+}
+```
+
+### Parse SQL Schema
+
+Parse SQL schema with auto-detection.
+
+**Endpoint**: `POST /sql/parse-schema`
+
+**Request Body**:
+```json
+{
+  "schema_content": "CREATE TABLE users (id INT PRIMARY KEY, name VARCHAR(100));",
+  "file_path": null
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "dialect": "mysql",
+  "tables": [
+    {
+      "name": "users",
+      "columns": [
+        {"name": "id", "type": "INT", "primary_key": true},
+        {"name": "name", "type": "VARCHAR(100)", "primary_key": false}
+      ]
+    }
+  ],
+  "relationships": []
+}
+```
+
+---
+
+## Real-time Monitoring (SSE)
+
+Server-Sent Events for real-time task monitoring.
+
+### Monitor Single Task
+
+Stream updates for a specific task.
+
+**Endpoint**: `GET /sse/task/{task_id}`
+
+**Response**: SSE stream
+```
+data: {"task_id": "task-abc123", "status": "PROCESSING", "progress": 25.0}
+
+data: {"task_id": "task-abc123", "status": "PROCESSING", "progress": 50.0}
+
+data: {"task_id": "task-abc123", "status": "SUCCESS", "progress": 100.0}
+```
+
+### Monitor All Tasks
+
+Stream updates for all tasks.
+
+**Endpoint**: `GET /sse/tasks?status={status}`
+
+**Parameters**:
+- `status` (string, optional): Filter by status
+
+**Response**: SSE stream
+```
+data: {"event": "task_update", "task_id": "task-1", "status": "PROCESSING"}
+
+data: {"event": "task_update", "task_id": "task-2", "status": "SUCCESS"}
+```
+
+### Get SSE Statistics
+
+Get active SSE connection statistics.
+
+**Endpoint**: `GET /sse/stats`
+
+**Response**:
+```json
+{
+  "active_connections": 5,
+  "task_streams": 3,
+  "global_streams": 2
+}
+```
+
+---
+
+## Error Handling
+
+All endpoints follow consistent error response format.
+
+### Error Response Format
+
+```json
+{
+  "detail": "Error message describing what went wrong"
+}
+```
+
+### HTTP Status Codes
+
+- `200 OK`: Success
+- `201 Created`: Resource created
+- `400 Bad Request`: Invalid request parameters
+- `404 Not Found`: Resource not found
+- `500 Internal Server Error`: Server error
+
+### Common Errors
+
+**Invalid Parameters**:
+```json
+{
+  "detail": "Invalid task type. Must be one of: document_processing, schema_parsing, knowledge_graph_construction, batch_processing"
+}
+```
+
+**Resource Not Found**:
+```json
+{
+  "detail": "Task not found"
+}
+```
+
+**Service Error**:
+```json
+{
+  "detail": "Failed to initialize Neo4j connection"
+}
+```
+
+---
+
+## Rate Limits
+
+Currently no rate limits are enforced. This may change in future versions.
+
+## Pagination
+
+Endpoints that return lists support pagination:
+- `page`: Page number (default: 1)
+- `page_size`: Items per page (default: 20, max: 100)
+
+Response includes:
+- `total`: Total item count
+- `page`: Current page
+- `page_size`: Items per page
+
+---
+
+**Last Updated**: 2025-01-15
+**API Version**: 1.0.0
+**Documentation Version**: 1.0
diff --git a/docs/architecture/components.md b/docs/architecture/components.md
new file mode 100644
index 0000000..3babc32
--- /dev/null
+++ b/docs/architecture/components.md
@@ -0,0 +1,1492 @@
+# System Components
+
+## Table of Contents
+
+- [Overview](#overview)
+- [API Layer Components](#api-layer-components)
+- [Service Layer Components](#service-layer-components)
+- [Storage Components](#storage-components)
+- [Utility Components](#utility-components)
+- [MCP Server Components](#mcp-server-components)
+- [Component Dependencies](#component-dependencies)
+
+## Overview
+
+The Code Graph Knowledge System consists of multiple specialized components organized in layers. This document provides detailed descriptions of each component, their responsibilities, interfaces, and interactions.
+
+```mermaid
+graph TB
+    subgraph "API Layer"
+        FastAPI[FastAPI Application]
+        MCP[MCP Server]
+        Routes[API Routes]
+        Middleware[Middleware Stack]
+    end
+
+    subgraph "Service Layer"
+        KnowServ[Knowledge Service]
+        MemServ[Memory Store]
+        GraphServ[Graph Service]
+        TaskQ[Task Queue]
+        CodeIng[Code Ingestor]
+        MemExt[Memory Extractor]
+    end
+
+    subgraph "Supporting Services"
+        SQLParse[SQL Parser]
+        Ranker[Result Ranker]
+        PackBuild[Context Pack Builder]
+        GitUtil[Git Utilities]
+        Metrics[Metrics Service]
+    end
+
+    subgraph "Storage"
+        Neo4j[(Neo4j)]
+        SQLite[(SQLite)]
+        FileSystem[File System]
+    end
+
+    FastAPI --> Routes
+    FastAPI --> Middleware
+    Routes --> KnowServ
+    Routes --> MemServ
+    Routes --> GraphServ
+    Routes --> TaskQ
+
+    MCP --> KnowServ
+    MCP --> MemServ
+    MCP --> GraphServ
+
+    KnowServ --> Neo4j
+    MemServ --> Neo4j
+    GraphServ --> Neo4j
+    TaskQ --> SQLite
+    CodeIng --> GraphServ
+    MemExt --> MemServ
+
+    style FastAPI fill:#4CAF50
+    style MCP fill:#2196F3
+    style Neo4j fill:#f9a825
+```
+
+## API Layer Components
+
+### FastAPI Application
+
+**File**: `main.py`, `core/app.py`
+
+**Purpose**: Main web server providing RESTful API endpoints
+
+**Key Responsibilities**:
+- HTTP request handling
+- Route management
+- Middleware processing
+- Static file serving
+- API documentation (OpenAPI/Swagger)
+
+**Configuration**:
+```python
+app = FastAPI(
+    title="Code Graph Knowledge Service",
+    version="1.0.0",
+    lifespan=lifespan,  # Startup/shutdown hooks
+    docs_url="/docs",
+    redoc_url="/redoc"
+)
+```
+
+**Dependencies**:
+- All service layer components
+- Configuration settings
+- Middleware stack
+- Exception handlers
+
+**Startup Sequence**:
+1. Load configuration from environment
+2. Initialize logging system
+3. Initialize all services via lifespan manager
+4. Setup middleware
+5. Register routes
+6. Mount static files
+7. Integrate monitoring UI (if enabled)
+
+**Shutdown Sequence**:
+1. Stop accepting new requests
+2. Stop task queue
+3. Close Memory Store
+4. Close Knowledge Service
+5. Close database connections
+
+### API Routes
+
+**File**: `core/routes.py`, `api/*.py`
+
+**Purpose**: Organize and register all API endpoints
+
+**Route Modules**:
+
+#### 1. Main Routes (`api/routes.py`)
+```python
+# Health check
+GET /api/v1/health
+
+# Knowledge base operations
+POST /api/v1/knowledge/query
+POST /api/v1/knowledge/search
+POST /api/v1/documents/add
+POST /api/v1/documents/file
+POST /api/v1/documents/directory
+
+# SQL parsing
+POST /api/v1/sql/parse
+POST /api/v1/sql/schema/upload
+
+# Code graph
+POST /api/v1/code/ingest
+POST /api/v1/code/search
+POST /api/v1/code/related
+POST /api/v1/code/impact
+POST /api/v1/code/context-pack
+```
+
+#### 2. Memory Routes (`api/memory_routes.py`)
+```python
+# Memory management
+POST /api/v1/memory/add
+POST /api/v1/memory/search
+GET /api/v1/memory/{memory_id}
+PUT /api/v1/memory/{memory_id}
+DELETE /api/v1/memory/{memory_id}
+POST /api/v1/memory/supersede
+GET /api/v1/memory/project/{project_id}/summary
+
+# Memory extraction (v0.7)
+POST /api/v1/memory/extract/conversation
+POST /api/v1/memory/extract/commit
+POST /api/v1/memory/extract/comments
+POST /api/v1/memory/suggest
+POST /api/v1/memory/extract/batch
+```
+
+#### 3. Task Routes (`api/task_routes.py`)
+```python
+# Task management
+GET /api/v1/tasks/{task_id}
+GET /api/v1/tasks
+POST /api/v1/tasks/{task_id}/cancel
+GET /api/v1/queue/stats
+```
+
+#### 4. SSE Routes (`api/sse_routes.py`)
+```python
+# Server-Sent Events for real-time updates
+GET /api/v1/sse/task/{task_id}
+GET /api/v1/sse/tasks
+GET /api/v1/sse/stats
+```
+
+#### 5. WebSocket Routes (`api/websocket_routes.py`)
+```python
+# WebSocket connections
+WS /api/v1/ws/task/{task_id}
+```
+
+**Request/Response Models**:
+```python
+# Example: Document addition
+class DocumentAddRequest(BaseModel):
+    content: str
+    title: str = "Untitled"
+    metadata: Optional[Dict[str, Any]] = None
+
+class DocumentAddResponse(BaseModel):
+    success: bool
+    document_id: Optional[str] = None
+    message: str
+    error: Optional[str] = None
+```
+
+### Middleware Stack
+
+**File**: `core/middleware.py`
+
+**Purpose**: Process all requests/responses with cross-cutting concerns
+
+**Middleware Components**:
+
+#### 1. CORS Middleware
+```python
+CORSMiddleware(
+    allow_origins=settings.cors_origins,
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+```
+
+**Purpose**: Handle cross-origin requests for web clients
+
+#### 2. GZip Middleware
+```python
+GZipMiddleware(minimum_size=1000)
+```
+
+**Purpose**: Compress responses for bandwidth optimization
+
+#### 3. Request Logging Middleware
+```python
+@app.middleware("http")
+async def log_requests(request: Request, call_next):
+    start_time = time.time()
+    response = await call_next(request)
+    duration = time.time() - start_time
+    logger.info(f"{request.method} {request.url.path} {response.status_code} {duration:.3f}s")
+    return response
+```
+
+**Purpose**: Log all HTTP requests with timing information
+
+#### 4. Error Handling Middleware
+```python
+@app.exception_handler(Exception)
+async def global_exception_handler(request: Request, exc: Exception):
+    logger.error(f"Unhandled exception: {exc}")
+    return JSONResponse(
+        status_code=500,
+        content={"detail": "Internal server error"}
+    )
+```
+
+**Purpose**: Catch and handle all uncaught exceptions
+
+### Lifespan Manager
+
+**File**: `core/lifespan.py`
+
+**Purpose**: Manage application startup and shutdown lifecycle
+
+**Initialization Sequence**:
+```python
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # Startup
+    logger.info("Starting services...")
+
+    # 1. Initialize Neo4j Knowledge Service
+    await neo4j_knowledge_service.initialize()
+
+    # 2. Initialize Memory Store
+    await memory_store.initialize()
+
+    # 3. Initialize Task Processors
+    processor_registry.initialize_default_processors(neo4j_knowledge_service)
+
+    # 4. Start Task Queue
+    await task_queue.start()
+
+    yield
+
+    # Shutdown
+    await task_queue.stop()
+    await memory_store.close()
+    await neo4j_knowledge_service.close()
+```
+
+**Design Pattern**: Context manager ensures proper cleanup even on errors
+
+## Service Layer Components
+
+### Knowledge Service
+
+**File**: `services/neo4j_knowledge_service.py`
+
+**Purpose**: Primary service for knowledge graph operations using LlamaIndex
+
+**Key Capabilities**:
+- Document processing and chunking
+- Vector embedding generation
+- Knowledge graph construction
+- RAG-based query answering
+- Semantic similarity search
+
+**Architecture**:
+```python
+class Neo4jKnowledgeService:
+    def __init__(self):
+        self.graph_store = None          # Neo4j graph store
+        self.knowledge_index = None      # LlamaIndex KnowledgeGraphIndex
+        self.query_engine = None         # RAG query engine
+        self._initialized = False
+```
+
+**Initialization Flow**:
+```mermaid
+sequenceDiagram
+    participant Client
+    participant KnowServ as Knowledge Service
+    participant LlamaIndex
+    participant Neo4j
+
+    Client->>KnowServ: initialize()
+    KnowServ->>KnowServ: _create_llm()
+    KnowServ->>KnowServ: _create_embed_model()
+    KnowServ->>Neo4j: Connect via Neo4jGraphStore
+    Neo4j-->>KnowServ: Connection established
+    KnowServ->>LlamaIndex: Configure Settings
+    KnowServ->>LlamaIndex: Create KnowledgeGraphIndex
+    LlamaIndex-->>KnowServ: Index ready
+    KnowServ->>KnowServ: Create query engine
+    KnowServ-->>Client: Initialized
+```
+
+**Core Methods**:
+
+#### 1. Document Addition
+```python
+async def add_document(
+    self,
+    content: str,
+    title: str = "Untitled",
+    metadata: Optional[Dict[str, Any]] = None
+) -> Dict[str, Any]:
+    """Add document to knowledge graph"""
+    # 1. Create LlamaIndex Document
+    document = Document(text=content, metadata={...})
+
+    # 2. Insert into index (creates nodes, embeddings, relationships)
+    await asyncio.to_thread(self.knowledge_index.insert, document)
+
+    # 3. Return result with document ID
+    return {"success": True, "document_id": doc_id}
+```
+
+#### 2. Query Processing (RAG)
+```python
+async def query(
+    self,
+    question: str,
+    top_k: int = 5
+) -> Dict[str, Any]:
+    """Query knowledge base with RAG"""
+    # 1. Use query engine to retrieve relevant context
+    # 2. Generate answer using LLM with context
+    response = await asyncio.to_thread(
+        self.query_engine.query,
+        question
+    )
+
+    # 3. Return answer with source nodes
+    return {
+        "success": True,
+        "answer": str(response),
+        "sources": [node.metadata for node in response.source_nodes]
+    }
+```
+
+#### 3. Semantic Search
+```python
+async def search_similar(
+    self,
+    query: str,
+    top_k: int = 5
+) -> Dict[str, Any]:
+    """Find similar documents using vector search"""
+    # 1. Generate query embedding
+    # 2. Search Neo4j vector index
+    # 3. Return ranked results
+```
+
+**LLM Provider Support**:
+```python
+def _create_llm(self):
+    provider = settings.llm_provider
+
+    if provider == "ollama":
+        return Ollama(model=settings.ollama_model, ...)
+    elif provider == "openai":
+        return OpenAI(model=settings.openai_model, ...)
+    elif provider == "gemini":
+        return Gemini(model=settings.gemini_model, ...)
+    elif provider == "openrouter":
+        return OpenRouter(model=settings.openrouter_model, ...)
+```
+
+**Embedding Model Support**:
+```python
+def _create_embed_model(self):
+    provider = settings.embedding_provider
+
+    if provider == "ollama":
+        return OllamaEmbedding(model_name=settings.ollama_embedding_model)
+    elif provider == "openai":
+        return OpenAIEmbedding(model=settings.openai_embedding_model)
+    elif provider == "gemini":
+        return GeminiEmbedding(model_name=settings.gemini_embedding_model)
+    elif provider == "huggingface":
+        return HuggingFaceEmbedding(model_name=settings.huggingface_embedding_model)
+```
+
+**Configuration**:
+```python
+# Global LlamaIndex settings
+Settings.llm = self._create_llm()
+Settings.embed_model = self._create_embed_model()
+Settings.chunk_size = settings.chunk_size
+Settings.chunk_overlap = settings.chunk_overlap
+Settings.node_parser = SimpleNodeParser.from_defaults()
+```
+
+### Memory Store
+
+**File**: `services/memory_store.py`
+
+**Purpose**: Persistent project knowledge management for AI agents
+
+**Memory Types**:
+```python
+MemoryType = Literal[
+    "decision",      # Architecture choices, tech decisions
+    "preference",    # Coding styles, tool preferences
+    "experience",    # Problems and solutions
+    "convention",    # Team rules, naming conventions
+    "plan",          # Future improvements, TODOs
+    "note"           # Other important information
+]
+```
+
+**Data Model**:
+```python
+class Memory:
+    id: str                          # Unique identifier
+    project_id: str                  # Project namespace
+    memory_type: MemoryType          # Type of memory
+    title: str                       # Short description
+    content: str                     # Main content
+    reason: Optional[str]            # Rationale/context
+    importance: float                # 0.0-1.0 score
+    tags: List[str]                  # Categorization tags
+    created_at: datetime             # Creation timestamp
+    updated_at: datetime             # Last update
+    is_active: bool                  # Soft delete flag
+    superseded_by: Optional[str]     # Replacement memory ID
+```
+
+**Graph Schema**:
+```cypher
+// Nodes
+(:Memory {
+    id: string,
+    project_id: string,
+    memory_type: string,
+    title: string,
+    content: string,
+    reason: string,
+    importance: float,
+    tags: [string],
+    created_at: datetime,
+    updated_at: datetime,
+    is_active: boolean,
+    superseded_by: string
+})
+
+(:Project {
+    id: string,
+    name: string,
+    created_at: datetime
+})
+
+// Relationships
+(Memory)-[:BELONGS_TO]->(Project)
+(Memory)-[:RELATES_TO]->(Memory)
+(Memory)-[:SUPERSEDES]->(Memory)
+```
+
+**Core Operations**:
+
+#### 1. Add Memory
+```python
+async def add_memory(
+    self,
+    project_id: str,
+    memory_type: MemoryType,
+    title: str,
+    content: str,
+    reason: Optional[str] = None,
+    importance: float = 0.5,
+    tags: Optional[List[str]] = None
+) -> Dict[str, Any]:
+    """Add new project memory"""
+    # 1. Generate unique ID
+    # 2. Create Memory node in Neo4j
+    # 3. Link to Project
+    # 4. Create fulltext indexes
+    # 5. Return memory details
+```
+
+#### 2. Search Memories
+```python
+async def search_memories(
+    self,
+    project_id: str,
+    query: Optional[str] = None,
+    memory_type: Optional[MemoryType] = None,
+    tags: Optional[List[str]] = None,
+    min_importance: float = 0.0,
+    limit: int = 10
+) -> List[Dict[str, Any]]:
+    """Search project memories with filters"""
+    # 1. Build Cypher query with filters
+    # 2. Use fulltext search if query provided
+    # 3. Filter by type, tags, importance
+    # 4. Order by relevance and importance
+    # 5. Return ranked results
+```
+
+#### 3. Supersede Memory
+```python
+async def supersede_memory(
+    self,
+    old_memory_id: str,
+    new_title: str,
+    new_content: str,
+    ...
+) -> Dict[str, Any]:
+    """Replace old memory with new version"""
+    # 1. Create new memory
+    # 2. Mark old memory as superseded
+    # 3. Create SUPERSEDES relationship
+    # 4. Maintain history chain
+```
+
+**Indexes**:
+```cypher
+// Constraints
+CREATE CONSTRAINT memory_id_unique IF NOT EXISTS
+FOR (m:Memory) REQUIRE m.id IS UNIQUE;
+
+// Fulltext search
+CREATE FULLTEXT INDEX memory_search IF NOT EXISTS
+FOR (m:Memory) ON EACH [m.title, m.content, m.reason, m.tags];
+```
+
+### Graph Service
+
+**File**: `services/graph_service.py`
+
+**Purpose**: Direct Neo4j graph database operations
+
+**Key Capabilities**:
+- Raw Cypher query execution
+- Code graph management
+- Schema operations
+- Batch operations
+- Transaction management
+
+**Architecture**:
+```python
+class Neo4jGraphService:
+    def __init__(self):
+        self.driver = None           # Neo4j driver
+        self._connected = False
+```
+
+**Core Methods**:
+
+#### 1. Execute Query
+```python
+async def execute_query(
+    self,
+    cypher: str,
+    parameters: Optional[Dict[str, Any]] = None
+) -> GraphQueryResult:
+    """Execute Cypher query and return results"""
+    with self.driver.session(database=settings.neo4j_database) as session:
+        result = session.run(cypher, parameters)
+        return self._process_result(result)
+```
+
+#### 2. Create Nodes
+```python
+def create_node(
+    self,
+    labels: List[str],
+    properties: Dict[str, Any]
+) -> GraphNode:
+    """Create graph node"""
+    cypher = f"""
+    CREATE (n:{':'.join(labels)})
+    SET n = $properties
+    RETURN n
+    """
+    # Execute and return node
+```
+
+#### 3. Create Relationships
+```python
+def create_relationship(
+    self,
+    start_node_id: str,
+    end_node_id: str,
+    relationship_type: str,
+    properties: Optional[Dict[str, Any]] = None
+) -> GraphRelationship:
+    """Create relationship between nodes"""
+    cypher = """
+    MATCH (a), (b)
+    WHERE a.id = $start_id AND b.id = $end_id
+    CREATE (a)-[r:$rel_type]->(b)
+    SET r = $properties
+    RETURN r
+    """
+    # Execute and return relationship
+```
+
+**Code Graph Schema**:
+```cypher
+// Repository structure
+(:Repo {id: string, name: string, path: string})
+(:File {repoId: string, path: string, lang: string, content: string})
+(:Symbol {id: string, name: string, type: string, line: int})
+
+// Code entities
+(:Function {id: string, name: string, params: [string], returns: string})
+(:Class {id: string, name: string, methods: [string]})
+(:Table {id: string, name: string, columns: [string]})
+
+// Relationships
+(File)-[:BELONGS_TO]->(Repo)
+(Symbol)-[:DEFINED_IN]->(File)
+(Symbol)-[:CALLS]->(Symbol)
+(Symbol)-[:INHERITS]->(Symbol)
+(Symbol)-[:USES]->(Symbol)
+```
+
+### Task Queue
+
+**File**: `services/task_queue.py`
+
+**Purpose**: Asynchronous background task processing with persistence
+
+**Design Pattern**: Producer-Consumer with SQLite persistence
+
+**Architecture**:
+```python
+class TaskQueue:
+    def __init__(self, max_concurrent_tasks: int = 3):
+        self.max_concurrent_tasks = max_concurrent_tasks
+        self.tasks: Dict[str, TaskResult] = {}           # In-memory cache
+        self.running_tasks: Dict[str, asyncio.Task] = {} # Active tasks
+        self.task_semaphore = asyncio.Semaphore(max_concurrent_tasks)
+        self._storage = None                              # SQLite storage
+        self._worker_id = str(uuid.uuid4())              # Worker identity
+```
+
+**Task Lifecycle**:
+```mermaid
+stateDiagram-v2
+    [*] --> PENDING: Task created
+    PENDING --> PROCESSING: Worker picks up
+    PROCESSING --> SUCCESS: Completed
+    PROCESSING --> FAILED: Error occurred
+    PROCESSING --> CANCELLED: User cancelled
+    SUCCESS --> [*]
+    FAILED --> [*]
+    CANCELLED --> [*]
+
+    note right of PROCESSING
+        Progress updates
+        sent via SSE/WebSocket
+    end note
+```
+
+**Task Status**:
+```python
+class TaskStatus(Enum):
+    PENDING = "pending"         # Queued, not started
+    PROCESSING = "processing"   # Currently running
+    SUCCESS = "success"         # Completed successfully
+    FAILED = "failed"          # Error occurred
+    CANCELLED = "cancelled"    # User cancelled
+```
+
+**Task Result**:
+```python
+@dataclass
+class TaskResult:
+    task_id: str
+    status: TaskStatus
+    progress: float = 0.0                    # 0.0 to 1.0
+    message: str = ""                        # Status message
+    result: Optional[Dict[str, Any]] = None  # Final result
+    error: Optional[str] = None              # Error details
+    created_at: datetime
+    started_at: Optional[datetime] = None
+    completed_at: Optional[datetime] = None
+    metadata: Dict[str, Any]                 # Task-specific data
+```
+
+**Core Operations**:
+
+#### 1. Submit Task
+```python
+async def submit_task(
+    self,
+    task_func: Callable,
+    *args,
+    task_type: str = "generic",
+    **kwargs
+) -> str:
+    """Submit new task for processing"""
+    # 1. Generate task ID
+    task_id = str(uuid.uuid4())
+
+    # 2. Create TaskResult
+    task_result = TaskResult(
+        task_id=task_id,
+        status=TaskStatus.PENDING,
+        metadata={"type": task_type}
+    )
+
+    # 3. Store in SQLite
+    await self._storage.store_task(task_result)
+
+    # 4. Cache in memory
+    self.tasks[task_id] = task_result
+
+    # 5. Worker will pick up automatically
+    return task_id
+```
+
+#### 2. Process Tasks (Worker)
+```python
+async def _process_pending_tasks(self):
+    """Background worker to process pending tasks"""
+    while True:
+        try:
+            # 1. Get pending tasks from SQLite
+            pending = await self._storage.get_pending_tasks(limit=10)
+
+            # 2. Process each task
+            for task in pending:
+                if len(self.running_tasks) < self.max_concurrent_tasks:
+                    await self._execute_task(task)
+
+            # 3. Wait before next poll
+            await asyncio.sleep(1)
+
+        except asyncio.CancelledError:
+            break
+```
+
+#### 3. Execute Task
+```python
+async def _execute_task(self, task: TaskResult):
+    """Execute single task with error handling"""
+    async with self.task_semaphore:
+        try:
+            # 1. Update status to PROCESSING
+            task.status = TaskStatus.PROCESSING
+            task.started_at = datetime.now()
+            await self._storage.update_task_status(task.task_id, task.status)
+
+            # 2. Get processor for task type
+            processor = processor_registry.get_processor(task.metadata["type"])
+
+            # 3. Execute processor
+            result = await processor.process(task)
+
+            # 4. Update status to SUCCESS
+            task.status = TaskStatus.SUCCESS
+            task.result = result
+            task.completed_at = datetime.now()
+
+        except Exception as e:
+            # Update status to FAILED
+            task.status = TaskStatus.FAILED
+            task.error = str(e)
+
+        finally:
+            # Save to storage
+            await self._storage.update_task(task)
+```
+
+**Progress Tracking**:
+```python
+async def update_progress(
+    self,
+    task_id: str,
+    progress: float,
+    message: str
+):
+    """Update task progress"""
+    task = self.tasks.get(task_id)
+    if task:
+        task.progress = progress
+        task.message = message
+        await self._storage.update_task(task)
+
+        # Notify SSE/WebSocket listeners
+        await self._notify_listeners(task_id, task)
+```
+
+### Code Ingestor
+
+**File**: `services/code_ingestor.py`
+
+**Purpose**: Parse and ingest code repositories into graph structure
+
+**Supported Languages**:
+- Python
+- JavaScript/TypeScript
+- Java
+- Go
+- C/C++
+- SQL
+
+**Ingestion Process**:
+```mermaid
+sequenceDiagram
+    participant Client
+    participant Ingestor
+    participant Parser
+    participant GraphService
+    participant Neo4j
+
+    Client->>Ingestor: ingest_repository(path)
+    Ingestor->>Ingestor: Scan directory
+    loop For each file
+        Ingestor->>Parser: parse_file(file, language)
+        Parser-->>Ingestor: AST + Symbols
+        Ingestor->>GraphService: create_file_node()
+        Ingestor->>GraphService: create_symbol_nodes()
+        Ingestor->>GraphService: create_relationships()
+        GraphService->>Neo4j: Cypher queries
+    end
+    Ingestor-->>Client: Ingestion complete
+```
+
+**Core Methods**:
+
+#### 1. Ingest Repository
+```python
+async def ingest_repository(
+    self,
+    repo_path: str,
+    repo_name: Optional[str] = None
+) -> Dict[str, Any]:
+    """Ingest entire code repository"""
+    # 1. Create Repo node
+    # 2. Walk directory tree
+    # 3. Parse each file
+    # 4. Create graph structure
+    # 5. Return statistics
+```
+
+#### 2. Parse File
+```python
+def parse_file(self, file_path: str, language: str) -> ParseResult:
+    """Parse code file and extract symbols"""
+    if language == "python":
+        return self._parse_python(file_path)
+    elif language == "javascript":
+        return self._parse_javascript(file_path)
+    # ... other languages
+```
+
+#### 3. Extract Symbols
+```python
+def _parse_python(self, file_path: str) -> ParseResult:
+    """Parse Python file using AST"""
+    import ast
+
+    with open(file_path) as f:
+        tree = ast.parse(f.read())
+
+    symbols = []
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef):
+            symbols.append({
+                "type": "function",
+                "name": node.name,
+                "line": node.lineno
+            })
+        elif isinstance(node, ast.ClassDef):
+            symbols.append({
+                "type": "class",
+                "name": node.name,
+                "line": node.lineno
+            })
+
+    return ParseResult(symbols=symbols, relationships=[])
+```
+
+### Memory Extractor
+
+**File**: `services/memory_extractor.py`
+
+**Purpose**: Automatically extract memories from various sources (v0.7)
+
+**Extraction Sources**:
+1. Conversation analysis
+2. Git commit mining
+3. Code comment extraction
+4. Query/answer analysis
+5. Batch repository analysis
+
+**Core Methods**:
+
+#### 1. Extract from Conversation
+```python
+async def extract_from_conversation(
+    self,
+    project_id: str,
+    conversation: List[Dict[str, str]],
+    auto_save: bool = False
+) -> List[Dict[str, Any]]:
+    """Extract memories from AI conversation"""
+    # 1. Format conversation for LLM
+    # 2. Use LLM to identify decisions, learnings
+    # 3. Generate memory objects
+    # 4. Optionally auto-save high-confidence memories
+```
+
+#### 2. Extract from Git Commit
+```python
+async def extract_from_git_commit(
+    self,
+    project_id: str,
+    commit_sha: str,
+    commit_message: str,
+    changed_files: List[str],
+    auto_save: bool = False
+) -> List[Dict[str, Any]]:
+    """Extract memories from git commit"""
+    # 1. Analyze commit message
+    # 2. Analyze changed files
+    # 3. Use LLM to extract decisions/experiences
+    # 4. Generate memories with context
+```
+
+#### 3. Extract from Code Comments
+```python
+async def extract_from_code_comments(
+    self,
+    project_id: str,
+    file_path: str
+) -> List[Dict[str, Any]]:
+    """Mine TODO, FIXME, NOTE markers"""
+    # 1. Parse file for comment markers
+    # 2. Extract context around markers
+    # 3. Classify as plan/note/experience
+    # 4. Generate memory objects
+```
+
+### Task Processors
+
+**File**: `services/task_processors.py`
+
+**Purpose**: Implement specific task processing logic
+
+**Processor Registry**:
+```python
+class ProcessorRegistry:
+    def __init__(self):
+        self.processors: Dict[str, TaskProcessor] = {}
+
+    def register(self, task_type: str, processor: TaskProcessor):
+        """Register processor for task type"""
+        self.processors[task_type] = processor
+
+    def get_processor(self, task_type: str) -> TaskProcessor:
+        """Get processor for task type"""
+        return self.processors.get(task_type)
+```
+
+**Built-in Processors**:
+
+#### 1. Document Processor
+```python
+class DocumentProcessor(TaskProcessor):
+    async def process(self, task: TaskResult) -> Dict[str, Any]:
+        """Process document ingestion task"""
+        # 1. Read document from file/content
+        # 2. Call knowledge service
+        # 3. Update progress
+        # 4. Return result
+```
+
+#### 2. Directory Processor
+```python
+class DirectoryProcessor(TaskProcessor):
+    async def process(self, task: TaskResult) -> Dict[str, Any]:
+        """Process batch directory ingestion"""
+        # 1. List files in directory
+        # 2. Filter by patterns
+        # 3. Process each file
+        # 4. Update progress incrementally
+        # 5. Return summary
+```
+
+#### 3. Code Ingestion Processor
+```python
+class CodeIngestionProcessor(TaskProcessor):
+    async def process(self, task: TaskResult) -> Dict[str, Any]:
+        """Process code repository ingestion"""
+        # 1. Call code ingestor
+        # 2. Track progress per file
+        # 3. Return ingestion statistics
+```
+
+## Storage Components
+
+### Neo4j Graph Database
+
+**Purpose**: Primary storage for all graph data
+
+**Node Types**:
+```cypher
+// Knowledge graph
+:Document, :Entity, :Chunk
+
+// Memory store
+:Memory, :Project
+
+// Code graph
+:Repo, :File, :Symbol, :Function, :Class, :Table
+
+// SQL schema
+:Database, :Table, :Column
+```
+
+**Indexes**:
+```cypher
+// Constraints
+CREATE CONSTRAINT FOR (d:Document) REQUIRE d.id IS UNIQUE;
+CREATE CONSTRAINT FOR (m:Memory) REQUIRE m.id IS UNIQUE;
+CREATE CONSTRAINT FOR (r:Repo) REQUIRE r.id IS UNIQUE;
+
+// Fulltext indexes
+CREATE FULLTEXT INDEX memory_search FOR (m:Memory)
+    ON EACH [m.title, m.content, m.reason, m.tags];
+
+CREATE FULLTEXT INDEX file_text FOR (f:File)
+    ON EACH [f.path, f.lang];
+
+// Vector index
+CALL db.index.vector.createNodeIndex(
+    'knowledge_vectors',
+    'Document',
+    'embedding',
+    1536,
+    'cosine'
+);
+```
+
+### SQLite Task Storage
+
+**File**: `services/task_storage.py`
+
+**Purpose**: Persistent storage for task queue
+
+**Schema**:
+```sql
+CREATE TABLE tasks (
+    task_id TEXT PRIMARY KEY,
+    status TEXT NOT NULL,
+    task_type TEXT NOT NULL,
+    progress REAL DEFAULT 0.0,
+    message TEXT,
+    result TEXT,  -- JSON
+    error TEXT,
+    metadata TEXT,  -- JSON
+    created_at TEXT NOT NULL,
+    started_at TEXT,
+    completed_at TEXT,
+    worker_id TEXT,
+    locked_at TEXT
+);
+
+CREATE INDEX idx_status ON tasks(status);
+CREATE INDEX idx_created ON tasks(created_at);
+CREATE INDEX idx_worker ON tasks(worker_id);
+```
+
+**Concurrency Control**:
+```python
+async def get_pending_tasks(self, limit: int = 10) -> List[TaskResult]:
+    """Get and lock pending tasks"""
+    # Use SELECT ... FOR UPDATE to prevent race conditions
+    query = """
+    UPDATE tasks
+    SET worker_id = ?, locked_at = ?
+    WHERE task_id IN (
+        SELECT task_id FROM tasks
+        WHERE status = 'pending'
+        AND (locked_at IS NULL OR locked_at < datetime('now', '-5 minutes'))
+        ORDER BY created_at
+        LIMIT ?
+    )
+    RETURNING *
+    """
+```
+
+## Utility Components
+
+### SQL Parser
+
+**File**: `services/sql_parser.py`
+
+**Purpose**: Parse SQL queries and extract metadata
+
+**Capabilities**:
+- SQL syntax parsing
+- Table/column extraction
+- Query type detection
+- Dependency analysis
+
+### Result Ranker
+
+**File**: `services/ranker.py`
+
+**Purpose**: Rank search results by relevance
+
+**Ranking Factors**:
+- Vector similarity score
+- Graph distance
+- Metadata match
+- Recency
+
+### Context Pack Builder
+
+**File**: `services/pack_builder.py`
+
+**Purpose**: Generate context packages for AI tools
+
+**Output Format**:
+```python
+{
+    "files": [
+        {"path": "src/main.py", "content": "...", "relevance": 0.95},
+        {"path": "src/utils.py", "content": "...", "relevance": 0.87}
+    ],
+    "symbols": [
+        {"name": "process_data", "type": "function", "file": "src/main.py"}
+    ],
+    "relationships": [
+        {"from": "main.py", "to": "utils.py", "type": "imports"}
+    ],
+    "metadata": {
+        "total_files": 2,
+        "total_lines": 450,
+        "languages": ["python"]
+    }
+}
+```
+
+### Git Utilities
+
+**File**: `services/git_utils.py`
+
+**Purpose**: Git repository operations
+
+**Capabilities**:
+- Commit history retrieval
+- Diff extraction
+- Branch operations
+- File change tracking
+
+## MCP Server Components
+
+### MCP Server Main
+
+**File**: `mcp_server.py`
+
+**Purpose**: Model Context Protocol server using official SDK
+
+**Architecture**:
+```python
+# Official MCP SDK
+from mcp.server import Server
+from mcp.server.models import InitializationOptions
+
+app = Server("code-graph-knowledge")
+
+# Tool registration
+@app.list_tools()
+async def list_tools() -> list[Tool]:
+    return get_tool_definitions()
+
+# Tool execution
+@app.call_tool()
+async def call_tool(name: str, arguments: dict) -> Sequence[TextContent]:
+    # Route to appropriate handler
+    handler = tool_handlers.get(name)
+    result = await handler(arguments)
+    return [TextContent(type="text", text=format_result(result))]
+```
+
+**Tool Categories** (30 tools total):
+
+#### 1. Knowledge Base Tools (5)
+- `query_knowledge`: RAG-based Q&A
+- `search_similar_nodes`: Vector similarity search
+- `add_document`: Add document from content
+- `add_file`: Add document from file
+- `add_directory`: Batch directory processing
+
+#### 2. Code Graph Tools (4)
+- `code_graph_ingest_repo`: Ingest repository
+- `code_graph_related`: Find related code
+- `code_graph_impact`: Impact analysis
+- `context_pack`: Generate AI context
+
+#### 3. Memory Tools (7)
+- `add_memory`: Create memory
+- `search_memories`: Search with filters
+- `get_memory`: Get by ID
+- `update_memory`: Modify memory
+- `delete_memory`: Soft delete
+- `supersede_memory`: Replace with new version
+- `get_project_summary`: Project overview
+
+#### 4. Memory Extraction Tools (5)
+- `extract_from_conversation`: Analyze conversations
+- `extract_from_git_commit`: Mine commits
+- `extract_from_code_comments`: Extract from code
+- `suggest_memory_from_query`: Suggest from Q&A
+- `batch_extract_from_repository`: Batch analysis
+
+#### 5. Task Tools (6)
+- `get_task_status`: Check task status
+- `watch_task`: Monitor single task
+- `watch_tasks`: Monitor multiple tasks
+- `list_tasks`: List all tasks
+- `cancel_task`: Cancel task
+- `get_queue_stats`: Queue statistics
+
+#### 6. System Tools (3)
+- `get_graph_schema`: Neo4j schema
+- `get_statistics`: System stats
+- `clear_knowledge_base`: Clear data
+
+### MCP Tool Handlers
+
+**File**: `mcp_tools/*.py`
+
+**Modular Organization**:
+```
+mcp_tools/
+├── __init__.py              # Exports
+├── tool_definitions.py      # Tool schemas
+├── knowledge_handlers.py    # Knowledge operations
+├── code_handlers.py         # Code graph operations
+├── memory_handlers.py       # Memory operations
+├── task_handlers.py         # Task operations
+├── system_handlers.py       # System operations
+├── resources.py             # MCP resources
+├── prompts.py              # MCP prompts
+└── utils.py                # Shared utilities
+```
+
+**Handler Pattern**:
+```python
+async def handle_query_knowledge(arguments: dict) -> dict:
+    """Handle knowledge query request"""
+    # 1. Validate arguments
+    question = arguments.get("question")
+    if not question:
+        return {"success": False, "error": "Question required"}
+
+    # 2. Call service
+    result = await neo4j_knowledge_service.query(
+        question=question,
+        top_k=arguments.get("top_k", 5)
+    )
+
+    # 3. Return result
+    return result
+```
+
+## Component Dependencies
+
+### Dependency Graph
+
+```mermaid
+graph TB
+    subgraph "API Layer"
+        FastAPI
+        MCPServer[MCP Server]
+    end
+
+    subgraph "Service Layer"
+        KnowServ[Knowledge Service]
+        MemServ[Memory Store]
+        GraphServ[Graph Service]
+        TaskQ[Task Queue]
+        CodeIng[Code Ingestor]
+    end
+
+    subgraph "External"
+        Neo4j
+        LLM[LLM Providers]
+    end
+
+    FastAPI --> KnowServ
+    FastAPI --> MemServ
+    FastAPI --> GraphServ
+    FastAPI --> TaskQ
+
+    MCPServer --> KnowServ
+    MCPServer --> MemServ
+    MCPServer --> GraphServ
+    MCPServer --> TaskQ
+
+    KnowServ --> Neo4j
+    KnowServ --> LLM
+    MemServ --> Neo4j
+    GraphServ --> Neo4j
+    CodeIng --> GraphServ
+
+    TaskQ --> KnowServ
+    TaskQ --> CodeIng
+```
+
+### Initialization Order
+
+Critical for avoiding circular dependencies:
+
+```python
+# 1. Configuration (no dependencies)
+from config import settings
+
+# 2. Storage layer (no app dependencies)
+neo4j_connection = Neo4jGraphStore(...)
+
+# 3. Service layer (depends on storage)
+knowledge_service = Neo4jKnowledgeService()
+memory_store = MemoryStore()
+graph_service = Neo4jGraphService()
+
+# 4. Processors (depend on services)
+processor_registry.initialize_default_processors(knowledge_service)
+
+# 5. Task queue (depends on processors)
+await task_queue.start()
+
+# 6. API layer (depends on all services)
+app = create_app()
+```
+
+### Service Communication Patterns
+
+**1. Direct Method Calls** (within same process):
+```python
+# FastAPI route calls service
+result = await knowledge_service.query(question)
+```
+
+**2. Task Queue** (async operations):
+```python
+# Submit task for background processing
+task_id = await task_queue.submit_task(
+    task_func=process_large_document,
+    document_path=path
+)
+```
+
+**3. Event Streaming** (real-time updates):
+```python
+# SSE for task progress
+async def task_progress_stream(task_id: str):
+    while True:
+        task = task_queue.get_task(task_id)
+        yield f"data: {json.dumps(task.to_dict())}\n\n"
+        await asyncio.sleep(1)
+```
+
+## Component Configuration
+
+All components are configured via environment variables:
+
+```python
+# config.py
+class Settings(BaseSettings):
+    # Database
+    neo4j_uri: str = "bolt://localhost:7687"
+    neo4j_username: str = "neo4j"
+    neo4j_password: str = "password"
+
+    # LLM
+    llm_provider: str = "ollama"
+    ollama_model: str = "llama2"
+
+    # Timeouts
+    connection_timeout: int = 30
+    operation_timeout: int = 120
+
+    class Config:
+        env_file = ".env"
+```
+
+Components access configuration:
+```python
+from config import settings
+
+# Use in service
+self.timeout = settings.operation_timeout
+```
+
+## Testing Components
+
+Each component has corresponding tests:
+
+```
+tests/
+├── test_neo4j_knowledge_service.py
+├── test_memory_store.py
+├── test_graph_service.py
+├── test_task_queue.py
+├── test_code_ingestor.py
+└── test_mcp_handlers.py
+```
+
+**Test Patterns**:
+```python
+@pytest.mark.asyncio
+async def test_add_memory():
+    # Setup
+    memory_store = MemoryStore()
+    await memory_store.initialize()
+
+    # Execute
+    result = await memory_store.add_memory(
+        project_id="test",
+        memory_type="decision",
+        title="Test decision",
+        content="Test content"
+    )
+
+    # Assert
+    assert result["success"] == True
+    assert "memory_id" in result
+
+    # Cleanup
+    await memory_store.close()
+```
+
+## Conclusion
+
+The component architecture follows these principles:
+
+1. **Single Responsibility**: Each component has one clear purpose
+2. **Loose Coupling**: Components communicate via interfaces
+3. **High Cohesion**: Related functionality grouped together
+4. **Dependency Injection**: Services injected rather than created
+5. **Async-First**: All I/O operations are asynchronous
+6. **Testability**: Components designed for easy testing
+
+This modular design enables:
+- Independent development and testing
+- Easy component replacement
+- Clear debugging and troubleshooting
+- Scalable architecture evolution
diff --git a/docs/architecture/dataflow.md b/docs/architecture/dataflow.md
new file mode 100644
index 0000000..e4ce465
--- /dev/null
+++ b/docs/architecture/dataflow.md
@@ -0,0 +1,1770 @@
+# Data Flow and Processing Pipelines
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Document Processing Pipeline](#document-processing-pipeline)
+- [Knowledge Query Pipeline (RAG)](#knowledge-query-pipeline-rag)
+- [Memory Management Pipeline](#memory-management-pipeline)
+- [Code Graph Ingestion Pipeline](#code-graph-ingestion-pipeline)
+- [Task Processing Pipeline](#task-processing-pipeline)
+- [Real-time Update Pipeline](#real-time-update-pipeline)
+- [MCP Request Pipeline](#mcp-request-pipeline)
+- [Data Flow Patterns](#data-flow-patterns)
+
+## Overview
+
+This document describes how data flows through the Code Graph Knowledge System, detailing the processing pipelines for different operations. Understanding these flows is crucial for:
+
+- Debugging issues
+- Optimizing performance
+- Extending functionality
+- Understanding system behavior
+
+### Core Data Types
+
+```python
+# Document content
+Document = {
+    "id": str,
+    "content": str,
+    "title": str,
+    "metadata": dict,
+    "embedding": List[float],  # Vector representation
+    "chunks": List[str]        # Processed chunks
+}
+
+# Memory object
+Memory = {
+    "id": str,
+    "project_id": str,
+    "memory_type": str,        # decision/preference/experience/etc.
+    "title": str,
+    "content": str,
+    "importance": float,
+    "tags": List[str]
+}
+
+# Code symbol
+Symbol = {
+    "id": str,
+    "name": str,
+    "type": str,               # function/class/variable
+    "file_path": str,
+    "line": int,
+    "relationships": List[dict]
+}
+
+# Task result
+Task = {
+    "task_id": str,
+    "status": str,             # pending/processing/success/failed
+    "progress": float,
+    "result": dict,
+    "error": Optional[str]
+}
+```
+
+## Document Processing Pipeline
+
+### Overview
+
+Documents go through multiple stages from upload to queryable knowledge:
+
+```mermaid
+graph TB
+    Start([User uploads document]) --> Validate[Validate & Parse]
+    Validate --> Size{Size check}
+
+    Size -->|Small < 10KB| DirectProc[Direct Processing]
+    Size -->|Medium 10-50KB| TaskQueue[Queue as Task]
+    Size -->|Large > 50KB| Reject[Return size limit error]
+
+    DirectProc --> Chunk[Text Chunking]
+    TaskQueue --> Worker[Task Worker]
+    Worker --> Chunk
+
+    Chunk --> Embed[Generate Embeddings]
+    Embed --> Extract[Extract Entities]
+    Extract --> BuildGraph[Build Knowledge Graph]
+    BuildGraph --> StoreNeo4j[Store in Neo4j]
+    StoreNeo4j --> Index[Create Vector Index]
+    Index --> Complete([Document ready for query])
+
+    Reject --> Error([Error response])
+
+    style DirectProc fill:#90EE90
+    style TaskQueue fill:#FFD700
+    style Reject fill:#FFB6C1
+```
+
+### Step-by-Step Flow
+
+#### 1. Document Upload
+
+**API Endpoint**: `POST /api/v1/documents/add`
+
+```python
+# Request
+{
+    "content": "This is the document content...",
+    "title": "My Document",
+    "metadata": {"author": "John", "topic": "AI"}
+}
+```
+
+**Validation**:
+```python
+# Size check
+content_size = len(content.encode('utf-8'))
+if content_size > settings.max_document_size:
+    raise HTTPException(413, "Document too large")
+
+# Content check
+if not content.strip():
+    raise HTTPException(400, "Empty content")
+```
+
+#### 2. Routing Decision
+
+Based on document size:
+
+```python
+if content_size < 10 * 1024:  # < 10KB
+    # Direct synchronous processing
+    result = await knowledge_service.add_document(content, title, metadata)
+    return {"success": True, "document_id": result["document_id"]}
+
+elif content_size < 50 * 1024:  # 10-50KB
+    # Background task processing
+    task_id = await submit_document_processing_task(content, title, metadata)
+    return {"task_id": task_id, "message": "Processing in background"}
+
+else:  # > 50KB
+    # Reject - too large for direct processing
+    return {"error": "Document too large, use file upload or directory processing"}
+```
+
+#### 3. Text Chunking
+
+**LlamaIndex SimpleNodeParser**:
+
+```python
+from llama_index.core.node_parser import SimpleNodeParser
+
+parser = SimpleNodeParser.from_defaults(
+    chunk_size=512,        # Characters per chunk
+    chunk_overlap=50       # Overlap between chunks
+)
+
+# Create chunks
+chunks = parser.get_nodes_from_documents([document])
+```
+
+**Chunking Strategy**:
+```
+Original document (2000 chars):
+"Text here... [chunk 1: 512 chars]
+ ... [overlap: 50 chars]
+ ... [chunk 2: 512 chars]
+ ... [overlap: 50 chars]
+ ... [chunk 3: remainder]"
+
+Result: 3-4 chunks with overlapping context
+```
+
+**Why Chunking?**
+- LLM context window limits
+- Better semantic granularity
+- Improved retrieval accuracy
+- Parallel processing capability
+
+#### 4. Embedding Generation
+
+**Process**:
+```python
+# For each chunk
+embedding = await embed_model.get_text_embedding(chunk.text)
+# embedding: List[float] with length = vector_dimension (e.g., 1536)
+```
+
+**Provider-Specific**:
+
+**Ollama** (local):
+```python
+from llama_index.embeddings.ollama import OllamaEmbedding
+
+embed_model = OllamaEmbedding(
+    model_name="nomic-embed-text",
+    base_url="http://localhost:11434"
+)
+# Dimension: 768
+```
+
+**OpenAI**:
+```python
+from llama_index.embeddings.openai import OpenAIEmbedding
+
+embed_model = OpenAIEmbedding(
+    model="text-embedding-ada-002"
+)
+# Dimension: 1536
+```
+
+**Performance Considerations**:
+- Batch embeddings when possible
+- Cache embeddings for identical text
+- Monitor API rate limits
+- Use local models for privacy/cost
+
+#### 5. Entity Extraction
+
+**LlamaIndex Knowledge Graph Extraction**:
+
+```python
+# Configured in Knowledge Service
+from llama_index.core import KnowledgeGraphIndex
+
+# Extract entities and relationships
+index = KnowledgeGraphIndex.from_documents(
+    documents=[document],
+    max_triplets_per_chunk=10
+)
+```
+
+**Extracted Entities**:
+```python
+# Example triplets (subject, predicate, object)
+[
+    ("Python", "is_a", "Programming Language"),
+    ("Python", "supports", "Object-Oriented Programming"),
+    ("Django", "is_built_with", "Python")
+]
+```
+
+**Entity Types**:
+- Concepts (abstract ideas)
+- Technologies (tools, frameworks)
+- People (authors, contributors)
+- Organizations (companies, projects)
+- Events (releases, changes)
+
+#### 6. Graph Construction
+
+**Neo4j Storage**:
+
+```cypher
+// Create document node
+CREATE (d:Document {
+    id: $doc_id,
+    title: $title,
+    created_at: datetime(),
+    metadata: $metadata
+})
+
+// Create chunk nodes with embeddings
+CREATE (c:Chunk {
+    id: $chunk_id,
+    text: $chunk_text,
+    embedding: $embedding_vector,
+    chunk_index: $index
+})
+CREATE (d)-[:HAS_CHUNK]->(c)
+
+// Create entity nodes
+CREATE (e:Entity {
+    id: $entity_id,
+    name: $entity_name,
+    type: $entity_type
+})
+
+// Create relationships
+CREATE (c)-[:MENTIONS]->(e)
+CREATE (e1)-[:RELATES_TO {type: $relation_type}]->(e2)
+```
+
+**Graph Structure**:
+```mermaid
+graph TB
+    Doc[Document Node]
+    Doc --> Chunk1[Chunk 1 + embedding]
+    Doc --> Chunk2[Chunk 2 + embedding]
+    Doc --> Chunk3[Chunk 3 + embedding]
+
+    Chunk1 --> Entity1[Entity: Python]
+    Chunk1 --> Entity2[Entity: Django]
+    Chunk2 --> Entity2
+    Chunk2 --> Entity3[Entity: FastAPI]
+
+    Entity1 -.->|RELATES_TO| Entity2
+    Entity2 -.->|SIMILAR_TO| Entity3
+```
+
+#### 7. Vector Index Creation
+
+**Neo4j Vector Index**:
+
+```cypher
+// Create vector index (if not exists)
+CALL db.index.vector.createNodeIndex(
+    'knowledge_vectors',     // Index name
+    'Chunk',                 // Node label
+    'embedding',             // Property name
+    1536,                    // Vector dimension
+    'cosine'                 // Similarity metric
+)
+```
+
+**Index Operations**:
+- Automatically indexes new chunks
+- Enables fast similarity search
+- Supports approximate nearest neighbor (ANN)
+
+#### 8. Completion
+
+**Success Response**:
+```python
+{
+    "success": true,
+    "document_id": "doc_abc123",
+    "chunks_created": 4,
+    "entities_extracted": 12,
+    "processing_time": 2.5  # seconds
+}
+```
+
+### Error Handling
+
+**Common Errors**:
+
+```python
+try:
+    result = await knowledge_service.add_document(content, title)
+except EmbeddingError as e:
+    # LLM/embedding provider unavailable
+    return {"error": "Embedding service unavailable", "retry": True}
+
+except Neo4jError as e:
+    # Database connection issue
+    return {"error": "Database error", "retry": True}
+
+except ValidationError as e:
+    # Invalid input
+    return {"error": str(e), "retry": False}
+
+except TimeoutError as e:
+    # Operation timeout
+    return {"error": "Processing timeout", "retry": True}
+```
+
+## Knowledge Query Pipeline (RAG)
+
+### Overview
+
+Query processing combines vector search, graph traversal, and LLM generation:
+
+```mermaid
+graph TB
+    Query([User asks question]) --> Embed[Generate Query Embedding]
+    Embed --> VectorSearch[Vector Similarity Search]
+    VectorSearch --> TopK[Retrieve Top-K Chunks]
+    TopK --> GraphExpand[Expand via Graph Relationships]
+    GraphExpand --> Rerank[Rerank by Relevance]
+    Rerank --> BuildContext[Build Context Window]
+    BuildContext --> LLMPrompt[Create LLM Prompt]
+    LLMPrompt --> LLMGenerate[LLM Generate Answer]
+    LLMGenerate --> PostProcess[Post-process Response]
+    PostProcess --> Response([Return Answer + Sources])
+
+    style VectorSearch fill:#E6F3FF
+    style GraphExpand fill:#FFF4E6
+    style LLMGenerate fill:#F0E6FF
+```
+
+### Step-by-Step Flow
+
+#### 1. Query Embedding
+
+**API Endpoint**: `POST /api/v1/knowledge/query`
+
+```python
+# Request
+{
+    "question": "How does authentication work in the system?",
+    "top_k": 5
+}
+```
+
+**Generate embedding**:
+```python
+query_embedding = await embed_model.get_query_embedding(question)
+# Same embedding model as documents for consistency
+```
+
+#### 2. Vector Similarity Search
+
+**Neo4j Vector Query**:
+
+```cypher
+CALL db.index.vector.queryNodes(
+    'knowledge_vectors',      // Index name
+    $top_k,                   // Number of results
+    $query_embedding          // Query vector
+)
+YIELD node, score
+
+MATCH (node)-[:BELONGS_TO]->(doc:Document)
+RETURN node.text as text,
+       doc.title as source,
+       score as similarity
+ORDER BY score DESC
+```
+
+**Result**:
+```python
+[
+    {
+        "text": "The system uses JWT tokens for authentication...",
+        "source": "Authentication Guide",
+        "similarity": 0.89
+    },
+    {
+        "text": "Users authenticate via POST /api/auth/login...",
+        "source": "API Documentation",
+        "similarity": 0.84
+    },
+    # ... more results
+]
+```
+
+#### 3. Graph-Based Expansion
+
+**Expand context via relationships**:
+
+```cypher
+// Get related chunks via entity relationships
+MATCH (chunk:Chunk)-[:MENTIONS]->(e:Entity)<-[:MENTIONS]-(related:Chunk)
+WHERE chunk.id IN $initial_chunk_ids
+  AND related.id NOT IN $initial_chunk_ids
+RETURN related.text as text,
+       COUNT(*) as connection_strength
+ORDER BY connection_strength DESC
+LIMIT 3
+```
+
+**Why expand?**
+- Capture related context
+- Find transitively related information
+- Improve answer completeness
+
+#### 4. Result Reranking
+
+**Combine scores**:
+
+```python
+def rerank_results(results: List[dict]) -> List[dict]:
+    """Rerank by multiple factors"""
+    for result in results:
+        score = (
+            result['similarity'] * 0.6 +      # Vector similarity
+            result['graph_score'] * 0.2 +     # Graph connectivity
+            result['recency_score'] * 0.1 +   # Document age
+            result['metadata_match'] * 0.1    # Metadata relevance
+        )
+        result['final_score'] = score
+
+    return sorted(results, key=lambda x: x['final_score'], reverse=True)
+```
+
+#### 5. Context Window Building
+
+**Create prompt context**:
+
+```python
+def build_context(chunks: List[dict], max_tokens: int = 2000) -> str:
+    """Build context staying within token limit"""
+    context_parts = []
+    total_tokens = 0
+
+    for chunk in chunks:
+        chunk_tokens = estimate_tokens(chunk['text'])
+        if total_tokens + chunk_tokens > max_tokens:
+            break
+
+        context_parts.append(f"[Source: {chunk['source']}]\n{chunk['text']}")
+        total_tokens += chunk_tokens
+
+    return "\n\n".join(context_parts)
+```
+
+#### 6. LLM Prompt Construction
+
+**Prompt Template**:
+
+```python
+prompt = f"""You are a helpful assistant. Answer the question based on the provided context.
+
+Context:
+{context}
+
+Question: {question}
+
+Answer: Provide a comprehensive answer based on the context above. If the context doesn't contain enough information to fully answer the question, say so.
+"""
+```
+
+**Advanced Prompting**:
+```python
+# With instructions
+prompt = f"""You are a technical documentation expert.
+
+Context from knowledge base:
+{context}
+
+User question: {question}
+
+Instructions:
+1. Answer based on the provided context
+2. Cite sources when possible
+3. If information is incomplete, state what's missing
+4. Use technical accuracy
+
+Answer:"""
+```
+
+#### 7. LLM Generation
+
+**Call LLM**:
+
+```python
+# OpenAI example
+response = await llm.acomplete(prompt)
+answer = response.text
+
+# Ollama example
+response = await llm.acomplete(prompt)
+answer = response.text
+```
+
+**Streaming Support**:
+```python
+async for token in llm.astream_complete(prompt):
+    yield token.text  # Stream to client
+```
+
+#### 8. Response Assembly
+
+**Final Response**:
+
+```python
+{
+    "success": true,
+    "answer": "The system uses JWT token-based authentication...",
+    "sources": [
+        {
+            "title": "Authentication Guide",
+            "relevance": 0.89,
+            "excerpt": "JWT tokens are used..."
+        },
+        {
+            "title": "API Documentation",
+            "relevance": 0.84,
+            "excerpt": "Login endpoint returns..."
+        }
+    ],
+    "metadata": {
+        "chunks_retrieved": 5,
+        "chunks_used": 3,
+        "processing_time": 1.2,
+        "model": "gpt-3.5-turbo"
+    }
+}
+```
+
+### Performance Optimization
+
+**Caching**:
+```python
+# Cache query embeddings
+@lru_cache(maxsize=1000)
+async def get_cached_embedding(query: str) -> List[float]:
+    return await embed_model.get_query_embedding(query)
+
+# Cache common queries
+query_cache = {}
+cache_key = f"{question}:{top_k}"
+if cache_key in query_cache:
+    return query_cache[cache_key]
+```
+
+**Parallel Processing**:
+```python
+# Parallel embedding and metadata lookup
+embedding_task = asyncio.create_task(generate_embedding(question))
+metadata_task = asyncio.create_task(get_metadata_filters(question))
+
+embedding = await embedding_task
+metadata = await metadata_task
+```
+
+## Memory Management Pipeline
+
+### Overview
+
+Memory lifecycle from creation to retrieval:
+
+```mermaid
+graph TB
+    Create([Create Memory]) --> Validate[Validate Input]
+    Validate --> Generate[Generate Memory ID]
+    Generate --> Store[Store in Neo4j]
+    Store --> Index[Index for Search]
+    Index --> Link[Link to Project]
+    Link --> Complete([Memory Ready])
+
+    Search([Search Request]) --> Parse[Parse Filters]
+    Parse --> Fulltext{Fulltext Search?}
+    Fulltext -->|Yes| FTS[Fulltext Query]
+    Fulltext -->|No| Filter[Filter Query]
+    FTS --> Merge[Merge Results]
+    Filter --> Merge
+    Merge --> Rank[Rank by Importance]
+    Rank --> Return([Return Results])
+
+    style Create fill:#90EE90
+    style Search fill:#FFD700
+```
+
+### Memory Creation Flow
+
+#### 1. Manual Memory Creation
+
+**API Endpoint**: `POST /api/v1/memory/add`
+
+```python
+{
+    "project_id": "myapp",
+    "memory_type": "decision",
+    "title": "Use PostgreSQL for main database",
+    "content": "Decided to use PostgreSQL instead of MySQL",
+    "reason": "Need advanced JSON support and better performance",
+    "importance": 0.9,
+    "tags": ["database", "architecture", "backend"]
+}
+```
+
+**Processing**:
+```python
+# 1. Validate
+if not 0 <= importance <= 1:
+    raise ValueError("Importance must be 0-1")
+
+# 2. Generate ID
+memory_id = f"mem_{uuid.uuid4().hex[:12]}"
+
+# 3. Store in Neo4j
+await memory_store.add_memory(
+    project_id=project_id,
+    memory_type=memory_type,
+    title=title,
+    content=content,
+    reason=reason,
+    importance=importance,
+    tags=tags
+)
+```
+
+**Neo4j Storage**:
+```cypher
+// Ensure project exists
+MERGE (p:Project {id: $project_id})
+
+// Create memory
+CREATE (m:Memory {
+    id: $memory_id,
+    project_id: $project_id,
+    memory_type: $memory_type,
+    title: $title,
+    content: $content,
+    reason: $reason,
+    importance: $importance,
+    tags: $tags,
+    created_at: datetime(),
+    updated_at: datetime(),
+    is_active: true
+})
+
+// Link to project
+CREATE (m)-[:BELONGS_TO]->(p)
+
+RETURN m
+```
+
+#### 2. Automatic Memory Extraction
+
+**Conversation Analysis Flow**:
+
+```mermaid
+sequenceDiagram
+    participant User
+    participant API
+    participant Extractor
+    participant LLM
+    participant MemStore
+
+    User->>API: Extract from conversation
+    API->>Extractor: analyze_conversation()
+    Extractor->>LLM: Identify decisions/learnings
+    LLM-->>Extractor: Extracted memories
+    Extractor->>Extractor: Score confidence
+    alt Auto-save enabled & high confidence
+        Extractor->>MemStore: Save memories
+    else
+        Extractor->>API: Return suggestions
+    end
+    API-->>User: Memories/suggestions
+```
+
+**LLM Prompt for Extraction**:
+```python
+prompt = f"""Analyze this conversation and extract important project knowledge.
+
+Conversation:
+{format_conversation(conversation)}
+
+Extract:
+1. Technical decisions and rationale
+2. Preferences and conventions
+3. Problems encountered and solutions
+4. Important insights or learnings
+
+For each item, provide:
+- Type (decision/preference/experience/note)
+- Title (short summary)
+- Content (detailed description)
+- Confidence score (0-1)
+
+Return as JSON list.
+"""
+```
+
+**Example Extraction Result**:
+```python
+[
+    {
+        "type": "decision",
+        "title": "Use Redis for caching",
+        "content": "Decided to add Redis caching layer for API responses",
+        "reason": "Reduce database load and improve response times",
+        "confidence": 0.85,
+        "importance": 0.8,
+        "tags": ["caching", "performance", "redis"]
+    }
+]
+```
+
+**Auto-save Logic**:
+```python
+if auto_save:
+    for memory in extracted_memories:
+        if memory['confidence'] > 0.7:  # High confidence threshold
+            await memory_store.add_memory(**memory)
+```
+
+### Memory Search Flow
+
+**API Endpoint**: `POST /api/v1/memory/search`
+
+```python
+{
+    "project_id": "myapp",
+    "query": "database decisions",
+    "memory_type": "decision",
+    "tags": ["database"],
+    "min_importance": 0.7,
+    "limit": 10
+}
+```
+
+**Search Strategy**:
+
+```cypher
+// Fulltext search with filters
+CALL db.index.fulltext.queryNodes('memory_search', $query)
+YIELD node as m, score
+
+WHERE m.project_id = $project_id
+  AND m.is_active = true
+  AND ($memory_type IS NULL OR m.memory_type = $memory_type)
+  AND m.importance >= $min_importance
+  AND ($tags IS NULL OR ANY(tag IN $tags WHERE tag IN m.tags))
+
+RETURN m, score
+ORDER BY score DESC, m.importance DESC
+LIMIT $limit
+```
+
+**Result Ranking**:
+```python
+def rank_memories(results: List[dict]) -> List[dict]:
+    """Rank by search score and importance"""
+    for result in results:
+        result['rank_score'] = (
+            result['search_score'] * 0.6 +
+            result['importance'] * 0.4
+        )
+    return sorted(results, key=lambda x: x['rank_score'], reverse=True)
+```
+
+### Memory Evolution (Supersede)
+
+**Supersede Flow**:
+
+```mermaid
+graph TB
+    Old[Old Memory: Use MySQL] --> Decision{Decision Changed}
+    Decision --> New[New Memory: Use PostgreSQL]
+    New --> Link[Create SUPERSEDES Relationship]
+    Link --> Deactivate[Mark Old as Superseded]
+    Deactivate --> History[Maintain History Chain]
+
+    style Old fill:#FFE6E6
+    style New fill:#E6FFE6
+```
+
+**Implementation**:
+```cypher
+// 1. Create new memory
+CREATE (new:Memory {
+    id: $new_id,
+    title: $new_title,
+    content: $new_content,
+    ...
+})
+
+// 2. Link to old memory
+MATCH (old:Memory {id: $old_id})
+CREATE (new)-[:SUPERSEDES]->(old)
+
+// 3. Update old memory
+SET old.superseded_by = $new_id,
+    old.is_active = false
+
+RETURN new, old
+```
+
+**History Traversal**:
+```cypher
+// Get full evolution history
+MATCH path = (latest:Memory)-[:SUPERSEDES*]->(oldest:Memory)
+WHERE latest.id = $memory_id
+RETURN nodes(path) as history
+```
+
+## Code Graph Ingestion Pipeline
+
+### Overview
+
+Converting code repositories into queryable graph structure:
+
+```mermaid
+graph TB
+    Start([Repository Path]) --> Scan[Scan Directory Tree]
+    Scan --> Filter[Filter by Language]
+    Filter --> Parse[Parse Each File]
+
+    Parse --> AST[Generate AST]
+    AST --> Extract[Extract Symbols]
+    Extract --> Analyze[Analyze Relationships]
+
+    Analyze --> CreateNodes[Create Graph Nodes]
+    CreateNodes --> CreateRels[Create Relationships]
+    CreateRels --> Index[Create Indexes]
+    Index --> Complete([Repository Indexed])
+
+    style Parse fill:#E6F3FF
+    style Extract fill:#FFF4E6
+    style CreateNodes fill:#F0E6FF
+```
+
+### Step-by-Step Flow
+
+#### 1. Repository Scanning
+
+**API Endpoint**: `POST /api/v1/code/ingest`
+
+```python
+{
+    "repo_path": "/path/to/repository",
+    "repo_name": "myapp",
+    "file_patterns": ["*.py", "*.js"]  # Optional filters
+}
+```
+
+**Directory Walk**:
+```python
+def scan_repository(repo_path: str) -> List[str]:
+    """Scan directory and collect files"""
+    files = []
+
+    for root, dirs, filenames in os.walk(repo_path):
+        # Skip common ignored directories
+        dirs[:] = [d for d in dirs if d not in {
+            '.git', 'node_modules', '__pycache__', '.venv'
+        }]
+
+        for filename in filenames:
+            file_path = os.path.join(root, filename)
+            if should_process_file(file_path):
+                files.append(file_path)
+
+    return files
+```
+
+**Language Detection**:
+```python
+LANGUAGE_EXTENSIONS = {
+    '.py': 'python',
+    '.js': 'javascript',
+    '.ts': 'typescript',
+    '.java': 'java',
+    '.go': 'go',
+    '.cpp': 'cpp',
+    '.sql': 'sql'
+}
+
+language = LANGUAGE_EXTENSIONS.get(file_extension)
+```
+
+#### 2. File Parsing
+
+**Python Parsing Example**:
+
+```python
+import ast
+
+def parse_python_file(file_path: str) -> ParseResult:
+    """Parse Python file using AST"""
+    with open(file_path, 'r') as f:
+        source = f.read()
+
+    tree = ast.parse(source, filename=file_path)
+
+    symbols = []
+    relationships = []
+
+    for node in ast.walk(tree):
+        if isinstance(node, ast.FunctionDef):
+            # Extract function
+            symbols.append({
+                'type': 'function',
+                'name': node.name,
+                'line': node.lineno,
+                'params': [arg.arg for arg in node.args.args],
+                'decorators': [d.id for d in node.decorator_list if isinstance(d, ast.Name)]
+            })
+
+            # Extract function calls
+            for child in ast.walk(node):
+                if isinstance(child, ast.Call):
+                    if isinstance(child.func, ast.Name):
+                        relationships.append({
+                            'from': node.name,
+                            'to': child.func.id,
+                            'type': 'CALLS'
+                        })
+
+        elif isinstance(node, ast.ClassDef):
+            # Extract class
+            symbols.append({
+                'type': 'class',
+                'name': node.name,
+                'line': node.lineno,
+                'bases': [base.id for base in node.bases if isinstance(base, ast.Name)],
+                'methods': [m.name for m in node.body if isinstance(m, ast.FunctionDef)]
+            })
+
+            # Extract inheritance
+            for base in node.bases:
+                if isinstance(base, ast.Name):
+                    relationships.append({
+                        'from': node.name,
+                        'to': base.id,
+                        'type': 'INHERITS'
+                    })
+
+    return ParseResult(symbols=symbols, relationships=relationships)
+```
+
+**JavaScript Parsing Example**:
+
+```python
+# Using esprima or similar parser
+import esprima
+
+def parse_javascript_file(file_path: str) -> ParseResult:
+    """Parse JavaScript file"""
+    with open(file_path, 'r') as f:
+        source = f.read()
+
+    tree = esprima.parseScript(source, {'loc': True})
+
+    # Extract functions, classes, imports
+    # Similar to Python parsing
+```
+
+#### 3. Graph Construction
+
+**Create Repository Node**:
+```cypher
+CREATE (r:Repo {
+    id: $repo_id,
+    name: $repo_name,
+    path: $repo_path,
+    created_at: datetime(),
+    file_count: $file_count
+})
+```
+
+**Create File Nodes**:
+```cypher
+CREATE (f:File {
+    repoId: $repo_id,
+    path: $file_path,
+    lang: $language,
+    content: $source_code,
+    lines: $line_count,
+    last_modified: datetime()
+})
+CREATE (f)-[:BELONGS_TO]->(r:Repo {id: $repo_id})
+```
+
+**Create Symbol Nodes**:
+```cypher
+// Function
+CREATE (s:Symbol:Function {
+    id: $symbol_id,
+    name: $function_name,
+    type: 'function',
+    line: $line_number,
+    params: $parameters,
+    file_path: $file_path
+})
+CREATE (s)-[:DEFINED_IN]->(f:File {path: $file_path})
+
+// Class
+CREATE (s:Symbol:Class {
+    id: $symbol_id,
+    name: $class_name,
+    type: 'class',
+    line: $line_number,
+    methods: $method_list,
+    file_path: $file_path
+})
+CREATE (s)-[:DEFINED_IN]->(f:File {path: $file_path})
+```
+
+**Create Relationships**:
+```cypher
+// Function calls
+MATCH (caller:Function {name: $caller_name})
+MATCH (callee:Function {name: $callee_name})
+CREATE (caller)-[:CALLS]->(callee)
+
+// Class inheritance
+MATCH (child:Class {name: $child_name})
+MATCH (parent:Class {name: $parent_name})
+CREATE (child)-[:INHERITS]->(parent)
+
+// Module imports
+MATCH (file:File {path: $importer_path})
+MATCH (imported:File {path: $imported_path})
+CREATE (file)-[:IMPORTS]->(imported)
+```
+
+#### 4. Progress Tracking
+
+**Task Progress Updates**:
+```python
+async def ingest_repository_with_progress(repo_path: str, task_id: str):
+    """Ingest with progress updates"""
+    files = scan_repository(repo_path)
+    total_files = len(files)
+
+    for i, file_path in enumerate(files):
+        # Parse file
+        result = parse_file(file_path)
+
+        # Store in graph
+        await store_file_in_graph(result)
+
+        # Update progress
+        progress = (i + 1) / total_files
+        await task_queue.update_progress(
+            task_id,
+            progress,
+            f"Processed {i+1}/{total_files} files"
+        )
+```
+
+#### 5. Code Graph Queries
+
+**Find Related Code**:
+```cypher
+// Find all functions that call a specific function
+MATCH (caller:Function)-[:CALLS]->(target:Function {name: $function_name})
+RETURN caller.name, caller.file_path
+
+// Find class hierarchy
+MATCH path = (child:Class)-[:INHERITS*]->(parent:Class {name: $class_name})
+RETURN nodes(path)
+
+// Find all files that import a module
+MATCH (file:File)-[:IMPORTS]->(module:File {path: $module_path})
+RETURN file.path
+```
+
+**Impact Analysis**:
+```cypher
+// Find all code affected by changing a function
+MATCH (target:Function {name: $function_name})
+MATCH path = (dependent:Function)-[:CALLS*1..3]->(target)
+RETURN DISTINCT dependent.name, dependent.file_path, length(path) as depth
+ORDER BY depth
+```
+
+## Task Processing Pipeline
+
+### Overview
+
+Asynchronous task lifecycle:
+
+```mermaid
+stateDiagram-v2
+    [*] --> Submitted: User submits task
+    Submitted --> Stored: Store in SQLite
+    Stored --> Pending: Queue for processing
+
+    Pending --> Locked: Worker locks task
+    Locked --> Processing: Worker starts processing
+
+    Processing --> Success: Completed
+    Processing --> Failed: Error
+    Processing --> Cancelled: User cancels
+
+    Success --> [*]
+    Failed --> [*]
+    Cancelled --> [*]
+
+    Processing --> Processing: Progress updates
+```
+
+### Task Submission Flow
+
+**Submit Document Task**:
+
+```python
+async def submit_document_processing_task(
+    content: str,
+    title: str,
+    metadata: dict
+) -> str:
+    """Submit document processing as background task"""
+    # 1. Create task data
+    task_data = {
+        'type': 'document_processing',
+        'content': content,
+        'title': title,
+        'metadata': metadata
+    }
+
+    # 2. Submit to queue
+    task_id = await task_queue.submit_task(
+        task_func=process_document_task,
+        task_type='document_processing',
+        task_data=task_data
+    )
+
+    return task_id
+```
+
+**Task Storage**:
+
+```python
+# Store in SQLite
+await task_storage.store_task(
+    task_id=task_id,
+    status=TaskStatus.PENDING,
+    task_type='document_processing',
+    metadata=task_data
+)
+```
+
+### Worker Processing Flow
+
+**Worker Loop**:
+
+```python
+async def _process_pending_tasks(self):
+    """Background worker continuously processes tasks"""
+    while True:
+        try:
+            # 1. Get pending tasks (with lock)
+            pending = await self._storage.get_pending_tasks(limit=10)
+
+            # 2. Process each task (respecting concurrency limit)
+            for task in pending:
+                if len(self.running_tasks) < self.max_concurrent_tasks:
+                    # Start task processing
+                    asyncio.create_task(self._execute_task(task))
+
+            # 3. Wait before next poll
+            await asyncio.sleep(1)
+
+        except asyncio.CancelledError:
+            logger.info("Worker shutting down")
+            break
+        except Exception as e:
+            logger.error(f"Worker error: {e}")
+            await asyncio.sleep(5)  # Back off on error
+```
+
+**Task Execution**:
+
+```python
+async def _execute_task(self, task: TaskResult):
+    """Execute single task"""
+    async with self.task_semaphore:  # Limit concurrency
+        try:
+            # 1. Update status
+            task.status = TaskStatus.PROCESSING
+            task.started_at = datetime.now()
+            await self._storage.update_task_status(task.task_id, task.status)
+
+            # 2. Get appropriate processor
+            processor = processor_registry.get_processor(task.metadata['type'])
+
+            # 3. Execute with progress updates
+            result = await processor.process(task)
+
+            # 4. Mark success
+            task.status = TaskStatus.SUCCESS
+            task.result = result
+            task.completed_at = datetime.now()
+            task.progress = 1.0
+
+        except Exception as e:
+            # Mark failed
+            task.status = TaskStatus.FAILED
+            task.error = str(e)
+            task.completed_at = datetime.now()
+            logger.error(f"Task {task.task_id} failed: {e}")
+
+        finally:
+            # Save final state
+            await self._storage.update_task(task)
+            self.running_tasks.pop(task.task_id, None)
+```
+
+### Real-time Monitoring
+
+**SSE Stream**:
+
+```python
+@app.get("/api/v1/sse/task/{task_id}")
+async def stream_task_progress(task_id: str):
+    """Stream task progress via Server-Sent Events"""
+    async def event_generator():
+        while True:
+            # Get current task status
+            task = task_queue.get_task(task_id)
+
+            if task:
+                # Send update
+                yield {
+                    "event": "progress",
+                    "data": json.dumps({
+                        "task_id": task.task_id,
+                        "status": task.status.value,
+                        "progress": task.progress,
+                        "message": task.message
+                    })
+                }
+
+                # Stop if terminal state
+                if task.status in {TaskStatus.SUCCESS, TaskStatus.FAILED, TaskStatus.CANCELLED}:
+                    break
+
+            await asyncio.sleep(0.5)  # Poll interval
+
+    return EventSourceResponse(event_generator())
+```
+
+**WebSocket Updates**:
+
+```python
+@app.websocket("/api/v1/ws/task/{task_id}")
+async def task_websocket(websocket: WebSocket, task_id: str):
+    """WebSocket for real-time task updates"""
+    await websocket.accept()
+
+    try:
+        while True:
+            task = task_queue.get_task(task_id)
+
+            if task:
+                # Send update
+                await websocket.send_json({
+                    "task_id": task.task_id,
+                    "status": task.status.value,
+                    "progress": task.progress,
+                    "message": task.message
+                })
+
+                # Stop if done
+                if task.status in {TaskStatus.SUCCESS, TaskStatus.FAILED}:
+                    break
+
+            await asyncio.sleep(0.5)
+
+    except WebSocketDisconnect:
+        logger.info(f"Client disconnected from task {task_id}")
+```
+
+## Real-time Update Pipeline
+
+### Architecture
+
+```mermaid
+graph TB
+    Task[Task Processing] --> Update[Progress Update]
+    Update --> Queue[Update Queue]
+
+    Queue --> SSE[SSE Connections]
+    Queue --> WS[WebSocket Connections]
+    Queue --> MCP[MCP Watch Tools]
+
+    SSE --> Client1[Browser Client]
+    WS --> Client2[Web UI]
+    MCP --> Client3[Claude Desktop]
+
+    style Task fill:#E6F3FF
+    style Queue fill:#FFF4E6
+```
+
+### Update Flow
+
+**1. Task Progress Update**:
+
+```python
+# Inside task processor
+await task_queue.update_progress(
+    task_id=task_id,
+    progress=0.5,
+    message="Processed 50% of documents"
+)
+```
+
+**2. Broadcast to Listeners**:
+
+```python
+class TaskQueue:
+    async def update_progress(self, task_id: str, progress: float, message: str):
+        """Update progress and notify listeners"""
+        # Update task
+        task = self.tasks[task_id]
+        task.progress = progress
+        task.message = message
+
+        # Store in database
+        await self._storage.update_task(task)
+
+        # Notify SSE listeners
+        await self._notify_sse_listeners(task_id, task)
+
+        # Notify WebSocket listeners
+        await self._notify_ws_listeners(task_id, task)
+```
+
+**3. SSE Delivery**:
+
+```python
+async def _notify_sse_listeners(self, task_id: str, task: TaskResult):
+    """Send update to SSE clients"""
+    if task_id in self.sse_listeners:
+        event_data = {
+            "task_id": task_id,
+            "status": task.status.value,
+            "progress": task.progress,
+            "message": task.message
+        }
+
+        for queue in self.sse_listeners[task_id]:
+            await queue.put(event_data)
+```
+
+## MCP Request Pipeline
+
+### Overview
+
+Model Context Protocol request handling:
+
+```mermaid
+sequenceDiagram
+    participant Client as Claude Desktop
+    participant MCP as MCP Server
+    participant Handler as Tool Handler
+    participant Service as Service Layer
+    participant Neo4j
+
+    Client->>MCP: Call tool (query_knowledge)
+    MCP->>MCP: Validate request
+    MCP->>Handler: Route to handler
+    Handler->>Service: Call knowledge service
+    Service->>Neo4j: Execute query
+    Neo4j-->>Service: Results
+    Service-->>Handler: Processed result
+    Handler->>Handler: Format response
+    Handler-->>MCP: Tool result
+    MCP-->>Client: Return response
+```
+
+### Request Flow
+
+**1. Tool Invocation**:
+
+```python
+# Client (Claude Desktop) calls tool
+{
+    "method": "tools/call",
+    "params": {
+        "name": "query_knowledge",
+        "arguments": {
+            "question": "How does authentication work?",
+            "top_k": 5
+        }
+    }
+}
+```
+
+**2. Server Routing**:
+
+```python
+@app.call_tool()
+async def call_tool(name: str, arguments: dict) -> Sequence[TextContent]:
+    """Route tool call to appropriate handler"""
+    # Get handler for tool
+    handler = TOOL_HANDLERS.get(name)
+
+    if not handler:
+        raise ValueError(f"Unknown tool: {name}")
+
+    # Execute handler
+    result = await handler(arguments)
+
+    # Format response
+    return [TextContent(
+        type="text",
+        text=format_result(result)
+    )]
+```
+
+**3. Handler Execution**:
+
+```python
+async def handle_query_knowledge(arguments: dict) -> dict:
+    """Handle knowledge query"""
+    # Validate arguments
+    question = arguments.get("question")
+    if not question:
+        return {"success": False, "error": "Question required"}
+
+    top_k = arguments.get("top_k", 5)
+
+    # Call service
+    result = await neo4j_knowledge_service.query(
+        question=question,
+        top_k=top_k
+    )
+
+    return result
+```
+
+**4. Response Formatting**:
+
+```python
+def format_result(result: dict) -> str:
+    """Format result for MCP client"""
+    if not result.get("success"):
+        return f"Error: {result.get('error', 'Unknown error')}"
+
+    # Format answer with sources
+    answer = result.get("answer", "")
+    sources = result.get("sources", [])
+
+    formatted = f"Answer:\n{answer}\n\n"
+
+    if sources:
+        formatted += "Sources:\n"
+        for i, source in enumerate(sources, 1):
+            formatted += f"{i}. {source.get('title')} (relevance: {source.get('relevance', 0):.2f})\n"
+
+    return formatted
+```
+
+## Data Flow Patterns
+
+### Pattern 1: Synchronous Request-Response
+
+**Use Case**: Quick operations (< 1 second)
+
+```mermaid
+sequenceDiagram
+    Client->>API: POST /api/v1/memory/search
+    API->>MemoryStore: search_memories()
+    MemoryStore->>Neo4j: Cypher query
+    Neo4j-->>MemoryStore: Results
+    MemoryStore-->>API: Formatted results
+    API-->>Client: JSON response
+```
+
+**Example**: Memory search, graph queries, statistics
+
+### Pattern 2: Asynchronous Task Processing
+
+**Use Case**: Long operations (> 1 second)
+
+```mermaid
+sequenceDiagram
+    Client->>API: POST /api/v1/documents/directory
+    API->>TaskQueue: submit_task()
+    TaskQueue->>SQLite: Store task
+    TaskQueue-->>API: task_id
+    API-->>Client: {"task_id": "..."}
+
+    Note over TaskQueue,Worker: Background Processing
+
+    Worker->>SQLite: Get pending tasks
+    Worker->>Processor: process_directory()
+    Processor->>KnowServ: Add documents
+    Processor->>TaskQueue: Update progress
+
+    Client->>API: GET /api/v1/tasks/{task_id}
+    API->>TaskQueue: get_task()
+    TaskQueue-->>API: Task status
+    API-->>Client: {"status": "processing", "progress": 0.7}
+```
+
+**Example**: Directory processing, large document ingestion, repository ingestion
+
+### Pattern 3: Streaming Updates
+
+**Use Case**: Real-time progress monitoring
+
+```mermaid
+sequenceDiagram
+    Client->>API: GET /api/v1/sse/task/{task_id}
+    API-->>Client: SSE connection established
+
+    loop Every 500ms
+        Worker->>TaskQueue: Update progress
+        TaskQueue->>API: Broadcast update
+        API-->>Client: event: progress\ndata: {...}
+    end
+
+    Worker->>TaskQueue: Complete task
+    TaskQueue->>API: Final update
+    API-->>Client: event: complete\ndata: {...}
+    API->>API: Close connection
+```
+
+**Example**: Task monitoring, batch operations, long-running queries
+
+### Pattern 4: Batch Processing
+
+**Use Case**: Multiple operations with coordination
+
+```mermaid
+graph TB
+    Start([Batch Request]) --> Split[Split into Tasks]
+    Split --> T1[Task 1]
+    Split --> T2[Task 2]
+    Split --> T3[Task 3]
+
+    T1 --> Process1[Process]
+    T2 --> Process2[Process]
+    T3 --> Process3[Process]
+
+    Process1 --> Collect[Collect Results]
+    Process2 --> Collect
+    Process3 --> Collect
+
+    Collect --> Aggregate[Aggregate]
+    Aggregate --> Complete([Return Combined Result])
+```
+
+**Implementation**:
+```python
+async def batch_process_files(file_paths: List[str]) -> dict:
+    """Process multiple files in parallel"""
+    # Create tasks
+    tasks = [
+        asyncio.create_task(process_file(path))
+        for path in file_paths
+    ]
+
+    # Wait for all tasks
+    results = await asyncio.gather(*tasks, return_exceptions=True)
+
+    # Aggregate results
+    successful = [r for r in results if not isinstance(r, Exception)]
+    failed = [r for r in results if isinstance(r, Exception)]
+
+    return {
+        "total": len(file_paths),
+        "successful": len(successful),
+        "failed": len(failed),
+        "results": successful
+    }
+```
+
+## Performance Considerations
+
+### Database Query Optimization
+
+**Use Indexes**:
+```cypher
+// Always use indexed properties in WHERE clauses
+MATCH (m:Memory)
+WHERE m.id = $id  // Uses unique constraint index
+RETURN m
+
+// vs.
+
+MATCH (m:Memory)
+WHERE m.content CONTAINS $text  // Full scan - slow!
+RETURN m
+```
+
+**Limit Result Sets**:
+```cypher
+// Always use LIMIT
+MATCH (d:Document)
+RETURN d
+LIMIT 100  // Prevent returning entire database
+```
+
+**Use Query Hints**:
+```cypher
+// Force index usage
+MATCH (m:Memory)
+USING INDEX m:Memory(id)
+WHERE m.id = $id
+RETURN m
+```
+
+### Embedding Optimization
+
+**Batch Embeddings**:
+```python
+# Instead of one at a time
+for text in texts:
+    embedding = await embed_model.get_text_embedding(text)
+
+# Batch process
+embeddings = await embed_model.get_text_embeddings(texts)
+```
+
+**Cache Embeddings**:
+```python
+# Cache by content hash
+content_hash = hashlib.sha256(text.encode()).hexdigest()
+if content_hash in embedding_cache:
+    return embedding_cache[content_hash]
+```
+
+### Task Queue Optimization
+
+**Concurrency Tuning**:
+```python
+# Adjust based on resources
+task_queue = TaskQueue(max_concurrent_tasks=5)  # CPU-bound
+task_queue = TaskQueue(max_concurrent_tasks=20)  # I/O-bound
+```
+
+**Task Prioritization**:
+```python
+# High priority tasks first
+async def get_pending_tasks(self, priority_order: bool = True):
+    if priority_order:
+        query = "ORDER BY priority DESC, created_at ASC"
+    else:
+        query = "ORDER BY created_at ASC"
+```
+
+## Error Handling Patterns
+
+### Retry Pattern
+
+```python
+async def retry_on_error(func, max_retries=3, backoff=2):
+    """Retry with exponential backoff"""
+    for attempt in range(max_retries):
+        try:
+            return await func()
+        except TransientError as e:
+            if attempt == max_retries - 1:
+                raise
+            wait_time = backoff ** attempt
+            await asyncio.sleep(wait_time)
+```
+
+### Circuit Breaker
+
+```python
+class CircuitBreaker:
+    """Prevent cascading failures"""
+    def __init__(self, failure_threshold=5, timeout=60):
+        self.failure_count = 0
+        self.failure_threshold = failure_threshold
+        self.timeout = timeout
+        self.last_failure_time = None
+        self.state = "CLOSED"  # CLOSED, OPEN, HALF_OPEN
+
+    async def call(self, func):
+        if self.state == "OPEN":
+            if time.time() - self.last_failure_time > self.timeout:
+                self.state = "HALF_OPEN"
+            else:
+                raise CircuitBreakerOpen()
+
+        try:
+            result = await func()
+            if self.state == "HALF_OPEN":
+                self.state = "CLOSED"
+                self.failure_count = 0
+            return result
+        except Exception as e:
+            self.failure_count += 1
+            self.last_failure_time = time.time()
+
+            if self.failure_count >= self.failure_threshold:
+                self.state = "OPEN"
+            raise
+```
+
+## Conclusion
+
+The data flow architecture is designed for:
+
+1. **Efficiency**: Minimize processing time and resource usage
+2. **Reliability**: Handle errors gracefully with retries and fallbacks
+3. **Scalability**: Support both sync and async patterns
+4. **Observability**: Track all data transformations and state changes
+5. **Flexibility**: Support multiple ingestion and query patterns
+
+Understanding these flows enables:
+- Effective debugging
+- Performance optimization
+- System extension
+- Operational monitoring
diff --git a/docs/architecture/design.md b/docs/architecture/design.md
new file mode 100644
index 0000000..0ccf50b
--- /dev/null
+++ b/docs/architecture/design.md
@@ -0,0 +1,959 @@
+# System Design and Architecture
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Architecture Tiers](#architecture-tiers)
+- [Design Philosophy](#design-philosophy)
+- [System Architecture](#system-architecture)
+- [Technology Stack](#technology-stack)
+- [Design Decisions](#design-decisions)
+- [Scalability Considerations](#scalability-considerations)
+- [Security Architecture](#security-architecture)
+
+## Overview
+
+Code Graph Knowledge System is a Neo4j-based intelligent knowledge management system that combines:
+
+- **Vector Search**: Semantic similarity search using embeddings
+- **Graph Database**: Relationship-based knowledge representation
+- **LLM Integration**: Multiple provider support for AI-powered features
+- **RAG (Retrieval Augmented Generation)**: Context-aware question answering
+- **Code Graph Analysis**: Repository structure and dependency analysis
+- **Memory Management**: Persistent project knowledge for AI agents
+
+The system is designed as a **multi-tier architecture** where each tier builds upon the previous one, allowing users to adopt capabilities incrementally based on their needs.
+
+## Architecture Tiers
+
+The system implements a three-tier architecture, each providing distinct capabilities:
+
+```mermaid
+graph TB
+    subgraph "Tier 1: Minimal - Code Graph"
+        T1[Code Graph Service]
+        T1A[Repository Ingestion]
+        T1B[Code Search]
+        T1C[Impact Analysis]
+        T1D[Context Pack Generation]
+    end
+
+    subgraph "Tier 2: Standard - Memory"
+        T2[Memory Store Service]
+        T2A[Decision Tracking]
+        T2B[Preference Management]
+        T2C[Experience Recording]
+        T2D[Memory Extraction]
+    end
+
+    subgraph "Tier 3: Full - Knowledge RAG"
+        T3[Knowledge Service]
+        T3A[Document Processing]
+        T3B[Vector Search]
+        T3C[RAG Query Engine]
+        T3D[Graph Relationships]
+    end
+
+    T1 --> T2
+    T2 --> T3
+
+    style T1 fill:#e1f5e1
+    style T2 fill:#e3f2fd
+    style T3 fill:#fff9e6
+```
+
+### Tier 1: Minimal (Code Graph)
+
+**Purpose**: Static code analysis and repository understanding
+
+**Components**:
+- Code ingestor with multi-language support
+- Graph-based code structure representation
+- Symbol relationship tracking
+- Impact analysis engine
+
+**Use Cases**:
+- Understanding codebase structure
+- Finding related code components
+- Analyzing change impact
+- Generating context for AI tools
+
+**Resource Requirements**: Low (minimal LLM usage)
+
+### Tier 2: Standard (+ Memory)
+
+**Purpose**: Project knowledge persistence for AI agents
+
+**Components**:
+- Memory Store service with typed memories
+- Search and retrieval system
+- Automatic extraction from commits/comments
+- Memory evolution tracking (supersede mechanism)
+
+**Use Cases**:
+- Recording architectural decisions
+- Tracking team preferences
+- Learning from past problems
+- Maintaining consistency across sessions
+
+**Resource Requirements**: Medium (LLM for extraction features)
+
+### Tier 3: Full (+ Knowledge RAG)
+
+**Purpose**: Intelligent document processing and question answering
+
+**Components**:
+- LlamaIndex-based knowledge graph
+- Vector embedding generation
+- Multi-source document ingestion
+- RAG query engine with graph traversal
+
+**Use Cases**:
+- Natural language querying
+- Document-based question answering
+- Cross-document knowledge synthesis
+- Semantic search across knowledge base
+
+**Resource Requirements**: High (intensive LLM and embedding usage)
+
+## Design Philosophy
+
+### 1. Progressive Complexity
+
+The tier-based architecture allows users to:
+- Start with minimal features (Code Graph only)
+- Add memory capabilities when needed
+- Enable full RAG when ready for advanced features
+
+**Trade-off**: Increased system complexity vs. flexibility
+
+### 2. Multi-Provider Support
+
+Support for multiple LLM and embedding providers:
+- **Ollama**: Local deployment, privacy-focused
+- **OpenAI**: High quality, cloud-based
+- **Google Gemini**: Competitive performance
+- **OpenRouter**: Access to multiple models
+- **HuggingFace**: Open-source embeddings
+
+**Trade-off**: More configuration complexity vs. vendor flexibility
+
+### 3. Async-First Design
+
+All I/O operations are asynchronous:
+- Non-blocking request handling
+- Background task processing
+- Concurrent operation support
+
+**Trade-off**: Programming complexity vs. performance
+
+### 4. Service-Oriented Architecture
+
+Clear separation of concerns:
+- Each service has a single responsibility
+- Services communicate through well-defined interfaces
+- Easy to test and maintain
+
+**Trade-off**: More files/modules vs. maintainability
+
+## System Architecture
+
+### High-Level Architecture
+
+```mermaid
+graph TB
+    subgraph "Client Layer"
+        HTTP[HTTP/REST Clients]
+        MCP[MCP Clients<br/>Claude Desktop, VSCode]
+        UI[Web UI<br/>Monitoring Interface]
+    end
+
+    subgraph "API Layer"
+        FastAPI[FastAPI Server]
+        MCPS[MCP Server<br/>Official SDK]
+        SSE[Server-Sent Events]
+        WS[WebSocket]
+    end
+
+    subgraph "Service Layer"
+        KS[Knowledge Service<br/>LlamaIndex + Neo4j]
+        MS[Memory Store<br/>Project Knowledge]
+        GS[Graph Service<br/>Code Analysis]
+        TQ[Task Queue<br/>Async Processing]
+        ME[Memory Extractor<br/>Auto-extraction]
+    end
+
+    subgraph "Storage Layer"
+        Neo4j[(Neo4j Graph DB<br/>Vector Index)]
+        SQLite[(SQLite<br/>Task Persistence)]
+        FS[File System<br/>Temp Files]
+    end
+
+    subgraph "External Services"
+        LLM[LLM Providers<br/>Ollama/OpenAI/Gemini]
+        Embed[Embedding Models<br/>Vector Generation]
+    end
+
+    HTTP --> FastAPI
+    MCP --> MCPS
+    UI --> FastAPI
+
+    FastAPI --> KS
+    FastAPI --> MS
+    FastAPI --> GS
+    FastAPI --> TQ
+    FastAPI --> SSE
+    FastAPI --> WS
+
+    MCPS --> KS
+    MCPS --> MS
+    MCPS --> GS
+    MCPS --> TQ
+
+    KS --> Neo4j
+    MS --> Neo4j
+    GS --> Neo4j
+    TQ --> SQLite
+    TQ --> FS
+    ME --> MS
+
+    KS --> LLM
+    KS --> Embed
+    MS --> LLM
+    ME --> LLM
+
+    style FastAPI fill:#4CAF50
+    style MCPS fill:#2196F3
+    style Neo4j fill:#f9a825
+    style LLM fill:#9C27B0
+```
+
+### Component Layers
+
+#### 1. Client Layer
+
+**HTTP/REST Clients**:
+- Standard HTTP requests
+- JSON-based communication
+- OpenAPI/Swagger documentation
+
+**MCP Clients**:
+- Claude Desktop integration
+- VSCode with MCP extension
+- Custom MCP client implementations
+- Uses official MCP SDK protocol
+
+**Web UI**:
+- Real-time monitoring interface (NiceGUI)
+- Task status visualization
+- File upload and processing
+- WebSocket-based updates
+
+#### 2. API Layer
+
+**FastAPI Server** (`main.py`, `core/app.py`):
+- RESTful API endpoints
+- Async request handling
+- CORS middleware
+- GZip compression
+- Exception handling
+
+**MCP Server** (`mcp_server.py`, `start_mcp.py`):
+- 30 tools across 6 categories
+- Official MCP SDK implementation
+- Session management
+- Streaming support
+- Multi-transport (stdio, SSE, WebSocket)
+
+**Real-time Communication**:
+- Server-Sent Events for task monitoring
+- WebSocket for UI updates
+- Streaming responses for long operations
+
+#### 3. Service Layer
+
+**Knowledge Service** (`services/neo4j_knowledge_service.py`):
+- LlamaIndex KnowledgeGraphIndex integration
+- Vector embedding generation
+- Document processing and chunking
+- RAG query engine
+
+**Memory Store** (`services/memory_store.py`):
+- Project knowledge persistence
+- Typed memory system (decision/preference/experience/convention/plan/note)
+- Search with filters and importance scoring
+- Memory evolution (supersede mechanism)
+
+**Graph Service** (`services/graph_service.py`):
+- Code graph management
+- Cypher query execution
+- Schema management
+- Relationship traversal
+
+**Task Queue** (`services/task_queue.py`):
+- Async background processing
+- SQLite-based persistence
+- Concurrent task limiting
+- Status tracking and updates
+
+**Memory Extractor** (`services/memory_extractor.py`):
+- Conversation analysis
+- Git commit mining
+- Code comment extraction
+- Batch repository analysis
+
+#### 4. Storage Layer
+
+**Neo4j Graph Database**:
+- Knowledge graph storage
+- Native vector indexing
+- Relationship management
+- Fulltext search indexes
+
+**SQLite Database**:
+- Task queue persistence
+- Task status tracking
+- Worker coordination
+
+**File System**:
+- Temporary file storage
+- Large document handling
+- Upload processing
+
+## Technology Stack
+
+### Core Framework
+
+```yaml
+Web Framework:
+  - FastAPI: Async web framework
+  - Uvicorn: ASGI server
+  - Pydantic: Data validation
+
+MCP Integration:
+  - mcp>=1.1.0: Official Model Context Protocol SDK
+  - Custom handlers: Modular tool organization
+```
+
+### Database & Storage
+
+```yaml
+Graph Database:
+  - Neo4j 5.0+: Graph and vector storage
+  - APOC plugin: Advanced procedures
+  - Native vector index: Semantic search
+
+Task Persistence:
+  - SQLite: Lightweight task storage
+  - Async driver: Non-blocking operations
+```
+
+### AI & ML
+
+```yaml
+LLM Integration:
+  - LlamaIndex: RAG framework
+  - Ollama: Local LLM hosting
+  - OpenAI: GPT models
+  - Google Gemini: Gemini models
+  - OpenRouter: Multi-provider access
+
+Embedding Models:
+  - Ollama: nomic-embed-text
+  - OpenAI: text-embedding-ada-002
+  - Gemini: models/embedding-001
+  - HuggingFace: BAAI/bge-small-en-v1.5
+```
+
+### Developer Tools
+
+```yaml
+Code Quality:
+  - Black: Code formatting
+  - isort: Import sorting
+  - Ruff: Fast linting
+  - pytest: Testing framework
+
+Monitoring:
+  - Loguru: Structured logging
+  - NiceGUI: Web monitoring UI
+  - SSE: Real-time updates
+```
+
+## Design Decisions
+
+### 1. Neo4j as Primary Database
+
+**Decision**: Use Neo4j for all persistent storage (knowledge, memory, code graph)
+
+**Rationale**:
+- Native graph queries for relationships
+- Built-in vector indexing (v5.0+)
+- Fulltext search capabilities
+- ACID compliance
+- Scales well for graph traversal
+
+**Trade-offs**:
+- More complex than traditional SQL
+- Requires Neo4j infrastructure
+- Learning curve for Cypher queries
+- Higher memory usage
+
+**Alternatives Considered**:
+- PostgreSQL + pgvector: Good but weaker graph queries
+- Separate vector DB (Pinecone/Weaviate): Additional infrastructure
+- MongoDB: Poor relationship handling
+
+### 2. LlamaIndex for RAG
+
+**Decision**: Use LlamaIndex's KnowledgeGraphIndex
+
+**Rationale**:
+- Production-ready RAG framework
+- Neo4j integration out-of-the-box
+- Flexible node parser system
+- Active development and community
+
+**Trade-offs**:
+- Additional abstraction layer
+- Some LlamaIndex-specific patterns
+- Updates may require code changes
+
+**Alternatives Considered**:
+- LangChain: More complex, heavier
+- Custom RAG: More control but more work
+- Haystack: Less graph-oriented
+
+### 3. Async Task Queue
+
+**Decision**: Custom async task queue with SQLite persistence
+
+**Rationale**:
+- Simple deployment (no external queue)
+- Sufficient for single-server deployment
+- Task persistence across restarts
+- Direct integration with FastAPI
+
+**Trade-offs**:
+- Not distributed (single server only)
+- Limited throughput vs. Redis/RabbitMQ
+- SQLite lock contention possible
+
+**Alternatives Considered**:
+- Celery + Redis: Overkill for single server
+- RQ: Still requires Redis
+- Dramatiq: More dependencies
+
+### 4. Multi-Provider LLM Support
+
+**Decision**: Support multiple LLM and embedding providers
+
+**Rationale**:
+- Vendor independence
+- Local deployment option (Ollama)
+- Cost optimization
+- Feature comparison capability
+
+**Trade-offs**:
+- More configuration complexity
+- Testing burden across providers
+- Inconsistent behavior possible
+
+**Alternatives Considered**:
+- Single provider (OpenAI): Simple but vendor lock-in
+- LiteLLM proxy: Additional component
+
+### 5. MCP Server with Official SDK
+
+**Decision**: Migrate from FastMCP to official MCP SDK
+
+**Rationale**:
+- Official protocol compliance
+- Better long-term support
+- Advanced features (streaming, sessions)
+- Industry standard
+
+**Trade-offs**:
+- More verbose code
+- Lower-level API
+- Migration effort required
+
+**Alternatives Considered**:
+- Keep FastMCP: Simpler but less standard
+- Direct HTTP API only: Miss Claude Desktop integration
+
+### 6. Tier-Based Architecture
+
+**Decision**: Three-tier progressive architecture
+
+**Rationale**:
+- Gradual adoption curve
+- Cost optimization (use only what's needed)
+- Clear feature boundaries
+- Independent scaling
+
+**Trade-offs**:
+- More complex initialization
+- Feature interdependencies
+- Documentation overhead
+
+**Alternatives Considered**:
+- All-or-nothing: Simpler but less flexible
+- Plugin system: More complex
+
+## Scalability Considerations
+
+### Current Architecture (Single Server)
+
+**Designed for**:
+- Small to medium teams (1-50 users)
+- Moderate query volume (<1000 req/hour)
+- Single deployment instance
+- Shared Neo4j database
+
+**Bottlenecks**:
+1. Neo4j connection pool
+2. Task queue concurrency limit
+3. LLM API rate limits
+4. Memory constraints for large documents
+
+### Horizontal Scaling Path
+
+```mermaid
+graph TB
+    subgraph "Load Balancer"
+        LB[Nginx / HAProxy]
+    end
+
+    subgraph "API Servers"
+        API1[FastAPI Instance 1]
+        API2[FastAPI Instance 2]
+        API3[FastAPI Instance N]
+    end
+
+    subgraph "MCP Servers"
+        MCP1[MCP Instance 1]
+        MCP2[MCP Instance 2]
+    end
+
+    subgraph "Shared Services"
+        Neo4j[(Neo4j Cluster)]
+        Redis[(Redis<br/>Task Queue)]
+        S3[Object Storage<br/>Documents]
+    end
+
+    LB --> API1
+    LB --> API2
+    LB --> API3
+
+    API1 --> Neo4j
+    API2 --> Neo4j
+    API3 --> Neo4j
+
+    API1 --> Redis
+    API2 --> Redis
+    API3 --> Redis
+
+    API1 --> S3
+    API2 --> S3
+    API3 --> S3
+
+    MCP1 --> Neo4j
+    MCP2 --> Neo4j
+```
+
+**Required Changes**:
+1. Replace SQLite task queue with Redis/RabbitMQ
+2. Use object storage (S3/MinIO) for file uploads
+3. Session management with Redis
+4. Neo4j clustering for HA
+5. Shared cache layer
+
+### Vertical Scaling
+
+**Immediate Improvements**:
+- Increase Neo4j memory (`dbms.memory.heap.max_size`)
+- Tune vector index parameters
+- Optimize chunk sizes
+- Add Redis caching layer
+- Use faster embedding models
+
+### Performance Optimization
+
+**Database Level**:
+```cypher
+// Ensure proper indexes exist
+CREATE INDEX IF NOT EXISTS FOR (n:Document) ON (n.id);
+CREATE INDEX IF NOT EXISTS FOR (m:Memory) ON (m.project_id, m.importance);
+CREATE FULLTEXT INDEX IF NOT EXISTS FOR (m:Memory) ON EACH [m.title, m.content];
+
+// Vector index configuration
+CALL db.index.vector.createNodeIndex(
+  'knowledge_vectors',
+  'Document',
+  'embedding',
+  1536,  // Dimension
+  'cosine'
+);
+```
+
+**Application Level**:
+- Connection pooling
+- Query result caching
+- Batch operations
+- Async I/O everywhere
+- Background task offloading
+
+## Security Architecture
+
+### Authentication & Authorization
+
+**Current Implementation**:
+- Optional API key authentication
+- Environment-based configuration
+- No user management (designed for internal use)
+
+**Production Recommendations**:
+```yaml
+Authentication:
+  - API key per user/service
+  - JWT tokens for session management
+  - OAuth2 for third-party integration
+
+Authorization:
+  - Role-based access control (RBAC)
+  - Project-level permissions
+  - Rate limiting per API key
+```
+
+### Data Security
+
+**At Rest**:
+- Neo4j encryption (`dbms.security.encryption.enabled=true`)
+- Environment variable encryption
+- Secrets management (AWS Secrets Manager, Vault)
+
+**In Transit**:
+- TLS/HTTPS for all HTTP traffic
+- Neo4j Bolt encryption
+- Secure WebSocket (WSS)
+
+**Code Security**:
+```python
+# Input validation with Pydantic
+class DocumentAddRequest(BaseModel):
+    content: str = Field(..., max_length=10_000_000)
+    title: str = Field(..., max_length=200)
+
+# SQL injection prevention (parameterized queries)
+await session.run(
+    "CREATE (d:Document {id: $id, title: $title})",
+    id=doc_id, title=title
+)
+
+# XSS prevention (automatic escaping in FastAPI)
+# CSRF protection for web UI
+```
+
+### Network Security
+
+**Recommended Deployment**:
+```yaml
+VPC Configuration:
+  - Private subnet for Neo4j
+  - Public subnet for API (behind ALB)
+  - Security groups for port control
+
+Firewall Rules:
+  - 8123: API access (restricted IPs)
+  - 7687: Neo4j Bolt (internal only)
+  - 7474: Neo4j Browser (VPN only)
+
+TLS Configuration:
+  - Minimum TLS 1.2
+  - Strong cipher suites
+  - Certificate pinning for MCP
+```
+
+### Secrets Management
+
+**Environment Variables**:
+```bash
+# Required secrets
+NEO4J_PASSWORD=<strong-password>
+OPENAI_API_KEY=<api-key>
+GOOGLE_API_KEY=<api-key>
+API_KEY=<system-api-key>
+
+# Use secrets manager
+AWS_SECRETS_MANAGER_SECRET_ID=code-graph-prod
+VAULT_ADDR=https://vault.company.com
+```
+
+**Best Practices**:
+- Never commit secrets to version control
+- Rotate API keys regularly
+- Use managed secrets services in production
+- Separate secrets per environment
+- Audit secret access
+
+### Threat Model
+
+**Potential Threats**:
+1. **Unauthorized Access**: API key leakage
+   - Mitigation: Strong keys, rotation, IP whitelisting
+
+2. **Data Injection**: Malicious document content
+   - Mitigation: Input validation, content sanitization
+
+3. **Resource Exhaustion**: Large document uploads
+   - Mitigation: Size limits, rate limiting, timeouts
+
+4. **Prompt Injection**: Malicious queries to LLM
+   - Mitigation: Input sanitization, output filtering
+
+5. **Data Leakage**: Sensitive information in graph
+   - Mitigation: Access controls, data classification
+
+**Security Checklist**:
+- [ ] Enable Neo4j authentication
+- [ ] Use HTTPS/TLS in production
+- [ ] Implement API key authentication
+- [ ] Set up rate limiting
+- [ ] Enable CORS restrictions
+- [ ] Configure file size limits
+- [ ] Set up logging and monitoring
+- [ ] Regular security updates
+- [ ] Backup encryption
+- [ ] Secrets rotation schedule
+
+## Monitoring & Observability
+
+### Logging Strategy
+
+**Structured Logging with Loguru**:
+```python
+logger.info("Document processed",
+    doc_id=doc_id,
+    size=len(content),
+    duration=elapsed_time
+)
+```
+
+**Log Levels**:
+- DEBUG: Detailed troubleshooting
+- INFO: General operational events
+- WARNING: Potential issues
+- ERROR: Error conditions
+- CRITICAL: System failures
+
+### Metrics Collection
+
+**Key Metrics**:
+```yaml
+Application Metrics:
+  - Request rate (req/sec)
+  - Response time (p50, p95, p99)
+  - Error rate (%)
+  - Task queue depth
+  - Active tasks count
+
+Database Metrics:
+  - Query execution time
+  - Connection pool usage
+  - Vector search latency
+  - Graph traversal depth
+
+LLM Metrics:
+  - API call duration
+  - Token usage
+  - Error rate per provider
+  - Cost tracking
+```
+
+### Health Checks
+
+**Endpoint**: `/api/v1/health`
+
+**Checks**:
+- Neo4j connectivity
+- LLM provider availability
+- Task queue status
+- Memory Store status
+
+**Example Response**:
+```json
+{
+  "status": "healthy",
+  "timestamp": "2025-11-06T12:00:00Z",
+  "services": {
+    "neo4j": true,
+    "knowledge_service": true,
+    "memory_store": true,
+    "task_queue": true,
+    "ollama": true
+  },
+  "version": "1.0.0"
+}
+```
+
+### Alerting
+
+**Critical Alerts**:
+- Service down (Neo4j, LLM provider)
+- High error rate (>5%)
+- Task queue backup (>100 pending)
+- Disk space low (<10%)
+- Memory usage high (>90%)
+
+**Warning Alerts**:
+- Slow queries (>5s)
+- High response time (>1s p95)
+- LLM API errors
+- Connection pool exhaustion
+
+## Disaster Recovery
+
+### Backup Strategy
+
+**Neo4j Backups**:
+```bash
+# Daily full backup
+neo4j-admin database dump neo4j --to-path=/backups/$(date +%Y%m%d)
+
+# Incremental backup (Enterprise)
+neo4j-admin database backup --backup-dir=/backups neo4j
+```
+
+**Task Queue Backups**:
+```bash
+# SQLite database backup
+cp tasks.db /backups/tasks_$(date +%Y%m%d_%H%M%S).db
+```
+
+**Configuration Backups**:
+- `.env` file (encrypted)
+- Neo4j configuration
+- Application configuration
+
+### Recovery Procedures
+
+**Full System Recovery**:
+1. Restore Neo4j from backup
+2. Restore SQLite database
+3. Restore configuration files
+4. Verify service connectivity
+5. Resume task processing
+
+**Partial Recovery**:
+- Knowledge graph: Restore from Neo4j backup
+- Memory Store: Restore from Neo4j backup
+- Tasks: Re-queue failed tasks
+
+**RTO/RPO Targets**:
+- RTO (Recovery Time Objective): 4 hours
+- RPO (Recovery Point Objective): 24 hours (daily backups)
+
+### High Availability
+
+**Single Point of Failure**:
+- Neo4j database (can cluster in Enterprise)
+- Application server (can load balance)
+- LLM provider (multi-provider fallback)
+
+**Mitigation**:
+```yaml
+Neo4j Clustering:
+  - 3-node cluster minimum
+  - Automatic failover
+  - Read replicas for scaling
+
+Application:
+  - Multiple instances behind load balancer
+  - Stateless design for easy scaling
+  - Health check-based routing
+
+LLM Providers:
+  - Primary + fallback provider
+  - Automatic retry with exponential backoff
+  - Circuit breaker pattern
+```
+
+## Future Architecture Considerations
+
+### Potential Enhancements
+
+**1. Distributed Task Queue**:
+```python
+# Replace SQLite with Redis/RabbitMQ
+from celery import Celery
+app = Celery('tasks', broker='redis://localhost:6379')
+```
+
+**2. Caching Layer**:
+```python
+# Add Redis caching
+from redis import asyncio as aioredis
+cache = await aioredis.from_url("redis://localhost")
+```
+
+**3. API Gateway**:
+```yaml
+Kong/Tyk Configuration:
+  - Rate limiting
+  - Authentication
+  - Request transformation
+  - Analytics
+```
+
+**4. Microservices Split**:
+```
+Current: Monolith
+Future:
+  - knowledge-service
+  - memory-service
+  - code-graph-service
+  - task-worker-service
+```
+
+**5. Event-Driven Architecture**:
+```python
+# Event bus for service communication
+from aiokafka import AIOKafkaProducer
+
+producer = AIOKafkaProducer(bootstrap_servers='localhost:9092')
+await producer.send('document.processed', value=event_data)
+```
+
+### Technology Evolution
+
+**Short-term (3-6 months)**:
+- Add Redis caching
+- Implement comprehensive metrics
+- Enhanced error handling
+- Performance optimization
+
+**Mid-term (6-12 months)**:
+- Kubernetes deployment
+- Neo4j clustering
+- Distributed tracing (Jaeger)
+- Advanced monitoring (Prometheus + Grafana)
+
+**Long-term (12+ months)**:
+- Microservices architecture
+- Multi-region deployment
+- GraphQL API option
+- ML model serving infrastructure
+
+## Conclusion
+
+The Code Graph Knowledge System architecture is designed with these core principles:
+
+1. **Progressive Adoption**: Three-tier architecture allows gradual capability adoption
+2. **Flexibility**: Multi-provider support for LLM and embeddings
+3. **Scalability**: Clear path from single-server to distributed deployment
+4. **Maintainability**: Service-oriented design with clear boundaries
+5. **Performance**: Async-first design for optimal throughput
+6. **Security**: Built-in security considerations for production use
+
+The architecture balances simplicity for initial deployment with clear paths for scaling and enhancement as needs grow.
diff --git a/docs/assets/favicon.svg b/docs/assets/favicon.svg
new file mode 100644
index 0000000..19e8d96
--- /dev/null
+++ b/docs/assets/favicon.svg
@@ -0,0 +1,16 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 64 64" width="64" height="64">
+  <!-- Background circle -->
+  <circle cx="32" cy="32" r="30" fill="#3f51b5"/>
+
+  <!-- Simplified graph structure -->
+  <circle cx="32" cy="20" r="5" fill="white" opacity="0.9"/>
+  <circle cx="20" cy="38" r="5" fill="white" opacity="0.9"/>
+  <circle cx="44" cy="38" r="5" fill="white" opacity="0.9"/>
+
+  <line x1="32" y1="20" x2="20" y2="38" stroke="white" stroke-width="2" opacity="0.7"/>
+  <line x1="32" y1="20" x2="44" y2="38" stroke="white" stroke-width="2" opacity="0.7"/>
+  <line x1="20" y1="38" x2="44" y2="38" stroke="white" stroke-width="2" opacity="0.7"/>
+
+  <!-- Code symbol -->
+  <text x="32" y="52" font-family="monospace" font-size="16" font-weight="bold" fill="white" text-anchor="middle">&lt;/&gt;</text>
+</svg>
diff --git a/docs/assets/logo.svg b/docs/assets/logo.svg
new file mode 100644
index 0000000..985b2e2
--- /dev/null
+++ b/docs/assets/logo.svg
@@ -0,0 +1,23 @@
+<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 200 200" width="200" height="200">
+  <!-- Background -->
+  <rect width="200" height="200" fill="none"/>
+
+  <!-- Graph nodes (larger circles) -->
+  <circle cx="100" cy="60" r="18" fill="#5c6bc0" opacity="0.9"/>
+  <circle cx="60" cy="120" r="18" fill="#5c6bc0" opacity="0.9"/>
+  <circle cx="140" cy="120" r="18" fill="#5c6bc0" opacity="0.9"/>
+  <circle cx="100" cy="160" r="18" fill="#5c6bc0" opacity="0.9"/>
+
+  <!-- Connecting lines -->
+  <line x1="100" y1="60" x2="60" y2="120" stroke="#7986cb" stroke-width="3" opacity="0.7"/>
+  <line x1="100" y1="60" x2="140" y2="120" stroke="#7986cb" stroke-width="3" opacity="0.7"/>
+  <line x1="60" y1="120" x2="100" y2="160" stroke="#7986cb" stroke-width="3" opacity="0.7"/>
+  <line x1="140" y1="120" x2="100" y2="160" stroke="#7986cb" stroke-width="3" opacity="0.7"/>
+  <line x1="60" y1="120" x2="140" y2="120" stroke="#7986cb" stroke-width="3" opacity="0.7"/>
+
+  <!-- Center highlight -->
+  <circle cx="100" cy="100" r="24" fill="#3f51b5" opacity="0.95"/>
+
+  <!-- Code brackets in center -->
+  <text x="100" y="112" font-family="monospace" font-size="32" font-weight="bold" fill="white" text-anchor="middle">&lt;/&gt;</text>
+</svg>
diff --git a/docs/changelog.md b/docs/changelog.md
new file mode 100644
index 0000000..5611071
--- /dev/null
+++ b/docs/changelog.md
@@ -0,0 +1,475 @@
+# Changelog
+
+All notable changes to the Code Graph Knowledge System will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Planned Features
+- API authentication with JWT
+- Web-based configuration UI
+- Multi-provider LLM support (simultaneous)
+- Advanced code refactoring suggestions
+- Rust, C++, C# language support
+- Real-time collaboration features
+- Plugin system for custom extensions
+
+## [0.7.0] - 2025-01-15
+
+### Added - Automatic Memory Extraction
+- **Conversation Analysis**: Extract memories from AI conversation history
+  - LLM-powered decision and experience detection
+  - Confidence scoring for automatic saving
+  - Configurable auto-save threshold
+- **Git Commit Analysis**: Analyze git commits for architectural decisions
+  - Parse commit messages and changed files
+  - Extract decisions, experiences, and conventions
+  - Link memories to specific commits
+- **Code Comment Mining**: Extract TODO, FIXME, NOTE, DECISION markers
+  - Automatic scanning of code comments
+  - Convert markers to structured memories
+  - Track technical debt and action items
+- **Query-based Memory Suggestions**: Suggest important memories from Q&A
+  - Analyze knowledge base queries and answers
+  - Identify information worth remembering
+  - Suggest memory creation with auto-populated fields
+- **Batch Repository Extraction**: Comprehensive codebase analysis
+  - Extract from git history (configurable commit limit)
+  - Mine code comments across file patterns
+  - Bulk memory creation from repository insights
+
+### Added - New MCP Tools (5 tools)
+- `extract_from_conversation`: Extract memories from conversation history
+- `extract_from_git_commit`: Analyze git commits for memories
+- `extract_from_code_comments`: Mine code comments for action items
+- `suggest_memory_from_query`: Suggest memories from Q&A sessions
+- `batch_extract_from_repository`: Full repository analysis
+
+### Added - New API Endpoints (5 endpoints)
+- `POST /api/v1/memory/extract/conversation`: Extract from conversations
+- `POST /api/v1/memory/extract/commit`: Extract from git commits
+- `POST /api/v1/memory/extract/comments`: Extract from code comments
+- `POST /api/v1/memory/suggest`: Suggest memory from query/answer
+- `POST /api/v1/memory/extract/batch`: Batch repository extraction
+
+### Changed
+- Enhanced memory extraction service with LLM-powered analysis
+- Improved error messages for memory operations
+- Updated MCP handler architecture documentation
+- Enhanced memory search relevance scoring
+
+### Fixed
+- Neo4j connection timeout in Docker environments
+- Memory search not finding recently added memories
+- Environment variable handling in Docker deployment
+- Race condition in concurrent memory additions
+
+### Documentation
+- Added comprehensive memory extraction guide
+- Updated API documentation with extraction endpoints
+- New examples for automatic memory extraction
+- Enhanced troubleshooting guide
+
+## [0.6.0] - 2024-12-20
+
+### Added - Memory Store for AI Agents
+- **Memory Management System**: Long-term project knowledge persistence
+  - Decision memory type: Architecture and technical choices
+  - Preference memory type: Coding styles and conventions
+  - Experience memory type: Problems encountered and solutions
+  - Convention memory type: Team rules and standards
+  - Plan memory type: Future improvements and TODOs
+  - Note memory type: General project information
+- **Memory Operations**: Full CRUD operations for memories
+  - Add memory with importance scoring
+  - Search memories with semantic search
+  - Update existing memories
+  - Delete memories (soft delete)
+  - Supersede memories (version history)
+  - Project memory summaries
+- **Memory Relationships**: Graph-based memory connections
+  - `BELONGS_TO`: Memory to project relationships
+  - `RELATES_TO`: Inter-memory relationships
+  - `SUPERSEDES`: Memory version history
+
+### Added - Multi-Language Support (3 languages)
+- **Java Support**: Complete Java code analysis
+  - Import statement parsing (standard and static)
+  - Class inheritance and interface tracking
+  - Method visibility detection (public/protected/private)
+  - Package dependency mapping
+- **PHP Support**: PHP code analysis
+  - Use statement parsing (class, function, const)
+  - Require/include dependency tracking
+  - Class extends and implements relationships
+  - Function type hint extraction
+- **Go Support**: Golang code analysis
+  - Package import parsing (single and blocks)
+  - Struct and interface detection
+  - Function and method extraction (with receivers)
+  - Package alias tracking
+
+### Added - Docker Multi-Mode Deployment
+- **Three deployment modes**:
+  - Minimal: Code Graph only (~800MB)
+  - Standard: Code Graph + Memory (~1.2GB)
+  - Full: All features (~1.5GB)
+- **Docker Compose configurations**:
+  - `docker-compose.minimal.yml`
+  - `docker-compose.standard.yml`
+  - `docker-compose.full.yml`
+- **Multi-platform support**: amd64, arm64
+- **Helper scripts**: Simplified deployment commands
+
+### Added - MCP Tools (7 memory tools)
+- `add_memory`: Save new project knowledge
+- `search_memories`: Find relevant memories
+- `get_memory`: Retrieve specific memory
+- `update_memory`: Modify existing memory
+- `delete_memory`: Remove memory
+- `supersede_memory`: Create new memory that replaces old one
+- `get_project_summary`: Get project memory overview
+
+### Added - API Endpoints (7 memory endpoints)
+- `POST /api/v1/memory/add`: Add new memory
+- `POST /api/v1/memory/search`: Search memories
+- `GET /api/v1/memory/{memory_id}`: Get specific memory
+- `PUT /api/v1/memory/{memory_id}`: Update memory
+- `DELETE /api/v1/memory/{memory_id}`: Delete memory
+- `POST /api/v1/memory/supersede`: Supersede old memory
+- `GET /api/v1/memory/project/{project_id}/summary`: Project summary
+
+### Changed
+- Updated file patterns to include Java, PHP, Go files
+- Enhanced code graph to support new language relationships
+- Improved transformer architecture for multi-language support
+
+### Documentation
+- Added Memory Store user guide
+- Added memory API documentation
+- Updated examples with memory usage
+- Enhanced CLAUDE.md with memory workflows
+
+## [0.5.0] - 2024-11-15
+
+### Added - MCP Protocol Support
+- **Official MCP SDK Integration**: Model Context Protocol v1.1.0+
+- **Modular Architecture**: Handler-based design (310-line main server)
+  - Knowledge handlers: Query, search, document management
+  - Code graph handlers: Ingestion, analysis, statistics
+  - System handlers: Health checks, configuration
+  - Task handlers: Background processing, monitoring
+- **30 MCP Tools**: Comprehensive AI assistant integration
+  - 8 knowledge tools
+  - 10 code graph tools
+  - 4 system tools
+  - 8 task monitoring tools
+- **Advanced Features**:
+  - Session management framework
+  - Streaming support (SSE)
+  - Multi-transport capability (stdio, SSE, WebSocket)
+
+### Added - Prometheus Metrics
+- **15+ metrics** for monitoring:
+  - Request counters (total, by endpoint, by status)
+  - Request duration histograms
+  - Active request gauges
+  - Neo4j operation metrics
+  - Document processing metrics
+  - Error rate tracking
+- **Metrics endpoint**: `GET /api/v1/metrics`
+- **Grafana dashboard** configuration (optional)
+
+### Added - Neo4j Health Monitoring
+- Connection status tracking
+- Query performance metrics
+- Database size monitoring
+- Index usage statistics
+
+### Changed
+- Refactored MCP server from 1400 lines to 310 lines (78% reduction)
+- Extracted handlers into `mcp_tools/` package
+- Improved error handling and logging
+- Enhanced code organization and maintainability
+
+### Documentation
+- Added MCP v2 modularization guide
+- Updated MCP integration documentation
+- Added Prometheus metrics documentation
+- Enhanced deployment guides
+
+## [0.4.0] - 2024-10-20
+
+### Added - Real-time Task Monitoring
+- **Web UI Monitoring**: NiceGUI-based monitoring interface
+  - Real-time task status updates via WebSocket
+  - File upload functionality (50KB size limit)
+  - Directory batch processing
+  - Task progress visualization
+  - Accessible at `/ui/monitor` when `ENABLE_MONITORING=true`
+- **Server-Sent Events (SSE)**: Streaming APIs for real-time updates
+  - `/api/v1/sse/task/{task_id}`: Monitor single task
+  - `/api/v1/sse/tasks`: Monitor all tasks with filtering
+  - `/api/v1/sse/stats`: SSE connection statistics
+- **Task Queue System**: Background processing with monitoring
+  - Async task execution
+  - Progress tracking
+  - Error handling and retry logic
+  - Task history and logs
+
+### Added - Large File Handling
+- **Multi-strategy approach**:
+  - Direct processing: Files < 10KB
+  - Temporary file strategy: Files 10-50KB
+  - Directory processing prompt: Files > 50KB
+  - MCP automatic temp files: All sizes
+- **Configurable limits**: Size thresholds via environment variables
+
+### Added - Client Examples
+- `examples/pure_mcp_client.py`: Pure MCP client with watch tools
+- `examples/hybrid_http_sse_client.py`: HTTP + SSE hybrid approach
+- Real-time monitoring demonstrations
+
+### Changed
+- Enhanced file upload handling with size validation
+- Improved error messages for large file uploads
+- Better timeout handling for large documents
+
+### Fixed
+- Memory leaks in long-running tasks
+- SSE connection stability issues
+- File upload timeout for large files
+
+## [0.3.0] - 2024-09-15
+
+### Added - Universal SQL Schema Parser
+- **Multi-dialect support**: Oracle, MySQL, PostgreSQL, SQL Server
+- **Configurable business domain classification**: YAML/JSON configuration
+- **Pre-built industry templates**:
+  - Insurance: Policies, claims, underwriting
+  - E-commerce: Products, orders, customers
+  - Banking: Accounts, transactions, loans
+  - Healthcare: Patients, diagnoses, treatments
+- **Comprehensive parsing**:
+  - Table and column extraction
+  - Foreign key relationships
+  - Index definitions
+  - Business domain classification
+- **Professional documentation generation**: Markdown output
+- **Real-world tested**: 356-table Oracle database (4,511 columns)
+
+### Added - SQL API Endpoints
+- `POST /api/v1/sql/parse`: Parse SQL schema files
+- `POST /api/v1/sql/analyze`: Analyze database structure
+- `GET /api/v1/sql/templates`: List available templates
+
+### Changed
+- Enhanced schema parsing with configurable templates
+- Improved relationship detection
+- Better error handling for malformed SQL
+
+### Documentation
+- Added SQL parsing user guide
+- Industry template documentation
+- Configuration examples
+
+## [0.2.0] - 2024-08-01
+
+### Added - Multi-Provider LLM Support
+- **Ollama integration**: Local LLM hosting (default)
+- **OpenAI integration**: GPT models and embeddings
+- **Google Gemini integration**: Gemini models and embeddings
+- **OpenRouter integration**: Multi-provider access
+- **HuggingFace embeddings**: Local embedding models
+- **Provider configuration**: Via `.env` file with flexible switching
+
+### Added - Enhanced Configuration
+- Environment-based configuration system
+- Support for multiple embedding providers
+- Configurable timeouts and limits
+- Feature flags (monitoring, Prometheus)
+
+### Changed
+- Refactored service initialization for multi-provider support
+- Improved LLM provider abstraction layer
+- Enhanced error messages for provider issues
+
+### Fixed
+- OpenAI API compatibility issues
+- Gemini embedding dimension mismatches
+- Provider-specific timeout handling
+
+## [0.1.0] - 2024-07-01
+
+### Added - Initial Release
+- **Core Features**:
+  - Neo4j GraphRAG integration
+  - Vector search with LlamaIndex
+  - Document processing (text, markdown, code)
+  - Knowledge graph construction
+  - Intelligent query engine
+  - RESTful API
+- **Code Analysis**:
+  - Python code parsing
+  - TypeScript/JavaScript parsing
+  - Import relationship mapping
+  - Basic code graph visualization
+- **Document Management**:
+  - Multi-format support
+  - Asynchronous processing
+  - Chunk-based indexing
+  - Vector similarity search
+- **API Endpoints**:
+  - `/api/v1/health`: Health check
+  - `/api/v1/knowledge/query`: Query knowledge base
+  - `/api/v1/knowledge/search`: Vector search
+  - `/api/v1/documents/upload`: Upload documents
+  - `/api/v1/documents/list`: List documents
+- **Infrastructure**:
+  - FastAPI backend
+  - Neo4j database
+  - Docker support
+  - Basic logging and error handling
+
+### Documentation
+- Initial README
+- API documentation
+- Basic deployment guide
+- Example scripts
+
+---
+
+## Version History Summary
+
+| Version | Release Date | Key Features |
+|---------|--------------|--------------|
+| 0.7.0   | 2025-01-15   | Automatic memory extraction (5 tools) |
+| 0.6.0   | 2024-12-20   | Memory Store, Multi-language (Java/PHP/Go), Docker modes |
+| 0.5.0   | 2024-11-15   | MCP protocol, Prometheus metrics, Modular architecture |
+| 0.4.0   | 2024-10-20   | Real-time monitoring, SSE, Large file handling |
+| 0.3.0   | 2024-09-15   | Universal SQL parser, Business domain templates |
+| 0.2.0   | 2024-08-01   | Multi-provider LLM support (Ollama/OpenAI/Gemini) |
+| 0.1.0   | 2024-07-01   | Initial release with core features |
+
+## Upgrade Guides
+
+### Upgrading from 0.6.x to 0.7.0
+
+**No breaking changes**. Simply pull new Docker image:
+
+```bash
+docker pull royisme/codebase-rag:0.7.0-full
+docker-compose restart
+```
+
+**New Features Available:**
+- Memory extraction endpoints and MCP tools
+- Automatic memory mining from git and code
+
+### Upgrading from 0.5.x to 0.6.0
+
+**Breaking Changes:**
+- None, fully backward compatible
+
+**New Configuration Options:**
+```env
+# Optional: Enable memory features (included in standard/full modes)
+ENABLE_MEMORY_STORE=true
+```
+
+**Data Migration:**
+- No migration needed
+- Memory Store creates new nodes in existing Neo4j database
+
+### Upgrading from 0.4.x to 0.5.0
+
+**Breaking Changes:**
+- MCP server entry point changed from `mcp_server.py` to `start_mcp.py`
+
+**Configuration Update:**
+```json
+// claude_desktop_config.json
+{
+  "mcpServers": {
+    "code-graph": {
+      "command": "python",
+      "args": ["/path/to/start_mcp.py"]  // Changed from mcp_server.py
+    }
+  }
+}
+```
+
+**Data Migration:**
+- No database changes
+- MCP protocol fully backward compatible
+
+### Upgrading from 0.3.x to 0.4.0
+
+**No breaking changes**. New features are opt-in:
+
+```env
+# Enable monitoring UI
+ENABLE_MONITORING=true
+
+# Enable Prometheus metrics
+ENABLE_PROMETHEUS=true
+```
+
+## Migration Notes
+
+### Python Version Upgrade
+As of v0.6.0, Python 3.13+ is required. If upgrading from older versions:
+
+```bash
+# Update Python
+python3.13 -m venv .venv
+source .venv/bin/activate
+
+# Reinstall dependencies
+pip install --upgrade pip
+pip install -e .
+```
+
+### Neo4j Version Compatibility
+All versions support Neo4j 5.0+. No database migration needed between versions.
+
+### Environment Variables
+Check `.env.example` for new configuration options in each version.
+
+## Deprecation Notices
+
+### Deprecated in 0.7.0
+- None
+
+### Deprecated in 0.6.0
+- None
+
+### Deprecated in 0.5.0
+- **Old MCP server entry point** (`mcp_server.py`): Use `start_mcp.py` instead
+- Will be removed in: v1.0.0
+
+### Removed in 0.5.0
+- None
+
+## Contributing
+
+See [CONTRIBUTING.md](./development/contributing.md) for guidelines on contributing to this project.
+
+## Support
+
+- **Documentation**: https://code-graph.vantagecraft.dev
+- **Issues**: https://github.com/royisme/codebase-rag/issues
+- **Discussions**: https://github.com/royisme/codebase-rag/discussions
+
+## Links
+
+- [Homepage](https://code-graph.vantagecraft.dev)
+- [GitHub Repository](https://github.com/royisme/codebase-rag)
+- [Docker Hub](https://hub.docker.com/r/royisme/codebase-rag)
+- [Issue Tracker](https://github.com/royisme/codebase-rag/issues)
+
+---
+
+**Note**: Dates in this changelog are illustrative. Check [GitHub Releases](https://github.com/royisme/codebase-rag/releases) for actual release dates.
diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md
new file mode 100644
index 0000000..adff713
--- /dev/null
+++ b/docs/deployment/docker.md
@@ -0,0 +1,504 @@
+# Docker Deployment Guide
+
+Comprehensive guide for deploying Code Graph Knowledge System using Docker and Docker Compose.
+
+## Overview
+
+The system provides three Docker images:
+- `royisme/codebase-rag:minimal` - Code Graph only (smallest)
+- `royisme/codebase-rag:standard` - Code Graph + Memory
+- `royisme/codebase-rag:full` - All features (largest)
+
+## Docker Compose Files
+
+### Location
+- `docker-compose.yml` - Default (points to minimal)
+- `docker/docker-compose.minimal.yml` - Minimal mode
+- `docker/docker-compose.standard.yml` - Standard mode
+- `docker/docker-compose.full.yml` - Full mode with optional Ollama
+
+### Common Structure
+
+All compose files include:
+```yaml
+services:
+  neo4j:
+    image: neo4j:5-enterprise  # or neo4j:5-community
+    environment:
+      - NEO4J_AUTH=neo4j/password
+      - NEO4J_PLUGINS=["apoc"]
+    volumes:
+      - neo4j-data:/data
+    ports:
+      - "7474:7474"  # HTTP
+      - "7687:7687"  # Bolt
+
+  mcp:
+    image: royisme/codebase-rag:MODE
+    environment:
+      - NEO4J_URI=bolt://neo4j:7687
+      - DEPLOYMENT_MODE=MODE
+    volumes:
+      - ./repos:/repos
+      - ./data:/data
+    depends_on:
+      - neo4j
+```
+
+## Building Custom Images
+
+### Build from Source
+
+```bash
+# Clone repository
+git clone https://github.com/royisme/codebase-rag.git
+cd codebase-rag
+
+# Build minimal
+docker build -f docker/Dockerfile.minimal -t my-codebase-rag:minimal .
+
+# Build standard
+docker build -f docker/Dockerfile.standard -t my-codebase-rag:standard .
+
+# Build full
+docker build -f docker/Dockerfile.full -t my-codebase-rag:full .
+```
+
+### Build with Buildx (Multi-Platform)
+
+```bash
+# Create builder
+docker buildx create --name mybuilder --use
+
+# Build for multiple platforms
+docker buildx build \
+  --platform linux/amd64,linux/arm64 \
+  -f docker/Dockerfile.minimal \
+  -t my-codebase-rag:minimal \
+  --push \
+  .
+```
+
+## Volume Management
+
+### Important Volumes
+
+**1. Neo4j Data** (`neo4j-data`)
+```yaml
+volumes:
+  neo4j-data:
+    driver: local
+```
+
+Contains all graph database data. **Must be backed up regularly.**
+
+**2. Repository Mount** (`./repos:/repos`)
+```yaml
+volumes:
+  - ./repos:/repos:ro  # Read-only recommended
+```
+
+Mount local repositories for ingestion.
+
+**3. Application Data** (`./data:/data`)
+```yaml
+volumes:
+  - ./data:/data
+```
+
+Temporary files, logs, and processing data.
+
+### Backup Volumes
+
+```bash
+# Backup Neo4j data
+docker run --rm \
+  -v codebase-rag_neo4j-data:/data \
+  -v $(pwd)/backup:/backup \
+  alpine \
+  tar czf /backup/neo4j-backup-$(date +%Y%m%d).tar.gz /data
+
+# Restore from backup
+docker run --rm \
+  -v codebase-rag_neo4j-data:/data \
+  -v $(pwd)/backup:/backup \
+  alpine \
+  tar xzf /backup/neo4j-backup-20241106.tar.gz -C /
+```
+
+## Network Configuration
+
+### Default Network
+
+```yaml
+networks:
+  default:
+    name: codebase-rag-network
+```
+
+### Custom Network
+
+```yaml
+networks:
+  codebase-rag:
+    driver: bridge
+    ipam:
+      config:
+        - subnet: 172.28.0.0/16
+
+services:
+  neo4j:
+    networks:
+      codebase-rag:
+        ipv4_address: 172.28.0.10
+```
+
+### External Services
+
+Connect to external Ollama:
+
+```yaml
+services:
+  mcp:
+    environment:
+      - OLLAMA_BASE_URL=http://host.docker.internal:11434
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
+```
+
+## Environment Variables
+
+### Core Variables
+
+```bash
+# Neo4j Connection
+NEO4J_URI=bolt://neo4j:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=<secure_password>
+NEO4J_DATABASE=neo4j
+
+# Deployment Mode
+DEPLOYMENT_MODE=minimal|standard|full
+ENABLE_KNOWLEDGE_RAG=true|false
+ENABLE_AUTO_EXTRACTION=true|false
+```
+
+### LLM Configuration
+
+```bash
+# Provider Selection
+LLM_PROVIDER=ollama|openai|gemini|openrouter
+EMBEDDING_PROVIDER=ollama|openai|gemini|huggingface
+
+# Ollama
+OLLAMA_BASE_URL=http://host.docker.internal:11434
+OLLAMA_MODEL=llama3.2
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# OpenAI
+OPENAI_API_KEY=sk-...
+OPENAI_MODEL=gpt-4o
+OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+
+# Gemini
+GOOGLE_API_KEY=AIza...
+GEMINI_MODEL=gemini-1.5-flash
+GEMINI_EMBEDDING_MODEL=models/embedding-001
+```
+
+### Performance Tuning
+
+```bash
+# Timeouts (seconds)
+CONNECTION_TIMEOUT=30
+OPERATION_TIMEOUT=300
+LARGE_DOCUMENT_TIMEOUT=600
+
+# Neo4j Memory
+NEO4J_server_memory_heap_initial__size=2G
+NEO4J_server_memory_heap_max__size=4G
+NEO4J_server_memory_pagecache_size=2G
+```
+
+## Docker Profiles
+
+Use profiles to optionally include services:
+
+```yaml
+services:
+  ollama:
+    profiles:
+      - with-ollama
+    image: ollama/ollama:latest
+```
+
+```bash
+# Start without Ollama
+docker-compose up -d
+
+# Start with Ollama
+docker-compose --profile with-ollama up -d
+```
+
+## Health Checks
+
+All images include health checks:
+
+```yaml
+services:
+  mcp:
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8000/api/v1/health"]
+      interval: 30s
+      timeout: 10s
+      start_period: 40s
+      retries: 3
+```
+
+Check health:
+
+```bash
+# View health status
+docker ps
+
+# Check specific container
+docker inspect --format='{{.State.Health.Status}}' codebase-rag-mcp
+
+# View health logs
+docker inspect --format='{{range .State.Health.Log}}{{.Output}}{{end}}' codebase-rag-mcp
+```
+
+## Resource Limits
+
+### Memory Limits
+
+```yaml
+services:
+  mcp:
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+        reservations:
+          memory: 2G
+```
+
+### CPU Limits
+
+```yaml
+services:
+  mcp:
+    deploy:
+      resources:
+        limits:
+          cpus: '2.0'
+        reservations:
+          cpus: '1.0'
+```
+
+## Logging
+
+### Configure Logging Driver
+
+```yaml
+services:
+  mcp:
+    logging:
+      driver: "json-file"
+      options:
+        max-size: "10m"
+        max-file: "3"
+```
+
+### View Logs
+
+```bash
+# Follow logs
+docker-compose logs -f mcp
+
+# Last 100 lines
+docker-compose logs --tail=100 mcp
+
+# Since timestamp
+docker-compose logs --since 2024-11-06T10:00:00 mcp
+```
+
+## Multi-Stage Deployment
+
+### Development
+
+```yaml
+# docker-compose.dev.yml
+services:
+  mcp:
+    build:
+      context: .
+      dockerfile: docker/Dockerfile.minimal
+    volumes:
+      - .:/app  # Mount source code
+    environment:
+      - DEBUG=true
+```
+
+### Production
+
+```yaml
+# docker-compose.prod.yml
+services:
+  mcp:
+    image: royisme/codebase-rag:minimal
+    restart: unless-stopped
+    logging:
+      driver: "syslog"
+    deploy:
+      resources:
+        limits:
+          memory: 4G
+```
+
+## Security Best Practices
+
+### 1. Use Secrets
+
+```yaml
+services:
+  mcp:
+    secrets:
+      - neo4j_password
+      - openai_api_key
+
+secrets:
+  neo4j_password:
+    file: ./secrets/neo4j_password.txt
+  openai_api_key:
+    file: ./secrets/openai_api_key.txt
+```
+
+### 2. Non-Root User
+
+All images run as non-root user `appuser` (UID 1000).
+
+### 3. Read-Only Filesystem
+
+```yaml
+services:
+  mcp:
+    read_only: true
+    tmpfs:
+      - /tmp
+      - /app/temp
+```
+
+### 4. Network Isolation
+
+```yaml
+networks:
+  frontend:
+    driver: bridge
+  backend:
+    driver: bridge
+    internal: true  # No external access
+
+services:
+  mcp:
+    networks:
+      - frontend
+  neo4j:
+    networks:
+      - backend
+```
+
+## Updating Images
+
+### Pull Latest
+
+```bash
+# Pull latest image
+docker pull royisme/codebase-rag:minimal
+
+# Recreate containers
+docker-compose up -d --force-recreate mcp
+```
+
+### Zero-Downtime Update
+
+```bash
+# Scale up new version
+docker-compose up -d --scale mcp=2 --no-recreate
+
+# Remove old container
+docker stop codebase-rag-mcp-1
+docker rm codebase-rag-mcp-1
+
+# Scale back to 1
+docker-compose up -d --scale mcp=1
+```
+
+## Troubleshooting
+
+### Container Won't Start
+
+```bash
+# Check logs
+docker logs codebase-rag-mcp
+
+# Check health
+docker inspect codebase-rag-mcp
+
+# Try recreating
+docker-compose down
+docker-compose up -d
+```
+
+### Network Issues
+
+```bash
+# Test connectivity
+docker exec -it codebase-rag-mcp ping neo4j
+
+# Check network
+docker network inspect codebase-rag-network
+
+# Recreate network
+docker-compose down
+docker network prune
+docker-compose up -d
+```
+
+### Performance Issues
+
+```bash
+# Check resource usage
+docker stats
+
+# Check Neo4j performance
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p password
+# Run: CALL dbms.listQueries();
+
+# Increase resources in docker-compose.yml
+```
+
+## Advanced Patterns
+
+### Using Docker Swarm
+
+```bash
+# Initialize swarm
+docker swarm init
+
+# Deploy stack
+docker stack deploy -c docker-compose.yml codebase-rag
+
+# Scale service
+docker service scale codebase-rag_mcp=3
+```
+
+### Using Kubernetes
+
+See separate Kubernetes deployment guide (coming soon).
+
+## Next Steps
+
+- [Minimal Mode Guide](minimal.md) - Deploy minimal mode
+- [Production Setup](production.md) - Production best practices
+- [Troubleshooting](../troubleshooting.md) - Common issues
diff --git a/docs/deployment/full.md b/docs/deployment/full.md
new file mode 100644
index 0000000..80c8034
--- /dev/null
+++ b/docs/deployment/full.md
@@ -0,0 +1,466 @@
+# Full Mode Deployment
+
+Full Mode provides **all features** including Code Graph, Memory Store, Knowledge RAG, and LLM-powered auto-extraction.
+
+## Complete Feature Set
+
+###All Features Enabled
+- ✅ **Code Graph**: Repository indexing, search, impact analysis
+- ✅ **Memory Store**: Project knowledge with vector search
+- ✅ **Knowledge RAG**: Document processing and intelligent Q&A
+- ✅ **Auto-Extraction**: LLM-powered memory extraction from:
+  - Git commits
+  - Code comments (TODO, FIXME, NOTE)
+  - AI conversations
+  - Knowledge base queries
+
+### Use Cases
+- Full-featured AI coding assistant
+- Intelligent documentation systems
+- Automated knowledge capture
+- Enterprise code intelligence platform
+
+## System Requirements
+
+### With Local LLM (Ollama)
+- **CPU**: 8+ cores (16+ recommended)
+- **RAM**: 16GB minimum (32GB recommended)
+- **GPU**: Optional but highly recommended (8GB+ VRAM)
+- **Disk**: 100GB SSD
+
+### With Cloud LLM
+- **CPU**: 4 cores
+- **RAM**: 8GB
+- **Disk**: 50GB SSD
+- **API Access**: OpenAI, Gemini, or OpenRouter
+
+## Quick Start
+
+### 1. Choose LLM Provider
+
+=== "Ollama (Local, Private)"
+
+    ```bash
+    # Install Ollama
+    curl -fsSL https://ollama.com/install.sh | sh
+
+    # Pull models
+    ollama pull llama3.2          # 8B parameter model
+    ollama pull nomic-embed-text  # Embedding model
+
+    # For better quality (requires more RAM)
+    # ollama pull mistral:7b
+    # ollama pull qwen2.5:14b
+    ```
+
+=== "OpenAI (Cloud, Best Quality)"
+
+    ```bash
+    # Get API key
+    # Visit: https://platform.openai.com/api-keys
+    export OPENAI_API_KEY=sk-proj-...
+    ```
+
+=== "Google Gemini (Cloud, Cost-Effective)"
+
+    ```bash
+    # Get API key
+    # Visit: https://makersuite.google.com/app/apikey
+    export GOOGLE_API_KEY=AIza...
+    ```
+
+=== "OpenRouter (Multi-Provider)"
+
+    ```bash
+    # Get API key
+    # Visit: https://openrouter.ai/keys
+    export OPENROUTER_API_KEY=sk-or-v1-...
+    ```
+
+### 2. Configure Environment
+
+```bash
+# Copy full template
+cp docker/.env.template/.env.full .env
+
+# Edit configuration
+nano .env
+```
+
+Example with Ollama:
+
+```bash
+# Neo4j
+NEO4J_URI=bolt://neo4j:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_secure_password
+NEO4J_DATABASE=neo4j
+
+# Deployment Mode - Enable all features
+DEPLOYMENT_MODE=full
+ENABLE_KNOWLEDGE_RAG=true
+ENABLE_AUTO_EXTRACTION=true
+
+# LLM Configuration
+LLM_PROVIDER=ollama
+OLLAMA_BASE_URL=http://host.docker.internal:11434
+OLLAMA_MODEL=llama3.2
+
+# Embedding Configuration
+EMBEDDING_PROVIDER=ollama
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+```
+
+### 3. Start Services
+
+=== "With Bundled Ollama"
+
+    ```bash
+    # Start with Ollama container included
+    make docker-full-with-ollama
+
+    # Or
+    docker-compose -f docker/docker-compose.full.yml --profile with-ollama up -d
+    ```
+
+=== "With External Ollama"
+
+    ```bash
+    # Start without Ollama (use system Ollama)
+    make docker-full
+
+    # Or
+    docker-compose -f docker/docker-compose.full.yml up -d
+    ```
+
+### 4. Verify Deployment
+
+```bash
+# Check all containers
+docker ps
+# Should see: mcp, neo4j, (optionally ollama)
+
+# Test LLM
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama3.2",
+  "prompt": "Hello, how are you?",
+  "stream": false
+}'
+
+# Test embedding
+curl http://localhost:11434/api/embeddings -d '{
+  "model": "nomic-embed-text",
+  "prompt": "test embedding"
+}'
+
+# Check service health (if using FastAPI)
+curl http://localhost:8000/api/v1/health
+```
+
+## Available MCP Tools
+
+Full mode provides **30 tools** across 6 categories:
+
+### Code Graph Tools (4)
+- `code_graph_ingest_repo`
+- `code_graph_fulltext_search`
+- `code_graph_impact_analysis`
+- `code_graph_pack_context`
+
+### Memory Management Tools (7)
+- `add_memory`
+- `search_memories`
+- `get_memory`
+- `update_memory`
+- `delete_memory`
+- `supersede_memory`
+- `get_project_summary`
+
+### Auto-Extraction Tools (5) - New!
+- `extract_from_conversation`
+- `extract_from_git_commit`
+- `extract_from_code_comments`
+- `suggest_memory_from_query`
+- `batch_extract_from_repository`
+
+### Knowledge RAG Tools (8) - New!
+- `knowledge_add_document`
+- `knowledge_add_directory`
+- `knowledge_query`
+- `knowledge_search`
+- `knowledge_list_documents`
+- `knowledge_delete_document`
+- `knowledge_update_document`
+- `knowledge_get_stats`
+
+### Task Queue Tools (4)
+- `task_submit`
+- `task_status`
+- `task_cancel`
+- `list_tasks`
+
+### System Tools (2)
+- `health_check`
+- `system_info`
+
+## Advanced Features
+
+### Auto-Extraction from Git Commits
+
+Automatically extract decisions and learnings:
+
+```json
+{
+  "tool": "extract_from_git_commit",
+  "input": {
+    "project_id": "myapp",
+    "commit_sha": "abc123...",
+    "commit_message": "feat: implement JWT authentication\n\nAdded JWT middleware for API auth",
+    "changed_files": ["src/auth/jwt.py", "src/middleware/auth.py"],
+    "auto_save": true
+  }
+}
+```
+
+### Mine Code Comments
+
+Extract TODOs and decisions from code:
+
+```json
+{
+  "tool": "extract_from_code_comments",
+  "input": {
+    "project_id": "myapp",
+    "file_path": "src/api/routes.py"
+  }
+}
+```
+
+### Conversation Analysis
+
+Extract memories from AI conversations:
+
+```json
+{
+  "tool": "extract_from_conversation",
+  "input": {
+    "project_id": "myapp",
+    "conversation": [
+      {"role": "user", "content": "Should we use Redis or Memcached?"},
+      {"role": "assistant", "content": "Redis is better because..."}
+    ],
+    "auto_save": false
+  }
+}
+```
+
+### Knowledge RAG
+
+Process and query documents:
+
+```json
+{
+  "tool": "knowledge_add_document",
+  "input": {
+    "file_path": "/docs/architecture.md",
+    "metadata": {"type": "architecture", "version": "1.0"}
+  }
+}
+
+{
+  "tool": "knowledge_query",
+  "input": {
+    "query": "How does the authentication system work?",
+    "max_results": 5
+  }
+}
+```
+
+### Batch Repository Extraction
+
+Comprehensive analysis:
+
+```json
+{
+  "tool": "batch_extract_from_repository",
+  "input": {
+    "project_id": "myapp",
+    "repo_path": "/repos/myapp",
+    "max_commits": 100,
+    "file_patterns": ["*.py", "*.js", "*.go"]
+  }
+}
+```
+
+## LLM Provider Comparison
+
+### Ollama (Local)
+
+**Pros**:
+- Free and private
+- No API limits
+- Works offline
+- Full control
+
+**Cons**:
+- Requires powerful hardware
+- Slower than cloud
+- Manual model management
+
+**Recommended Models**:
+- `llama3.2` (8B) - Good balance
+- `mistral` (7B) - Fast
+- `qwen2.5` (14B) - Better quality (needs 16GB+ RAM)
+
+### OpenAI
+
+**Pros**:
+- Best quality
+- Fast responses
+- No infrastructure needed
+
+**Cons**:
+- Costs money
+- Requires internet
+- Data sent to OpenAI
+
+**Cost** (Nov 2024):
+- GPT-4o: $5/$15 per 1M tokens (in/out)
+- GPT-4o-mini: $0.15/$0.60 per 1M tokens
+- Embeddings: $0.02 per 1M tokens
+
+### Google Gemini
+
+**Pros**:
+- Cost-effective
+- Good quality
+- Fast
+
+**Cons**:
+- Requires internet
+- Data sent to Google
+
+**Cost**:
+- Gemini 1.5 Flash: Lower cost
+- Gemini 1.5 Pro: Higher quality
+- Free tier available
+
+## Performance Optimization
+
+### Ollama GPU Acceleration
+
+```yaml
+# Add to docker-compose.full.yml
+services:
+  ollama:
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
+```
+
+### Neo4j Performance for Large Scale
+
+```bash
+# In docker-compose.full.yml
+NEO4J_server_memory_heap_initial__size=4G
+NEO4J_server_memory_heap_max__size=8G
+NEO4J_server_memory_pagecache_size=4G
+NEO4J_dbms_memory_transaction_total_max=2G
+```
+
+### LLM Context Optimization
+
+```python
+# Use context packing to stay within token limits
+tool: code_graph_pack_context
+input: {
+  "entry_points": ["src/main.py"],
+  "task_type": "implement",
+  "token_budget": 8000  # Adjust based on model
+}
+```
+
+## Cost Estimation
+
+### Local Deployment (Ollama)
+- **VPS**: $40-80/month (32GB RAM, 8 cores)
+- **GPU VPS**: $100-200/month (with GPU)
+- **LLM**: $0
+- **Embeddings**: $0
+- **Total**: $40-200/month
+
+### Cloud Deployment (OpenAI)
+- **VPS**: $10-20/month (8GB RAM)
+- **LLM**: $20-100/month (depends on usage)
+- **Embeddings**: $1-5/month
+- **Total**: $31-125/month
+
+### Hybrid (Ollama Embeddings + OpenAI LLM)
+- **VPS**: $10-20/month
+- **LLM**: $20-100/month
+- **Embeddings**: $0 (local)
+- **Total**: $30-120/month
+
+## Production Deployment
+
+See [Production Setup Guide](production.md) for:
+- High availability configuration
+- Backup strategies
+- Monitoring setup
+- Security hardening
+- Scaling considerations
+
+## Troubleshooting
+
+### LLM Generation Fails
+
+```bash
+# Check Ollama
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama3.2",
+  "prompt": "test"
+}'
+
+# Check model is pulled
+ollama list
+
+# View Ollama logs
+docker logs codebase-rag-ollama
+```
+
+### Out of Memory Errors
+
+```bash
+# Check memory usage
+docker stats
+
+# Reduce model size
+ollama pull llama3.2:3b  # Smaller 3B model
+
+# Or increase Docker memory limit
+# Docker Desktop: Settings → Resources → Memory
+```
+
+### Slow Response Times
+
+```bash
+# Enable GPU acceleration (if available)
+# Check GPU is detected
+nvidia-smi
+
+# Or switch to smaller model
+OLLAMA_MODEL=mistral  # 7B instead of 13B
+
+# Or use cloud LLM for faster responses
+LLM_PROVIDER=openai
+```
+
+## Next Steps
+
+- [Knowledge RAG Guide](../guide/knowledge/overview.md) - Document processing
+- [Auto-Extraction Guide](../guide/memory/extraction.md) - Automated memory capture
+- [Production Setup](production.md) - Deploy at scale
diff --git a/docs/deployment/minimal.md b/docs/deployment/minimal.md
new file mode 100644
index 0000000..27f04f2
--- /dev/null
+++ b/docs/deployment/minimal.md
@@ -0,0 +1,340 @@
+# Minimal Mode Deployment
+
+Minimal Mode provides **Code Graph functionality only** - no LLM or embedding model required. Perfect for:
+
+- Resource-constrained environments
+- Privacy-sensitive projects
+- Cost-conscious deployments
+- Pure code analysis without AI
+
+## Features Available
+
+### ✅ What's Included
+
+- **Repository Ingestion**: Parse and index code repositories
+- **Fulltext Search**: Fast code search using Neo4j native indexes
+- **Graph Traversal**: Navigate code relationships (calls, imports, inheritance)
+- **Impact Analysis**: Find what code depends on a given symbol
+- **Context Packing**: Intelligently select relevant code for LLM context
+
+### ❌ What's Not Included
+
+- Vector similarity search (no embeddings)
+- Memory Store for AI agents
+- LLM-powered auto-extraction
+- Knowledge RAG document Q&A
+
+## System Requirements
+
+### Minimum
+- **CPU**: 2 cores
+- **RAM**: 4GB
+- **Disk**: 10GB SSD
+- **Docker**: 20.10+
+- **Neo4j**: 5.0+ (included)
+
+### Recommended
+- **CPU**: 4 cores
+- **RAM**: 8GB
+- **Disk**: 50GB SSD
+
+## Quick Start
+
+### 1. Clone and Configure
+
+```bash
+# Clone repository
+git clone https://github.com/royisme/codebase-rag.git
+cd codebase-rag
+
+# Copy minimal environment template
+cp docker/.env.template/.env.minimal .env
+
+# Edit configuration
+nano .env
+```
+
+### 2. Configure Environment
+
+Edit `.env`:
+
+```bash
+# Neo4j Configuration (required)
+NEO4J_URI=bolt://neo4j:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=change_this_password  # ⚠️ Change this!
+NEO4J_DATABASE=neo4j
+
+# Deployment Mode
+DEPLOYMENT_MODE=minimal
+ENABLE_KNOWLEDGE_RAG=false
+ENABLE_AUTO_EXTRACTION=false
+```
+
+### 3. Start Services
+
+```bash
+# Using Makefile (recommended)
+make docker-minimal
+
+# Or using docker-compose directly
+docker-compose -f docker/docker-compose.minimal.yml up -d
+
+# Or using helper script
+./scripts/docker-deploy.sh
+# Choose option 1: Minimal
+```
+
+### 4. Verify Deployment
+
+```bash
+# Check containers
+docker ps
+# Should show: codebase-rag-mcp-minimal and codebase-rag-neo4j
+
+# Check Neo4j
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p your_password
+# Run: RETURN 'Connected' as status;
+
+# View logs
+docker logs codebase-rag-mcp-minimal
+```
+
+## MCP Client Configuration
+
+Configure Claude Desktop or VS Code to use the minimal MCP server:
+
+### Claude Desktop
+
+Edit `~/Library/Application Support/Claude/claude_desktop_config.json`:
+
+```json
+{
+  "mcpServers": {
+    "codebase-rag-minimal": {
+      "command": "docker",
+      "args": [
+        "exec",
+        "-i",
+        "codebase-rag-mcp-minimal",
+        "python",
+        "start_mcp.py",
+        "--mode=minimal"
+      ]
+    }
+  }
+}
+```
+
+### VS Code with MCP Extension
+
+Add to VS Code settings:
+
+```json
+{
+  "mcp.servers": {
+    "codebase-rag-minimal": {
+      "command": "docker",
+      "args": ["exec", "-i", "codebase-rag-mcp-minimal", "python", "start_mcp.py", "--mode=minimal"],
+      "type": "stdio"
+    }
+  }
+}
+```
+
+## Available MCP Tools
+
+Minimal mode provides 4 core Code Graph tools:
+
+### 1. code_graph_ingest_repo
+
+Index a code repository:
+
+```json
+{
+  "local_path": "/repos/myproject",
+  "mode": "full"
+}
+```
+
+### 2. code_graph_fulltext_search
+
+Search code by keywords:
+
+```json
+{
+  "query": "authentication middleware",
+  "language": "python",
+  "limit": 20
+}
+```
+
+### 3. code_graph_impact_analysis
+
+Find code dependencies:
+
+```json
+{
+  "symbol": "UserService.authenticate",
+  "direction": "reverse"
+}
+```
+
+### 4. code_graph_pack_context
+
+Build intelligent context for LLM:
+
+```json
+{
+  "entry_points": ["src/api/routes.py"],
+  "task_type": "implement",
+  "token_budget": 8000
+}
+```
+
+## Usage Examples
+
+### Example 1: Index and Search
+
+```bash
+# 1. Ingest repository
+# (Via Claude or MCP client)
+Tool: code_graph_ingest_repo
+Input: {"local_path": "/repos/myapp", "mode": "full"}
+
+# 2. Search for authentication code
+Tool: code_graph_fulltext_search
+Input: {"query": "JWT token validation", "language": "python"}
+
+# 3. Analyze impact of changing auth function
+Tool: code_graph_impact_analysis
+Input: {"symbol": "validate_token", "direction": "reverse"}
+```
+
+### Example 2: Prepare Context for Code Review
+
+```bash
+# Pack relevant context for reviewing auth changes
+Tool: code_graph_pack_context
+Input: {
+  "entry_points": ["src/auth/jwt.py", "src/middleware/auth.py"],
+  "task_type": "review",
+  "token_budget": 12000
+}
+```
+
+## Performance Optimization
+
+### Neo4j Tuning
+
+For large repositories, adjust Neo4j memory in `docker-compose.minimal.yml`:
+
+```yaml
+services:
+  neo4j:
+    environment:
+      - NEO4J_server_memory_heap_initial__size=2G
+      - NEO4J_server_memory_heap_max__size=4G
+      - NEO4J_server_memory_pagecache_size=2G
+```
+
+### Ingestion Performance
+
+```bash
+# Incremental updates for large repos
+Tool: code_graph_ingest_repo
+Input: {"local_path": "/repos/myapp", "mode": "incremental"}
+
+# Full re-index when needed
+Input: {"local_path": "/repos/myapp", "mode": "full"}
+```
+
+## Monitoring
+
+Check system health:
+
+```bash
+# Container stats
+docker stats codebase-rag-mcp-minimal codebase-rag-neo4j
+
+# Neo4j query performance
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p password
+# Run: CALL dbms.listQueries();
+
+# View ingestion logs
+docker logs -f codebase-rag-mcp-minimal
+```
+
+## Upgrading to Standard/Full Mode
+
+When you need more features:
+
+```bash
+# Stop minimal mode
+docker-compose -f docker/docker-compose.minimal.yml down
+
+# Copy and configure for standard mode
+cp docker/.env.template/.env.standard .env
+nano .env  # Add embedding configuration
+
+# Start standard mode
+docker-compose -f docker/docker-compose.standard.yml up -d
+```
+
+Your Neo4j data persists, so existing code graphs are preserved.
+
+## Troubleshooting
+
+### Neo4j Connection Failed
+
+```bash
+# Check Neo4j status
+docker logs codebase-rag-neo4j
+
+# Verify Neo4j is ready
+docker exec codebase-rag-neo4j neo4j status
+
+# Test connection
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p password
+```
+
+### Ingestion Stuck
+
+```bash
+# Check MCP server logs
+docker logs codebase-rag-mcp-minimal
+
+# Check disk space
+df -h
+
+# Restart if needed
+docker restart codebase-rag-mcp-minimal
+```
+
+### Poor Search Results
+
+```bash
+# Rebuild fulltext indexes
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p password
+
+# Run these queries:
+CALL db.index.fulltext.drop('code_search');
+CALL db.index.fulltext.createNodeIndex('code_search', ['Function', 'Class'], ['name', 'content']);
+```
+
+## Cost Analysis
+
+Minimal mode is the most cost-effective option:
+
+- **Infrastructure**: ~$5-10/month (small VPS)
+- **LLM costs**: $0 (no LLM required)
+- **Embedding costs**: $0 (no embeddings)
+- **Total**: ~$5-10/month for hosting only
+
+Perfect for individual developers and small teams!
+
+## Next Steps
+
+- [Docker Guide](docker.md) - Advanced Docker configuration
+- [Code Graph User Guide](../guide/code-graph/overview.md) - Learn all features
+- [Production Setup](production.md) - Deploy to production
diff --git a/docs/deployment/overview.md b/docs/deployment/overview.md
new file mode 100644
index 0000000..0d78941
--- /dev/null
+++ b/docs/deployment/overview.md
@@ -0,0 +1,355 @@
+# Deployment Overview
+
+Choose the right deployment mode based on your needs and available infrastructure.
+
+## 🎯 Deployment Modes
+
+### Minimal - Code Graph Only
+
+**Perfect for**: Developers who want code intelligence without LLM overhead
+
+```yaml
+Requirements:
+  - Neo4j database
+  - Docker & docker-compose
+  - No LLM needed ✓
+  - No embedding model needed ✓
+
+Resources:
+  - Image size: ~500MB
+  - Memory: ~1GB RAM
+  - Startup time: ~5 seconds
+```
+
+**Available Features**:
+
+- ✅ Repository ingestion and code parsing
+- ✅ File relationship discovery (imports, dependencies)
+- ✅ Impact analysis (who depends on this file?)
+- ✅ Context packing for AI assistants
+- ✅ Full-text search on file paths and content
+- ❌ Memory Store
+- ❌ Knowledge RAG
+- ❌ Auto-extraction
+
+**Use When**:
+
+- You want code navigation and analysis only
+- You don't need LLM-powered features
+- You're working in air-gapped environments
+- You want minimal resource usage
+
+[→ Minimal Deployment Guide](minimal.md){ .md-button .md-button--primary }
+
+---
+
+### Standard - Code Graph + Memory
+
+**Perfect for**: Teams building project knowledge bases
+
+```yaml
+Requirements:
+  - Neo4j database
+  - Docker & docker-compose
+  - Embedding model (Ollama/OpenAI/Gemini) ✓
+  - No LLM needed ✓
+
+Resources:
+  - Image size: ~600MB
+  - Memory: ~2GB RAM
+  - Startup time: ~8 seconds
+```
+
+**Available Features**:
+
+- ✅ All Minimal features
+- ✅ Manual memory management (add/update/delete)
+- ✅ Vector-based memory search
+- ✅ Project memory summaries
+- ✅ Memory superseding (track decision changes)
+- ❌ Auto-extraction from git/conversations
+- ❌ Knowledge RAG
+
+**Use When**:
+
+- You want to maintain project decision logs
+- You need searchable team knowledge
+- You have access to an embedding service
+- You prefer manual curation over auto-extraction
+
+[→ Standard Deployment Guide](standard.md){ .md-button .md-button--primary }
+
+---
+
+### Full - All Features
+
+**Perfect for**: Teams wanting complete AI-powered capabilities
+
+```yaml
+Requirements:
+  - Neo4j database
+  - Docker & docker-compose
+  - LLM (Ollama/OpenAI/Gemini/OpenRouter) ✓
+  - Embedding model ✓
+
+Resources:
+  - Image size: ~800MB
+  - Memory: ~4GB RAM (+ LLM requirements)
+  - Startup time: ~15 seconds
+```
+
+**Available Features**:
+
+- ✅ All Standard features
+- ✅ Automatic memory extraction from:
+    - Git commits
+    - AI conversations
+    - Code comments (TODO/FIXME/NOTE)
+    - Q&A sessions
+- ✅ Knowledge base RAG:
+    - Document ingestion
+    - Intelligent Q&A
+    - Multi-format support
+- ✅ Batch repository analysis
+
+**Use When**:
+
+- You want fully automated knowledge extraction
+- You need document Q&A capabilities
+- You have LLM infrastructure available
+- You want maximum AI assistance
+
+[→ Full Deployment Guide](full.md){ .md-button .md-button--primary }
+
+---
+
+## 🔄 Mode Comparison Matrix
+
+| Feature Category | Minimal | Standard | Full |
+|------------------|---------|----------|------|
+| **Code Graph** |
+| Repository ingestion | ✅ | ✅ | ✅ |
+| Incremental updates | ✅ | ✅ | ✅ |
+| File search | ✅ | ✅ | ✅ |
+| Impact analysis | ✅ | ✅ | ✅ |
+| Context packing | ✅ | ✅ | ✅ |
+| **Memory Store** |
+| Add memory | ❌ | ✅ | ✅ |
+| Search memories | ❌ | ✅ (vector) | ✅ (vector) |
+| Update/delete | ❌ | ✅ | ✅ |
+| Supersede | ❌ | ✅ | ✅ |
+| Extract from git | ❌ | ❌ | ✅ (LLM) |
+| Extract from chat | ❌ | ❌ | ✅ (LLM) |
+| Extract from code | ❌ | ❌ | ✅ (LLM) |
+| **Knowledge RAG** |
+| Add documents | ❌ | ❌ | ✅ |
+| Query knowledge | ❌ | ❌ | ✅ (LLM) |
+| Vector search | ❌ | ❌ | ✅ |
+| **Infrastructure** |
+| Neo4j | Required | Required | Required |
+| Embedding | - | Required | Required |
+| LLM | - | - | Required |
+| **Performance** |
+| Image size | 500MB | 600MB | 800MB |
+| RAM usage | 1GB | 2GB | 4GB+ |
+| Startup time | 5s | 8s | 15s |
+
+## 🏗️ Architecture Diagrams
+
+### Minimal Mode Architecture
+
+```mermaid
+graph TB
+    subgraph "Client"
+        A[Claude Desktop / API Client]
+    end
+
+    subgraph "Docker Network"
+        B[MCP Server<br/>Minimal]
+        C[(Neo4j<br/>Graph DB)]
+    end
+
+    subgraph "Code Graph Services"
+        D[Code Ingestor]
+        E[Graph Service]
+        F[Ranker]
+        G[Pack Builder]
+    end
+
+    A -->|MCP/REST| B
+    B --> D
+    B --> E
+    B --> F
+    B --> G
+    D -->|Store| C
+    E -->|Query| C
+
+    style B fill:#90EE90
+    style C fill:#87CEEB
+```
+
+### Standard Mode Architecture
+
+```mermaid
+graph TB
+    subgraph "Client"
+        A[Claude Desktop / API Client]
+    end
+
+    subgraph "Docker Network"
+        B[MCP Server<br/>Standard]
+        C[(Neo4j<br/>Graph DB)]
+    end
+
+    subgraph "Code Graph Services"
+        D[Code Ingestor]
+        E[Graph Service]
+    end
+
+    subgraph "Memory Services"
+        F[Memory Store]
+    end
+
+    subgraph "External"
+        G[Embedding Service<br/>Ollama/OpenAI]
+    end
+
+    A -->|MCP/REST| B
+    B --> D
+    B --> E
+    B --> F
+    D -->|Store| C
+    E -->|Query| C
+    F -->|Store/Search| C
+    F -->|Vectorize| G
+
+    style B fill:#FFD700
+    style C fill:#87CEEB
+    style G fill:#FFA07A
+```
+
+### Full Mode Architecture
+
+```mermaid
+graph TB
+    subgraph "Client"
+        A[Claude Desktop / API Client]
+    end
+
+    subgraph "Docker Network"
+        B[MCP Server<br/>Full]
+        C[(Neo4j<br/>Graph DB)]
+        D[Ollama<br/>Optional]
+    end
+
+    subgraph "All Services"
+        E[Code Graph]
+        F[Memory Store]
+        G[Knowledge RAG]
+        H[Memory Extractor]
+    end
+
+    subgraph "External/Optional"
+        I[LLM Service<br/>OpenAI/Gemini]
+        J[Embedding Service]
+    end
+
+    A -->|MCP/REST| B
+    B --> E
+    B --> F
+    B --> G
+    B --> H
+    E -->|Store| C
+    F -->|Store/Search| C
+    G -->|Store/Query| C
+    F -->|Vectorize| J
+    G -->|Generate| I
+    H -->|Analyze| I
+
+    D -.->|Local LLM| I
+    D -.->|Local Embed| J
+
+    style B fill:#FF6347
+    style C fill:#87CEEB
+    style D fill:#DDA0DD
+```
+
+## 🚀 Quick Decision Guide
+
+Use this flowchart to choose your deployment mode:
+
+```mermaid
+graph TD
+    A[Start] --> B{Do you need<br/>LLM features?}
+    B -->|No| C{Do you need<br/>memory search?}
+    B -->|Yes| D[Full Mode]
+    C -->|No| E[Minimal Mode]
+    C -->|Yes| F{Can you provide<br/>embedding service?}
+    F -->|Yes| G[Standard Mode]
+    F -->|No| E
+
+    E --> H[✓ Code Graph only<br/>✓ No external deps<br/>✓ Fast & lightweight]
+    G --> I[✓ Code Graph<br/>✓ Memory Store<br/>⚠ Need embedding]
+    D --> J{Do you have<br/>local GPU?}
+    J -->|Yes| K[Use with-ollama profile]
+    J -->|No| L[Use cloud LLM]
+    K --> M[✓ All features<br/>✓ Self-hosted<br/>⚠ High resources]
+    L --> N[✓ All features<br/>✓ Lower resources<br/>⚠ API costs]
+
+    style E fill:#90EE90
+    style G fill:#FFD700
+    style K fill:#FF6347
+    style L fill:#FF6347
+```
+
+## 📋 Pre-Deployment Checklist
+
+### For All Modes
+
+- [ ] Docker installed (version 20.10+)
+- [ ] docker-compose installed (version 1.29+)
+- [ ] At least 4GB free disk space
+- [ ] Ports 7474, 7687, 8000 available
+- [ ] `.env` file configured
+
+### Additional for Standard Mode
+
+- [ ] Embedding service available:
+    - [ ] Local Ollama running, or
+    - [ ] OpenAI API key, or
+    - [ ] Google API key for Gemini
+
+### Additional for Full Mode
+
+- [ ] LLM service available:
+    - [ ] Local Ollama running, or
+    - [ ] OpenAI API key, or
+    - [ ] Google API key, or
+    - [ ] OpenRouter API key
+- [ ] Embedding service (same as Standard)
+- [ ] For local Ollama: GPU with 8GB+ VRAM (optional but recommended)
+
+## 🔄 Switching Between Modes
+
+You can switch deployment modes at any time. Data in Neo4j is preserved.
+
+```bash
+# Stop current deployment
+make docker-stop
+
+# Start different mode
+make docker-minimal    # or
+make docker-standard   # or
+make docker-full
+```
+
+!!! warning "Configuration Required"
+    When switching to Standard or Full mode, update your `.env` file with required API keys and service URLs.
+
+## 📚 Next Steps
+
+- [Minimal Deployment Guide](minimal.md)
+- [Standard Deployment Guide](standard.md)
+- [Full Deployment Guide](full.md)
+- [Production Setup](production.md)
+- [Docker Guide](docker.md)
diff --git a/docs/deployment/production.md b/docs/deployment/production.md
new file mode 100644
index 0000000..a60f22f
--- /dev/null
+++ b/docs/deployment/production.md
@@ -0,0 +1,515 @@
+# Production Deployment
+
+This guide covers deploying Code Graph Knowledge System to production, including documentation hosting on vantagecraft.dev.
+
+## 📝 Documentation Deployment (vantagecraft.dev)
+
+### Option 1: GitHub Pages (Recommended)
+
+Deploy documentation automatically using GitHub Actions.
+
+#### Prerequisites
+
+- GitHub repository
+- Domain `vantagecraft.dev` with DNS access
+
+#### Step 1: Configure DNS
+
+Add a CNAME record for your documentation subdomain:
+
+```dns
+Type: CNAME
+Name: docs
+Value: royisme.github.io
+TTL: 3600
+```
+
+Or for root domain:
+
+```dns
+Type: A
+Name: @
+Value: 185.199.108.153
+Value: 185.199.109.153
+Value: 185.199.110.153
+Value: 185.199.111.153
+```
+
+#### Step 2: Configure GitHub Pages
+
+1. Create `docs/CNAME` file:
+
+```bash
+echo "code-graph.vantagecraft.dev" > docs/CNAME
+```
+
+2. Enable GitHub Pages in repository settings:
+   - Go to Settings → Pages
+   - Source: GitHub Actions
+
+#### Step 3: Deploy
+
+The GitHub Actions workflow will automatically deploy on push to main:
+
+```bash
+git add .
+git commit -m "Add documentation"
+git push origin main
+```
+
+Your documentation will be available at: **https://code-graph.vantagecraft.dev**
+
+### Option 2: Self-Hosted (Nginx)
+
+Host documentation on your own server.
+
+#### Prerequisites
+
+- Server with Nginx
+- Domain configured
+- SSL certificate (Let's Encrypt recommended)
+
+#### Step 1: Build Documentation
+
+```bash
+# Install dependencies
+pip install mkdocs-material mkdocs-minify-plugin mkdocs-git-revision-date-localized-plugin
+
+# Build
+mkdocs build
+
+# Output in site/ directory
+```
+
+#### Step 2: Configure Nginx
+
+```nginx
+# /etc/nginx/sites-available/code-graph.vantagecraft.dev
+
+server {
+    listen 80;
+    server_name code-graph.vantagecraft.dev;
+    return 301 https://$server_name$request_uri;
+}
+
+server {
+    listen 443 ssl http2;
+    server_name code-graph.vantagecraft.dev;
+
+    ssl_certificate /etc/letsencrypt/live/code-graph.vantagecraft.dev/fullchain.pem;
+    ssl_certificate_key /etc/letsencrypt/live/code-graph.vantagecraft.dev/privkey.pem;
+
+    root /var/www/code-graph.vantagecraft.dev;
+    index index.html;
+
+    location / {
+        try_files $uri $uri/ =404;
+    }
+
+    # Gzip compression
+    gzip on;
+    gzip_vary on;
+    gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript;
+
+    # Cache static assets
+    location ~* \.(jpg|jpeg|png|gif|ico|css|js|svg|woff|woff2)$ {
+        expires 1y;
+        add_header Cache-Control "public, immutable";
+    }
+}
+```
+
+#### Step 3: Deploy
+
+```bash
+# Copy built site to server
+rsync -avz site/ user@server:/var/www/code-graph.vantagecraft.dev/
+
+# Reload Nginx
+ssh user@server 'sudo nginx -t && sudo systemctl reload nginx'
+```
+
+#### Step 4: SSL Certificate (Let's Encrypt)
+
+```bash
+# On server
+sudo apt install certbot python3-certbot-nginx
+
+# Obtain certificate
+sudo certbot --nginx -d code-graph.vantagecraft.dev
+
+# Auto-renewal is configured automatically
+```
+
+### Option 3: Cloudflare Pages
+
+Deploy to Cloudflare Pages for global CDN.
+
+#### Step 1: Connect Repository
+
+1. Go to Cloudflare Pages dashboard
+2. Create new project from GitHub
+3. Select your repository
+
+#### Step 2: Configure Build
+
+```yaml
+Build command: mkdocs build
+Build output directory: site
+Root directory: /
+```
+
+#### Step 3: Custom Domain
+
+1. Add custom domain: `code-graph.vantagecraft.dev`
+2. Cloudflare will configure DNS automatically
+
+---
+
+## 🚀 Application Production Deployment
+
+### Docker Swarm Deployment
+
+For production workloads, use Docker Swarm or Kubernetes.
+
+#### Single Node Setup
+
+```bash
+# Initialize swarm
+docker swarm init
+
+# Deploy stack
+docker stack deploy -c docker-compose.full.yml codebase-rag
+```
+
+#### Stack Configuration
+
+```yaml
+# docker-compose.prod.yml
+version: '3.8'
+
+services:
+  neo4j:
+    image: neo4j:5.15-enterprise
+    deploy:
+      replicas: 1
+      resources:
+        limits:
+          memory: 4G
+        reservations:
+          memory: 2G
+      restart_policy:
+        condition: on-failure
+        delay: 5s
+        max_attempts: 3
+    volumes:
+      - neo4j_data:/data
+    environment:
+      - NEO4J_ACCEPT_LICENSE_AGREEMENT=yes
+      # ... other production configs
+
+  mcp:
+    image: royisme/codebase-rag:full
+    deploy:
+      replicas: 2
+      update_config:
+        parallelism: 1
+        delay: 10s
+      resources:
+        limits:
+          memory: 2G
+        reservations:
+          memory: 1G
+    environment:
+      # Production environment variables
+```
+
+### Kubernetes Deployment
+
+#### Prerequisites
+
+- Kubernetes cluster (1.24+)
+- kubectl configured
+- Helm 3+
+
+#### Step 1: Create Namespace
+
+```bash
+kubectl create namespace codebase-rag
+```
+
+#### Step 2: Deploy Neo4j
+
+```yaml
+# neo4j-deployment.yaml
+apiVersion: apps/v1
+kind: StatefulSet
+metadata:
+  name: neo4j
+  namespace: codebase-rag
+spec:
+  serviceName: neo4j
+  replicas: 1
+  selector:
+    matchLabels:
+      app: neo4j
+  template:
+    metadata:
+      labels:
+        app: neo4j
+    spec:
+      containers:
+      - name: neo4j
+        image: neo4j:5.15-community
+        ports:
+        - containerPort: 7474
+          name: http
+        - containerPort: 7687
+          name: bolt
+        env:
+        - name: NEO4J_AUTH
+          valueFrom:
+            secretKeyRef:
+              name: neo4j-auth
+              key: auth
+        volumeMounts:
+        - name: data
+          mountPath: /data
+  volumeClaimTemplates:
+  - metadata:
+      name: data
+    spec:
+      accessModes: [ "ReadWriteOnce" ]
+      resources:
+        requests:
+          storage: 10Gi
+```
+
+#### Step 3: Deploy Application
+
+```yaml
+# mcp-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: mcp-full
+  namespace: codebase-rag
+spec:
+  replicas: 3
+  selector:
+    matchLabels:
+      app: mcp-full
+  template:
+    metadata:
+      labels:
+        app: mcp-full
+    spec:
+      containers:
+      - name: mcp-full
+        image: royisme/codebase-rag:full
+        ports:
+        - containerPort: 8000
+        env:
+        - name: NEO4J_URI
+          value: "bolt://neo4j:7687"
+        - name: NEO4J_PASSWORD
+          valueFrom:
+            secretKeyRef:
+              name: neo4j-auth
+              key: password
+        resources:
+          requests:
+            memory: "1Gi"
+            cpu: "500m"
+          limits:
+            memory: "2Gi"
+            cpu: "1000m"
+        livenessProbe:
+          httpGet:
+            path: /api/v1/health
+            port: 8000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+        readinessProbe:
+          httpGet:
+            path: /api/v1/health
+            port: 8000
+          initialDelaySeconds: 10
+          periodSeconds: 5
+```
+
+#### Step 4: Create Service
+
+```yaml
+# service.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: mcp-full
+  namespace: codebase-rag
+spec:
+  type: LoadBalancer
+  ports:
+  - port: 80
+    targetPort: 8000
+    protocol: TCP
+  selector:
+    app: mcp-full
+```
+
+#### Step 5: Deploy
+
+```bash
+kubectl apply -f neo4j-deployment.yaml
+kubectl apply -f mcp-deployment.yaml
+kubectl apply -f service.yaml
+```
+
+---
+
+## 🔒 Security Best Practices
+
+### 1. Environment Variables
+
+Never commit secrets to git:
+
+```bash
+# Use Kubernetes secrets
+kubectl create secret generic app-secrets \
+  --from-literal=neo4j-password=xxx \
+  --from-literal=openai-api-key=xxx \
+  -n codebase-rag
+```
+
+### 2. Network Security
+
+```bash
+# Restrict Neo4j access
+# Only allow from application pods
+apiVersion: networking.k8s.io/v1
+kind: NetworkPolicy
+metadata:
+  name: neo4j-policy
+spec:
+  podSelector:
+    matchLabels:
+      app: neo4j
+  ingress:
+  - from:
+    - podSelector:
+        matchLabels:
+          app: mcp-full
+```
+
+### 3. TLS/SSL
+
+Use cert-manager for automatic certificate management:
+
+```bash
+# Install cert-manager
+kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.13.0/cert-manager.yaml
+
+# Create issuer
+apiVersion: cert-manager.io/v1
+kind: ClusterIssuer
+metadata:
+  name: letsencrypt-prod
+spec:
+  acme:
+    server: https://acme-v02.api.letsencrypt.org/directory
+    email: your-email@vantagecraft.dev
+    privateKeySecretRef:
+      name: letsencrypt-prod
+    solvers:
+    - http01:
+        ingress:
+          class: nginx
+```
+
+### 4. Rate Limiting
+
+```nginx
+# Nginx ingress annotation
+nginx.ingress.kubernetes.io/limit-rps: "10"
+nginx.ingress.kubernetes.io/limit-connections: "5"
+```
+
+---
+
+## 📊 Monitoring
+
+### Prometheus Metrics
+
+Application exposes metrics at `/metrics`:
+
+```yaml
+# prometheus-servicemonitor.yaml
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: mcp-full
+spec:
+  selector:
+    matchLabels:
+      app: mcp-full
+  endpoints:
+  - port: http
+    path: /metrics
+```
+
+### Logging
+
+Use ELK stack or Loki for centralized logging:
+
+```yaml
+# fluent-bit configmap
+[OUTPUT]
+    Name es
+    Match *
+    Host elasticsearch
+    Port 9200
+    Index codebase-rag
+```
+
+---
+
+## 🔄 Backup & Recovery
+
+### Neo4j Backup
+
+```bash
+# Manual backup
+docker exec codebase-rag-neo4j \
+  neo4j-admin database dump neo4j \
+  --to=/backups/neo4j-$(date +%Y%m%d).dump
+
+# Automated backup (cron)
+0 2 * * * /usr/local/bin/backup-neo4j.sh
+```
+
+### Restore
+
+```bash
+# Stop services
+docker-compose down
+
+# Restore backup
+docker run --rm \
+  -v neo4j_data:/data \
+  -v $(pwd)/backups:/backups \
+  neo4j:5.15 \
+  neo4j-admin database load neo4j \
+  --from=/backups/neo4j-20240101.dump
+
+# Start services
+docker-compose up -d
+```
+
+---
+
+## 📚 Next Steps
+
+- [System Architecture](../architecture/design.md) - Scalability and disaster recovery
+- [Troubleshooting Guide](../troubleshooting.md) - Common production issues
+- [Docker Guide](docker.md) - Advanced Docker configuration
+- [FAQ](../faq.md) - Frequently asked questions
diff --git a/docs/deployment/standard.md b/docs/deployment/standard.md
new file mode 100644
index 0000000..296c73f
--- /dev/null
+++ b/docs/deployment/standard.md
@@ -0,0 +1,389 @@
+# Standard Mode Deployment
+
+Standard Mode adds **Memory Store with vector search** to Code Graph functionality. Requires embedding model but no LLM.
+
+## What You Get
+
+### Minimal Mode Features +
+- **Memory Store**: Persistent project knowledge for AI agents
+- **Vector Search**: Semantic similarity search in memories
+- **Memory Management**: Add, search, update, delete memories
+- **Memory Evolution**: Supersede outdated decisions
+
+### Use Cases
+- AI agent long-term memory across sessions
+- Project decision tracking with semantic search
+- Team preference documentation
+- Problem-solution repository
+
+## System Requirements
+
+### Minimum
+- **CPU**: 4 cores
+- **RAM**: 8GB (for local embeddings)
+- **Disk**: 20GB SSD
+- **Docker**: 20.10+
+
+### With Cloud Embeddings
+- **CPU**: 2 cores
+- **RAM**: 4GB
+- **OpenAI/Gemini API key**
+
+## Quick Start
+
+### 1. Choose Embedding Provider
+
+=== "Ollama (Local, Free)"
+
+    ```bash
+    # Install Ollama
+    curl -fsSL https://ollama.com/install.sh | sh
+
+    # Pull embedding model
+    ollama pull nomic-embed-text
+
+    # Verify
+    curl http://localhost:11434/api/embeddings \
+      -d '{"model":"nomic-embed-text","prompt":"test"}'
+    ```
+
+=== "OpenAI (Cloud, Best Quality)"
+
+    ```bash
+    # Get API key from https://platform.openai.com/api-keys
+    export OPENAI_API_KEY=sk-proj-...
+    ```
+
+=== "Google Gemini (Cloud, Cost-Effective)"
+
+    ```bash
+    # Get API key from https://makersuite.google.com/app/apikey
+    export GOOGLE_API_KEY=AIza...
+    ```
+
+### 2. Configure Environment
+
+```bash
+# Copy standard template
+cp docker/.env.template/.env.standard .env
+
+# Edit configuration
+nano .env
+```
+
+Example configuration:
+
+```bash
+# Neo4j
+NEO4J_URI=bolt://neo4j:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_secure_password
+NEO4J_DATABASE=neo4j
+
+# Deployment Mode
+DEPLOYMENT_MODE=standard
+ENABLE_KNOWLEDGE_RAG=false
+ENABLE_AUTO_EXTRACTION=false
+
+# Embedding Provider (choose one)
+EMBEDDING_PROVIDER=ollama
+
+# Ollama Configuration
+OLLAMA_BASE_URL=http://host.docker.internal:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Or OpenAI
+# EMBEDDING_PROVIDER=openai
+# OPENAI_API_KEY=sk-...
+# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+```
+
+### 3. Start Services
+
+```bash
+make docker-standard
+
+# Or
+docker-compose -f docker/docker-compose.standard.yml up -d
+```
+
+### 4. Verify Deployment
+
+```bash
+# Check containers
+docker ps
+
+# Test embedding
+curl http://localhost:11434/api/embeddings \
+  -d '{"model":"nomic-embed-text","prompt":"test query"}'
+
+# Check Neo4j vector index
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p password
+# Run: SHOW INDEXES;
+```
+
+## Available MCP Tools
+
+Standard mode provides **11 tools** (4 Code Graph + 7 Memory):
+
+### Memory Management Tools
+
+**1. add_memory** - Save project knowledge
+```json
+{
+  "project_id": "myapp",
+  "memory_type": "decision",
+  "title": "Use PostgreSQL for main database",
+  "content": "Selected PostgreSQL over MySQL",
+  "reason": "Need advanced JSON support",
+  "importance": 0.9,
+  "tags": ["database", "architecture"]
+}
+```
+
+**2. search_memories** - Semantic search
+```json
+{
+  "project_id": "myapp",
+  "query": "database decisions",
+  "memory_type": "decision",
+  "min_importance": 0.7,
+  "limit": 10
+}
+```
+
+**3. get_memory** - Retrieve specific memory
+```json
+{
+  "memory_id": "mem_123456"
+}
+```
+
+**4. update_memory** - Modify existing memory
+```json
+{
+  "memory_id": "mem_123456",
+  "title": "Updated title",
+  "importance": 0.95
+}
+```
+
+**5. delete_memory** - Soft delete memory
+```json
+{
+  "memory_id": "mem_123456",
+  "reason": "No longer relevant"
+}
+```
+
+**6. supersede_memory** - Replace with new memory
+```json
+{
+  "old_memory_id": "mem_123456",
+  "new_title": "Migrate to PostgreSQL 16",
+  "new_content": "Upgrading from PostgreSQL 14",
+  "new_reason": "Performance improvements",
+  "new_importance": 0.9
+}
+```
+
+**7. get_project_summary** - Overview of all memories
+```json
+{
+  "project_id": "myapp"
+}
+```
+
+## Usage Examples
+
+### Example 1: AI Agent Workflow
+
+```bash
+# Agent starts working on authentication feature
+
+# 1. Search for related decisions
+Tool: search_memories
+Input: {
+  "project_id": "myapp",
+  "query": "authentication security",
+  "memory_type": "decision"
+}
+
+# 2. Implement feature following past decisions
+
+# 3. Save new decision
+Tool: add_memory
+Input: {
+  "project_id": "myapp",
+  "memory_type": "decision",
+  "title": "Use JWT with RS256",
+  "content": "Implemented JWT authentication with RS256 signing",
+  "reason": "More secure than HS256, supports key rotation",
+  "importance": 0.9,
+  "tags": ["auth", "security"]
+}
+```
+
+### Example 2: Track Problem Solutions
+
+```bash
+# Encountered Redis connection issue in Docker
+
+Tool: add_memory
+Input: {
+  "project_id": "myapp",
+  "memory_type": "experience",
+  "title": "Redis Docker networking issue",
+  "content": "Redis connection fails with localhost in Docker",
+  "reason": "Must use service name 'redis' instead of localhost",
+  "importance": 0.7,
+  "tags": ["docker", "redis", "networking"]
+}
+
+# Later, search for Redis issues
+Tool: search_memories
+Input: {
+  "project_id": "myapp",
+  "query": "Redis connection problems",
+  "memory_type": "experience"
+}
+```
+
+### Example 3: Update Outdated Decision
+
+```bash
+# Original decision to use MySQL
+Old Memory ID: mem_abc123
+
+# Decided to migrate to PostgreSQL
+Tool: supersede_memory
+Input: {
+  "old_memory_id": "mem_abc123",
+  "new_title": "Migrate to PostgreSQL",
+  "new_content": "Migrating from MySQL to PostgreSQL",
+  "new_reason": "Need advanced features and better performance",
+  "new_importance": 0.95,
+  "new_tags": ["database", "migration"]
+}
+```
+
+## Memory Best Practices
+
+### Importance Scoring
+- **0.9-1.0**: Critical architectural decisions, security findings
+- **0.7-0.8**: Important technical choices
+- **0.5-0.6**: Team preferences, conventions
+- **0.3-0.4**: Future plans, minor notes
+
+### Effective Tagging
+```bash
+# Domain tags
+"database", "api", "frontend", "auth"
+
+# Type tags
+"performance", "security", "bug", "optimization"
+
+# Status tags
+"critical", "deprecated", "planned"
+```
+
+### When to Save Memories
+- After making architecture decisions
+- When solving tricky bugs
+- When establishing team conventions
+- When discovering important limitations
+
+## Performance Considerations
+
+### Embedding Model Selection
+
+**Local (Ollama)**:
+- `nomic-embed-text`: Best quality, 768 dimensions
+- `mxbai-embed-large`: Faster, good quality
+- `all-minilm`: Lightweight, 384 dimensions
+
+**Cloud**:
+- OpenAI `text-embedding-3-small`: $0.02/1M tokens
+- OpenAI `text-embedding-3-large`: $0.13/1M tokens
+- Gemini `embedding-001`: Free tier available
+
+### Vector Index Tuning
+
+```cypher
+// Check vector index status
+SHOW INDEXES;
+
+// Rebuild if needed
+DROP INDEX memory_content_vector IF EXISTS;
+CREATE VECTOR INDEX memory_content_vector
+FOR (m:Memory) ON (m.embedding)
+OPTIONS {indexConfig: {
+  `vector.dimensions`: 768,
+  `vector.similarity_function`: 'cosine'
+}};
+```
+
+## Cost Analysis
+
+### With Local Ollama
+- **Infrastructure**: ~$10-20/month (VPS with 8GB RAM)
+- **Embedding**: $0 (local)
+- **Total**: ~$10-20/month
+
+### With OpenAI Embeddings
+- **Infrastructure**: ~$5-10/month (small VPS)
+- **Embeddings**: ~$0.02 per 1M tokens
+- **Typical usage**: ~$1-5/month for embeddings
+- **Total**: ~$6-15/month
+
+## Upgrading to Full Mode
+
+When you need LLM-powered features:
+
+```bash
+# Stop standard mode
+docker-compose -f docker/docker-compose.standard.yml down
+
+# Configure for full mode
+cp docker/.env.template/.env.full .env
+nano .env  # Add LLM configuration
+
+# Start full mode
+docker-compose -f docker/docker-compose.full.yml up -d
+```
+
+## Troubleshooting
+
+### Embedding Generation Fails
+
+```bash
+# Check Ollama logs
+docker logs codebase-rag-ollama
+
+# Test embedding locally
+curl http://localhost:11434/api/embeddings \
+  -d '{"model":"nomic-embed-text","prompt":"test"}'
+
+# Restart Ollama
+docker restart codebase-rag-ollama
+```
+
+### Vector Search Returns No Results
+
+```bash
+# Check if vector index exists
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p password
+# Run: SHOW INDEXES;
+
+# Check memory count
+# Run: MATCH (m:Memory) RETURN count(m);
+
+# Verify embeddings exist
+# Run: MATCH (m:Memory) WHERE m.embedding IS NOT NULL RETURN count(m);
+```
+
+## Next Steps
+
+- [Memory Store User Guide](../guide/memory/overview.md) - Detailed features
+- [Full Mode](full.md) - Upgrade for all features
+- [Production Setup](production.md) - Deploy to production
diff --git a/docs/development/changelog-automation.md b/docs/development/changelog-automation.md
new file mode 100644
index 0000000..f8702e7
--- /dev/null
+++ b/docs/development/changelog-automation.md
@@ -0,0 +1,508 @@
+# Automatic Changelog Generation
+
+Complete guide to automatically generating changelogs from git commits.
+
+## Overview
+
+Instead of manually writing changelog entries, we automatically generate them from git commit messages using **Conventional Commits** format.
+
+**Benefits**:
+- ✅ Never forget to update changelog
+- ✅ Consistent formatting
+- ✅ Automatic categorization
+- ✅ Less manual work
+- ✅ Traceable to specific commits
+
+## Conventional Commits Format
+
+All commits should follow this format:
+
+```
+<type>(<scope>): <subject>
+
+<body>
+
+<footer>
+```
+
+### Commit Types
+
+| Type | Changelog Section | Description |
+|------|------------------|-------------|
+| `feat` | ### Added | New features |
+| `fix` | ### Fixed | Bug fixes |
+| `docs` | ### Documentation | Documentation changes |
+| `perf` | ### Performance | Performance improvements |
+| `refactor` | ### Changed | Code refactoring |
+| `test` | ### Testing | Test updates |
+| `build` | ### Build System | Build system changes |
+| `ci` | ### CI/CD | CI/CD changes |
+| `chore` | ### Maintenance | Maintenance tasks |
+| `style` | ### Changed | Code style changes |
+
+### Examples
+
+**Feature**:
+```bash
+git commit -m "feat(memory): add auto-extraction from git commits
+
+Implemented LLM-powered extraction of decisions and experiences
+from git commit messages and changed files.
+
+Closes #123"
+```
+
+**Bug Fix**:
+```bash
+git commit -m "fix(api): resolve timeout issue in document processing
+
+Increased timeout for large files from 60s to 300s.
+Added progress reporting for long-running operations."
+```
+
+**Breaking Change**:
+```bash
+git commit -m "feat(api)!: change memory search API response format
+
+BREAKING CHANGE: The search response now returns results in a
+different structure for better consistency with other endpoints.
+
+Before: { "memories": [...] }
+After: { "data": [...], "total": 10, "page": 1 }
+```
+
+**Documentation**:
+```bash
+git commit -m "docs: add deployment guide for minimal mode"
+```
+
+**Chore**:
+```bash
+git commit -m "chore: update dependencies to latest versions"
+```
+
+## Automatic Generation
+
+### During Version Bump (Recommended)
+
+The `bump-version.sh` script automatically generates changelog:
+
+```bash
+# This command will:
+# 1. Generate changelog from commits since last tag
+# 2. Update version numbers
+# 3. Create commit and tag
+./scripts/bump-version.sh minor
+```
+
+**What happens**:
+1. Script finds all commits since last tag (e.g., `v0.7.0`)
+2. Parses each commit message
+3. Groups by type (Added, Fixed, etc.)
+4. Generates formatted changelog section
+5. Inserts into `docs/changelog.md` after `[Unreleased]`
+6. Proceeds with version bump
+
+### Manual Generation
+
+Generate changelog without bumping version:
+
+```bash
+# Generate from latest tag to HEAD
+python3 scripts/generate-changelog.py --update --version 0.8.0
+
+# Generate from specific tag
+python3 scripts/generate-changelog.py --from-tag v0.7.0 --update --version 0.8.0
+
+# Preview without updating file
+python3 scripts/generate-changelog.py --version 0.8.0
+
+# Save to separate file
+python3 scripts/generate-changelog.py --version 0.8.0 --output CHANGELOG_DRAFT.md
+```
+
+## Generated Format
+
+The script generates entries in **Keep a Changelog** format:
+
+```markdown
+## [0.8.0] - 2025-01-20
+
+### Added
+- **memory**: Add auto-extraction from git commits
+- **api**: Add new endpoint for batch memory operations
+- Add support for Rust code parsing
+
+### Fixed
+- **api**: Resolve timeout issue in document processing
+- **docker**: Fix volume mounting in minimal mode
+
+### Changed
+- **refactor**: Improve memory search performance
+- Update dependencies to latest versions
+
+### Documentation
+- Add deployment guide for minimal mode
+- Update API reference with new endpoints
+```
+
+## Workflow Integration
+
+### Standard Release Workflow
+
+```bash
+# 1. Development - Use conventional commits
+git commit -m "feat(api): add new feature X"
+git commit -m "fix(core): resolve bug Y"
+git push origin main
+
+# 2. Ready to release
+git checkout main
+git pull origin main
+
+# 3. Bump version (auto-generates changelog)
+./scripts/bump-version.sh minor
+
+# Output:
+# === Version Bump Tool ===
+# Current version: 0.7.0
+# Bump type:       minor
+# New version:     0.8.0
+#
+# This will:
+#   1. Generate changelog from git commits
+#   2. Update version in pyproject.toml, src/__version__.py
+#   3. Create a git commit
+#   4. Create a git tag v0.8.0
+#
+# Continue? (y/N) y
+#
+# Generating changelog from commits...
+# Processing 15 commits...
+# ✓ Changelog generated and updated
+# ✓ Version bumped successfully!
+
+# 4. Review and push
+git show  # Review the commit
+git push origin main
+git push origin v0.8.0
+```
+
+### Skip Changelog Generation
+
+If you want to manually edit changelog:
+
+```bash
+# Bump version without auto-generating changelog
+./scripts/bump-version.sh minor --no-changelog
+
+# Then manually edit
+vim docs/changelog.md
+git add docs/changelog.md
+git commit --amend --no-edit
+```
+
+## Best Practices
+
+### 1. Write Good Commit Messages
+
+**Good**:
+```bash
+feat(memory): add conversation extraction
+
+Implemented LLM-powered analysis of AI conversations to automatically
+extract decisions, preferences, and experiences.
+
+- Supports multiple conversation formats
+- Configurable confidence threshold
+- Auto-save option for high-confidence memories
+```
+
+**Bad**:
+```bash
+added stuff
+```
+
+### 2. Use Conventional Format Consistently
+
+```bash
+# ✅ Good
+feat: add new feature
+fix: resolve bug
+docs: update guide
+
+# ❌ Bad
+Added new feature
+Fixed the bug
+Updated some docs
+```
+
+### 3. Group Related Changes
+
+```bash
+# ✅ Good - Separate commits for separate concerns
+git commit -m "feat(api): add memory export endpoint"
+git commit -m "docs(api): document memory export API"
+git commit -m "test(api): add tests for memory export"
+
+# ❌ Bad - Everything in one commit
+git commit -m "add memory export with docs and tests"
+```
+
+### 4. Use Scopes for Clarity
+
+```bash
+feat(api): ...       # API changes
+feat(memory): ...    # Memory store changes
+feat(docker): ...    # Docker configuration
+feat(docs): ...      # Documentation system
+```
+
+### 5. Mark Breaking Changes
+
+```bash
+# Method 1: Use ! after type
+feat(api)!: change response format
+
+# Method 2: Use footer
+feat(api): change response format
+
+BREAKING CHANGE: The API now returns data in a different structure.
+```
+
+## Customization
+
+### Add New Commit Types
+
+Edit `scripts/generate-changelog.py`:
+
+```python
+COMMIT_TYPE_MAP = {
+    "feat": "### Added",
+    "fix": "### Fixed",
+    "docs": "### Documentation",
+    # Add custom type
+    "security": "### Security",
+}
+```
+
+### Change Section Order
+
+Edit `scripts/generate-changelog.py`:
+
+```python
+preferred_order = [
+    "### ⚠️ Breaking Changes",
+    "### Security",  # Add this
+    "### Added",
+    "### Fixed",
+    # ...
+]
+```
+
+### Custom Formatting
+
+The script uses Python string formatting. Customize in `format_changelog_entry()`:
+
+```python
+# Current format:
+# - **scope**: Subject
+
+# Could change to:
+# - [scope] Subject (abc123)  # Include commit hash
+```
+
+## Troubleshooting
+
+### No commits found
+
+**Problem**: Script says "No commits found to process"
+
+**Solution**:
+```bash
+# Check if you have tags
+git tag
+
+# If no tags, specify manually
+python3 scripts/generate-changelog.py --from-tag "" --update --version 0.8.0
+
+# Or process all commits
+python3 scripts/generate-changelog.py --update --version 0.8.0
+```
+
+### Commits not following format
+
+**Problem**: Commits don't follow Conventional Commits
+
+**Solution**:
+- Old commits: Manually edit changelog
+- Future commits: Follow the format
+- Mix approach: Auto-generate what you can, manually add the rest
+
+### Wrong version in changelog
+
+**Problem**: Generated with wrong version number
+
+**Solution**:
+```bash
+# Manually fix
+vim docs/changelog.md
+
+# Or regenerate
+python3 scripts/generate-changelog.py --update --version 0.8.1
+```
+
+### Want to edit generated changelog
+
+**Solution**:
+```bash
+# Generate first
+./scripts/bump-version.sh minor
+
+# Review generated changelog
+vim docs/changelog.md
+
+# Edit as needed
+
+# Amend the commit
+git add docs/changelog.md
+git commit --amend --no-edit
+```
+
+## Migration from Manual Changelog
+
+### If you have existing manual changelog
+
+The script will preserve existing content and insert new sections.
+
+**Steps**:
+1. Ensure `docs/changelog.md` has `## [Unreleased]` section
+2. Run generation - it will insert after Unreleased
+3. Review and adjust if needed
+
+### Converting old commits
+
+For retroactive changelog generation:
+
+```bash
+# Generate from beginning
+python3 scripts/generate-changelog.py --from-tag "" --version 0.7.0 > TEMP_CHANGELOG.md
+
+# Manually merge into docs/changelog.md
+# Clean up and adjust as needed
+```
+
+## Examples
+
+### Example 1: Feature Release
+
+```bash
+# Commits since v0.7.0:
+# - feat(memory): add conversation extraction
+# - feat(api): add export endpoint
+# - fix(docker): resolve volume issue
+# - docs: update deployment guide
+
+./scripts/bump-version.sh minor
+
+# Generated changelog:
+## [0.8.0] - 2025-01-20
+
+### Added
+- **memory**: Add conversation extraction
+- **api**: Add export endpoint
+
+### Fixed
+- **docker**: Resolve volume issue
+
+### Documentation
+- Update deployment guide
+```
+
+### Example 2: Hotfix Release
+
+```bash
+# Critical bug fix
+git commit -m "fix(api): resolve data corruption in memory export
+
+Critical fix for issue where export could truncate large memories.
+Added validation and error handling."
+
+# Bump patch version
+./scripts/bump-version.sh patch
+
+# Generated:
+## [0.7.1] - 2025-01-21
+
+### Fixed
+- **api**: Resolve data corruption in memory export
+```
+
+### Example 3: Breaking Change Release
+
+```bash
+# Breaking API change
+git commit -m "feat(api)!: standardize all response formats
+
+BREAKING CHANGE: All API endpoints now return consistent response
+structure with 'data', 'meta', and 'errors' fields.
+
+See migration guide in docs/api/migration.md"
+
+# Major version bump
+./scripts/bump-version.sh major
+
+# Generated:
+## [1.0.0] - 2025-01-25
+
+### ⚠️ Breaking Changes
+- **api**: Standardize all response formats
+
+### Added
+- **api**: Standardize all response formats
+```
+
+## Resources
+
+- [Conventional Commits](https://www.conventionalcommits.org/)
+- [Keep a Changelog](https://keepachangelog.com/)
+- [Semantic Versioning](https://semver.org/)
+- [Angular Commit Guidelines](https://github.com/angular/angular/blob/main/CONTRIBUTING.md#commit)
+
+## Quick Reference
+
+| Command | Description |
+|---------|-------------|
+| `./scripts/bump-version.sh minor` | Bump version + auto-generate changelog |
+| `./scripts/bump-version.sh minor --no-changelog` | Bump version without changelog |
+| `python3 scripts/generate-changelog.py --update` | Generate changelog only |
+| `python3 scripts/generate-changelog.py --from-tag v0.7.0` | From specific tag |
+| `python3 scripts/generate-changelog.py --output FILE` | Save to file |
+
+## Commit Message Template
+
+Save this as `.gitmessage`:
+
+```
+<type>(<scope>): <subject>
+
+# <body>
+
+# <footer>
+
+# Type: feat|fix|docs|style|refactor|perf|test|build|ci|chore
+# Scope: api|memory|docker|core|docs (optional)
+# Subject: imperative mood, lowercase, no period
+#
+# Body: explain what and why (optional)
+#
+# Footer: breaking changes, issues (optional)
+#   BREAKING CHANGE: description
+#   Closes #123
+```
+
+Configure git to use it:
+
+```bash
+git config commit.template .gitmessage
+```
diff --git a/docs/development/contributing.md b/docs/development/contributing.md
new file mode 100644
index 0000000..864b6dc
--- /dev/null
+++ b/docs/development/contributing.md
@@ -0,0 +1,584 @@
+# Contributing to Code Graph Knowledge System
+
+Thank you for your interest in contributing to the Code Graph Knowledge System! This guide will help you understand our development process and how to submit quality contributions.
+
+## Table of Contents
+
+- [Code of Conduct](#code-of-conduct)
+- [Getting Started](#getting-started)
+- [Development Workflow](#development-workflow)
+- [Code Style Guidelines](#code-style-guidelines)
+- [Commit Conventions](#commit-conventions)
+- [Pull Request Process](#pull-request-process)
+- [Testing Requirements](#testing-requirements)
+- [Documentation Requirements](#documentation-requirements)
+- [Community and Support](#community-and-support)
+
+## Code of Conduct
+
+We are committed to providing a welcoming and inclusive environment for all contributors. Please:
+
+- Be respectful and considerate in all interactions
+- Accept constructive criticism gracefully
+- Focus on what is best for the community
+- Show empathy towards other community members
+
+## Getting Started
+
+### Prerequisites
+
+Before contributing, ensure you have:
+
+1. **Python 3.13+** installed
+2. **uv** package manager ([installation guide](https://github.com/astral-sh/uv))
+3. **Neo4j 5.0+** running locally or via Docker
+4. **Git** for version control
+5. A **GitHub account**
+
+### Fork and Clone
+
+1. Fork the repository on GitHub
+2. Clone your fork locally:
+
+```bash
+git clone https://github.com/YOUR_USERNAME/codebase-rag.git
+cd codebase-rag
+```
+
+3. Add the upstream repository:
+
+```bash
+git remote add upstream https://github.com/royisme/codebase-rag.git
+```
+
+4. Create a feature branch:
+
+```bash
+git checkout -b feature/your-feature-name
+```
+
+### Development Environment Setup
+
+See the [Development Setup Guide](./setup.md) for detailed instructions on setting up your local development environment.
+
+## Development Workflow
+
+### 1. Sync with Upstream
+
+Before starting work, sync your fork with the upstream repository:
+
+```bash
+git fetch upstream
+git checkout main
+git merge upstream/main
+git push origin main
+```
+
+### 2. Create a Feature Branch
+
+Create a descriptive branch name:
+
+```bash
+git checkout -b feature/add-sql-parser-support
+git checkout -b fix/neo4j-connection-timeout
+git checkout -b docs/update-api-documentation
+```
+
+Branch naming conventions:
+- `feature/` - New features
+- `fix/` - Bug fixes
+- `docs/` - Documentation updates
+- `refactor/` - Code refactoring
+- `test/` - Test additions or modifications
+- `chore/` - Maintenance tasks
+
+### 3. Make Your Changes
+
+- Write clean, readable code
+- Follow the code style guidelines (see below)
+- Add tests for new functionality
+- Update documentation as needed
+- Keep commits focused and atomic
+
+### 4. Test Your Changes
+
+Run the test suite to ensure everything works:
+
+```bash
+# Run all tests
+pytest tests/
+
+# Run specific test types
+pytest tests/ -m unit
+pytest tests/ -m integration
+
+# Run with coverage
+pytest tests/ --cov=services --cov=api --cov=mcp_tools --cov-report=term
+```
+
+### 5. Commit Your Changes
+
+Follow our commit conventions (see below) and commit your changes:
+
+```bash
+git add .
+git commit -m "feat: add PostgreSQL schema parser support"
+```
+
+### 6. Push and Create Pull Request
+
+```bash
+git push origin feature/your-feature-name
+```
+
+Then create a pull request on GitHub following our PR template.
+
+## Code Style Guidelines
+
+We use automated tools to maintain consistent code style across the project.
+
+### Python Code Style
+
+#### Formatting Tools
+
+1. **Black** - Code formatter
+   - Line length: 100 characters
+   - Target Python versions: 3.11, 3.12, 3.13
+
+2. **isort** - Import sorter
+   - Profile: black (compatible with Black)
+   - Line length: 100 characters
+
+3. **Ruff** - Fast Python linter
+   - Line length: 100 characters
+   - Enabled rule sets: pycodestyle (E/W), pyflakes (F), isort (I), comprehensions (C), bugbear (B)
+
+#### Running Code Quality Tools
+
+Format your code before committing:
+
+```bash
+# Format with Black
+black .
+
+# Sort imports
+isort .
+
+# Lint with Ruff
+ruff check .
+
+# Fix auto-fixable Ruff issues
+ruff check . --fix
+
+# Run all together
+black . && isort . && ruff check .
+```
+
+#### Code Style Best Practices
+
+**Import Organization:**
+```python
+# Standard library imports
+import os
+import sys
+from pathlib import Path
+
+# Third-party imports
+from fastapi import FastAPI, HTTPException
+from neo4j import GraphDatabase
+
+# Local imports
+from services.neo4j_knowledge_service import Neo4jKnowledgeService
+from core.config import settings
+```
+
+**Type Hints:**
+Always use type hints for function parameters and return values:
+
+```python
+from typing import Optional, List, Dict, Any
+
+async def process_document(
+    document_path: str,
+    metadata: Optional[Dict[str, Any]] = None
+) -> Dict[str, Any]:
+    """Process a document and return results."""
+    pass
+```
+
+**Docstrings:**
+Use clear docstrings for all public functions and classes:
+
+```python
+def parse_sql_schema(sql_file: str, dialect: str = "oracle") -> Dict[str, Any]:
+    """
+    Parse SQL schema file and extract table definitions.
+
+    Args:
+        sql_file: Path to SQL file to parse
+        dialect: SQL dialect (oracle, mysql, postgresql, sqlserver)
+
+    Returns:
+        Dictionary containing parsed schema information including:
+        - tables: List of table definitions
+        - relationships: Foreign key relationships
+        - indexes: Index definitions
+
+    Raises:
+        FileNotFoundError: If SQL file doesn't exist
+        ValueError: If dialect is not supported
+    """
+    pass
+```
+
+**Async/Await Patterns:**
+Use async/await consistently for asynchronous operations:
+
+```python
+async def initialize_service(self) -> None:
+    """Initialize the service asynchronously."""
+    await self._connect_database()
+    await self._load_configuration()
+    self.initialized = True
+```
+
+**Error Handling:**
+Use structured error responses:
+
+```python
+try:
+    result = await process_data(data)
+    return {"success": True, "data": result}
+except ValueError as e:
+    return {"success": False, "error": str(e), "error_type": "validation"}
+except Exception as e:
+    logger.error(f"Unexpected error: {e}")
+    return {"success": False, "error": "Internal server error"}
+```
+
+### Configuration
+
+Our code style configuration is defined in `pyproject.toml`:
+
+```toml
+[tool.black]
+line-length = 100
+target-version = ['py311', 'py312', 'py313']
+
+[tool.isort]
+profile = "black"
+line_length = 100
+
+[tool.ruff]
+line-length = 100
+target-version = "py311"
+```
+
+## Commit Conventions
+
+We follow the [Conventional Commits](https://www.conventionalcommits.org/) specification for commit messages.
+
+### Commit Message Format
+
+```
+<type>(<scope>): <subject>
+
+<body>
+
+<footer>
+```
+
+### Commit Types
+
+- `feat`: New feature
+- `fix`: Bug fix
+- `docs`: Documentation changes
+- `style`: Code style changes (formatting, no logic change)
+- `refactor`: Code refactoring (no feature change or bug fix)
+- `perf`: Performance improvements
+- `test`: Adding or updating tests
+- `chore`: Maintenance tasks, dependency updates
+- `ci`: CI/CD configuration changes
+- `revert`: Reverting previous commits
+
+### Commit Message Examples
+
+**Feature Addition:**
+```
+feat(memory): add automatic memory extraction from conversations
+
+Implement LLM-powered analysis to extract decisions, preferences, and
+experiences from AI conversation history. Includes confidence scoring
+and auto-save capability for high-confidence memories.
+
+Closes #42
+```
+
+**Bug Fix:**
+```
+fix(neo4j): resolve connection timeout in Docker environment
+
+Fix Redis connection failures when running in Docker by using service
+name instead of localhost in connection string.
+
+Fixes #123
+```
+
+**Documentation:**
+```
+docs(api): update memory API endpoint documentation
+
+Add examples for all memory types and update request/response schemas.
+```
+
+**Refactoring:**
+```
+refactor(mcp): extract handlers into modular architecture
+
+Break down monolithic MCP server into smaller, focused handler modules.
+Reduces main server file from 1400 lines to 310 lines (78% reduction).
+
+Related to #56
+```
+
+### Commit Best Practices
+
+1. **Keep commits atomic** - One logical change per commit
+2. **Write clear subjects** - Imperative mood, max 50 characters
+3. **Add detailed body when needed** - Explain what and why, not how
+4. **Reference issues** - Use `Fixes #123`, `Closes #456`, `Related to #789`
+5. **Don't commit generated files** - Keep commits clean and focused
+
+## Pull Request Process
+
+### Before Submitting
+
+1. **Update your branch** with the latest upstream changes
+2. **Run all tests** and ensure they pass
+3. **Run code quality tools** (black, isort, ruff)
+4. **Update documentation** if you've changed functionality
+5. **Add tests** for new features or bug fixes
+6. **Review your own changes** - catch obvious issues before submission
+
+### PR Title
+
+Follow the same format as commit messages:
+
+```
+feat(memory): add batch repository memory extraction
+fix(api): handle large file uploads correctly
+docs(deployment): add troubleshooting guide
+```
+
+### PR Description Template
+
+```markdown
+## Description
+Brief description of what this PR does and why.
+
+## Changes
+- List of specific changes made
+- Another change
+- And another
+
+## Testing
+- [ ] Unit tests added/updated
+- [ ] Integration tests pass
+- [ ] Manual testing performed
+
+## Documentation
+- [ ] Code comments added/updated
+- [ ] API documentation updated
+- [ ] User guide updated (if needed)
+
+## Related Issues
+Fixes #123
+Related to #456
+
+## Screenshots (if applicable)
+Include screenshots for UI changes.
+
+## Checklist
+- [ ] Code follows style guidelines
+- [ ] Self-review completed
+- [ ] Tests added and passing
+- [ ] Documentation updated
+- [ ] No breaking changes (or clearly documented)
+```
+
+### PR Review Process
+
+1. **Automated Checks**: CI/CD pipeline runs automatically
+   - Unit tests
+   - Integration tests (if applicable)
+   - Code quality checks
+   - Security scans
+
+2. **Code Review**: At least one maintainer will review
+   - Code quality and style
+   - Test coverage
+   - Documentation completeness
+   - Design and architecture
+
+3. **Feedback**: Address review comments
+   - Make requested changes
+   - Push additional commits
+   - Respond to questions
+
+4. **Merge**: Once approved and all checks pass
+   - Squash and merge (default)
+   - Merge commit (for large features)
+   - Rebase and merge (for small fixes)
+
+### PR Best Practices
+
+- Keep PRs focused and reasonably sized (< 500 lines preferred)
+- Link to related issues
+- Add screenshots for UI changes
+- Update tests and documentation
+- Respond to feedback promptly
+- Don't force-push after review starts (unless requested)
+
+## Testing Requirements
+
+All contributions must include appropriate tests. See the [Testing Guide](./testing.md) for detailed information.
+
+### Test Coverage Expectations
+
+- **New Features**: 80%+ coverage required
+- **Bug Fixes**: Add test that reproduces the bug
+- **Refactoring**: Maintain or improve existing coverage
+
+### Test Types
+
+1. **Unit Tests** - Fast, isolated tests (no external dependencies)
+2. **Integration Tests** - Test with Neo4j and external services
+3. **End-to-End Tests** - Full workflow testing (where applicable)
+
+### Running Tests Locally
+
+```bash
+# Run all tests
+pytest tests/
+
+# Run only unit tests
+pytest tests/ -m unit
+
+# Run with coverage
+pytest tests/ --cov=services --cov=api --cov=mcp_tools --cov-report=html
+
+# View coverage report
+open htmlcov/index.html
+```
+
+## Documentation Requirements
+
+Good documentation is as important as good code.
+
+### What to Document
+
+1. **Code Comments**
+   - Complex algorithms or business logic
+   - Non-obvious design decisions
+   - Workarounds and known limitations
+
+2. **Docstrings**
+   - All public functions and classes
+   - Include parameters, return values, and exceptions
+
+3. **API Documentation**
+   - New endpoints or changes to existing ones
+   - Request/response examples
+   - Error codes and messages
+
+4. **User Documentation**
+   - New features visible to end users
+   - Configuration changes
+   - Migration guides for breaking changes
+
+5. **Architecture Documentation**
+   - Significant architectural changes
+   - New design patterns introduced
+   - System integration points
+
+### Documentation Format
+
+- Use **Markdown** for all documentation
+- Follow the existing structure in `docs/`
+- Include code examples where helpful
+- Add diagrams for complex concepts (use Mermaid)
+
+### Example Documentation Update
+
+If you add a new API endpoint:
+
+1. Update `docs/api/endpoints.md`
+2. Add code examples in `examples/`
+3. Update the relevant user guide
+4. Add entry to changelog (if user-facing)
+
+## Community and Support
+
+### Getting Help
+
+- **Documentation**: Check [docs](https://code-graph.vantagecraft.dev)
+- **GitHub Issues**: Search existing issues or create a new one
+- **Discussions**: Use GitHub Discussions for questions
+- **Email**: Contact maintainers for security issues
+
+### Reporting Bugs
+
+When reporting bugs, include:
+
+1. **Environment Information**
+   - Python version
+   - Operating system
+   - Neo4j version
+   - Deployment mode (local/Docker)
+
+2. **Steps to Reproduce**
+   - Minimal reproducible example
+   - Expected vs actual behavior
+   - Error messages or logs
+
+3. **Additional Context**
+   - Configuration files (redact sensitive data)
+   - Screenshots or recordings
+   - Related issues or PRs
+
+### Suggesting Features
+
+Before suggesting a feature:
+
+1. **Search existing issues** - Avoid duplicates
+2. **Discuss in GitHub Discussions** - Get feedback first
+3. **Create a detailed proposal** - Use the feature request template
+
+### Security Issues
+
+**Do NOT** create public issues for security vulnerabilities.
+
+Instead:
+- Email maintainers directly
+- Provide detailed information privately
+- Allow time for patching before disclosure
+
+## Recognition
+
+Contributors are recognized in several ways:
+
+- **Contributors list** in README.md
+- **Changelog entries** for significant contributions
+- **Social media mentions** for major features
+- **Maintainer status** for consistent, quality contributions
+
+## Questions?
+
+If you have questions about contributing:
+
+1. Check the [FAQ](../faq.md)
+2. Read the [Development Setup Guide](./setup.md)
+3. Search GitHub Issues and Discussions
+4. Create a new discussion topic
+
+Thank you for contributing to Code Graph Knowledge System!
diff --git a/docs/development/release.md b/docs/development/release.md
new file mode 100644
index 0000000..aa75e76
--- /dev/null
+++ b/docs/development/release.md
@@ -0,0 +1,697 @@
+# Release Process
+
+This guide covers the complete release process for the Code Graph Knowledge System, including version management, Docker builds, and deployment.
+
+## Overview
+
+### Release Types
+
+1. **Major Release** (x.0.0) - Breaking changes, major new features
+2. **Minor Release** (0.x.0) - New features, backward compatible
+3. **Patch Release** (0.0.x) - Bug fixes, minor improvements
+4. **Hotfix Release** (0.0.x) - Critical bug fixes
+
+### Release Artifacts
+
+Each release produces:
+- **Git tag** (`vX.Y.Z`)
+- **GitHub release** with release notes
+- **Docker images** (minimal, standard, full)
+- **PyPI package** (future)
+- **Updated documentation**
+
+### Release Schedule
+
+- **Major releases**: As needed (quarterly target)
+- **Minor releases**: Monthly
+- **Patch releases**: As needed
+- **Hotfixes**: Immediate (for critical issues)
+
+## Version Strategy
+
+### Semantic Versioning
+
+We follow [Semantic Versioning 2.0.0](https://semver.org/):
+
+```
+MAJOR.MINOR.PATCH
+  |     |     |
+  |     |     +-- Bug fixes, minor improvements
+  |     +-------- New features, backward compatible
+  +-------------- Breaking changes, major features
+```
+
+### Version Examples
+
+**Patch Release (0.6.1 → 0.6.2):**
+- Fix Neo4j connection timeout
+- Update dependency versions
+- Fix documentation typos
+
+**Minor Release (0.6.2 → 0.7.0):**
+- Add automatic memory extraction
+- New MCP tools
+- Performance improvements
+
+**Major Release (0.7.0 → 1.0.0):**
+- Complete API redesign
+- New authentication system
+- Breaking configuration changes
+
+### Current Version
+
+Version is managed in:
+- `pyproject.toml`: `version = "0.3.0"`
+- Git tags: `v0.7.0`
+- Docker images: `royisme/codebase-rag:0.7.0-*`
+
+## Pre-Release Checklist
+
+Before starting a release, ensure:
+
+### Code Quality
+
+- [ ] All tests pass locally
+- [ ] CI/CD pipeline is green
+- [ ] Code coverage meets requirements (70%+)
+- [ ] No known critical bugs
+- [ ] Security scan passes (Trivy)
+- [ ] Code review completed for all changes
+
+### Documentation
+
+- [ ] CHANGELOG.md updated
+- [ ] API documentation current
+- [ ] User guides updated
+- [ ] Breaking changes documented
+- [ ] Migration guide created (if needed)
+- [ ] README.md updated
+
+### Dependencies
+
+- [ ] Dependencies up to date
+- [ ] Security vulnerabilities addressed
+- [ ] Compatibility tested
+- [ ] Requirements files updated
+
+### Testing
+
+```bash
+# Run full test suite
+pytest tests/ -v --cov=services --cov=api --cov=mcp_tools
+
+# Run integration tests
+pytest tests/ -m integration
+
+# Test Docker builds locally
+docker build -f docker/Dockerfile.minimal -t test:minimal .
+docker build -f docker/Dockerfile.standard -t test:standard .
+docker build -f docker/Dockerfile.full -t test:full .
+```
+
+### Communication
+
+- [ ] Release notes drafted
+- [ ] Breaking changes communicated
+- [ ] Known issues documented
+- [ ] User announcements prepared
+
+## Release Process
+
+### Step 1: Prepare Release Branch
+
+```bash
+# Ensure main branch is up to date
+git checkout main
+git pull origin main
+
+# Create release branch
+git checkout -b release/v0.7.0
+```
+
+### Step 2: Update Version Numbers
+
+Update version in `pyproject.toml`:
+
+```toml
+[project]
+name = "code-graph"
+version = "0.7.0"  # Update this
+description = "Add your description here"
+```
+
+**Optional**: Update version in other files if needed:
+```bash
+# If version is defined elsewhere
+find . -name "*.py" -exec grep -l "__version__" {} \;
+```
+
+### Step 3: Update CHANGELOG.md
+
+```markdown
+# Changelog
+
+## [0.7.0] - 2025-01-15
+
+### Added
+- Automatic memory extraction from conversations
+- Git commit analysis for memory extraction
+- Code comment mining for TODO/FIXME markers
+- Query-based memory suggestions
+- Batch repository extraction
+
+### Changed
+- Improved MCP handler architecture
+- Enhanced error messages for memory operations
+
+### Fixed
+- Neo4j connection timeout in Docker
+- Memory search relevance scoring
+
+### Security
+- Updated dependencies with security patches
+
+## [0.6.0] - 2024-12-20
+...
+```
+
+### Step 4: Update Documentation
+
+```bash
+# Update user documentation
+vim docs/guide/memory-extraction.md
+
+# Update API documentation
+vim docs/api/memory-endpoints.md
+
+# Update README if needed
+vim README.md
+```
+
+### Step 5: Commit Changes
+
+```bash
+# Commit version and changelog updates
+git add pyproject.toml docs/changelog.md README.md
+git commit -m "chore: prepare release v0.7.0"
+
+# Push release branch
+git push origin release/v0.7.0
+```
+
+### Step 6: Create Pull Request
+
+1. Create PR from `release/v0.7.0` to `main`
+2. Title: "Release v0.7.0"
+3. Description: Include release notes
+4. Request review from maintainers
+5. Wait for CI/CD to pass
+6. Merge when approved
+
+### Step 7: Tag the Release
+
+```bash
+# After PR is merged, checkout main
+git checkout main
+git pull origin main
+
+# Create annotated tag
+git tag -a v0.7.0 -m "Release version 0.7.0
+
+### Added
+- Automatic memory extraction features
+- Enhanced MCP tools
+
+### Changed
+- Improved error handling
+
+### Fixed
+- Neo4j connection issues"
+
+# Push tag to trigger release workflow
+git push origin v0.7.0
+```
+
+### Step 8: Monitor Automated Builds
+
+The tag push triggers GitHub Actions workflows:
+
+1. **Docker builds** (`docker-build.yml`)
+   - Builds minimal, standard, and full images
+   - Tags with version number and latest
+   - Pushes to Docker Hub
+
+2. **GitHub release** (`docker-build.yml`)
+   - Creates GitHub release
+   - Generates release notes
+   - Attaches artifacts
+
+Monitor at: `https://github.com/royisme/codebase-rag/actions`
+
+## Docker Image Builds
+
+### Build Configuration
+
+Three Docker images are built for each release:
+
+#### 1. Minimal Image
+**Tag**: `royisme/codebase-rag:0.7.0-minimal`
+**Contents**: Code Graph only, no memory features
+**Size**: ~800MB
+**Use case**: Lightweight code analysis
+
+```dockerfile
+# docker/Dockerfile.minimal
+FROM python:3.13-slim
+# ... minimal dependencies
+```
+
+#### 2. Standard Image
+**Tag**: `royisme/codebase-rag:0.7.0-standard`
+**Contents**: Code Graph + Memory Store
+**Size**: ~1.2GB
+**Use case**: Memory-enhanced development
+
+```dockerfile
+# docker/Dockerfile.standard
+FROM python:3.13-slim
+# ... includes memory features
+```
+
+#### 3. Full Image
+**Tag**: `royisme/codebase-rag:0.7.0-full` (also `latest`)
+**Contents**: All features + UI + monitoring
+**Size**: ~1.5GB
+**Use case**: Complete development environment
+
+```dockerfile
+# docker/Dockerfile.full
+FROM python:3.13-slim
+# ... all features included
+```
+
+### Automated Build Process
+
+When a version tag is pushed:
+
+1. **GitHub Actions** triggered by tag push
+2. **Build images** for all three variants
+3. **Multi-platform build** (amd64, arm64)
+4. **Push to Docker Hub** with multiple tags:
+   - Version tag: `0.7.0-minimal`
+   - Major.minor tag: `0.7-minimal`
+   - Variant tag: `minimal`
+   - Latest tag: `latest` (full image only)
+
+### Manual Docker Build
+
+For testing or emergency releases:
+
+```bash
+# Build minimal
+docker build -f docker/Dockerfile.minimal -t royisme/codebase-rag:0.7.0-minimal .
+
+# Build standard
+docker build -f docker/Dockerfile.standard -t royisme/codebase-rag:0.7.0-standard .
+
+# Build full
+docker build -f docker/Dockerfile.full -t royisme/codebase-rag:0.7.0-full .
+
+# Test image
+docker run -d --name test-release \
+  -p 8000:8000 \
+  -e NEO4J_URI=bolt://neo4j:7687 \
+  royisme/codebase-rag:0.7.0-full
+
+# Verify
+curl http://localhost:8000/api/v1/health
+
+# Cleanup
+docker stop test-release
+docker rm test-release
+
+# Push manually (if needed)
+docker push royisme/codebase-rag:0.7.0-minimal
+docker push royisme/codebase-rag:0.7.0-standard
+docker push royisme/codebase-rag:0.7.0-full
+```
+
+### Docker Image Testing
+
+```bash
+# Test minimal image
+docker-compose -f docker/docker-compose.minimal.yml up -d
+# Run smoke tests
+curl http://localhost:8000/api/v1/health
+docker-compose -f docker/docker-compose.minimal.yml down
+
+# Test standard image
+docker-compose -f docker/docker-compose.standard.yml up -d
+# Test memory endpoints
+curl -X POST http://localhost:8000/api/v1/memory/add -H "Content-Type: application/json" -d '{"project_id":"test","memory_type":"decision","title":"Test","content":"Test"}'
+docker-compose -f docker/docker-compose.standard.yml down
+
+# Test full image
+docker-compose -f docker/docker-compose.full.yml up -d
+# Access monitoring UI
+open http://localhost:8000/ui/monitor
+docker-compose -f docker/docker-compose.full.yml down
+```
+
+## GitHub Release
+
+### Automated Release Creation
+
+The `docker-build.yml` workflow automatically creates a GitHub release when a tag is pushed:
+
+```yaml
+- name: Create Release
+  uses: softprops/action-gh-release@v1
+  with:
+    generate_release_notes: true
+    body: |
+      ## Docker Images
+
+      ### Minimal (Code Graph only)
+      ```bash
+      docker pull royisme/codebase-rag:0.7.0-minimal
+      ```
+
+      ### Standard (Code Graph + Memory)
+      ```bash
+      docker pull royisme/codebase-rag:0.7.0-standard
+      ```
+
+      ### Full (All Features)
+      ```bash
+      docker pull royisme/codebase-rag:0.7.0-full
+      ```
+```
+
+### Manual Release Creation
+
+If automated release fails:
+
+1. Go to: `https://github.com/royisme/codebase-rag/releases/new`
+2. **Tag**: Select `v0.7.0`
+3. **Title**: "Release v0.7.0 - Automatic Memory Extraction"
+4. **Description**: Copy from CHANGELOG.md and add Docker pull commands
+5. **Attachments**: Add any additional files
+6. Click "Publish release"
+
+### Release Notes Template
+
+```markdown
+## What's New in v0.7.0
+
+This release introduces automatic memory extraction capabilities, enabling the system to learn from conversations, code comments, and git commits.
+
+### 🚀 New Features
+
+- **Automatic Memory Extraction**: Extract memories from AI conversations
+- **Git Commit Analysis**: Analyze commits for decisions and experiences
+- **Code Comment Mining**: Extract TODO, FIXME, NOTE markers
+- **Batch Repository Extraction**: Comprehensive codebase analysis
+- **5 new MCP tools** for memory extraction
+
+### 🔧 Improvements
+
+- Enhanced error messages for memory operations
+- Improved MCP handler architecture (78% code reduction)
+- Better timeout handling for large documents
+
+### 🐛 Bug Fixes
+
+- Fixed Neo4j connection timeout in Docker environments
+- Resolved memory search relevance scoring issues
+- Fixed environment variable handling in Docker
+
+### 📚 Documentation
+
+- Complete memory extraction guide
+- Updated API documentation
+- New troubleshooting guide
+
+### 🐳 Docker Images
+
+#### Minimal (Code Graph only)
+```bash
+docker pull royisme/codebase-rag:0.7.0-minimal
+```
+
+#### Standard (Code Graph + Memory)
+```bash
+docker pull royisme/codebase-rag:0.7.0-standard
+```
+
+#### Full (All Features)
+```bash
+docker pull royisme/codebase-rag:0.7.0-full
+docker pull royisme/codebase-rag:latest
+```
+
+### 📖 Documentation
+
+Full documentation: https://code-graph.vantagecraft.dev
+
+### ⚠️ Breaking Changes
+
+None in this release.
+
+### 🙏 Contributors
+
+Thanks to all contributors who made this release possible!
+
+**Full Changelog**: https://github.com/royisme/codebase-rag/compare/v0.6.0...v0.7.0
+```
+
+## Documentation Updates
+
+### Update Documentation Site
+
+```bash
+# Update MkDocs documentation
+cd docs/
+
+# Build documentation locally
+mkdocs build
+
+# Test locally
+mkdocs serve
+# Open http://localhost:8000
+
+# Documentation auto-deploys via GitHub Actions
+# Verify at: https://code-graph.vantagecraft.dev
+```
+
+### Update Docker Hub
+
+1. Go to: `https://hub.docker.com/r/royisme/codebase-rag`
+2. Update description with latest version info
+3. Update README with new features
+4. Add release notes
+
+### Update README Badges
+
+Update version badges in `README.md`:
+
+```markdown
+[![Version](https://img.shields.io/badge/version-0.7.0-blue.svg)](https://github.com/royisme/codebase-rag/releases/tag/v0.7.0)
+[![Docker](https://img.shields.io/docker/v/royisme/codebase-rag?label=docker)](https://hub.docker.com/r/royisme/codebase-rag)
+```
+
+## Post-Release Tasks
+
+### Immediate Tasks (Within 24 Hours)
+
+1. **Monitor Docker Hub** for successful image pushes
+2. **Test deployed images** with quick smoke tests
+3. **Check documentation** site updated correctly
+4. **Monitor error reports** from new release
+5. **Respond to GitHub issues** related to release
+
+### Week 1 Tasks
+
+1. **Monitor metrics** for performance regressions
+2. **Track user feedback** on new features
+3. **Address critical bugs** with hotfix if needed
+4. **Update project board** with next milestone
+5. **Write blog post** announcing release (optional)
+
+### Ongoing Tasks
+
+1. **Close resolved issues** that were fixed in release
+2. **Update roadmap** with completed features
+3. **Plan next release** with new features
+4. **Review and merge** pending PRs
+5. **Engage with community** feedback
+
+## Hotfix Process
+
+For critical bugs that need immediate release:
+
+### Step 1: Create Hotfix Branch
+
+```bash
+# Branch from latest release tag
+git checkout v0.7.0
+git checkout -b hotfix/v0.7.1
+```
+
+### Step 2: Fix the Bug
+
+```bash
+# Make minimal changes to fix critical bug
+vim services/memory_store.py
+
+# Add tests
+vim tests/test_memory_store.py
+
+# Commit fix
+git add .
+git commit -m "fix: resolve critical memory corruption issue"
+```
+
+### Step 3: Update Version
+
+```bash
+# Update to patch version
+vim pyproject.toml  # 0.7.0 → 0.7.1
+
+# Update changelog
+vim docs/changelog.md
+```
+
+### Step 4: Fast-Track Release
+
+```bash
+# Push hotfix branch
+git push origin hotfix/v0.7.1
+
+# Create PR to main (expedited review)
+# After merge, tag immediately
+git checkout main
+git pull origin main
+git tag -a v0.7.1 -m "Hotfix: Critical memory corruption"
+git push origin v0.7.1
+```
+
+### Step 5: Communicate
+
+- Post GitHub release immediately
+- Notify users in discussions
+- Update documentation
+- Consider backporting to older versions if needed
+
+## Rollback Procedures
+
+If a release has critical issues:
+
+### Docker Rollback
+
+Users can revert to previous version:
+
+```bash
+# Pull previous version
+docker pull royisme/codebase-rag:0.6.0-full
+
+# Update docker-compose.yml
+image: royisme/codebase-rag:0.6.0-full
+
+# Restart
+docker-compose down
+docker-compose up -d
+```
+
+### Git Rollback
+
+For repository issues:
+
+```bash
+# Revert to previous release
+git checkout v0.6.0
+
+# Or create revert commit
+git revert <commit-hash>
+git push origin main
+```
+
+### Communication
+
+1. **Create GitHub issue** explaining the problem
+2. **Update release notes** with warning
+3. **Publish hotfix** as soon as possible
+4. **Document root cause** and prevention measures
+
+## Release Checklist Summary
+
+```markdown
+## Pre-Release
+- [ ] All tests passing
+- [ ] Documentation updated
+- [ ] CHANGELOG.md updated
+- [ ] Version bumped in pyproject.toml
+- [ ] Dependencies updated
+- [ ] Security scan passed
+
+## Release
+- [ ] Create release branch
+- [ ] Update version numbers
+- [ ] Commit and push changes
+- [ ] Create and merge PR
+- [ ] Create and push git tag
+- [ ] Monitor Docker builds
+- [ ] Verify GitHub release created
+
+## Post-Release
+- [ ] Test Docker images
+- [ ] Verify documentation updated
+- [ ] Announce release
+- [ ] Monitor for issues
+- [ ] Close related issues
+- [ ] Plan next release
+```
+
+## Questions?
+
+For questions about the release process:
+
+1. Check [Contributing Guide](./contributing.md)
+2. Review previous releases
+3. Ask in GitHub Discussions
+4. Contact maintainers
+
+## Useful Commands Reference
+
+```bash
+# Version bump helper
+grep -r "version" pyproject.toml
+
+# List all tags
+git tag -l
+
+# Show tag details
+git show v0.7.0
+
+# Delete local tag
+git tag -d v0.7.0
+
+# Delete remote tag (careful!)
+git push origin :refs/tags/v0.7.0
+
+# Compare releases
+git log v0.6.0..v0.7.0 --oneline
+
+# Build all Docker images
+for variant in minimal standard full; do
+  docker build -f docker/Dockerfile.$variant -t royisme/codebase-rag:0.7.0-$variant .
+done
+```
+
+## Resources
+
+- [Semantic Versioning](https://semver.org/)
+- [Conventional Commits](https://www.conventionalcommits.org/)
+- [Keep a Changelog](https://keepachangelog.com/)
+- [GitHub Releases](https://docs.github.com/en/repositories/releasing-projects-on-github)
+- [Docker Hub](https://hub.docker.com/r/royisme/codebase-rag)
diff --git a/docs/development/setup.md b/docs/development/setup.md
new file mode 100644
index 0000000..bae3e3e
--- /dev/null
+++ b/docs/development/setup.md
@@ -0,0 +1,714 @@
+# Development Environment Setup
+
+This guide will help you set up a complete development environment for the Code Graph Knowledge System.
+
+## Table of Contents
+
+- [Prerequisites](#prerequisites)
+- [System Requirements](#system-requirements)
+- [Python Environment Setup](#python-environment-setup)
+- [Neo4j Database Setup](#neo4j-database-setup)
+- [LLM Provider Setup](#llm-provider-setup)
+- [Project Installation](#project-installation)
+- [Environment Configuration](#environment-configuration)
+- [IDE Setup](#ide-setup)
+- [Verification](#verification)
+- [Troubleshooting](#troubleshooting)
+
+## Prerequisites
+
+Before starting, ensure you have the following installed on your system:
+
+### Required
+
+- **Python 3.13 or higher**
+- **Git** (for version control)
+- **Docker and Docker Compose** (for Neo4j and optional services)
+
+### Recommended
+
+- **uv** (fast Python package manager)
+- **Visual Studio Code** or **PyCharm** (recommended IDEs)
+
+## System Requirements
+
+### Minimum Requirements
+
+- **OS**: Linux, macOS, or Windows (with WSL2)
+- **RAM**: 8GB (16GB recommended)
+- **Disk Space**: 10GB free space
+- **CPU**: 4 cores (8 cores recommended for Ollama)
+
+### For Production Development
+
+- **RAM**: 16GB minimum (32GB for Ollama with large models)
+- **GPU**: NVIDIA GPU with CUDA support (optional, for faster Ollama inference)
+
+## Python Environment Setup
+
+### Install Python 3.13
+
+#### Linux (Ubuntu/Debian)
+
+```bash
+# Add deadsnakes PPA for latest Python versions
+sudo add-apt-repository ppa:deadsnakes/ppa
+sudo apt update
+sudo apt install python3.13 python3.13-venv python3.13-dev
+```
+
+#### macOS
+
+```bash
+# Using Homebrew
+brew install python@3.13
+```
+
+#### Windows (WSL2)
+
+Follow the Linux instructions after installing WSL2.
+
+### Install uv (Recommended)
+
+uv is a fast Python package manager written in Rust. It's significantly faster than pip.
+
+```bash
+# Install uv
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Verify installation
+uv --version
+```
+
+**Alternative**: Use pip if you prefer traditional Python package management:
+
+```bash
+pip install --upgrade pip
+```
+
+## Neo4j Database Setup
+
+Neo4j is required for the knowledge graph functionality. You can run it via Docker or install it natively.
+
+### Option 1: Neo4j with Docker (Recommended for Development)
+
+This is the easiest method for development:
+
+```bash
+# Create a docker-compose.yml for Neo4j
+cat > docker-compose.dev.yml << 'EOF'
+version: '3.8'
+
+services:
+  neo4j:
+    image: neo4j:5.14
+    container_name: code-graph-neo4j
+    ports:
+      - "7474:7474"  # HTTP
+      - "7687:7687"  # Bolt
+    environment:
+      - NEO4J_AUTH=neo4j/devpassword
+      - NEO4J_PLUGINS=["apoc"]
+      - NEO4J_dbms_security_procedures_unrestricted=apoc.*
+      - NEO4J_dbms_memory_heap_max__size=2G
+    volumes:
+      - neo4j_data:/data
+      - neo4j_logs:/logs
+    healthcheck:
+      test: ["CMD", "cypher-shell", "-u", "neo4j", "-p", "devpassword", "RETURN 1"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+volumes:
+  neo4j_data:
+  neo4j_logs:
+EOF
+
+# Start Neo4j
+docker-compose -f docker-compose.dev.yml up -d
+
+# Check logs
+docker logs -f code-graph-neo4j
+
+# Wait for Neo4j to be ready (look for "Started.")
+```
+
+**Verify Neo4j is running:**
+
+```bash
+# Check container status
+docker ps | grep neo4j
+
+# Access Neo4j Browser
+# Open http://localhost:7474 in your browser
+# Login: neo4j / devpassword
+```
+
+### Option 2: Native Neo4j Installation
+
+#### Linux
+
+```bash
+# Add Neo4j repository
+wget -O - https://debian.neo4j.com/neotechnology.gpg.key | sudo apt-key add -
+echo 'deb https://debian.neo4j.com stable latest' | sudo tee /etc/apt/sources.list.d/neo4j.list
+
+# Install Neo4j
+sudo apt update
+sudo apt install neo4j
+
+# Install APOC plugin
+wget https://github.com/neo4j/apoc/releases/download/5.14.0/apoc-5.14.0-core.jar -P /var/lib/neo4j/plugins/
+
+# Configure Neo4j
+sudo nano /etc/neo4j/neo4j.conf
+# Add: dbms.security.procedures.unrestricted=apoc.*
+
+# Start Neo4j
+sudo systemctl start neo4j
+sudo systemctl enable neo4j
+```
+
+#### macOS
+
+```bash
+# Using Homebrew
+brew install neo4j
+
+# Start Neo4j
+neo4j start
+```
+
+### Neo4j Initial Configuration
+
+1. **Access Neo4j Browser**: http://localhost:7474
+2. **Initial Login**:
+   - Username: `neo4j`
+   - Password: `neo4j` (or `devpassword` if using Docker)
+3. **Change Password**: Follow the prompt (or keep the Docker password)
+4. **Verify APOC**: Run `RETURN apoc.version()` in the browser
+
+## LLM Provider Setup
+
+The system supports multiple LLM providers. Choose at least one:
+
+### Option 1: Ollama (Recommended for Development)
+
+Ollama provides local LLM hosting, which is free and doesn't require API keys.
+
+#### Install Ollama
+
+```bash
+# Linux
+curl -fsSL https://ollama.com/install.sh | sh
+
+# macOS
+brew install ollama
+
+# Windows (WSL2)
+curl -fsSL https://ollama.com/install.sh | sh
+```
+
+#### Start Ollama and Pull Models
+
+```bash
+# Start Ollama service
+ollama serve  # Keep this running in a terminal
+
+# In another terminal, pull models
+ollama pull llama3.2:3b     # Small, fast model
+ollama pull mistral:7b      # Good balance
+ollama pull nomic-embed-text  # Embedding model
+
+# Verify models are available
+ollama list
+```
+
+**Note**: Larger models require more RAM:
+- 3B parameters: ~4GB RAM
+- 7B parameters: ~8GB RAM
+- 13B parameters: ~16GB RAM
+- 70B parameters: ~48GB RAM
+
+### Option 2: OpenAI
+
+```bash
+# Get API key from https://platform.openai.com/api-keys
+# No installation needed, just add to .env file
+```
+
+### Option 3: Google Gemini
+
+```bash
+# Get API key from https://ai.google.dev/
+# No installation needed, just add to .env file
+```
+
+### Option 4: OpenRouter
+
+```bash
+# Get API key from https://openrouter.ai/
+# Provides access to multiple LLM providers
+# No installation needed, just add to .env file
+```
+
+## Project Installation
+
+### Clone the Repository
+
+```bash
+# Clone your fork (replace YOUR_USERNAME with your GitHub username)
+git clone https://github.com/YOUR_USERNAME/codebase-rag.git
+cd codebase-rag
+
+# Add upstream remote
+git remote add upstream https://github.com/royisme/codebase-rag.git
+```
+
+### Install Dependencies
+
+#### Using uv (Recommended)
+
+```bash
+# Create virtual environment and install dependencies
+uv venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+
+# Install project in editable mode
+uv pip install -e .
+
+# Install development dependencies
+uv pip install pytest pytest-asyncio pytest-cov pytest-mock black isort ruff
+```
+
+#### Using pip
+
+```bash
+# Create virtual environment
+python3.13 -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+
+# Upgrade pip
+pip install --upgrade pip
+
+# Install project in editable mode
+pip install -e .
+
+# Install development dependencies
+pip install pytest pytest-asyncio pytest-cov pytest-mock black isort ruff
+```
+
+### Verify Installation
+
+```bash
+# Check installed packages
+uv pip list  # or: pip list
+
+# Verify key packages
+python -c "import fastapi; print(f'FastAPI: {fastapi.__version__}')"
+python -c "import neo4j; print(f'Neo4j: {neo4j.__version__}')"
+python -c "import llama_index; print(f'LlamaIndex: {llama_index.__version__}')"
+```
+
+## Environment Configuration
+
+### Create .env File
+
+```bash
+# Copy example environment file
+cp env.example .env
+
+# Edit with your settings
+nano .env  # or use your preferred editor
+```
+
+### Essential Environment Variables
+
+#### Neo4j Configuration
+
+```bash
+# Neo4j connection
+NEO4J_URI=bolt://localhost:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=devpassword  # Change to your password
+NEO4J_DATABASE=neo4j
+```
+
+#### LLM Provider Configuration
+
+**For Ollama (Local):**
+```bash
+LLM_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_MODEL=llama3.2:3b
+
+EMBEDDING_PROVIDER=ollama
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+```
+
+**For OpenAI:**
+```bash
+LLM_PROVIDER=openai
+OPENAI_API_KEY=sk-your-api-key-here
+OPENAI_MODEL=gpt-4
+
+EMBEDDING_PROVIDER=openai
+OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+```
+
+**For Google Gemini:**
+```bash
+LLM_PROVIDER=gemini
+GOOGLE_API_KEY=your-api-key-here
+GEMINI_MODEL=gemini-1.5-flash
+
+EMBEDDING_PROVIDER=gemini
+GEMINI_EMBEDDING_MODEL=models/text-embedding-004
+```
+
+#### Application Configuration
+
+```bash
+# Server settings
+HOST=0.0.0.0
+PORT=8000
+LOG_LEVEL=DEBUG  # Use DEBUG for development
+
+# Features
+ENABLE_MONITORING=true  # Enable NiceGUI monitoring UI
+ENABLE_PROMETHEUS=true  # Enable Prometheus metrics
+
+# Timeouts (in seconds)
+CONNECTION_TIMEOUT=30
+OPERATION_TIMEOUT=300
+LARGE_DOCUMENT_TIMEOUT=600
+```
+
+### Example Complete .env File
+
+```bash
+# Neo4j Configuration
+NEO4J_URI=bolt://localhost:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=devpassword
+NEO4J_DATABASE=neo4j
+
+# LLM Provider (Ollama for development)
+LLM_PROVIDER=ollama
+OLLAMA_BASE_URL=http://localhost:11434
+OLLAMA_MODEL=llama3.2:3b
+
+# Embedding Provider
+EMBEDDING_PROVIDER=ollama
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Server Configuration
+HOST=0.0.0.0
+PORT=8000
+LOG_LEVEL=DEBUG
+
+# Features
+ENABLE_MONITORING=true
+ENABLE_PROMETHEUS=true
+
+# Timeouts
+CONNECTION_TIMEOUT=30
+OPERATION_TIMEOUT=300
+LARGE_DOCUMENT_TIMEOUT=600
+```
+
+## IDE Setup
+
+### Visual Studio Code
+
+#### Recommended Extensions
+
+```json
+{
+  "recommendations": [
+    "ms-python.python",
+    "ms-python.vscode-pylance",
+    "ms-python.black-formatter",
+    "charliermarsh.ruff",
+    "ms-python.isort",
+    "redhat.vscode-yaml",
+    "neo4j.cypher",
+    "tamasfe.even-better-toml"
+  ]
+}
+```
+
+Save this as `.vscode/extensions.json` in your project root.
+
+#### VS Code Settings
+
+```json
+{
+  "python.defaultInterpreterPath": ".venv/bin/python",
+  "python.linting.enabled": true,
+  "python.linting.ruffEnabled": true,
+  "python.formatting.provider": "black",
+  "editor.formatOnSave": true,
+  "editor.codeActionsOnSave": {
+    "source.organizeImports": true
+  },
+  "[python]": {
+    "editor.defaultFormatter": "ms-python.black-formatter",
+    "editor.formatOnSave": true,
+    "editor.rulers": [100]
+  },
+  "files.exclude": {
+    "**/__pycache__": true,
+    "**/*.pyc": true,
+    ".pytest_cache": true,
+    ".ruff_cache": true
+  }
+}
+```
+
+Save this as `.vscode/settings.json`.
+
+#### Launch Configuration
+
+```json
+{
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python: FastAPI",
+      "type": "python",
+      "request": "launch",
+      "module": "uvicorn",
+      "args": [
+        "main:app",
+        "--reload",
+        "--host",
+        "0.0.0.0",
+        "--port",
+        "8000"
+      ],
+      "jinja": true,
+      "justMyCode": false,
+      "env": {
+        "PYTHONPATH": "${workspaceFolder}"
+      }
+    },
+    {
+      "name": "Python: MCP Server",
+      "type": "python",
+      "request": "launch",
+      "program": "start_mcp.py",
+      "console": "integratedTerminal",
+      "justMyCode": false
+    },
+    {
+      "name": "Python: Current File",
+      "type": "python",
+      "request": "launch",
+      "program": "${file}",
+      "console": "integratedTerminal"
+    },
+    {
+      "name": "Python: Pytest Current File",
+      "type": "python",
+      "request": "launch",
+      "module": "pytest",
+      "args": [
+        "${file}",
+        "-v",
+        "--tb=short"
+      ],
+      "console": "integratedTerminal"
+    }
+  ]
+}
+```
+
+Save this as `.vscode/launch.json`.
+
+### PyCharm
+
+#### Setup Steps
+
+1. **Open Project**: File > Open > Select `codebase-rag` directory
+2. **Configure Interpreter**:
+   - Settings > Project > Python Interpreter
+   - Add Interpreter > Existing Environment
+   - Select `.venv/bin/python`
+3. **Configure Black**:
+   - Settings > Tools > Black
+   - Enable "On save"
+   - Line length: 100
+4. **Configure Ruff**:
+   - Install Ruff plugin
+   - Enable in Settings > Tools > Ruff
+5. **Run Configurations**:
+   - Create configurations for `start.py` and `start_mcp.py`
+
+## Verification
+
+### Test Development Environment
+
+Run through this checklist to verify everything is set up correctly:
+
+#### 1. Python Environment
+
+```bash
+# Activate virtual environment
+source .venv/bin/activate
+
+# Check Python version
+python --version  # Should be 3.13 or higher
+
+# Check installed packages
+uv pip list | grep -E "(fastapi|neo4j|llama-index)"
+```
+
+#### 2. Neo4j Connection
+
+```bash
+# Test Neo4j connection
+python -c "
+from neo4j import GraphDatabase
+driver = GraphDatabase.driver('bolt://localhost:7687', auth=('neo4j', 'devpassword'))
+with driver.session() as session:
+    result = session.run('RETURN 1 as num')
+    print(f'Neo4j connection successful: {result.single()[0]}')
+driver.close()
+"
+```
+
+#### 3. LLM Provider (Ollama)
+
+```bash
+# Test Ollama connection
+curl http://localhost:11434/api/tags
+
+# Test model availability
+ollama list
+```
+
+#### 4. Start Application
+
+```bash
+# Start the application
+python start.py
+
+# You should see:
+# ✓ All service health checks passed
+# Application starting...
+# INFO:     Uvicorn running on http://0.0.0.0:8000
+```
+
+#### 5. Test API Endpoints
+
+In another terminal:
+
+```bash
+# Health check
+curl http://localhost:8000/api/v1/health
+
+# Expected response:
+# {"status":"healthy","neo4j":"connected","llm_provider":"ollama"}
+
+# Test knowledge query
+curl -X POST http://localhost:8000/api/v1/knowledge/query \
+  -H "Content-Type: application/json" \
+  -d '{"query": "test"}'
+```
+
+#### 6. Run Tests
+
+```bash
+# Run unit tests
+pytest tests/ -m unit -v
+
+# Should see: All tests passed
+```
+
+### Common Verification Issues
+
+**Neo4j not connecting:**
+```bash
+# Check if Neo4j is running
+docker ps | grep neo4j  # For Docker
+sudo systemctl status neo4j  # For native installation
+
+# Check logs
+docker logs code-graph-neo4j  # For Docker
+sudo journalctl -u neo4j -f  # For native installation
+```
+
+**Ollama not responding:**
+```bash
+# Check if Ollama is running
+ps aux | grep ollama
+
+# Start Ollama if not running
+ollama serve
+```
+
+**Import errors:**
+```bash
+# Reinstall dependencies
+uv pip install -e .
+
+# Verify PYTHONPATH
+echo $PYTHONPATH
+export PYTHONPATH="${PWD}:${PYTHONPATH}"
+```
+
+## Troubleshooting
+
+For common development environment issues, see the [Troubleshooting Guide](../troubleshooting.md).
+
+### Quick Fixes
+
+**Virtual environment not activating:**
+```bash
+# Recreate virtual environment
+rm -rf .venv
+uv venv
+source .venv/bin/activate
+uv pip install -e .
+```
+
+**Port already in use:**
+```bash
+# Find process using port 8000
+lsof -i :8000  # On Linux/macOS
+netstat -ano | findstr :8000  # On Windows
+
+# Kill process or change port in .env
+PORT=8001
+```
+
+**Neo4j memory issues:**
+```bash
+# Adjust Neo4j heap size in docker-compose.dev.yml
+NEO4J_dbms_memory_heap_max__size=4G  # Increase if needed
+```
+
+## Next Steps
+
+Now that your development environment is set up:
+
+1. Read the [Contributing Guide](./contributing.md) for code standards
+2. Review the [Testing Guide](./testing.md) to learn about writing tests
+3. Explore the codebase starting with `main.py` and `services/`
+4. Try running the examples in `examples/`
+5. Make your first contribution!
+
+## Getting Help
+
+If you encounter issues during setup:
+
+1. Check the [Troubleshooting Guide](../troubleshooting.md)
+2. Search [GitHub Issues](https://github.com/royisme/codebase-rag/issues)
+3. Create a new issue with:
+   - Your OS and Python version
+   - Complete error messages
+   - Steps you've already tried
+
+Happy developing!
diff --git a/docs/development/testing.md b/docs/development/testing.md
new file mode 100644
index 0000000..a2aab34
--- /dev/null
+++ b/docs/development/testing.md
@@ -0,0 +1,794 @@
+# Testing Guide
+
+This guide covers testing practices, conventions, and procedures for the Code Graph Knowledge System.
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Test Organization](#test-organization)
+- [Test Types](#test-types)
+- [Running Tests](#running-tests)
+- [Writing Tests](#writing-tests)
+- [Test Fixtures](#test-fixtures)
+- [Mocking Strategies](#mocking-strategies)
+- [Coverage Requirements](#coverage-requirements)
+- [CI/CD Testing](#cicd-testing)
+- [Best Practices](#best-practices)
+
+## Overview
+
+We use **pytest** as our testing framework with support for:
+
+- **Async/await** testing with `pytest-asyncio`
+- **Code coverage** tracking with `pytest-cov`
+- **Mocking** capabilities with `pytest-mock`
+- **Test markers** for categorizing tests
+
+### Testing Philosophy
+
+- **Test early, test often** - Write tests as you develop features
+- **Fast unit tests** - Keep unit tests fast and isolated
+- **Meaningful integration tests** - Test real interactions with Neo4j
+- **High coverage** - Aim for 80%+ coverage on new code
+- **Clear test names** - Test names should describe what is being tested
+
+## Test Organization
+
+### Directory Structure
+
+```
+tests/
+├── __init__.py
+├── conftest.py              # Shared fixtures and configuration
+├── test_mcp_handlers.py     # MCP handler unit tests
+├── test_mcp_integration.py  # MCP integration tests
+├── test_mcp_utils.py        # MCP utility tests
+├── test_memory_store.py     # Memory store tests
+├── test_context_pack.py     # Context packing tests
+├── test_ingest.py           # Document ingestion tests
+├── test_related.py          # Related code tests
+└── README.md                # Test documentation
+```
+
+### Test File Naming
+
+- Test files: `test_*.py`
+- Test classes: `Test*`
+- Test functions: `test_*`
+
+Example:
+```python
+# test_memory_store.py
+class TestMemoryStore:
+    def test_add_memory_success(self):
+        pass
+
+    async def test_search_memories_async(self):
+        pass
+```
+
+## Test Types
+
+We use pytest markers to categorize tests:
+
+### Unit Tests
+
+Fast tests with no external dependencies (mocked services).
+
+```python
+import pytest
+
+@pytest.mark.unit
+async def test_parse_memory_type():
+    """Test memory type parsing logic."""
+    from services.memory_store import parse_memory_type
+
+    result = parse_memory_type("decision")
+    assert result == "decision"
+
+    with pytest.raises(ValueError):
+        parse_memory_type("invalid_type")
+```
+
+**Characteristics:**
+- No database connections
+- No external API calls
+- No file I/O (use mocks)
+- Run in milliseconds
+- Can run in parallel
+
+### Integration Tests
+
+Tests that interact with real services (Neo4j, etc.).
+
+```python
+import pytest
+
+@pytest.mark.integration
+async def test_neo4j_connection(neo4j_service):
+    """Test actual Neo4j connection and query."""
+    result = await neo4j_service.execute_query("RETURN 1 as num")
+    assert result["success"] is True
+    assert result["data"][0]["num"] == 1
+```
+
+**Characteristics:**
+- Require Neo4j running
+- May require LLM provider
+- Slower execution
+- May need setup/teardown
+- Test real integrations
+
+### Slow Tests
+
+Tests that take significant time (> 1 second).
+
+```python
+import pytest
+
+@pytest.mark.slow
+@pytest.mark.integration
+async def test_large_document_processing(knowledge_service):
+    """Test processing of large document."""
+    large_doc = "x" * 100000  # 100KB document
+    result = await knowledge_service.ingest_document(large_doc)
+    assert result["success"] is True
+```
+
+## Running Tests
+
+### Basic Test Execution
+
+```bash
+# Run all tests
+pytest tests/
+
+# Run with verbose output
+pytest tests/ -v
+
+# Run with extra verbosity (show test names)
+pytest tests/ -vv
+```
+
+### Running Specific Tests
+
+```bash
+# Run a specific test file
+pytest tests/test_memory_store.py
+
+# Run a specific test class
+pytest tests/test_memory_store.py::TestMemoryStore
+
+# Run a specific test function
+pytest tests/test_memory_store.py::TestMemoryStore::test_add_memory_success
+
+# Run tests matching a pattern
+pytest tests/ -k "memory"
+pytest tests/ -k "test_add or test_search"
+```
+
+### Running by Markers
+
+```bash
+# Run only unit tests (fast)
+pytest tests/ -m unit
+
+# Run only integration tests
+pytest tests/ -m integration
+
+# Run integration tests but not slow ones
+pytest tests/ -m "integration and not slow"
+
+# Run all except slow tests
+pytest tests/ -m "not slow"
+```
+
+### Coverage Reports
+
+```bash
+# Run with coverage for specific modules
+pytest tests/ --cov=services --cov=api --cov=mcp_tools
+
+# Generate HTML coverage report
+pytest tests/ --cov=services --cov=api --cov-report=html
+
+# View HTML report
+open htmlcov/index.html  # macOS
+xdg-open htmlcov/index.html  # Linux
+start htmlcov/index.html  # Windows
+
+# Generate terminal coverage report
+pytest tests/ --cov=services --cov-report=term
+
+# Show missing lines
+pytest tests/ --cov=services --cov-report=term-missing
+```
+
+### Debugging Tests
+
+```bash
+# Stop on first failure
+pytest tests/ -x
+
+# Show local variables on failure
+pytest tests/ -l
+
+# Enter debugger on failure
+pytest tests/ --pdb
+
+# Show print statements
+pytest tests/ -s
+
+# Increase verbosity for debugging
+pytest tests/ -vv --tb=long
+```
+
+### Parallel Execution
+
+```bash
+# Install pytest-xdist
+pip install pytest-xdist
+
+# Run tests in parallel (4 workers)
+pytest tests/ -n 4
+
+# Run unit tests in parallel
+pytest tests/ -m unit -n auto
+```
+
+## Writing Tests
+
+### Test Function Structure
+
+Follow the **Arrange-Act-Assert** pattern:
+
+```python
+import pytest
+
+async def test_add_memory_success():
+    """Test adding a memory successfully."""
+    # Arrange - Set up test data and dependencies
+    project_id = "test-project"
+    memory_data = {
+        "type": "decision",
+        "title": "Use PostgreSQL",
+        "content": "Selected PostgreSQL for database",
+        "importance": 0.8
+    }
+
+    # Act - Execute the code being tested
+    result = await memory_store.add_memory(
+        project_id=project_id,
+        **memory_data
+    )
+
+    # Assert - Verify the results
+    assert result["success"] is True
+    assert "memory_id" in result
+    assert result["memory"]["title"] == memory_data["title"]
+```
+
+### Testing Async Functions
+
+All async functions must use `pytest.mark.asyncio`:
+
+```python
+import pytest
+
+@pytest.mark.asyncio
+async def test_async_operation():
+    """Test asynchronous operation."""
+    result = await some_async_function()
+    assert result is not None
+```
+
+### Testing Exceptions
+
+```python
+import pytest
+
+def test_invalid_input_raises_error():
+    """Test that invalid input raises ValueError."""
+    with pytest.raises(ValueError) as exc_info:
+        parse_invalid_input("bad data")
+
+    assert "invalid format" in str(exc_info.value)
+
+async def test_async_exception():
+    """Test async function raises exception."""
+    with pytest.raises(ConnectionError):
+        await connect_to_invalid_service()
+```
+
+### Parametrized Tests
+
+Test multiple scenarios with one test function:
+
+```python
+import pytest
+
+@pytest.mark.parametrize("memory_type,expected", [
+    ("decision", "decision"),
+    ("preference", "preference"),
+    ("experience", "experience"),
+    ("convention", "convention"),
+    ("plan", "plan"),
+])
+def test_memory_type_validation(memory_type, expected):
+    """Test memory type validation for various types."""
+    result = validate_memory_type(memory_type)
+    assert result == expected
+
+@pytest.mark.parametrize("invalid_type", [
+    "invalid",
+    "unknown",
+    "",
+    None,
+    123,
+])
+def test_invalid_memory_type(invalid_type):
+    """Test that invalid memory types raise errors."""
+    with pytest.raises(ValueError):
+        validate_memory_type(invalid_type)
+```
+
+### Testing with Fixtures
+
+```python
+import pytest
+
+@pytest.fixture
+def sample_memory_data():
+    """Provide sample memory data for tests."""
+    return {
+        "project_id": "test-project",
+        "memory_type": "decision",
+        "title": "Test Decision",
+        "content": "Test content",
+        "importance": 0.7,
+        "tags": ["test", "example"]
+    }
+
+def test_with_fixture(sample_memory_data):
+    """Test using fixture data."""
+    assert sample_memory_data["memory_type"] == "decision"
+    assert sample_memory_data["importance"] == 0.7
+```
+
+## Test Fixtures
+
+### Available Fixtures
+
+Fixtures are defined in `tests/conftest.py`:
+
+#### mock_neo4j_driver
+```python
+@pytest.fixture
+def mock_neo4j_driver(mocker):
+    """Mock Neo4j driver for unit tests."""
+    # Returns a mock Neo4j driver
+```
+
+#### mock_llm_service
+```python
+@pytest.fixture
+def mock_llm_service(mocker):
+    """Mock LLM service for unit tests."""
+    # Returns a mock LLM service
+```
+
+#### mock_knowledge_service
+```python
+@pytest.fixture
+async def mock_knowledge_service(mocker):
+    """Mock knowledge service for unit tests."""
+    # Returns a mock knowledge service
+```
+
+#### neo4j_test_driver
+```python
+@pytest.fixture(scope="session")
+def neo4j_test_driver():
+    """Real Neo4j driver for integration tests."""
+    # Returns actual Neo4j driver connected to test database
+```
+
+### Creating Custom Fixtures
+
+```python
+import pytest
+from typing import Generator
+
+@pytest.fixture
+def temp_directory(tmp_path) -> Generator[Path, None, None]:
+    """Create a temporary directory for tests."""
+    test_dir = tmp_path / "test_data"
+    test_dir.mkdir()
+    yield test_dir
+    # Cleanup happens automatically with tmp_path
+
+@pytest.fixture
+async def initialized_memory_store():
+    """Provide an initialized memory store."""
+    store = MemoryStore()
+    await store.initialize()
+    yield store
+    await store.cleanup()
+```
+
+### Fixture Scopes
+
+```python
+# Function scope (default) - New instance per test
+@pytest.fixture
+def per_test_fixture():
+    return "new instance"
+
+# Class scope - New instance per test class
+@pytest.fixture(scope="class")
+def per_class_fixture():
+    return "shared in class"
+
+# Module scope - New instance per test file
+@pytest.fixture(scope="module")
+def per_module_fixture():
+    return "shared in module"
+
+# Session scope - One instance for entire test session
+@pytest.fixture(scope="session")
+def per_session_fixture():
+    return "shared across all tests"
+```
+
+## Mocking Strategies
+
+### Mocking with pytest-mock
+
+```python
+def test_with_mock(mocker):
+    """Test using mocker fixture."""
+    # Mock a function
+    mock_func = mocker.patch('services.memory_store.some_function')
+    mock_func.return_value = "mocked result"
+
+    # Call code that uses the function
+    result = call_code_using_function()
+
+    # Assert mock was called
+    mock_func.assert_called_once()
+    assert result == "mocked result"
+```
+
+### Mocking Neo4j Queries
+
+```python
+def test_neo4j_query(mocker):
+    """Test code that queries Neo4j."""
+    mock_driver = mocker.Mock()
+    mock_session = mocker.Mock()
+    mock_result = mocker.Mock()
+
+    # Setup mock chain
+    mock_driver.session.return_value.__enter__.return_value = mock_session
+    mock_session.run.return_value = mock_result
+    mock_result.data.return_value = [{"id": "123", "title": "Test"}]
+
+    # Test your code
+    result = query_neo4j(mock_driver, "MATCH (n) RETURN n")
+
+    assert result[0]["id"] == "123"
+    mock_session.run.assert_called_once()
+```
+
+### Mocking Async Functions
+
+```python
+import pytest
+
+@pytest.mark.asyncio
+async def test_async_mock(mocker):
+    """Test with async mock."""
+    # Create async mock
+    mock_async_func = mocker.AsyncMock(return_value={"success": True})
+
+    # Patch the async function
+    mocker.patch('services.memory_store.async_function', mock_async_func)
+
+    # Call code that uses it
+    result = await some_code_using_async_function()
+
+    # Assert
+    assert result["success"] is True
+    mock_async_func.assert_awaited_once()
+```
+
+### Mocking Environment Variables
+
+```python
+def test_with_env_vars(mocker):
+    """Test with environment variables."""
+    mocker.patch.dict('os.environ', {
+        'NEO4J_URI': 'bolt://test:7687',
+        'NEO4J_USER': 'test',
+        'NEO4J_PASSWORD': 'testpass'
+    })
+
+    from core.config import settings
+    assert settings.neo4j_uri == 'bolt://test:7687'
+```
+
+## Coverage Requirements
+
+### Coverage Goals
+
+- **New Features**: 80%+ coverage
+- **Bug Fixes**: 100% coverage of fixed code path
+- **Critical Paths**: 90%+ coverage
+- **Overall Project**: 70%+ coverage
+
+### Checking Coverage
+
+```bash
+# Generate coverage report
+pytest tests/ --cov=services --cov=api --cov=mcp_tools --cov-report=term-missing
+
+# Coverage output shows:
+# - Lines covered
+# - Lines missed
+# - Coverage percentage
+# - Missing line numbers
+```
+
+### Improving Coverage
+
+```bash
+# Find uncovered code
+pytest tests/ --cov=services --cov-report=term-missing | grep "MISSED"
+
+# Focus on specific module
+pytest tests/ --cov=services.memory_store --cov-report=term-missing
+
+# Generate HTML report for detailed view
+pytest tests/ --cov=services --cov-report=html
+open htmlcov/index.html
+```
+
+### Coverage Configuration
+
+Coverage settings in `pyproject.toml`:
+
+```toml
+[tool.coverage.run]
+source = ["mcp_tools", "services", "api", "core"]
+omit = [
+    "*/tests/*",
+    "*/test_*.py",
+    "*/__pycache__/*",
+]
+
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+    "if __name__ == .__main__.:",
+    "if TYPE_CHECKING:",
+    "@abstractmethod",
+]
+```
+
+## CI/CD Testing
+
+### GitHub Actions
+
+Tests run automatically on:
+- **Push to main/develop** - Full test suite
+- **Pull requests** - Full test suite with coverage
+- **Scheduled** - Nightly integration tests
+
+### CI Test Configuration
+
+See `.github/workflows/ci.yml` and `.github/workflows/pr-tests.yml`.
+
+#### Test Matrix
+
+Tests run on:
+- **Python versions**: 3.13
+- **OS**: Ubuntu latest
+- **Neo4j**: 5.14
+
+#### Running Tests Locally Like CI
+
+```bash
+# Start Neo4j with test configuration
+docker run -d \
+  --name neo4j-test \
+  -p 7687:7687 \
+  -e NEO4J_AUTH=neo4j/testpassword \
+  -e NEO4J_PLUGINS='["apoc"]' \
+  neo4j:5.14
+
+# Set environment variables
+export NEO4J_URI=bolt://localhost:7687
+export NEO4J_USER=neo4j
+export NEO4J_PASSWORD=testpassword
+
+# Run tests like CI
+pytest tests/ -v --tb=short --cov=services --cov=api --cov=mcp_tools
+
+# Cleanup
+docker stop neo4j-test
+docker rm neo4j-test
+```
+
+## Best Practices
+
+### Do's
+
+✅ **Write tests first** (TDD) when fixing bugs
+✅ **Use descriptive test names** that explain what is tested
+✅ **Keep tests independent** - No shared state between tests
+✅ **Use fixtures** for common setup
+✅ **Mock external dependencies** in unit tests
+✅ **Test edge cases** and error conditions
+✅ **Use parametrized tests** for multiple similar scenarios
+✅ **Add docstrings** to complex tests
+✅ **Clean up resources** (use fixtures with yield)
+✅ **Test async code** with pytest-asyncio
+
+### Don'ts
+
+❌ **Don't test framework code** (FastAPI, Neo4j internals)
+❌ **Don't write flaky tests** (random failures)
+❌ **Don't use time.sleep()** in tests (use proper async)
+❌ **Don't leave debug code** (print statements, breakpoints)
+❌ **Don't skip tests** without good reason and documentation
+❌ **Don't test implementation details** - Test behavior
+❌ **Don't share state** between tests
+❌ **Don't commit commented-out tests**
+
+### Test Naming Conventions
+
+Good test names:
+```python
+def test_add_memory_with_valid_data_returns_success()
+def test_search_memories_with_no_results_returns_empty_list()
+def test_invalid_memory_type_raises_value_error()
+async def test_concurrent_memory_additions_are_thread_safe()
+```
+
+Poor test names:
+```python
+def test_memory()  # Too vague
+def test_1()  # No description
+def test_it_works()  # What works?
+def test_memory_store_add_memory_function_test()  # Redundant
+```
+
+### Example Test File
+
+```python
+"""
+Tests for memory store service.
+
+This module tests memory CRUD operations, search functionality,
+and memory relationships.
+"""
+import pytest
+from typing import Dict, Any
+
+from services.memory_store import MemoryStore
+
+
+class TestMemoryStore:
+    """Test suite for MemoryStore service."""
+
+    @pytest.fixture
+    async def memory_store(self):
+        """Provide initialized memory store."""
+        store = MemoryStore()
+        await store.initialize()
+        yield store
+        await store.cleanup()
+
+    @pytest.fixture
+    def sample_memory(self) -> Dict[str, Any]:
+        """Provide sample memory data."""
+        return {
+            "project_id": "test-project",
+            "memory_type": "decision",
+            "title": "Use PostgreSQL",
+            "content": "Selected PostgreSQL for main database",
+            "reason": "Need advanced JSON support",
+            "importance": 0.8,
+            "tags": ["database", "architecture"]
+        }
+
+    @pytest.mark.unit
+    async def test_add_memory_success(self, memory_store, sample_memory):
+        """Test adding a memory successfully."""
+        result = await memory_store.add_memory(**sample_memory)
+
+        assert result["success"] is True
+        assert "memory_id" in result
+        assert result["memory"]["title"] == sample_memory["title"]
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize("invalid_type", ["invalid", "", None])
+    async def test_add_memory_invalid_type_fails(
+        self, memory_store, sample_memory, invalid_type
+    ):
+        """Test adding memory with invalid type fails."""
+        sample_memory["memory_type"] = invalid_type
+
+        with pytest.raises(ValueError):
+            await memory_store.add_memory(**sample_memory)
+
+    @pytest.mark.integration
+    async def test_search_memories_with_neo4j(self, memory_store, sample_memory):
+        """Test searching memories with real Neo4j."""
+        # Add memory
+        await memory_store.add_memory(**sample_memory)
+
+        # Search
+        results = await memory_store.search_memories(
+            project_id=sample_memory["project_id"],
+            query="PostgreSQL"
+        )
+
+        assert len(results) > 0
+        assert results[0]["title"] == sample_memory["title"]
+```
+
+## Troubleshooting Tests
+
+### Common Issues
+
+**Tests fail with "fixture not found":**
+```bash
+# Check fixture is defined in conftest.py or test file
+# Check fixture name spelling
+# Check fixture scope
+```
+
+**Async tests fail:**
+```bash
+# Ensure @pytest.mark.asyncio is present
+# Check pytest-asyncio is installed
+pip install pytest-asyncio
+```
+
+**Neo4j connection failures:**
+```bash
+# Check Neo4j is running
+docker ps | grep neo4j
+
+# Check connection details in .env
+NEO4J_URI=bolt://localhost:7687
+```
+
+**Import errors in tests:**
+```bash
+# Ensure package is installed in editable mode
+pip install -e .
+
+# Check PYTHONPATH
+export PYTHONPATH="${PWD}:${PYTHONPATH}"
+```
+
+### Getting Help
+
+- Check test logs: `pytest tests/ -vv`
+- Review CI test results on GitHub
+- Search issues: [GitHub Issues](https://github.com/royisme/codebase-rag/issues)
+- Ask in discussions
+
+## Next Steps
+
+- Read [Contributing Guide](./contributing.md) for overall workflow
+- Review [Development Setup](./setup.md) for environment configuration
+- Explore existing tests in `tests/` directory
+- Write tests for your features!
+
+Happy testing!
diff --git a/docs/development/version-management.md b/docs/development/version-management.md
new file mode 100644
index 0000000..e9540df
--- /dev/null
+++ b/docs/development/version-management.md
@@ -0,0 +1,391 @@
+# Version Management
+
+Complete guide to version management in the Code Graph Knowledge System.
+
+## Overview
+
+We use **Semantic Versioning** (SemVer) with automated tooling to ensure consistency across all version references.
+
+**Version Format**: `MAJOR.MINOR.PATCH` (e.g., `0.7.0`, `1.2.3`)
+
+**Version Rules**:
+- **MAJOR** (x.0.0): Breaking changes, major architecture updates
+- **MINOR** (0.x.0): New features, backward compatible
+- **PATCH** (0.0.x): Bug fixes, small improvements
+
+## Tools
+
+### bump-my-version
+
+We use [bump-my-version](https://github.com/callowayproject/bump-my-version) (formerly `bump2version`) to automate version updates.
+
+**Installation**:
+```bash
+pip install bump-my-version
+```
+
+**Configuration**: `.bumpversion.toml`
+
+## Version Sources
+
+### Single Source of Truth: `pyproject.toml`
+
+The authoritative version is stored in `pyproject.toml`:
+
+```toml
+[project]
+version = "0.7.0"
+```
+
+### Synchronized Files
+
+These files are automatically updated by bump-my-version:
+
+1. **`pyproject.toml`** - Package version
+2. **`src/__version__.py`** - Runtime version access
+3. **`docs/changelog.md`** - Version history
+4. **Git tag** - `v0.7.0`
+
+## Bumping Versions
+
+### Method 1: Automated Script (Recommended)
+
+```bash
+# Patch version (0.7.0 → 0.7.1)
+./scripts/bump-version.sh patch
+
+# Minor version (0.7.1 → 0.8.0)
+./scripts/bump-version.sh minor
+
+# Major version (0.8.0 → 1.0.0)
+./scripts/bump-version.sh major
+
+# Dry run to preview changes
+./scripts/bump-version.sh minor --dry-run
+```
+
+**What the script does**:
+1. ✅ Validates no uncommitted changes
+2. ✅ Shows current and new version
+3. ✅ Asks for confirmation
+4. ✅ Updates all version files
+5. ✅ Creates git commit
+6. ✅ Creates git tag
+7. ✅ Shows next steps
+
+### Method 2: Manual bump-my-version
+
+```bash
+# Bump patch version
+bump-my-version bump patch
+
+# Bump minor version
+bump-my-version bump minor
+
+# Bump major version
+bump-my-version bump major
+
+# Dry run
+bump-my-version bump minor --dry-run
+
+# Show current configuration
+bump-my-version show-bump
+```
+
+### Method 3: Manual (Not Recommended)
+
+If you need to bump manually:
+
+```bash
+# 1. Update version in all files
+vim pyproject.toml          # version = "0.8.0"
+vim src/__version__.py      # __version__ = "0.8.0"
+vim docs/changelog.md       # Add new version entry
+
+# 2. Commit changes
+git add pyproject.toml src/__version__.py docs/changelog.md
+git commit -m "chore: bump version to 0.8.0"
+
+# 3. Create tag
+git tag -a v0.8.0 -m "Release v0.8.0"
+
+# 4. Push
+git push origin main
+git push origin v0.8.0
+```
+
+## Release Workflow
+
+### Standard Release
+
+```bash
+# 1. Ensure you're on main and up to date
+git checkout main
+git pull origin main
+
+# 2. Update changelog with release notes
+vim docs/changelog.md
+# Add release notes under [Unreleased] section
+
+# 3. Commit changelog updates
+git add docs/changelog.md
+git commit -m "docs: update changelog for v0.8.0"
+git push origin main
+
+# 4. Bump version (creates commit + tag)
+./scripts/bump-version.sh minor
+
+# 5. Push changes and tag
+git push origin main
+git push origin v0.8.0
+
+# 6. GitHub Actions automatically:
+#    - Builds Docker images (minimal, standard, full)
+#    - Pushes to Docker Hub with version tags
+#    - Creates GitHub Release
+```
+
+### Hotfix Release
+
+For urgent bug fixes on production:
+
+```bash
+# 1. Create hotfix branch from tag
+git checkout -b hotfix/v0.7.1 v0.7.0
+
+# 2. Fix the bug
+git add <files>
+git commit -m "fix: critical bug description"
+
+# 3. Bump patch version
+./scripts/bump-version.sh patch
+
+# 4. Push and create PR
+git push origin hotfix/v0.7.1
+
+# 5. After merge, tag is already created
+git checkout main
+git pull origin main
+git push origin v0.7.1
+```
+
+## Docker Image Versioning
+
+When you push a tag `v0.7.0`, GitHub Actions creates these Docker images:
+
+### Full Version Tags
+```bash
+royisme/codebase-rag:0.7.0-minimal
+royisme/codebase-rag:0.7.0-standard
+royisme/codebase-rag:0.7.0-full
+```
+
+### Minor Version Tags (auto-updated for patches)
+```bash
+royisme/codebase-rag:0.7-minimal
+royisme/codebase-rag:0.7-standard
+royisme/codebase-rag:0.7-full
+```
+
+### Latest Tags (from main branch)
+```bash
+royisme/codebase-rag:minimal
+royisme/codebase-rag:standard
+royisme/codebase-rag:full
+royisme/codebase-rag:latest  # Points to full
+```
+
+### Development Tags (main branch, no tag)
+```bash
+royisme/codebase-rag:dev-minimal
+royisme/codebase-rag:dev-standard
+royisme/codebase-rag:dev-full
+```
+
+## Runtime Version Access
+
+### Python Code
+
+```python
+from src.__version__ import __version__, get_features
+
+# Get version string
+print(f"Version: {__version__}")  # "0.7.0"
+
+# Get version tuple
+from src.__version__ import __version_info
+print(__version_info__)  # (0, 7, 0)
+
+# Check features
+features = get_features()
+if features["auto_extraction"]:
+    print("Auto-extraction available")
+```
+
+### API Endpoint
+
+```bash
+# Health endpoint includes version
+curl http://localhost:8000/api/v1/health
+
+{
+  "status": "healthy",
+  "version": "0.7.0",
+  "deployment_mode": "full"
+}
+```
+
+### MCP Tool
+
+```json
+{
+  "tool": "system_info",
+  "response": {
+    "version": "0.7.0",
+    "features": ["code_graph", "memory_store", "auto_extraction"]
+  }
+}
+```
+
+## Version Validation in CI/CD
+
+GitHub Actions validates version consistency:
+
+```yaml
+- name: Validate Version Consistency
+  run: |
+    # Get version from pyproject.toml
+    PROJECT_VERSION=$(grep '^version = ' pyproject.toml | cut -d'"' -f2)
+
+    # Get version from __version__.py
+    VERSION_PY=$(grep '__version__ = ' src/__version__.py | cut -d'"' -f2)
+
+    # Get tag version (if tagged)
+    if [[ $GITHUB_REF == refs/tags/* ]]; then
+      TAG_VERSION=${GITHUB_REF#refs/tags/v}
+      if [[ "$PROJECT_VERSION" != "$TAG_VERSION" ]]; then
+        echo "Error: Version mismatch!"
+        echo "pyproject.toml: $PROJECT_VERSION"
+        echo "Git tag: $TAG_VERSION"
+        exit 1
+      fi
+    fi
+
+    # Validate Python version file
+    if [[ "$PROJECT_VERSION" != "$VERSION_PY" ]]; then
+      echo "Error: Version mismatch!"
+      echo "pyproject.toml: $PROJECT_VERSION"
+      echo "__version__.py: $VERSION_PY"
+      exit 1
+    fi
+
+    echo "✓ All versions consistent: $PROJECT_VERSION"
+```
+
+## Changelog Format
+
+Follow this format in `docs/changelog.md`:
+
+```markdown
+# Changelog
+
+All notable changes to this project will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+### Added
+- New features go here
+
+### Changed
+- Changes in existing functionality
+
+### Fixed
+- Bug fixes
+
+## [0.8.0] - 2025-01-20
+
+### Added
+- Feature A
+- Feature B
+
+### Changed
+- Updated dependency X
+
+### Fixed
+- Bug #123
+
+## [0.7.0] - 2025-01-15
+
+...
+```
+
+## Troubleshooting
+
+### Version mismatch error
+
+```bash
+# If versions are out of sync, manually fix:
+vim pyproject.toml src/__version__.py
+
+# Then commit
+git add pyproject.toml src/__version__.py
+git commit -m "fix: synchronize version numbers"
+```
+
+### Tag already exists
+
+```bash
+# Delete local tag
+git tag -d v0.7.0
+
+# Delete remote tag
+git push origin :refs/tags/v0.7.0
+
+# Recreate tag
+git tag -a v0.7.0 -m "Release v0.7.0"
+git push origin v0.7.0
+```
+
+### Uncommitted changes
+
+```bash
+# Stash changes
+git stash
+
+# Bump version
+./scripts/bump-version.sh patch
+
+# Restore changes
+git stash pop
+```
+
+## Best Practices
+
+1. **Always bump on main branch** - Never bump version on feature branches
+2. **Update changelog first** - Write release notes before bumping
+3. **Use dry-run** - Preview changes with `--dry-run` flag
+4. **Test before release** - Ensure all tests pass before creating tag
+5. **Semantic meaning** - Follow SemVer strictly for predictability
+6. **Document breaking changes** - Clearly mark breaking changes in changelog
+
+## Quick Reference
+
+| Task | Command |
+|------|---------|
+| Bump patch | `./scripts/bump-version.sh patch` |
+| Bump minor | `./scripts/bump-version.sh minor` |
+| Bump major | `./scripts/bump-version.sh major` |
+| Dry run | `./scripts/bump-version.sh minor --dry-run` |
+| Show current | `bump-my-version show-bump` |
+| Manual bump | `bump-my-version bump patch` |
+| Push release | `git push origin main && git push origin v0.x.0` |
+
+## Resources
+
+- [Semantic Versioning](https://semver.org/)
+- [Keep a Changelog](https://keepachangelog.com/)
+- [bump-my-version Docs](https://callowayproject.github.io/bump-my-version/)
+- [Release Process Guide](release.md)
diff --git a/docs/faq.md b/docs/faq.md
new file mode 100644
index 0000000..e983278
--- /dev/null
+++ b/docs/faq.md
@@ -0,0 +1,1057 @@
+# Frequently Asked Questions (FAQ)
+
+Common questions and answers about the Code Graph Knowledge System.
+
+## Table of Contents
+
+- [General Questions](#general-questions)
+- [Deployment and Installation](#deployment-and-installation)
+- [Features and Capabilities](#features-and-capabilities)
+- [LLM Providers and Models](#llm-providers-and-models)
+- [Memory Store](#memory-store)
+- [Performance and Scaling](#performance-and-scaling)
+- [Cost and Resources](#cost-and-resources)
+- [Security and Privacy](#security-and-privacy)
+- [Integration and APIs](#integration-and-apis)
+- [Troubleshooting](#troubleshooting)
+
+## General Questions
+
+### What is Code Graph Knowledge System?
+
+Code Graph Knowledge System is an intelligent knowledge management system designed specifically for software development. It combines:
+
+- **Neo4j Graph Database**: Stores relationships between code, documents, and memories
+- **Vector Search**: Semantic search across your codebase and documentation
+- **LLM Integration**: AI-powered code analysis and question answering
+- **Memory Store**: Long-term knowledge persistence for AI agents
+- **MCP Protocol**: Integration with AI assistants like Claude
+
+**Use Cases:**
+- Intelligent code navigation and search
+- Automated documentation generation
+- AI-assisted development with persistent memory
+- Code relationship mapping
+- Database schema analysis
+
+### What's new in version 0.7?
+
+Version 0.7 introduces **Automatic Memory Extraction**:
+
+- Extract memories from AI conversations
+- Analyze git commits for decisions and experiences
+- Mine code comments (TODO, FIXME, NOTE markers)
+- Suggest important memories from Q&A sessions
+- Batch extract from entire repositories
+
+See the [Changelog](./changelog.md) for complete details.
+
+### Is this project open source?
+
+Yes! Code Graph Knowledge System is open source under [appropriate license]. You can:
+
+- View source code on [GitHub](https://github.com/royisme/codebase-rag)
+- Contribute improvements
+- Fork for custom needs
+- Use commercially (check license terms)
+
+### Who maintains this project?
+
+The project is maintained by a team of contributors led by [@royisme](https://github.com/royisme). See [Contributing Guide](./development/contributing.md) to join the community.
+
+## Deployment and Installation
+
+### What are the different deployment modes?
+
+Three Docker deployment modes are available:
+
+**1. Minimal Mode** (Code Graph only)
+- **Size**: ~800MB
+- **Features**: Code graph, vector search, basic RAG
+- **Best for**: Lightweight code analysis, resource-constrained environments
+- **RAM**: 2-4GB
+- **Pull**: `docker pull royisme/codebase-rag:minimal`
+
+**2. Standard Mode** (Code Graph + Memory)
+- **Size**: ~1.2GB
+- **Features**: Everything in Minimal + Memory Store for AI agents
+- **Best for**: AI-assisted development with memory
+- **RAM**: 4-8GB
+- **Pull**: `docker pull royisme/codebase-rag:standard`
+
+**3. Full Mode** (All Features)
+- **Size**: ~1.5GB
+- **Features**: Everything + Web UI, monitoring, Prometheus metrics
+- **Best for**: Production deployment, team environments
+- **RAM**: 8-16GB
+- **Pull**: `docker pull royisme/codebase-rag:full`
+
+### Which deployment mode should I choose?
+
+**Choose Minimal if:**
+- You only need code analysis
+- Running on limited resources (Raspberry Pi, small VPS)
+- Don't need memory features
+- Want smallest footprint
+
+**Choose Standard if:**
+- Using with AI assistants (Claude Desktop, VSCode)
+- Need memory persistence
+- Want balanced features and size
+- Typical development environment
+
+**Choose Full if:**
+- Need web UI for teams
+- Want monitoring and metrics
+- Production deployment
+- Multiple users
+- Have resources available
+
+### Can I switch between deployment modes later?
+
+Yes! Data is stored in Neo4j volumes, which are shared across modes:
+
+```bash
+# Start with minimal
+docker-compose -f docker/docker-compose.minimal.yml up -d
+
+# Later, switch to standard (data preserved)
+docker-compose -f docker/docker-compose.minimal.yml down
+docker-compose -f docker/docker-compose.standard.yml up -d
+```
+
+**Note**: Switching modes doesn't delete your data, but some features may not be available in smaller modes.
+
+### What are the minimum system requirements?
+
+**Development (Minimal Mode):**
+- CPU: 2 cores
+- RAM: 4GB
+- Disk: 10GB
+- OS: Linux, macOS, Windows with WSL2
+
+**Development (Standard/Full Mode):**
+- CPU: 4 cores
+- RAM: 8GB
+- Disk: 20GB
+- OS: Linux, macOS, Windows with WSL2
+
+**Production:**
+- CPU: 8 cores
+- RAM: 16GB (32GB with Ollama)
+- Disk: 50GB+ (depends on data size)
+- OS: Linux (Ubuntu 22.04+ recommended)
+
+### Can I run this without Docker?
+
+Yes, you can run natively:
+
+```bash
+# Install Python 3.13+
+python3.13 -m venv .venv
+source .venv/bin/activate
+
+# Install dependencies
+pip install -e .
+
+# Install Neo4j separately
+# Follow: https://neo4j.com/docs/operations-manual/current/installation/
+
+# Configure and run
+python start.py
+```
+
+**Note**: Docker is recommended for easier setup and isolation.
+
+### How do I upgrade to a new version?
+
+```bash
+# Pull new images
+docker pull royisme/codebase-rag:latest
+
+# Or specific version
+docker pull royisme/codebase-rag:0.7.0-full
+
+# Restart with new version
+docker-compose down
+docker-compose up -d
+
+# Check version
+curl http://localhost:8000/api/v1/health | jq .version
+```
+
+Your data is preserved in volumes across upgrades.
+
+## Features and Capabilities
+
+### What programming languages are supported?
+
+**Fully Supported** (with import/relationship analysis):
+- Python (`.py`)
+- TypeScript/JavaScript (`.ts`, `.tsx`, `.js`, `.jsx`)
+- Java (`.java`)
+- PHP (`.php`)
+- Go (`.go`)
+
+**Document Processing** (any file type):
+- Markdown (`.md`)
+- Text files (`.txt`)
+- Code files (analyzed as text if not in supported list)
+- SQL files (`.sql`) with schema parsing
+
+**Future Support** (planned):
+- Rust, C++, C#, Ruby, Kotlin
+
+### What's the difference between Memory Store and Knowledge Graph?
+
+**Knowledge Graph:**
+- **Purpose**: Store documents, code, and their relationships
+- **Content**: Code files, documentation, SQL schemas
+- **Search**: Vector similarity + graph traversal
+- **Updates**: Add/remove documents
+- **Use Case**: "Show me all files that import module X"
+
+**Memory Store:**
+- **Purpose**: Long-term knowledge for AI agents
+- **Content**: Decisions, preferences, experiences, conventions
+- **Search**: Semantic search + importance filtering
+- **Updates**: Add, update, supersede memories
+- **Use Case**: "Remember we decided to use PostgreSQL because..."
+
+**Analogy:**
+- Knowledge Graph = Your codebase and documentation
+- Memory Store = Your project's institutional memory
+
+### Can I use this with GitHub Copilot or other AI assistants?
+
+**GitHub Copilot**: No direct integration (closed API)
+
+**Claude Desktop**: ✅ Yes, via MCP protocol
+```json
+// claude_desktop_config.json
+{
+  "mcpServers": {
+    "code-graph": {
+      "command": "python",
+      "args": ["/path/to/start_mcp.py"]
+    }
+  }
+}
+```
+
+**VS Code with MCP**: ✅ Yes (requires MCP extension)
+
+**Any Tool with HTTP API**: ✅ Yes, use REST API
+```bash
+curl -X POST http://localhost:8000/api/v1/knowledge/query \
+  -d '{"query": "how does authentication work?"}'
+```
+
+### Can I analyze private repositories?
+
+Yes! All processing is local:
+
+1. **Clone your private repo** to your machine
+2. **Ingest with MCP**:
+   ```
+   Use ingest_directory tool with your repo path
+   ```
+3. **Or via API**:
+   ```bash
+   curl -X POST http://localhost:8000/api/v1/ingest/directory \
+     -d '{"path": "/path/to/private/repo"}'
+   ```
+
+**Privacy**: No code leaves your machine (unless you use cloud LLM providers).
+
+### Does it work with monorepos?
+
+Yes, monorepos are fully supported:
+
+```bash
+# Ingest entire monorepo
+ingest_directory(/path/to/monorepo)
+
+# Or specific workspaces
+ingest_directory(/path/to/monorepo/packages/frontend)
+ingest_directory(/path/to/monorepo/packages/backend)
+
+# Query across entire monorepo
+query_knowledge("How does the API communicate with frontend?")
+```
+
+**Tip**: Use project-specific tags to organize large monorepos.
+
+### Can I customize the code analysis?
+
+Yes, several customization options:
+
+**1. File Patterns:**
+```python
+# Include/exclude specific patterns
+include_globs = ["**/*.py", "**/*.ts"]
+exclude_globs = ["**/test_*.py", "**/node_modules/**"]
+```
+
+**2. Chunk Size:**
+```env
+CHUNK_SIZE=512
+CHUNK_OVERLAP=50
+```
+
+**3. Embedding Model:**
+```env
+EMBEDDING_PROVIDER=ollama
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # Fast
+# or
+OLLAMA_EMBEDDING_MODEL=mxbai-embed-large  # More accurate
+```
+
+**4. Custom Parsers:**
+Extend `services/pipeline/transformers.py` to add new language support.
+
+## LLM Providers and Models
+
+### Which LLM provider should I use?
+
+**Ollama (Recommended for Development):**
+- ✅ Free
+- ✅ No API keys needed
+- ✅ Unlimited requests
+- ✅ Privacy (100% local)
+- ❌ Requires powerful hardware
+- ❌ Slower than cloud APIs
+
+**OpenAI (Recommended for Production):**
+- ✅ Fast and accurate
+- ✅ No local resources needed
+- ✅ Best quality results
+- ❌ Costs money per request
+- ❌ Requires API key
+- ❌ Data sent to OpenAI
+
+**Google Gemini:**
+- ✅ Good free tier
+- ✅ Fast response times
+- ✅ Large context window
+- ❌ Requires API key
+- ❌ Data sent to Google
+
+**OpenRouter:**
+- ✅ Access to many models
+- ✅ Pay-as-you-go pricing
+- ✅ Model flexibility
+- ❌ Requires API key
+- ❌ Variable quality
+
+### Can I use multiple LLM providers?
+
+Currently, one provider at a time is supported:
+
+```env
+# Choose one
+LLM_PROVIDER=ollama  # or openai, gemini, openrouter
+```
+
+**Workaround**: Run multiple instances with different configurations on different ports.
+
+**Future**: Multi-provider support is planned for v0.8.
+
+### What Ollama models do you recommend?
+
+**For LLM (text generation):**
+
+- **llama3.2:3b** - Fast, good for development (4GB RAM)
+- **mistral:7b** - Balanced quality and speed (8GB RAM)
+- **llama3.1:8b** - High quality (8GB RAM)
+- **codellama:13b** - Best for code (16GB RAM)
+
+**For Embeddings:**
+
+- **nomic-embed-text** - Fast, good quality (recommended)
+- **mxbai-embed-large** - Better quality, slower
+- **all-minilm** - Smallest, fastest
+
+**Install:**
+```bash
+ollama pull llama3.2:3b
+ollama pull nomic-embed-text
+```
+
+### How do I switch LLM providers?
+
+Simply update `.env` and restart:
+
+```env
+# From Ollama to OpenAI
+LLM_PROVIDER=openai
+OPENAI_API_KEY=sk-your-key
+OPENAI_MODEL=gpt-4
+
+EMBEDDING_PROVIDER=openai
+OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+```
+
+```bash
+# Restart
+docker-compose restart api
+# or
+pkill -f start.py && python start.py
+```
+
+No data migration needed - embeddings are recalculated automatically.
+
+## Memory Store
+
+### What is the Memory Store for?
+
+Memory Store provides **long-term project knowledge** for AI agents:
+
+**Without Memory:**
+```
+Session 1: "Use PostgreSQL" → AI learns
+Session 2: AI forgot, suggests MySQL again ❌
+```
+
+**With Memory:**
+```
+Session 1: "Use PostgreSQL" → Saved to memory
+Session 2: AI remembers: "You decided on PostgreSQL" ✅
+```
+
+**Types of Memories:**
+- **Decisions**: "Chose JWT for auth"
+- **Preferences**: "Use raw SQL over ORM"
+- **Experiences**: "Redis fails with localhost in Docker"
+- **Conventions**: "API endpoints use kebab-case"
+- **Plans**: "Migrate to PostgreSQL 16"
+
+### How is Memory Store different from conversation history?
+
+**Conversation History** (Short-term):
+- Temporary (session only)
+- All messages (including noise)
+- Lost when session ends
+- Not searchable
+- No importance ranking
+
+**Memory Store** (Long-term):
+- Permanent (persisted in Neo4j)
+- Curated knowledge only
+- Survives restarts/sessions
+- Searchable by topic/tag
+- Importance-ranked
+
+Think of Memory Store as your project's **institutional memory**.
+
+### Do I need Memory Store?
+
+**You need Memory Store if:**
+- Using AI assistants (Claude, Copilot, etc.)
+- Working on long-term projects
+- Multiple people/agents on same project
+- Want AI to remember past decisions
+- Need consistent AI behavior
+
+**You don't need Memory Store if:**
+- Just analyzing code (use Minimal mode)
+- One-off queries
+- Don't use AI assistants
+- Limited to Knowledge Graph features
+
+### How do I add memories manually?
+
+**Via MCP (in Claude Desktop):**
+```
+Add a memory:
+- Type: decision
+- Title: Use PostgreSQL
+- Content: Selected PostgreSQL for main database
+- Reason: Need advanced JSON support
+- Importance: 0.9
+```
+
+**Via HTTP API:**
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/add \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "myapp",
+    "memory_type": "decision",
+    "title": "Use PostgreSQL",
+    "content": "Selected PostgreSQL for main database",
+    "reason": "Need advanced JSON support",
+    "importance": 0.9,
+    "tags": ["database", "architecture"]
+  }'
+```
+
+### Can memories be automatically extracted?
+
+Yes! Version 0.7 added automatic extraction:
+
+**1. From Conversations:**
+```python
+# Extract from AI chat history
+extract_from_conversation(project_id, conversation_history)
+```
+
+**2. From Git Commits:**
+```python
+# Analyze git commits
+extract_from_git_commit(project_id, commit_sha, commit_message)
+```
+
+**3. From Code Comments:**
+```python
+# Mine TODO, FIXME, NOTE markers
+extract_from_code_comments(project_id, file_path)
+```
+
+**4. From Repository:**
+```python
+# Full repo analysis
+batch_extract_from_repository(project_id, repo_path)
+```
+
+See [Memory Extraction Guide](./guide/memory/extraction.md) for details.
+
+### How do I search memories?
+
+**MCP Tool:**
+```
+search_memories(
+  project_id="myapp",
+  query="database decisions",
+  memory_type="decision",
+  min_importance=0.7
+)
+```
+
+**HTTP API:**
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/search \
+  -d '{
+    "project_id": "myapp",
+    "query": "database",
+    "memory_type": "decision",
+    "min_importance": 0.7
+  }'
+```
+
+**Results ranked by:**
+- Relevance to query
+- Importance score
+- Recency
+
+## Performance and Scaling
+
+### How fast is document processing?
+
+**Speed depends on:**
+- File size
+- LLM provider
+- Hardware specs
+- Chunk size
+
+**Typical Performance:**
+
+| File Size | Ollama (Local) | OpenAI (Cloud) |
+|-----------|----------------|----------------|
+| 10KB      | 2-5 seconds    | 1-2 seconds    |
+| 100KB     | 20-30 seconds  | 5-10 seconds   |
+| 1MB       | 3-5 minutes    | 30-60 seconds  |
+
+**Tips for faster processing:**
+- Use smaller embedding models
+- Reduce chunk size
+- Use OpenAI (fastest)
+- Process in background/batch
+
+### Can it handle large codebases?
+
+Yes! Tested with:
+
+- **Large PHP Project**: 25,000+ files, 5GB code
+- **Oracle Database Schema**: 356 tables, 4,511 columns
+- **Monorepo**: Multiple packages, 100,000+ LOC
+
+**Performance Tips:**
+1. **Use batch ingestion**: Process directories in background
+2. **Filter files**: Use `.gitignore` patterns to skip unnecessary files
+3. **Increase resources**: Allocate more RAM to Neo4j
+4. **Add indexes**: Create Neo4j indexes on frequently queried fields
+
+### What's the maximum document size?
+
+**Recommended Limits:**
+- **API upload**: 50KB (configurable)
+- **Directory processing**: No limit (batch mode)
+- **Single file processing**: 1MB recommended
+
+**For Large Files:**
+```bash
+# Use MCP client with automatic temp file handling
+# Or process directory in batch mode
+ingest_directory(/path/to/large/files)
+```
+
+### How do I improve query performance?
+
+**1. Add Neo4j Indexes:**
+```cypher
+CREATE INDEX document_content IF NOT EXISTS
+FOR (d:Document) ON (d.content);
+
+CREATE INDEX memory_tags IF NOT EXISTS
+FOR (m:Memory) ON (m.tags);
+```
+
+**2. Optimize Chunk Size:**
+```env
+CHUNK_SIZE=512  # Smaller = faster search
+```
+
+**3. Use Faster Embeddings:**
+```env
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # Fast
+OPENAI_EMBEDDING_MODEL=text-embedding-3-small  # Fastest
+```
+
+**4. Increase Neo4j Memory:**
+```yaml
+environment:
+  - NEO4J_dbms_memory_heap_max__size=4G
+```
+
+**5. Limit Results:**
+```python
+# Request fewer results
+query_knowledge("...", limit=10)  # Instead of 100
+```
+
+## Cost and Resources
+
+### How much does it cost to run?
+
+**Infrastructure Costs:**
+
+**Local Deployment (Ollama):**
+- Hardware: One-time cost ($1000-$3000 for good GPU)
+- Electricity: ~$20-50/month (GPU running 24/7)
+- Internet: Standard connection sufficient
+- **Total**: $0/month after initial investment
+
+**Cloud Deployment (Minimal, OpenAI):**
+- VPS: $20-40/month (4GB RAM, 2 CPU)
+- Neo4j Cloud: $65/month (Aura free tier available)
+- OpenAI API: Variable (see below)
+- **Total**: $20-105/month + API costs
+
+**Cloud Deployment (Full, OpenAI):**
+- VPS: $80-160/month (16GB RAM, 8 CPU)
+- Neo4j Cloud: $65/month
+- OpenAI API: Variable
+- **Total**: $145-225/month + API costs
+
+**LLM API Costs:**
+
+**OpenAI (GPT-4):**
+- Embeddings: $0.00002/1K tokens (~$0.02 per 1MB document)
+- Queries: $0.03/1K tokens (~$0.30 per complex query)
+- **Estimate**: $10-50/month for moderate use
+
+**OpenAI (GPT-3.5):**
+- 10x cheaper than GPT-4
+- **Estimate**: $1-5/month for moderate use
+
+**Google Gemini:**
+- Free tier: 15 RPM, 1M tokens/day
+- Paid: $0.35/1M input tokens
+- **Estimate**: Free to $5/month
+
+**OpenRouter:**
+- Variable per model
+- Usually cheaper than direct APIs
+- **Estimate**: $5-20/month
+
+### Can I use the free tiers of LLM providers?
+
+**Ollama**: ✅ Completely free, no limits
+
+**Google Gemini**: ✅ Generous free tier (15 RPM, 1M tokens/day)
+
+**OpenAI**: ❌ No free tier, but trial credits available
+
+**OpenRouter**: ⚠️ Small free tier, then pay-as-you-go
+
+**Recommendation**: Start with Ollama or Gemini free tier.
+
+### What are the hosting costs?
+
+**Self-Hosted (Recommended for Development):**
+- Your own machine: $0/month
+- Electric cost: ~$10-30/month (if running 24/7)
+
+**VPS Hosting:**
+- DigitalOcean Droplet: $24/month (4GB RAM)
+- Linode: $24/month (4GB RAM)
+- AWS EC2: $30-50/month (t3.medium)
+- Google Cloud: $25-45/month (e2-standard-2)
+
+**Platform-as-a-Service:**
+- Railway.app: ~$20/month (with free trial)
+- Render.com: ~$25/month
+- Heroku: Not recommended (disk limitations)
+
+**Managed Neo4j:**
+- Neo4j Aura Free: $0/month (limited)
+- Neo4j Aura Pro: $65/month (production)
+
+### How can I reduce costs?
+
+**1. Use Ollama Locally:**
+- Zero API costs
+- One-time hardware investment
+
+**2. Use Smaller Models:**
+```env
+OLLAMA_MODEL=llama3.2:3b  # Instead of 13b
+OPENAI_MODEL=gpt-3.5-turbo  # Instead of gpt-4
+```
+
+**3. Batch Operations:**
+- Process multiple files at once
+- Reduce API calls with caching
+
+**4. Optimize Chunk Size:**
+```env
+CHUNK_SIZE=1024  # Larger chunks = fewer embeddings = lower cost
+```
+
+**5. Use Minimal Mode:**
+- Smaller Docker image
+- Lower resource requirements
+
+**6. Self-Host Neo4j:**
+- Avoid managed database costs
+- Use Docker Neo4j
+
+## Security and Privacy
+
+### Is my code sent to external services?
+
+**Depends on LLM provider:**
+
+**Ollama (Local):**
+- ✅ 100% local processing
+- ✅ No data leaves your machine
+- ✅ Complete privacy
+
+**OpenAI/Gemini/OpenRouter:**
+- ⚠️ Code sent to provider for processing
+- ⚠️ Subject to provider's terms of service
+- ⚠️ Check provider's data retention policies
+
+**Recommendation**: Use Ollama for sensitive/proprietary code.
+
+### How is data stored?
+
+**Neo4j Database:**
+- Stored in Docker volumes (encrypted at rest if configured)
+- Local machine or private VPS
+- Not shared with external services
+
+**File System:**
+- Temporary files during processing (deleted after)
+- Logs (can contain query text, check before sharing)
+
+**No external storage** unless you explicitly configure cloud backups.
+
+### Can I use this in an enterprise environment?
+
+Yes, with considerations:
+
+**✅ Suitable for Enterprise:**
+- Self-hosted (complete control)
+- Local Ollama (no data leakage)
+- Isolated networks
+- Compliance with data residency requirements
+
+**⚠️ Considerations:**
+- Review LLM provider terms (if using cloud APIs)
+- Implement access controls
+- Secure Neo4j with authentication
+- Use HTTPS for API endpoints
+- Regular security updates
+
+**Enterprise Checklist:**
+- [ ] Use Ollama or enterprise LLM provider
+- [ ] Enable Neo4j authentication
+- [ ] Configure firewall rules
+- [ ] Set up SSL/TLS
+- [ ] Implement audit logging
+- [ ] Regular backups
+- [ ] Security scanning
+
+### Are there any security best practices?
+
+**1. Secure Neo4j:**
+```env
+NEO4J_AUTH=neo4j/strong-password-here
+NEO4J_dbms_security_auth__enabled=true
+```
+
+**2. Use Environment Variables:**
+```bash
+# Never commit .env to git
+echo ".env" >> .gitignore
+```
+
+**3. API Authentication** (not implemented yet, planned for v0.8):
+```python
+# Coming soon: JWT authentication for API
+```
+
+**4. Network Isolation:**
+```yaml
+# docker-compose.yml
+services:
+  api:
+    networks:
+      - internal  # Not exposed to internet
+```
+
+**5. Regular Updates:**
+```bash
+# Pull latest security patches
+docker pull royisme/codebase-rag:latest
+```
+
+**6. Audit Logging:**
+```env
+LOG_LEVEL=INFO  # Log all API access
+```
+
+## Integration and APIs
+
+### What APIs are available?
+
+**REST API** (HTTP):
+- `/api/v1/knowledge/query` - Query knowledge base
+- `/api/v1/knowledge/search` - Vector search
+- `/api/v1/documents/*` - Document management
+- `/api/v1/memory/*` - Memory operations
+- `/api/v1/sql/*` - SQL schema analysis
+
+**MCP Protocol** (AI Assistants):
+- 30 tools across 6 categories
+- Knowledge, Code Graph, Memory, Tasks, System
+- Compatible with Claude Desktop, VSCode
+
+**Real-time APIs**:
+- Server-Sent Events (SSE) for task monitoring
+- WebSocket (via NiceGUI monitoring UI)
+
+See [REST API Documentation](./api/rest.md) for details.
+
+### Can I integrate this with my CI/CD pipeline?
+
+Yes! Several integration options:
+
+**1. Pre-commit Hook:**
+```bash
+# .git/hooks/pre-commit
+#!/bin/bash
+# Extract memories from commit
+curl -X POST http://localhost:8000/api/v1/memory/extract/commit \
+  -d "{\"commit_sha\": \"$(git rev-parse HEAD)\"}"
+```
+
+**2. GitHub Actions:**
+```yaml
+# .github/workflows/code-analysis.yml
+name: Code Analysis
+on: [push]
+jobs:
+  analyze:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Analyze code
+        run: |
+          curl -X POST http://code-graph-server/api/v1/ingest/directory \
+            -d '{"path": "${{ github.workspace }}"}'
+```
+
+**3. Build Script:**
+```bash
+# In your build.sh
+python -c "
+from services.memory_store import MemoryStore
+# Auto-extract memories after build
+"
+```
+
+### How do I backup my data?
+
+**Neo4j Backup:**
+```bash
+# Export Neo4j data
+docker exec code-graph-neo4j neo4j-admin dump \
+  --database=neo4j --to=/backups/neo4j-backup.dump
+
+# Copy from container
+docker cp code-graph-neo4j:/backups/neo4j-backup.dump ./backups/
+
+# Or backup volume
+docker run --rm \
+  -v code-graph_neo4j_data:/data \
+  -v $(pwd)/backups:/backup \
+  alpine tar czf /backup/neo4j-data.tar.gz /data
+```
+
+**Restore Backup:**
+```bash
+# Stop Neo4j
+docker-compose stop neo4j
+
+# Restore
+docker cp ./backups/neo4j-backup.dump code-graph-neo4j:/backups/
+docker exec code-graph-neo4j neo4j-admin load \
+  --from=/backups/neo4j-backup.dump --database=neo4j --force
+
+# Restart
+docker-compose start neo4j
+```
+
+### Can I export my data?
+
+**Export Memories as JSON:**
+```bash
+curl http://localhost:8000/api/v1/memory/project/myapp/export > memories.json
+```
+
+**Export Knowledge Graph:**
+```cypher
+// In Neo4j Browser
+CALL apoc.export.json.all("/export/graph.json", {})
+```
+
+**Export to CSV:**
+```cypher
+MATCH (m:Memory)
+RETURN m.title, m.content, m.importance
+// Click "Export" in Neo4j Browser
+```
+
+## Troubleshooting
+
+### Where can I find logs?
+
+**Docker Logs:**
+```bash
+# Application logs
+docker logs code-graph-api
+
+# Neo4j logs
+docker logs code-graph-neo4j
+
+# Follow logs
+docker logs -f code-graph-api
+```
+
+**Local Logs:**
+```bash
+# Application logs
+tail -f logs/application.log
+
+# Debug logs
+tail -f logs/debug.log
+```
+
+### The application won't start. What should I check?
+
+**Quick Checklist:**
+
+1. **Neo4j Running:**
+   ```bash
+   docker ps | grep neo4j
+   ```
+
+2. **Environment Variables:**
+   ```bash
+   cat .env | grep NEO4J
+   ```
+
+3. **Dependencies Installed:**
+   ```bash
+   pip list | grep llama-index
+   ```
+
+4. **Port Available:**
+   ```bash
+   lsof -i :8000
+   ```
+
+5. **Logs for Errors:**
+   ```bash
+   docker logs code-graph-api | grep ERROR
+   ```
+
+See [Troubleshooting Guide](./troubleshooting.md) for detailed solutions.
+
+### Where can I get help?
+
+**Documentation:**
+- Main docs: https://code-graph.vantagecraft.dev
+- This FAQ
+- Troubleshooting guide
+- API documentation
+
+**Community:**
+- GitHub Issues: Bug reports and feature requests
+- GitHub Discussions: Questions and community help
+- Discord/Slack: Real-time chat (if available)
+
+**Support:**
+- Email maintainers for critical issues
+- Check existing issues before posting
+- Include logs and error messages
+
+### How do I report a bug?
+
+**Good Bug Report:**
+
+1. **Search existing issues** first
+2. **Use issue template** if available
+3. **Include**:
+   - System info (OS, Python version, Docker version)
+   - Steps to reproduce
+   - Expected vs actual behavior
+   - Error messages and logs
+   - Configuration (sanitized .env)
+4. **Create minimal reproduction** if possible
+
+See [Contributing Guide](./development/contributing.md) for details.
+
+### Is there a community forum or chat?
+
+Check the [GitHub Discussions](https://github.com/royisme/codebase-rag/discussions) for:
+- Questions and answers
+- Feature discussions
+- Show and tell
+- Community support
+
+**Coming soon**: Discord/Slack community (watch for announcements).
+
+## Still Have Questions?
+
+Can't find your answer here?
+
+1. Search the [full documentation](https://code-graph.vantagecraft.dev)
+2. Check [Troubleshooting Guide](./troubleshooting.md)
+3. Search [GitHub Issues](https://github.com/royisme/codebase-rag/issues)
+4. Ask in [GitHub Discussions](https://github.com/royisme/codebase-rag/discussions)
+5. Review the [source code](https://github.com/royisme/codebase-rag)
+
+**Found an error in this FAQ?** Please [open an issue](https://github.com/royisme/codebase-rag/issues/new) or submit a PR!
diff --git a/docs/getting-started/configuration.md b/docs/getting-started/configuration.md
new file mode 100644
index 0000000..9b7ac02
--- /dev/null
+++ b/docs/getting-started/configuration.md
@@ -0,0 +1,263 @@
+# Configuration
+
+This guide explains how to configure Code Graph Knowledge System for different deployment modes and providers.
+
+## Configuration Files
+
+### Environment Variables (.env)
+
+The primary configuration method uses `.env` file. Templates are provided for each deployment mode:
+
+- `docker/.env.template/.env.minimal` - Code Graph only
+- `docker/.env.template/.env.standard` - Code Graph + Memory
+- `docker/.env.template/.env.full` - All features
+
+## Deployment Mode Configuration
+
+### Minimal Mode (Code Graph Only)
+
+No LLM or embedding model required. Only Neo4j configuration needed:
+
+```bash
+# Neo4j Configuration
+NEO4J_URI=bolt://neo4j:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_secure_password_here
+NEO4J_DATABASE=neo4j
+
+# Deployment Mode
+DEPLOYMENT_MODE=minimal
+ENABLE_KNOWLEDGE_RAG=false
+ENABLE_AUTO_EXTRACTION=false
+```
+
+### Standard Mode (Code Graph + Memory)
+
+Requires embedding model for vector search:
+
+```bash
+# Neo4j Configuration
+NEO4J_URI=bolt://neo4j:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_secure_password_here
+NEO4J_DATABASE=neo4j
+
+# Deployment Mode
+DEPLOYMENT_MODE=standard
+ENABLE_KNOWLEDGE_RAG=false
+ENABLE_AUTO_EXTRACTION=false
+
+# Embedding Provider (choose one)
+EMBEDDING_PROVIDER=ollama  # or openai, gemini, huggingface
+
+# Ollama Configuration (if using Ollama)
+OLLAMA_BASE_URL=http://host.docker.internal:11434
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Or OpenAI Configuration
+# OPENAI_API_KEY=sk-...
+# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+
+# Or Google Gemini Configuration
+# GOOGLE_API_KEY=...
+# GEMINI_EMBEDDING_MODEL=models/embedding-001
+```
+
+### Full Mode (All Features)
+
+Requires both LLM and embedding model:
+
+```bash
+# Neo4j Configuration
+NEO4J_URI=bolt://neo4j:7687
+NEO4J_USER=neo4j
+NEO4J_PASSWORD=your_secure_password_here
+NEO4J_DATABASE=neo4j
+
+# Deployment Mode
+DEPLOYMENT_MODE=full
+ENABLE_KNOWLEDGE_RAG=true
+ENABLE_AUTO_EXTRACTION=true
+
+# LLM Provider (choose one)
+LLM_PROVIDER=ollama  # or openai, gemini, openrouter
+
+# Ollama Configuration
+OLLAMA_BASE_URL=http://host.docker.internal:11434
+OLLAMA_MODEL=llama3.2
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+EMBEDDING_PROVIDER=ollama
+
+# Or OpenAI Configuration
+# LLM_PROVIDER=openai
+# OPENAI_API_KEY=sk-...
+# OPENAI_MODEL=gpt-4o
+# OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+# EMBEDDING_PROVIDER=openai
+
+# Or Google Gemini Configuration
+# LLM_PROVIDER=gemini
+# GOOGLE_API_KEY=...
+# GEMINI_MODEL=gemini-1.5-pro
+# GEMINI_EMBEDDING_MODEL=models/embedding-001
+# EMBEDDING_PROVIDER=gemini
+```
+
+## Provider-Specific Configuration
+
+### Ollama
+
+Run locally for privacy and cost savings:
+
+```bash
+# Install Ollama
+curl -fsSL https://ollama.com/install.sh | sh
+
+# Pull models
+ollama pull llama3.2
+ollama pull nomic-embed-text
+
+# Configuration
+OLLAMA_BASE_URL=http://localhost:11434  # or host.docker.internal in Docker
+OLLAMA_MODEL=llama3.2
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+```
+
+**Recommended Models**:
+- LLM: `llama3.2` (8B), `mistral` (7B), `qwen2.5` (7B)
+- Embedding: `nomic-embed-text`, `mxbai-embed-large`
+
+### OpenAI
+
+Best performance, requires API key:
+
+```bash
+OPENAI_API_KEY=sk-proj-...
+OPENAI_MODEL=gpt-4o  # or gpt-4o-mini for lower cost
+OPENAI_EMBEDDING_MODEL=text-embedding-3-small
+```
+
+**Cost Considerations**:
+- GPT-4o: $5/$15 per 1M tokens (input/output)
+- GPT-4o-mini: $0.15/$0.60 per 1M tokens
+- Embeddings: $0.02 per 1M tokens
+
+### Google Gemini
+
+Good balance of performance and cost:
+
+```bash
+GOOGLE_API_KEY=AIza...
+GEMINI_MODEL=gemini-1.5-flash  # or gemini-1.5-pro
+GEMINI_EMBEDDING_MODEL=models/embedding-001
+```
+
+**Model Options**:
+- `gemini-1.5-flash`: Fast, lower cost
+- `gemini-1.5-pro`: Higher quality, more expensive
+
+### OpenRouter
+
+Access multiple providers through one API:
+
+```bash
+OPENROUTER_API_KEY=sk-or-v1-...
+OPENROUTER_MODEL=anthropic/claude-3.5-sonnet
+OPENAI_API_BASE=https://openrouter.ai/api/v1
+```
+
+### HuggingFace (Local Embeddings)
+
+Free local embeddings without API:
+
+```bash
+EMBEDDING_PROVIDER=huggingface
+HF_EMBEDDING_MODEL=sentence-transformers/all-MiniLM-L6-v2
+```
+
+## Advanced Configuration
+
+### Timeout Settings
+
+Adjust timeouts for large operations:
+
+```bash
+# Connection timeout (seconds)
+CONNECTION_TIMEOUT=30
+
+# Standard operation timeout (seconds)
+OPERATION_TIMEOUT=300
+
+# Large document processing timeout (seconds)
+LARGE_DOCUMENT_TIMEOUT=600
+```
+
+### Neo4j Performance Tuning
+
+For large repositories:
+
+```bash
+# Neo4j memory configuration (add to docker-compose.yml)
+NEO4J_server_memory_heap_initial__size=2G
+NEO4J_server_memory_heap_max__size=4G
+NEO4J_server_memory_pagecache_size=2G
+```
+
+### Monitoring and Logging
+
+Enable detailed logging:
+
+```bash
+# Enable monitoring UI
+ENABLE_MONITORING=true
+
+# Log level
+LOG_LEVEL=INFO  # or DEBUG for detailed logs
+
+# Enable SSE for real-time updates
+ENABLE_SSE=true
+```
+
+## Validation
+
+After configuration, validate settings:
+
+```bash
+# Test Neo4j connection
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p your_password
+
+# Test LLM provider (if Full mode)
+curl http://localhost:11434/api/generate -d '{"model":"llama3.2","prompt":"test"}'
+
+# Check service health
+curl http://localhost:8000/api/v1/health
+```
+
+## Configuration Best Practices
+
+1. **Use Strong Passwords**: Generate secure Neo4j passwords
+   ```bash
+   openssl rand -base64 32
+   ```
+
+2. **API Key Security**: Never commit `.env` to git
+   ```bash
+   echo ".env" >> .gitignore
+   ```
+
+3. **Resource Allocation**: Allocate sufficient memory for Neo4j based on repository size
+   - Small (<1000 files): 2GB heap
+   - Medium (<10000 files): 4GB heap
+   - Large (>10000 files): 8GB+ heap
+
+4. **Provider Selection**:
+   - **Privacy-sensitive**: Use Ollama (local)
+   - **Best quality**: Use OpenAI GPT-4o
+   - **Cost-effective**: Use Gemini Flash or Ollama
+   - **Minimal mode**: No LLM needed!
+
+## Next Steps
+
+- [Quick Start Guide](quickstart.md) - Start using the system
+- [Deployment Guides](../deployment/overview.md) - Detailed deployment instructions
+- [Troubleshooting](../troubleshooting.md) - Common configuration issues
diff --git a/docs/getting-started/installation.md b/docs/getting-started/installation.md
new file mode 100644
index 0000000..a3f42b5
--- /dev/null
+++ b/docs/getting-started/installation.md
@@ -0,0 +1,191 @@
+# Installation
+
+This guide covers different installation methods for Code Graph Knowledge System.
+
+## Prerequisites
+
+### Required
+- **Docker** (20.10+) and **Docker Compose** (2.0+)
+- **Neo4j** 5.0+ (included in Docker setup)
+
+### Optional (depending on deployment mode)
+- **Ollama** (for local LLM/embedding) - Full and Standard modes
+- **OpenAI API key** (for cloud LLM) - Full mode
+- **Google Gemini API key** (for cloud LLM) - Full mode
+
+## Installation Methods
+
+### Method 1: Docker Compose (Recommended)
+
+The easiest way to get started. Choose your deployment mode:
+
+=== "Minimal (Code Graph Only)"
+
+    ```bash
+    # Clone repository
+    git clone https://github.com/royisme/codebase-rag.git
+    cd codebase-rag
+
+    # Copy environment template
+    cp docker/.env.template/.env.minimal .env
+
+    # Edit .env and set Neo4j password
+    nano .env
+
+    # Start services
+    docker-compose up -d
+    ```
+
+    **Requirements**: Only Neo4j (included)
+
+=== "Standard (Code Graph + Memory)"
+
+    ```bash
+    # Clone repository
+    git clone https://github.com/royisme/codebase-rag.git
+    cd codebase-rag
+
+    # Copy environment template
+    cp docker/.env.template/.env.standard .env
+
+    # Edit .env and configure embedding provider
+    nano .env
+
+    # Start services
+    docker-compose -f docker/docker-compose.standard.yml up -d
+    ```
+
+    **Requirements**: Neo4j + Embedding model (Ollama or cloud)
+
+=== "Full (All Features)"
+
+    ```bash
+    # Clone repository
+    git clone https://github.com/royisme/codebase-rag.git
+    cd codebase-rag
+
+    # Copy environment template
+    cp docker/.env.template/.env.full .env
+
+    # Edit .env and configure LLM and embedding
+    nano .env
+
+    # Start with bundled Ollama
+    docker-compose -f docker/docker-compose.full.yml --profile with-ollama up -d
+
+    # Or use external LLM
+    docker-compose -f docker/docker-compose.full.yml up -d
+    ```
+
+    **Requirements**: Neo4j + LLM + Embedding model
+
+### Method 2: Docker Hub (Pull Pre-built Images)
+
+Pull official images from Docker Hub:
+
+```bash
+# Minimal
+docker pull royisme/codebase-rag:minimal
+
+# Standard
+docker pull royisme/codebase-rag:standard
+
+# Full
+docker pull royisme/codebase-rag:full
+```
+
+Then use with docker-compose files or run directly with docker run.
+
+### Method 3: Local Development
+
+For development or local testing without Docker:
+
+```bash
+# Clone repository
+git clone https://github.com/royisme/codebase-rag.git
+cd codebase-rag
+
+# Install uv (if not already installed)
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Install dependencies
+uv pip install -e .
+
+# Start Neo4j separately (required)
+# ... see Neo4j installation docs
+
+# Copy and configure environment
+cp env.example .env
+nano .env
+
+# Start MCP server
+python start_mcp.py
+
+# Or start FastAPI server
+python start.py
+```
+
+## Verify Installation
+
+After installation, verify services are running:
+
+```bash
+# Check Docker containers
+docker ps
+
+# Test health endpoint (if using FastAPI)
+curl http://localhost:8000/api/v1/health
+
+# Check Neo4j
+docker exec -it codebase-rag-neo4j cypher-shell -u neo4j -p your_password "RETURN 'Connected' as status;"
+```
+
+## Next Steps
+
+- [Configuration Guide](configuration.md) - Configure your deployment
+- [Quick Start](quickstart.md) - Get started with basic operations
+- [Deployment Overview](../deployment/overview.md) - Choose the right deployment mode
+
+## Troubleshooting Installation
+
+### Docker Issues
+
+**Problem**: Port already in use
+```bash
+# Check what's using the port
+sudo lsof -i :7687  # Neo4j
+sudo lsof -i :8000  # MCP server
+
+# Change port in docker-compose.yml if needed
+```
+
+**Problem**: Permission denied
+```bash
+# Add user to docker group
+sudo usermod -aG docker $USER
+newgrp docker
+```
+
+### Neo4j Connection Issues
+
+**Problem**: Cannot connect to Neo4j
+```bash
+# Check Neo4j logs
+docker logs codebase-rag-neo4j
+
+# Verify Neo4j is ready
+docker exec codebase-rag-neo4j neo4j status
+```
+
+### Ollama Issues
+
+**Problem**: Cannot connect to Ollama
+```bash
+# Check Ollama is running
+docker logs codebase-rag-ollama
+
+# Test Ollama connection
+curl http://localhost:11434/api/version
+```
+
+For more issues, see the [Troubleshooting Guide](../troubleshooting.md).
diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md
new file mode 100644
index 0000000..5a96189
--- /dev/null
+++ b/docs/getting-started/quickstart.md
@@ -0,0 +1,268 @@
+# Quick Start Guide
+
+Get Code Graph Knowledge System up and running in 5 minutes!
+
+## 🎯 Choose Your Path
+
+=== "Minimal (Recommended)"
+    **Code Graph only** - No LLM required
+
+    Perfect for getting started and trying out the system.
+
+    ```bash
+    # Clone repository
+    git clone https://github.com/royisme/codebase-rag.git
+    cd codebase-rag
+
+    # Initialize environment
+    make init-env
+    # Choose: minimal
+
+    # Start services
+    make docker-minimal
+    ```
+
+=== "Standard"
+    **Code Graph + Memory** - Embedding required
+
+    ```bash
+    git clone https://github.com/royisme/codebase-rag.git
+    cd codebase-rag
+
+    # Initialize environment
+    make init-env
+    # Choose: standard
+
+    # Edit .env and add your embedding provider
+    # e.g., EMBEDDING_PROVIDER=ollama
+
+    make docker-standard
+    ```
+
+=== "Full"
+    **All Features** - LLM + Embedding required
+
+    ```bash
+    git clone https://github.com/royisme/codebase-rag.git
+    cd codebase-rag
+
+    # Initialize environment
+    make init-env
+    # Choose: full
+
+    # Edit .env and add your LLM provider
+    # e.g., LLM_PROVIDER=ollama
+
+    make docker-full-with-ollama
+    ```
+
+## ✅ Verify Installation
+
+After starting the services, verify everything is running:
+
+```bash
+# Check service health
+make health-check
+
+# View logs
+make docker-logs
+```
+
+You should see:
+
+- ✅ Neo4j running at http://localhost:7474
+- ✅ API running at http://localhost:8000
+- ✅ API docs at http://localhost:8000/docs
+
+## 🚀 First Steps
+
+### 1. Access Neo4j Browser
+
+1. Open http://localhost:7474 in your browser
+2. Connect with:
+   - **URL**: `bolt://localhost:7687`
+   - **User**: `neo4j`
+   - **Password**: (from your `.env` file)
+
+### 2. Test the API
+
+```bash
+# Check health
+curl http://localhost:8000/api/v1/health
+
+# Get statistics
+curl http://localhost:8000/api/v1/statistics
+```
+
+### 3. Ingest Your First Repository
+
+#### Option A: Using REST API
+
+```bash
+curl -X POST http://localhost:8000/api/v1/code-graph/ingest \
+  -H "Content-Type: application/json" \
+  -d '{
+    "local_path": "/path/to/your/repo",
+    "repo_url": "https://github.com/user/repo",
+    "mode": "incremental"
+  }'
+```
+
+#### Option B: Using MCP (Claude Desktop)
+
+1. Configure Claude Desktop to connect to MCP server
+2. Use the tool:
+
+```
+code_graph_ingest_repo({
+  "local_path": "/path/to/your/repo",
+  "mode": "incremental"
+})
+```
+
+### 4. Search Your Code
+
+```bash
+# Find files related to "authentication"
+curl -X POST http://localhost:8000/api/v1/code-graph/related \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "authentication",
+    "repo_id": "your-repo-name",
+    "limit": 10
+  }'
+```
+
+### 5. Analyze Impact
+
+```bash
+# See what depends on a specific file
+curl -X POST http://localhost:8000/api/v1/code-graph/impact \
+  -H "Content-Type: application/json" \
+  -d '{
+    "repo_id": "your-repo-name",
+    "file_path": "src/auth/login.py",
+    "depth": 2
+  }'
+```
+
+## 🎓 Next Steps
+
+### Learn Code Graph Features
+
+- [Repository Ingestion](../guide/code-graph/ingestion.md) - Index your codebase
+- [Search & Discovery](../guide/code-graph/search.md) - Find related files
+- [Impact Analysis](../guide/code-graph/impact.md) - Understand dependencies
+- [Context Packing](../guide/code-graph/context.md) - Generate AI context
+
+### Explore Advanced Features
+
+!!! info "Available in Standard/Full modes only"
+
+- [Memory Store](../guide/memory/overview.md) - Project knowledge management
+- [Knowledge RAG](../guide/knowledge/overview.md) - Document Q&A
+- [Auto Extraction](../guide/memory/extraction.md) - Automated memory curation
+
+### Integrate with Your Workflow
+
+- [Claude Desktop Setup](../guide/mcp/claude-desktop.md) - Use with Claude
+- [VS Code Integration](../guide/mcp/vscode.md) - Editor integration
+- [API Reference](../api/mcp-tools.md) - Complete tool documentation
+
+## 🔧 Common Issues
+
+### Port Already in Use
+
+If ports 7474, 7687, or 8000 are already in use:
+
+```bash
+# Edit .env file
+NEO4J_HTTP_PORT=17474
+NEO4J_BOLT_PORT=17687
+APP_PORT=18000
+
+# Restart
+make docker-stop
+make docker-minimal
+```
+
+### Neo4j Connection Failed
+
+1. Check Neo4j is healthy:
+   ```bash
+   docker ps | grep neo4j
+   docker logs codebase-rag-neo4j
+   ```
+
+2. Verify credentials in `.env` match
+
+3. Wait for Neo4j to fully start (can take 30s)
+
+### Ollama Not Found (Full mode)
+
+If using local Ollama on your host:
+
+```env
+# In .env file
+OLLAMA_BASE_URL=http://host.docker.internal:11434
+```
+
+If Ollama is not installed:
+
+```bash
+# Install Ollama
+curl -fsSL https://ollama.com/install.sh | sh
+
+# Pull a model
+ollama pull llama3.2
+ollama pull nomic-embed-text
+```
+
+## 📚 Documentation Links
+
+- [Deployment Overview](../deployment/overview.md) - Choose the right mode
+- [Configuration Guide](configuration.md) - Detailed configuration options
+- [Docker Guide](../deployment/docker.md) - Docker-specific information
+- [Troubleshooting](../troubleshooting.md) - Common problems and solutions
+
+## 💡 Tips & Tricks
+
+### Use Incremental Mode
+
+Always use `"mode": "incremental"` for repository ingestion. It's 60x faster than full mode.
+
+### Start Small
+
+Test with a small repository first (< 1000 files) before ingesting large monorepos.
+
+### Monitor Resources
+
+```bash
+# Watch Docker resource usage
+docker stats
+
+# Check Neo4j memory
+docker logs codebase-rag-neo4j | grep memory
+```
+
+### Batch Operations
+
+For multiple repositories, use the batch ingestion API or write a simple script:
+
+```bash
+for repo in repo1 repo2 repo3; do
+  curl -X POST http://localhost:8000/api/v1/code-graph/ingest \
+    -H "Content-Type: application/json" \
+    -d "{\"local_path\": \"/repos/$repo\", \"mode\": \"incremental\"}"
+done
+```
+
+## 🎉 You're Ready!
+
+Congratulations! You now have Code Graph Knowledge System running.
+
+Try exploring your codebase with the MCP tools or REST API. Check out the [User Guide](../guide/code-graph/overview.md) for detailed feature documentation.
+
+---
+
+**Need help?** Join our [GitHub Discussions](https://github.com/royisme/codebase-rag/discussions) or [report an issue](https://github.com/royisme/codebase-rag/issues).
diff --git a/docs/guide/code-graph/context.md b/docs/guide/code-graph/context.md
new file mode 100644
index 0000000..bfebd82
--- /dev/null
+++ b/docs/guide/code-graph/context.md
@@ -0,0 +1,1378 @@
+# Context Packing Guide
+
+## Introduction
+
+Context packing is the art of generating curated, token-budget-aware context bundles for AI assistants. It solves one of the most common problems in AI-assisted development: **"What code should I show the LLM?"**
+
+Instead of manually copying files or overwhelming the AI with too much context, context packing automatically selects the most relevant code within your specified token budget.
+
+## The Context Problem
+
+### Without Context Packing
+
+**The manual approach:**
+
+1. Search for relevant files
+2. Copy-paste into chat
+3. Realize you exceeded token limit
+4. Remove some files
+5. Wonder if you removed the wrong ones
+6. Repeat process
+
+**Problems:**
+- ❌ Time-consuming
+- ❌ Inconsistent results
+- ❌ Easy to exceed token limits
+- ❌ Hard to know what's most relevant
+- ❌ Manual deduplication needed
+
+### With Context Packing
+
+**The automated approach:**
+
+1. Specify repo_id, stage, and budget
+2. Get curated context bundle
+3. Use directly with AI
+
+**Benefits:**
+- ✅ Automatic relevance ranking
+- ✅ Budget-aware selection
+- ✅ Stage-optimized content
+- ✅ Deduplication included
+- ✅ Consistent, reproducible
+
+## How It Works
+
+Context packing follows a multi-stage process:
+
+1. **Query the graph**: Search for relevant files/symbols
+2. **Rank by relevance**: Score each item based on multiple factors
+3. **Apply filters**: Remove duplicates and low-quality results
+4. **Budget management**: Select items within token budget
+5. **Category balancing**: Balance files vs symbols vs guidelines
+6. **Format output**: Generate ref:// handles for AI tools
+
+### The Pack Builder Algorithm
+
+```python
+def build_context_pack(nodes, budget, stage):
+    # 1. Deduplicate nodes by ref handle
+    nodes = deduplicate(nodes)
+
+    # 2. Sort by relevance score
+    nodes = sort_by_score(nodes, descending=True)
+
+    # 3. Apply stage-specific prioritization
+    nodes = prioritize_by_stage(nodes, stage)
+
+    # 4. Pack within budget and category limits
+    pack = []
+    budget_used = 0
+    file_count = 0
+    symbol_count = 0
+
+    for node in nodes:
+        # Check category limits
+        if node.type == "file" and file_count >= FILE_LIMIT:
+            continue
+        if node.type == "symbol" and symbol_count >= SYMBOL_LIMIT:
+            continue
+
+        # Estimate token cost
+        tokens = estimate_tokens(node)
+
+        # Check budget
+        if budget_used + tokens > budget:
+            break
+
+        # Add to pack
+        pack.append(node)
+        budget_used += tokens
+
+        if node.type == "file":
+            file_count += 1
+        elif node.type == "symbol":
+            symbol_count += 1
+
+    return pack, budget_used
+```
+
+## Using MCP Tools
+
+### Tool: context_pack
+
+Build a context pack within specified token budget.
+
+#### Input Schema
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "implement",
+  "budget": 1500,
+  "keywords": "authentication user",
+  "focus": "src/auth/service.py"
+}
+```
+
+#### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `repo_id` | string | Yes | - | Repository identifier |
+| `stage` | string | No | `implement` | Development stage (`plan`, `review`, `implement`) |
+| `budget` | integer | No | 1500 | Token budget (500-10000) |
+| `keywords` | string | No | - | Focus keywords (optional) |
+| `focus` | string | No | - | Focus file paths (optional) |
+
+#### Example: Basic Context Pack
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "implement",
+  "budget": 2000
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "items": [
+    {
+      "kind": "file",
+      "title": "auth/service.py",
+      "summary": "Python file service.py in auth/ directory",
+      "ref": "ref://file/src/auth/service.py#L1-L1000",
+      "extra": {
+        "lang": "python",
+        "score": 2.85
+      }
+    },
+    {
+      "kind": "file",
+      "title": "api/routes.py",
+      "summary": "Python file routes.py in api/ directory",
+      "ref": "ref://file/src/api/routes.py#L1-L1000",
+      "extra": {
+        "lang": "python",
+        "score": 2.41
+      }
+    }
+  ],
+  "budget_used": 1847,
+  "budget_limit": 2000,
+  "stage": "implement",
+  "repo_id": "myapp",
+  "category_counts": {
+    "file": 2,
+    "symbol": 0
+  }
+}
+```
+
+#### Example: Planning Stage
+
+For high-level project overview:
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "plan",
+  "budget": 1000
+}
+```
+
+**Optimized for:**
+- Project structure
+- Entry points
+- Key modules
+- Architecture overview
+- High-level organization
+
+#### Example: Review Stage
+
+For code review focus:
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "review",
+  "budget": 2000,
+  "focus": "src/api/routes/users.py"
+}
+```
+
+**Optimized for:**
+- Code quality
+- Patterns and conventions
+- Related files
+- Test coverage
+- Documentation
+
+#### Example: Implementation Stage
+
+For detailed coding work:
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "implement",
+  "budget": 3000,
+  "keywords": "authentication jwt token",
+  "focus": "src/auth/"
+}
+```
+
+**Optimized for:**
+- Implementation details
+- Function signatures
+- Class definitions
+- Detailed logic
+- Dependencies
+
+#### Example: Large Context
+
+For comprehensive analysis:
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "implement",
+  "budget": 8000,
+  "keywords": "user authentication authorization"
+}
+```
+
+**Use when:**
+- Working with large context window LLMs (Claude Opus, GPT-4)
+- Need comprehensive understanding
+- Multiple related features
+- Complex refactoring
+
+#### Example: Claude Desktop Usage
+
+In Claude Desktop:
+
+```
+I need to implement JWT authentication. Can you give me relevant context?
+```
+
+Claude calls the MCP tool:
+
+```json
+{
+  "name": "context_pack",
+  "arguments": {
+    "repo_id": "myapp",
+    "stage": "implement",
+    "budget": 2000,
+    "keywords": "jwt authentication"
+  }
+}
+```
+
+Claude then uses the ref:// handles to fetch file contents and provide informed assistance.
+
+### Understanding Response Fields
+
+#### Context Item Structure
+
+Each item in the pack:
+
+```json
+{
+  "kind": "file",                           // Item type (file/symbol/guideline)
+  "title": "auth/service.py",               // Short display title
+  "summary": "Python file service.py...",   // Human-readable description
+  "ref": "ref://file/...",                  // Reference handle
+  "extra": {
+    "lang": "python",                       // Additional metadata
+    "score": 2.85                           // Relevance score
+  }
+}
+```
+
+#### Budget Fields
+
+```json
+{
+  "budget_used": 1847,        // Tokens used in pack
+  "budget_limit": 2000,       // Requested budget
+  "category_counts": {
+    "file": 2,                // Number of file items
+    "symbol": 0               // Number of symbol items
+  }
+}
+```
+
+#### Reference Handles
+
+The `ref` field provides standardized file references:
+
+```
+ref://file/{path}#L{start}-L{end}
+```
+
+**Usage:**
+- MCP clients can fetch content
+- AI tools can request specific lines
+- Deduplication by ref
+- Future symbol references
+
+## Using REST API
+
+### Endpoint: POST /api/v1/code-graph/context-pack
+
+#### Request Body
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "implement",
+  "budget": 2000,
+  "keywords": "authentication",
+  "focus": "src/auth/"
+}
+```
+
+#### Response
+
+```json
+{
+  "success": true,
+  "items": [
+    {
+      "kind": "file",
+      "title": "auth/service.py",
+      "summary": "Python file service.py in auth/ directory",
+      "ref": "ref://file/src/auth/service.py#L1-L1000",
+      "extra": {
+        "lang": "python",
+        "score": 2.85
+      }
+    }
+  ],
+  "budget_used": 1847,
+  "budget_limit": 2000,
+  "stage": "implement",
+  "repo_id": "myapp"
+}
+```
+
+#### Example: cURL
+
+```bash
+curl -X POST http://localhost:8000/api/v1/code-graph/context-pack \
+  -H "Content-Type: application/json" \
+  -d '{
+    "repo_id": "myapp",
+    "stage": "implement",
+    "budget": 2000,
+    "keywords": "payment processing"
+  }'
+```
+
+#### Example: Python
+
+```python
+import requests
+
+def build_context(repo_id, stage, budget, keywords=None, focus=None):
+    """Build context pack for AI assistant"""
+    response = requests.post(
+        "http://localhost:8000/api/v1/code-graph/context-pack",
+        json={
+            "repo_id": repo_id,
+            "stage": stage,
+            "budget": budget,
+            "keywords": keywords,
+            "focus": focus
+        }
+    )
+
+    result = response.json()
+
+    if result["success"]:
+        print(f"Context Pack ({result['budget_used']}/{result['budget_limit']} tokens):")
+        print(f"  {result['category_counts']['file']} files")
+        print(f"  {result['category_counts']['symbol']} symbols")
+        print("\nItems:")
+
+        for item in result["items"]:
+            print(f"  - {item['title']} (score: {item['extra']['score']:.2f})")
+            print(f"    {item['ref']}")
+
+        return result["items"]
+
+# Usage
+context = build_context(
+    repo_id="myapp",
+    stage="implement",
+    budget=2000,
+    keywords="user authentication"
+)
+```
+
+#### Example: JavaScript
+
+```javascript
+async function buildContextPack(repoId, stage, budget, keywords = null, focus = null) {
+  const response = await fetch('http://localhost:8000/api/v1/code-graph/context-pack', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      repo_id: repoId,
+      stage: stage,
+      budget: budget,
+      keywords: keywords,
+      focus: focus
+    })
+  });
+
+  const data = await response.json();
+
+  if (data.success) {
+    console.log(`Context Pack (${data.budget_used}/${data.budget_limit} tokens):`);
+    console.log(`  ${data.category_counts.file} files`);
+    console.log(`  ${data.category_counts.symbol} symbols`);
+    console.log('\nItems:');
+
+    data.items.forEach(item => {
+      console.log(`  - ${item.title} (score: ${item.extra.score.toFixed(2)})`);
+      console.log(`    ${item.ref}`);
+    });
+
+    return data.items;
+  }
+}
+
+// Usage
+const context = await buildContextPack(
+  'myapp',
+  'implement',
+  2000,
+  'payment processing'
+);
+```
+
+## Stages Explained
+
+### Plan Stage
+
+**Purpose:** High-level project understanding and planning
+
+**Optimized for:**
+- Project structure overview
+- Key entry points
+- Main modules and their purposes
+- Architectural patterns
+- Technology stack
+
+**Typical budget:** 500-1000 tokens
+
+**Example use cases:**
+- Starting new feature
+- Understanding codebase structure
+- Planning architecture changes
+- Creating project roadmap
+- Onboarding new developers
+
+**What gets prioritized:**
+- README and documentation files
+- Main entry points (main.py, index.ts)
+- Configuration files
+- Package manifests
+- Top-level directories
+
+### Review Stage
+
+**Purpose:** Code review and quality analysis
+
+**Optimized for:**
+- Code quality patterns
+- Testing coverage
+- Documentation completeness
+- Convention adherence
+- Related changes
+
+**Typical budget:** 1000-2000 tokens
+
+**Example use cases:**
+- Code review preparation
+- Quality audits
+- Finding similar patterns
+- Checking consistency
+- Test coverage analysis
+
+**What gets prioritized:**
+- Files modified in recent commits
+- Related test files
+- Similar implementation patterns
+- Documentation files
+- Style and lint configs
+
+### Implement Stage (Default)
+
+**Purpose:** Detailed implementation work
+
+**Optimized for:**
+- Function and class definitions
+- Implementation details
+- Dependencies and imports
+- Type definitions
+- Helper utilities
+
+**Typical budget:** 1500-3000 tokens
+
+**Example use cases:**
+- Writing new features
+- Refactoring existing code
+- Fixing bugs
+- Understanding implementation
+- API integration
+
+**What gets prioritized:**
+- Focused area files (via keywords/focus)
+- Related utility functions
+- Type definitions
+- Interface definitions
+- Helper modules
+
+## Budget Guidelines
+
+### Token Estimation
+
+Context packing estimates tokens using:
+
+```
+estimated_tokens = (title_length + summary_length + ref_length + 50) / 4
+```
+
+**Character-to-token ratio:** ~4 characters per token (conservative estimate)
+
+### Budget Recommendations
+
+#### Small Context (500-1000 tokens)
+
+**Best for:**
+- Quick questions
+- Focused tasks
+- Planning stage
+- Limited context LLMs
+
+**Example:**
+```json
+{
+  "stage": "plan",
+  "budget": 800
+}
+```
+
+**Typical result:** 3-5 files, high-level overview
+
+#### Medium Context (1000-2000 tokens)
+
+**Best for:**
+- Regular development
+- Code reviews
+- Bug fixes
+- Standard tasks
+
+**Example:**
+```json
+{
+  "stage": "implement",
+  "budget": 1500
+}
+```
+
+**Typical result:** 5-8 files, detailed content
+
+#### Large Context (2000-5000 tokens)
+
+**Best for:**
+- Complex features
+- Major refactoring
+- Architecture changes
+- Comprehensive analysis
+
+**Example:**
+```json
+{
+  "stage": "implement",
+  "budget": 3000
+}
+```
+
+**Typical result:** 10-15 files, comprehensive coverage
+
+#### Extra Large Context (5000-10000 tokens)
+
+**Best for:**
+- Large context window LLMs (Claude Opus, GPT-4 Turbo)
+- System-wide refactoring
+- Complete feature implementation
+- Deep analysis
+
+**Example:**
+```json
+{
+  "stage": "implement",
+  "budget": 8000
+}
+```
+
+**Typical result:** 20-30 files, exhaustive coverage
+
+### LLM Context Windows
+
+Match budget to your LLM's capabilities:
+
+| LLM | Context Window | Recommended Budget | Use Case |
+|-----|----------------|-------------------|----------|
+| GPT-3.5 | 4K tokens | 500-1000 | Quick tasks |
+| GPT-4 | 8K tokens | 1000-2000 | Regular dev |
+| GPT-4 | 32K tokens | 2000-5000 | Complex tasks |
+| GPT-4 Turbo | 128K tokens | 5000-10000 | Large refactoring |
+| Claude 2 | 100K tokens | 5000-10000 | Comprehensive |
+| Claude 3 | 200K tokens | 8000-10000+ | Full system |
+
+**Rule of thumb:** Use 20-30% of context window for Code Graph context, leaving room for conversation.
+
+## Keywords and Focus
+
+### Using Keywords
+
+Keywords filter and rank results based on relevance.
+
+**Syntax:**
+```
+"keyword1 keyword2 keyword3"
+```
+
+**Examples:**
+
+```json
+// Authentication-related code
+{"keywords": "authentication login jwt"}
+
+// Payment processing
+{"keywords": "payment stripe checkout"}
+
+// Database operations
+{"keywords": "database postgres migration"}
+
+// API endpoints
+{"keywords": "api routes endpoints"}
+```
+
+**Effect:**
+- Files matching keywords get higher scores
+- Non-matching files may be excluded if budget is tight
+- Multiple keywords create AND logic (all should match)
+
+### Using Focus
+
+Focus prioritizes specific files or directories.
+
+**Syntax:**
+```
+"path/to/file.py"           // Single file
+"path/to/directory/"        // Directory
+"file1.py,file2.py"         // Multiple files
+```
+
+**Examples:**
+
+```json
+// Focus on specific file
+{"focus": "src/auth/service.py"}
+
+// Focus on directory
+{"focus": "src/api/routes/"}
+
+// Focus on multiple files
+{"focus": "src/auth/service.py,src/models/user.py"}
+```
+
+**Effect:**
+- Focused files/directories appear first in results
+- Gets priority in budget allocation
+- Ensures important context is included
+
+### Combining Keywords and Focus
+
+For maximum precision:
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "implement",
+  "budget": 2000,
+  "keywords": "payment processing stripe",
+  "focus": "src/api/routes/payment.py"
+}
+```
+
+**Result:**
+1. `src/api/routes/payment.py` (focused)
+2. Other files matching "payment processing stripe"
+3. Related files by dependency
+4. Within 2000 token budget
+
+## Category Limits
+
+Context packing enforces category limits to ensure balanced context.
+
+### Default Limits
+
+```python
+FILE_LIMIT = 8          # Maximum 8 file items
+SYMBOL_LIMIT = 12       # Maximum 12 symbol items
+```
+
+**Why limits?**
+- Prevents pack from being all files or all symbols
+- Ensures variety in context
+- Maintains readability
+- Respects token budget
+
+### File Items
+
+**What counts as a file item:**
+- Complete source files
+- Configuration files
+- Documentation files
+
+**Current state (v0.7):**
+- Only files are indexed
+- Symbol extraction in progress
+- FILE_LIMIT is primary constraint
+
+### Symbol Items
+
+**What will count as symbol items (v0.8+):**
+- Functions
+- Classes
+- Methods
+- Constants
+- Type definitions
+
+**Coming soon:**
+- Function-level context
+- Class-level context
+- Mixed file + symbol packs
+
+### Customizing Limits
+
+Currently not exposed via API, but coming in v0.8:
+
+```json
+{
+  "repo_id": "myapp",
+  "stage": "implement",
+  "budget": 2000,
+  "file_limit": 12,       // Override default
+  "symbol_limit": 8       // Override default
+}
+```
+
+## Deduplication
+
+Context packing automatically removes duplicates based on ref:// handles.
+
+### How Deduplication Works
+
+```python
+def deduplicate(nodes):
+    seen_refs = {}
+
+    for node in nodes:
+        ref = node.ref
+
+        if ref in seen_refs:
+            # Keep node with higher score
+            if node.score > seen_refs[ref].score:
+                seen_refs[ref] = node
+        else:
+            seen_refs[ref] = node
+
+    return list(seen_refs.values())
+```
+
+### Why Deduplication Matters
+
+**Without deduplication:**
+```
+ref://file/src/auth/service.py#L1-L1000  (score: 2.5)
+ref://file/src/auth/service.py#L1-L1000  (score: 2.3)
+ref://file/src/auth/service.py#L1-L1000  (score: 1.9)
+```
+
+**With deduplication:**
+```
+ref://file/src/auth/service.py#L1-L1000  (score: 2.5)  ← Highest score kept
+```
+
+**Benefits:**
+- Avoids redundant context
+- Saves token budget
+- Cleaner output
+- Better LLM performance
+
+### Duplicate Sources
+
+Files may appear multiple times from:
+
+1. **Multiple searches**: Same file matches different keywords
+2. **Dependency chains**: File appears at different depths
+3. **Different rankings**: Different scoring methods
+4. **Impact analysis**: File appears in multiple dependency paths
+
+Deduplication resolves all of these automatically.
+
+## Advanced Usage
+
+### Pattern 1: Progressive Context Building
+
+Start small, expand as needed:
+
+```python
+# 1. Start with small context
+context = build_context_pack(
+    repo_id="myapp",
+    stage="plan",
+    budget=500
+)
+
+# 2. User needs more detail
+context = build_context_pack(
+    repo_id="myapp",
+    stage="implement",
+    budget=1500,
+    focus=context[0]["ref"]  # Focus on most relevant file
+)
+
+# 3. Deep dive into specific area
+context = build_context_pack(
+    repo_id="myapp",
+    stage="implement",
+    budget=3000,
+    keywords="authentication security",
+    focus="src/auth/"
+)
+```
+
+### Pattern 2: Multi-Feature Context
+
+Build context for multiple related features:
+
+```python
+features = [
+    {"keywords": "user authentication", "budget": 1000},
+    {"keywords": "payment processing", "budget": 1000},
+    {"keywords": "email notifications", "budget": 1000}
+]
+
+all_context = []
+for feature in features:
+    pack = build_context_pack(
+        repo_id="myapp",
+        stage="implement",
+        **feature
+    )
+    all_context.extend(pack["items"])
+
+# Deduplicate across features
+unique_refs = {}
+for item in all_context:
+    if item["ref"] not in unique_refs:
+        unique_refs[item["ref"]] = item
+
+print(f"Total unique files: {len(unique_refs)}")
+```
+
+### Pattern 3: Dependency-Aware Context
+
+Combine search, impact analysis, and context packing:
+
+```python
+# 1. Find relevant files
+search_results = search("authentication", "myapp", limit=10)
+
+# 2. Analyze impact of top result
+top_file = search_results[0]["path"]
+impact = analyze_impact("myapp", top_file, depth=2)
+
+# 3. Build context including dependencies
+all_files = [top_file] + [i["path"] for i in impact["impact"][:5]]
+focus_paths = ",".join(all_files)
+
+context = build_context_pack(
+    repo_id="myapp",
+    stage="implement",
+    budget=3000,
+    focus=focus_paths
+)
+```
+
+### Pattern 4: Stage-Specific Workflow
+
+Use different stages for different tasks:
+
+```python
+# Planning phase
+plan_context = build_context_pack(
+    repo_id="myapp",
+    stage="plan",
+    budget=800,
+    keywords="new feature"
+)
+# → Get overview for planning
+
+# Review phase
+review_context = build_context_pack(
+    repo_id="myapp",
+    stage="review",
+    budget=1500,
+    focus="src/new_feature/"
+)
+# → Review code quality
+
+# Implementation phase
+impl_context = build_context_pack(
+    repo_id="myapp",
+    stage="implement",
+    budget=2500,
+    keywords="new feature",
+    focus="src/new_feature/"
+)
+# → Write implementation
+```
+
+### Pattern 5: Budget Optimization
+
+Find optimal budget for your use case:
+
+```python
+budgets = [500, 1000, 1500, 2000, 3000, 5000]
+
+for budget in budgets:
+    pack = build_context_pack(
+        repo_id="myapp",
+        stage="implement",
+        budget=budget,
+        keywords="authentication"
+    )
+
+    print(f"Budget {budget}: "
+          f"{pack['budget_used']} tokens used, "
+          f"{pack['category_counts']['file']} files")
+
+# Output:
+# Budget 500: 487 tokens used, 2 files
+# Budget 1000: 945 tokens used, 4 files
+# Budget 1500: 1423 tokens used, 6 files
+# Budget 2000: 1897 tokens used, 8 files  ← Hits file limit
+# Budget 3000: 1897 tokens used, 8 files  ← Same (limit reached)
+# Budget 5000: 1897 tokens used, 8 files  ← Same (limit reached)
+```
+
+## Integration Patterns
+
+### Claude Desktop Integration
+
+Claude Desktop automatically uses context packs via MCP:
+
+**User:**
+```
+I need to add JWT authentication to the API
+```
+
+**Claude (internal):**
+1. Calls `code_graph_related` to find relevant files
+2. Calls `context_pack` with appropriate budget
+3. Fetches file contents via ref:// handles
+4. Provides informed response
+
+**User sees:**
+```
+Based on your codebase, here's how to add JWT authentication...
+
+[Response includes relevant code context]
+```
+
+### VS Code Extension
+
+Custom VS Code extension can use context packing:
+
+```typescript
+// vscode-extension/context-provider.ts
+async function getContextForCursor(document, position) {
+  // Get current file
+  const currentFile = document.fileName;
+
+  // Build context pack
+  const response = await fetch('http://localhost:8000/api/v1/code-graph/context-pack', {
+    method: 'POST',
+    body: JSON.stringify({
+      repo_id: workspace.name,
+      stage: 'implement',
+      budget: 2000,
+      focus: currentFile
+    })
+  });
+
+  const pack = await response.json();
+
+  // Show in sidebar
+  showContextPanel(pack.items);
+
+  return pack;
+}
+```
+
+### Custom AI Agent
+
+Build custom AI agents with context packing:
+
+```python
+class CodeAssistant:
+    def __init__(self, repo_id, llm_client):
+        self.repo_id = repo_id
+        self.llm = llm_client
+
+    async def answer_question(self, question, budget=2000):
+        """Answer question with relevant code context"""
+
+        # 1. Extract keywords from question
+        keywords = self.extract_keywords(question)
+
+        # 2. Build context pack
+        pack = await build_context_pack(
+            repo_id=self.repo_id,
+            stage="implement",
+            budget=budget,
+            keywords=" ".join(keywords)
+        )
+
+        # 3. Fetch file contents
+        context_text = ""
+        for item in pack["items"]:
+            content = await self.fetch_ref(item["ref"])
+            context_text += f"\n\n=== {item['title']} ===\n{content}"
+
+        # 4. Query LLM with context
+        prompt = f"""
+        Based on this code context:
+        {context_text}
+
+        Answer this question:
+        {question}
+        """
+
+        response = await self.llm.complete(prompt)
+        return response
+
+# Usage
+assistant = CodeAssistant("myapp", llm_client)
+answer = await assistant.answer_question(
+    "How does the authentication system work?"
+)
+```
+
+### Continuous Integration
+
+Use context packing in CI/CD for automated analysis:
+
+```yaml
+# .github/workflows/code-analysis.yml
+name: Code Analysis
+
+on: [pull_request]
+
+jobs:
+  analyze:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Get changed files
+        id: changes
+        run: |
+          FILES=$(git diff --name-only ${{ github.event.pull_request.base.sha }} ${{ github.sha }} | tr '\n' ',')
+          echo "files=$FILES" >> $GITHUB_OUTPUT
+
+      - name: Build context pack
+        run: |
+          curl -X POST http://code-graph:8000/api/v1/code-graph/context-pack \
+            -H "Content-Type: application/json" \
+            -d "{
+              \"repo_id\": \"${{ github.repository }}\",
+              \"stage\": \"review\",
+              \"budget\": 3000,
+              \"focus\": \"${{ steps.changes.outputs.files }}\"
+            }" > context.json
+
+      - name: AI Code Review
+        run: |
+          # Use context to perform AI-powered code review
+          python scripts/ai_review.py context.json
+```
+
+## Troubleshooting
+
+### Budget Not Fully Used
+
+**Symptoms:**
+```json
+{
+  "budget_used": 500,
+  "budget_limit": 2000
+}
+```
+
+**Possible causes:**
+1. Hit category limit (FILE_LIMIT=8)
+2. Not enough relevant files
+3. Keywords too specific
+4. Small repository
+
+**Solutions:**
+1. Check category counts
+2. Remove or broaden keywords
+3. Increase file limit (coming in v0.8)
+4. Use focus parameter less restrictively
+
+### Empty Context Pack
+
+**Symptoms:**
+```json
+{
+  "items": [],
+  "budget_used": 0
+}
+```
+
+**Possible causes:**
+1. Repository not ingested
+2. Keywords don't match anything
+3. Focus path doesn't exist
+4. Repository empty
+
+**Solutions:**
+1. Verify ingestion: `MATCH (f:File {repoId: 'myapp'}) RETURN count(f)`
+2. Try without keywords
+3. Check focus path spelling
+4. Ingest repository
+
+### Irrelevant Results
+
+**Symptoms:**
+- Files don't match expected content
+- Low relevance scores
+- Wrong files prioritized
+
+**Solutions:**
+1. Add more specific keywords
+2. Use focus parameter
+3. Try different stage
+4. Adjust budget (smaller may be more focused)
+
+### Inconsistent Results
+
+**Symptoms:**
+- Different results each time
+- Unpredictable ordering
+
+**Possible causes:**
+1. Non-deterministic scoring
+2. Database state changes
+3. Recent ingestion
+
+**Solutions:**
+1. Use focus parameter for consistency
+2. Wait after ingestion completes
+3. Use specific keywords
+4. Clear cache if implemented
+
+## Best Practices
+
+### 1. Match Budget to LLM
+
+Use appropriate budgets for your LLM:
+
+```python
+# GPT-3.5 (4K context)
+budget = 800
+
+# GPT-4 (8K context)
+budget = 1500
+
+# GPT-4 (32K context)
+budget = 3000
+
+# Claude Opus (200K context)
+budget = 8000
+```
+
+### 2. Use Appropriate Stage
+
+```python
+# High-level planning
+stage = "plan"
+
+# Code review
+stage = "review"
+
+# Implementation work
+stage = "implement"
+```
+
+### 3. Leverage Keywords
+
+Be specific:
+
+```python
+# ❌ Too generic
+keywords = "code"
+
+# ✅ Specific and relevant
+keywords = "user authentication jwt token"
+```
+
+### 4. Focus When Needed
+
+Use focus for targeted context:
+
+```python
+# Working on specific feature
+focus = "src/features/payment/"
+
+# Refactoring specific file
+focus = "src/services/user_service.py"
+```
+
+### 5. Iterate and Refine
+
+Start small, expand as needed:
+
+```python
+# 1. Quick overview
+pack = build_pack(budget=500, stage="plan")
+
+# 2. More detail
+pack = build_pack(budget=1500, stage="implement", keywords="auth")
+
+# 3. Comprehensive
+pack = build_pack(budget=3000, stage="implement", keywords="auth jwt", focus="src/auth/")
+```
+
+## Next Steps
+
+You've now learned all four Code Graph features:
+
+1. ✅ [Repository Ingestion](ingestion.md)
+2. ✅ [Search and Discovery](search.md)
+3. ✅ [Impact Analysis](impact.md)
+4. ✅ [Context Packing](context.md)
+
+**Ready to use Code Graph?**
+
+- **[Installation](../../getting-started/installation.md)**: Set up Code Graph
+- **[Quick Start](../../getting-started/quickstart.md)**: Get started in 5 minutes
+- **[MCP Setup](../mcp/claude-desktop.md)**: Configure Claude Desktop integration
+
+## Reference
+
+### MCP Tool Definition
+
+```json
+{
+  "name": "context_pack",
+  "description": "Build a context pack for AI agents within token budget",
+  "inputSchema": {
+    "type": "object",
+    "properties": {
+      "repo_id": {
+        "type": "string",
+        "description": "Repository identifier"
+      },
+      "stage": {
+        "type": "string",
+        "enum": ["plan", "review", "implement"],
+        "default": "implement",
+        "description": "Development stage"
+      },
+      "budget": {
+        "type": "integer",
+        "minimum": 500,
+        "maximum": 10000,
+        "default": 1500,
+        "description": "Token budget"
+      },
+      "keywords": {
+        "type": "string",
+        "description": "Focus keywords (optional)"
+      },
+      "focus": {
+        "type": "string",
+        "description": "Focus file paths (optional)"
+      }
+    },
+    "required": ["repo_id"]
+  }
+}
+```
+
+### REST API Specification
+
+**Endpoint:** `POST /api/v1/code-graph/context-pack`
+
+**Request:**
+```typescript
+interface ContextPackRequest {
+  repo_id: string;                          // Required
+  stage?: 'plan' | 'review' | 'implement'; // Default: 'implement'
+  budget?: number;                          // Default: 1500, range: 500-10000
+  keywords?: string;                        // Optional: space-separated
+  focus?: string;                           // Optional: comma-separated paths
+}
+```
+
+**Response:**
+```typescript
+interface ContextPackResponse {
+  success: boolean;
+  items: Array<{
+    kind: 'file' | 'symbol' | 'guideline';  // Item type
+    title: string;                           // Display title
+    summary: string;                         // Description
+    ref: string;                             // ref:// handle
+    extra: {
+      lang?: string;                         // Language
+      score?: number;                        // Relevance score
+    };
+  }>;
+  budget_used: number;                       // Tokens used
+  budget_limit: number;                      // Requested budget
+  stage: string;                             // Stage used
+  repo_id: string;                           // Repository ID
+  category_counts: {
+    file: number;                            // File item count
+    symbol: number;                          // Symbol item count
+  };
+}
+```
+
+### Category Limits
+
+```python
+FILE_LIMIT = 8          # Maximum file items
+SYMBOL_LIMIT = 12       # Maximum symbol items
+```
+
+**Customization coming in v0.8**
diff --git a/docs/guide/code-graph/impact.md b/docs/guide/code-graph/impact.md
new file mode 100644
index 0000000..fd31fbc
--- /dev/null
+++ b/docs/guide/code-graph/impact.md
@@ -0,0 +1,1219 @@
+# Impact Analysis Guide
+
+## Introduction
+
+Impact analysis is one of Code Graph's most powerful features. It answers the critical question: **"If I change this file, what else might break?"**
+
+By traversing the dependency graph, Code Graph identifies all files that depend on your target file, helping you understand the blast radius of code changes before making them.
+
+## What is Impact Analysis?
+
+Impact analysis finds **reverse dependencies** - files and symbols that depend on or call the code you're planning to modify.
+
+### The Problem It Solves
+
+**Without impact analysis:**
+- Make changes to a file
+- Hope nothing breaks
+- Discover issues in production
+- Spend hours debugging
+- Emergency rollback
+
+**With impact analysis:**
+- See what depends on the file
+- Identify all affected components
+- Update dependent code proactively
+- Run targeted tests
+- Deploy with confidence
+
+### How It Works
+
+Code Graph traverses the dependency graph in reverse:
+
+1. **Start node**: The file you want to analyze
+2. **Find symbols**: Functions and classes defined in that file
+3. **Traverse backwards**: Find who CALLS or IMPORTS these
+4. **Follow chains**: Continue for N levels (depth)
+5. **Score results**: Rank by importance and directness
+6. **Return impact**: List of affected files with metadata
+
+### Relationship Types
+
+Impact analysis considers two types of relationships:
+
+**IMPORTS relationships:**
+```
+(FileA)-[:IMPORTS]->(FileB)
+```
+FileA imports FileB. If you change FileB, FileA is affected.
+
+**CALLS relationships:**
+```
+(SymbolA)-[:CALLS]->(SymbolB)
+```
+SymbolA calls SymbolB. If you change SymbolB's behavior, SymbolA is affected.
+
+## Using MCP Tools
+
+### Tool: code_graph_impact
+
+Analyze the impact of changing a specific file.
+
+#### Input Schema
+
+```json
+{
+  "repo_id": "myapp",
+  "file_path": "src/auth/service.py",
+  "depth": 2
+}
+```
+
+#### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `repo_id` | string | Yes | - | Repository identifier |
+| `file_path` | string | Yes | - | Path to file to analyze |
+| `depth` | integer | No | 2 | Traversal depth (1-5) |
+
+#### Example: Basic Impact Analysis
+
+```json
+{
+  "repo_id": "myapp",
+  "file_path": "src/services/user_service.py",
+  "depth": 2
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "target": {
+    "path": "src/services/user_service.py",
+    "lang": "python",
+    "repo_id": "myapp"
+  },
+  "impact": [
+    {
+      "type": "file",
+      "path": "src/api/routes/users.py",
+      "lang": "python",
+      "repoId": "myapp",
+      "relationship": "CALLS",
+      "depth": 1,
+      "score": 1.0
+    },
+    {
+      "type": "file",
+      "path": "src/api/routes/auth.py",
+      "lang": "python",
+      "repoId": "myapp",
+      "relationship": "CALLS",
+      "depth": 1,
+      "score": 1.0
+    },
+    {
+      "type": "file",
+      "path": "src/controllers/user_controller.py",
+      "lang": "python",
+      "repoId": "myapp",
+      "relationship": "IMPORTS",
+      "depth": 1,
+      "score": 0.9
+    },
+    {
+      "type": "file",
+      "path": "src/api/routes/admin.py",
+      "lang": "python",
+      "repoId": "myapp",
+      "relationship": "CALLS",
+      "depth": 2,
+      "score": 0.7
+    }
+  ],
+  "total_count": 4,
+  "depth": 2
+}
+```
+
+#### Example: Shallow Analysis (Depth 1)
+
+For quick checks, use depth=1 to see only direct dependencies:
+
+```json
+{
+  "repo_id": "myapp",
+  "file_path": "src/models/user.py",
+  "depth": 1
+}
+```
+
+This shows only files that **directly** import or call the target file.
+
+#### Example: Deep Analysis (Depth 3)
+
+For comprehensive impact assessment:
+
+```json
+{
+  "repo_id": "myapp",
+  "file_path": "src/database/connection.py",
+  "depth": 3
+}
+```
+
+This traces dependencies through 3 levels, showing the full chain of affected files.
+
+#### Example: Claude Desktop Usage
+
+In Claude Desktop:
+
+```
+What would break if I modify src/auth/service.py?
+```
+
+Claude calls the MCP tool:
+
+```json
+{
+  "name": "code_graph_impact",
+  "arguments": {
+    "repo_id": "myapp",
+    "file_path": "src/auth/service.py",
+    "depth": 2
+  }
+}
+```
+
+Claude then presents the results in a readable format:
+
+```
+If you modify src/auth/service.py, these files would be affected:
+
+Direct Dependencies (depth 1):
+- src/api/routes/auth.py (CALLS)
+- src/middleware/auth_middleware.py (IMPORTS)
+
+Indirect Dependencies (depth 2):
+- src/api/routes/admin.py (CALLS through auth.py)
+- tests/integration/test_auth.py (CALLS through middleware)
+```
+
+### Understanding Response Fields
+
+#### Impact Node Structure
+
+Each impact node contains:
+
+```json
+{
+  "type": "file",                          // Always "file" (symbols coming in v0.8)
+  "path": "src/api/routes/users.py",      // Dependent file path
+  "lang": "python",                        // Programming language
+  "repoId": "myapp",                       // Repository ID
+  "relationship": "CALLS",                 // Relationship type (CALLS or IMPORTS)
+  "depth": 1,                              // Dependency distance
+  "score": 1.0                             // Impact score
+}
+```
+
+#### Relationship Field
+
+- **CALLS**: A symbol in the dependent file calls a symbol in your file
+- **IMPORTS**: The dependent file imports your file directly
+
+#### Depth Field
+
+Distance from the target file:
+
+- **depth=1**: Direct dependency (file directly imports/calls target)
+- **depth=2**: Transitive dependency (depends through one intermediate)
+- **depth=3**: Second-level transitive dependency
+- **depth>3**: Deep indirect dependency
+
+#### Score Field
+
+Impact score ranges from 0.5 to 1.0:
+
+| Score | Meaning | Description |
+|-------|---------|-------------|
+| 1.0 | **Critical** | Direct CALLS at depth 1 |
+| 0.9 | **High** | Direct IMPORTS at depth 1 |
+| 0.7 | **Medium** | Transitive CALLS at depth 2 |
+| 0.6 | **Medium-Low** | Transitive IMPORTS at depth 2 |
+| 0.5 | **Low** | Deep dependencies (depth 3+) |
+
+**Score formula:**
+
+```python
+if depth == 1 and relationship == "CALLS":
+    score = 1.0
+elif depth == 1 and relationship == "IMPORTS":
+    score = 0.9
+elif depth == 2 and relationship == "CALLS":
+    score = 0.7
+elif depth == 2 and relationship == "IMPORTS":
+    score = 0.6
+else:
+    score = 0.5 / depth
+```
+
+## Using REST API
+
+### Endpoint: POST /api/v1/code-graph/impact
+
+#### Request Body
+
+```json
+{
+  "repo_id": "myapp",
+  "file_path": "src/auth/service.py",
+  "depth": 2
+}
+```
+
+#### Response
+
+```json
+{
+  "success": true,
+  "target": {
+    "path": "src/auth/service.py",
+    "lang": "python"
+  },
+  "impact": [
+    {
+      "path": "src/api/routes/auth.py",
+      "relationship": "CALLS",
+      "depth": 1,
+      "score": 1.0
+    }
+  ],
+  "total_count": 1,
+  "depth": 2
+}
+```
+
+#### Example: cURL
+
+```bash
+curl -X POST http://localhost:8000/api/v1/code-graph/impact \
+  -H "Content-Type: application/json" \
+  -d '{
+    "repo_id": "myapp",
+    "file_path": "src/services/payment.py",
+    "depth": 2
+  }'
+```
+
+#### Example: Python
+
+```python
+import requests
+
+def analyze_impact(repo_id, file_path, depth=2):
+    response = requests.post(
+        "http://localhost:8000/api/v1/code-graph/impact",
+        json={
+            "repo_id": repo_id,
+            "file_path": file_path,
+            "depth": depth
+        }
+    )
+
+    result = response.json()
+    if result["success"]:
+        print(f"Impact analysis for {file_path}:")
+        print(f"Found {result['total_count']} dependent files\n")
+
+        # Group by depth
+        by_depth = {}
+        for item in result["impact"]:
+            d = item["depth"]
+            if d not in by_depth:
+                by_depth[d] = []
+            by_depth[d].append(item)
+
+        # Print by depth
+        for depth in sorted(by_depth.keys()):
+            print(f"Depth {depth} ({len(by_depth[depth])} files):")
+            for item in by_depth[depth]:
+                print(f"  - {item['path']} ({item['relationship']})")
+            print()
+
+# Usage
+analyze_impact("myapp", "src/auth/service.py", depth=2)
+```
+
+#### Example: JavaScript
+
+```javascript
+async function analyzeImpact(repoId, filePath, depth = 2) {
+  const response = await fetch('http://localhost:8000/api/v1/code-graph/impact', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      repo_id: repoId,
+      file_path: filePath,
+      depth: depth
+    })
+  });
+
+  const data = await response.json();
+
+  if (data.success) {
+    console.log(`Impact analysis for ${filePath}:`);
+    console.log(`Found ${data.total_count} dependent files\n`);
+
+    // Group by score
+    const critical = data.impact.filter(i => i.score >= 0.9);
+    const medium = data.impact.filter(i => i.score >= 0.6 && i.score < 0.9);
+    const low = data.impact.filter(i => i.score < 0.6);
+
+    if (critical.length > 0) {
+      console.log('Critical Impact:');
+      critical.forEach(i => console.log(`  - ${i.path}`));
+    }
+
+    if (medium.length > 0) {
+      console.log('\nMedium Impact:');
+      medium.forEach(i => console.log(`  - ${i.path}`));
+    }
+
+    if (low.length > 0) {
+      console.log('\nLow Impact:');
+      low.forEach(i => console.log(`  - ${i.path}`));
+    }
+  }
+}
+
+// Usage
+analyzeImpact('myapp', 'src/auth/service.py', 2);
+```
+
+## Depth Selection
+
+Choosing the right depth is critical for useful impact analysis.
+
+### Depth 1: Direct Dependencies Only
+
+**When to use:**
+- Quick sanity check
+- Verifying expectations
+- Small, focused changes
+- Well-understood components
+
+**Pros:**
+- Fast (< 100ms)
+- Clear, actionable results
+- No noise
+
+**Cons:**
+- May miss important indirect impacts
+- Incomplete picture
+
+**Example use case:**
+```
+You're renaming a function in a utility file.
+Use depth=1 to see direct callers.
+```
+
+### Depth 2: Standard (Recommended)
+
+**When to use:**
+- Regular refactoring
+- API changes
+- Most scenarios
+- Default choice
+
+**Pros:**
+- Comprehensive coverage
+- Reasonable performance (< 200ms)
+- Catches most real impacts
+- Good signal-to-noise ratio
+
+**Cons:**
+- May include some indirect effects you don't care about
+
+**Example use case:**
+```
+You're changing a service layer API.
+Use depth=2 to see routes and controllers that depend on it.
+```
+
+### Depth 3: Deep Analysis
+
+**When to use:**
+- Core infrastructure changes
+- Database schema modifications
+- Major refactoring
+- Architecture analysis
+
+**Pros:**
+- Very comprehensive
+- Finds deep dependencies
+- Good for planning
+
+**Cons:**
+- Slower (200-500ms)
+- More noise
+- Many low-importance results
+
+**Example use case:**
+```
+You're changing the database connection pool.
+Use depth=3 to see everything that might be affected.
+```
+
+### Depth 4-5: Exhaustive Search
+
+**When to use:**
+- Rarely needed
+- Understanding system architecture
+- Finding all possible paths
+- Academic interest
+
+**Pros:**
+- Complete dependency graph
+- No missing links
+
+**Cons:**
+- Slow (> 500ms)
+- Lots of noise
+- Diminishing returns
+- Hard to interpret
+
+**Example use case:**
+```
+You're analyzing the full dependency tree of a core module.
+Use depth=5 to see the complete picture.
+```
+
+## Interpreting Results
+
+### Critical Impact (Score ≥ 0.9)
+
+**These files will definitely break if you change the target.**
+
+```json
+{
+  "path": "src/api/routes/users.py",
+  "relationship": "CALLS",
+  "depth": 1,
+  "score": 1.0
+}
+```
+
+**Action items:**
+- ✅ Update these files in the same PR
+- ✅ Add tests covering these interactions
+- ✅ Review these files carefully
+- ✅ Communicate changes to owners
+
+### Medium Impact (Score 0.6-0.8)
+
+**These files might break, depending on the nature of your change.**
+
+```json
+{
+  "path": "src/controllers/admin.py",
+  "relationship": "CALLS",
+  "depth": 2,
+  "score": 0.7
+}
+```
+
+**Action items:**
+- ⚠️ Review if your changes affect them
+- ⚠️ Consider updating if interface changes
+- ⚠️ Run integration tests
+- ⚠️ Document breaking changes
+
+### Low Impact (Score < 0.6)
+
+**These files are unlikely to break, but worth noting.**
+
+```json
+{
+  "path": "src/utils/logging.py",
+  "relationship": "IMPORTS",
+  "depth": 3,
+  "score": 0.5
+}
+```
+
+**Action items:**
+- ℹ️ Good to know about
+- ℹ️ No immediate action needed
+- ℹ️ Monitor in case of issues
+- ℹ️ Update documentation
+
+### No Impact
+
+**If impact analysis returns 0 results:**
+
+```json
+{
+  "success": true,
+  "impact": [],
+  "total_count": 0
+}
+```
+
+**Possible meanings:**
+1. ✅ File is truly isolated (rare)
+2. ⚠️ File is new (no dependencies yet)
+3. ⚠️ Symbol extraction not complete
+4. ❌ File doesn't exist in graph
+
+**Verification:**
+```cypher
+// Check if file exists
+MATCH (f:File {path: 'your/file.py'})
+RETURN f
+
+// Check outgoing relationships
+MATCH (f:File {path: 'your/file.py'})-[r]-()
+RETURN type(r), count(*)
+```
+
+## Use Cases
+
+### Use Case 1: Refactoring
+
+**Scenario:** You need to refactor a service class.
+
+**Workflow:**
+
+1. **Analyze impact:**
+   ```json
+   {
+     "repo_id": "myapp",
+     "file_path": "src/services/user_service.py",
+     "depth": 2
+   }
+   ```
+
+2. **Review results:**
+   - 12 files directly depend on this service
+   - 34 files indirectly depend on it
+
+3. **Plan changes:**
+   - Update 12 direct dependents
+   - Add deprecation warnings
+   - Plan migration strategy
+
+4. **Execute:**
+   - Refactor service
+   - Update dependents
+   - Run tests
+   - Deploy
+
+**Benefits:**
+- Know exactly what to update
+- No surprises in production
+- Confidence in changes
+
+### Use Case 2: Breaking Changes
+
+**Scenario:** You need to make a breaking change to an API.
+
+**Workflow:**
+
+1. **Analyze impact (depth=3):**
+   ```json
+   {
+     "repo_id": "myapp",
+     "file_path": "src/api/v1/users.py",
+     "depth": 3
+   }
+   ```
+
+2. **Categorize results:**
+   - Critical: 8 files (must update)
+   - Medium: 15 files (might update)
+   - Low: 23 files (monitor)
+
+3. **Communication:**
+   - Notify owners of critical files
+   - Document breaking changes
+   - Provide migration guide
+
+4. **Migration:**
+   - Create v2 API alongside v1
+   - Deprecate v1
+   - Monitor usage
+   - Sunset v1 after migration
+
+### Use Case 3: Code Review
+
+**Scenario:** Reviewing a PR that modifies shared utilities.
+
+**Workflow:**
+
+1. **For each modified file, run impact analysis:**
+   ```python
+   changed_files = [
+       "src/utils/string.py",
+       "src/utils/date.py"
+   ]
+
+   for file in changed_files:
+       impact = analyze_impact("myapp", file, depth=2)
+       print(f"{file}: {impact['total_count']} dependents")
+   ```
+
+2. **Check if tests cover impact:**
+   - List all dependent files
+   - Check if they have tests
+   - Verify tests run in CI
+
+3. **Request additional tests:**
+   - If high-impact files lack tests
+   - If new functionality added
+   - If breaking changes made
+
+### Use Case 4: Test Planning
+
+**Scenario:** Determining which tests to run after changes.
+
+**Workflow:**
+
+1. **Get list of changed files (from git):**
+   ```bash
+   git diff --name-only main
+   ```
+
+2. **For each file, get impact:**
+   ```python
+   changed = ["src/auth/service.py", "src/models/user.py"]
+   all_impacted = set()
+
+   for file in changed:
+       result = analyze_impact("myapp", file, depth=2)
+       for item in result["impact"]:
+           all_impacted.add(item["path"])
+   ```
+
+3. **Find associated tests:**
+   ```python
+   test_files = [f for f in all_impacted if "test_" in f or "/tests/" in f]
+   ```
+
+4. **Run targeted tests:**
+   ```bash
+   pytest {' '.join(test_files)}
+   ```
+
+**Benefits:**
+- Run only relevant tests
+- Faster CI/CD
+- Better coverage
+
+### Use Case 5: Architecture Analysis
+
+**Scenario:** Understanding system coupling and architecture.
+
+**Workflow:**
+
+1. **Identify core modules:**
+   ```python
+   core_modules = [
+       "src/database/connection.py",
+       "src/auth/service.py",
+       "src/api/main.py"
+   ]
+   ```
+
+2. **Analyze each module:**
+   ```python
+   for module in core_modules:
+       result = analyze_impact("myapp", module, depth=3)
+       print(f"{module}: {result['total_count']} dependents")
+   ```
+
+3. **Identify high-coupling modules:**
+   - Modules with > 50 dependents: High coupling
+   - Modules with < 5 dependents: Low coupling
+
+4. **Plan improvements:**
+   - Reduce coupling in high-coupling modules
+   - Add abstraction layers
+   - Improve module boundaries
+
+### Use Case 6: Onboarding
+
+**Scenario:** New developer learning codebase structure.
+
+**Workflow:**
+
+1. **Start with entry point:**
+   ```json
+   {
+     "repo_id": "myapp",
+     "file_path": "src/main.py",
+     "depth": 2
+   }
+   ```
+
+2. **Understand dependencies:**
+   - What does main.py depend on?
+   - What are the key services?
+   - How are layers organized?
+
+3. **Explore key modules:**
+   ```python
+   key_files = [
+       "src/api/routes/users.py",
+       "src/services/auth.py",
+       "src/models/user.py"
+   ]
+
+   for file in key_files:
+       result = analyze_impact("myapp", file, depth=1)
+       print(f"\n{file} is used by:")
+       for item in result["impact"][:5]:
+           print(f"  - {item['path']}")
+   ```
+
+4. **Build mental model:**
+   - Understand system architecture
+   - Identify key dependencies
+   - Learn module responsibilities
+
+## Advanced Techniques
+
+### Comparing Blast Radius
+
+Compare impact of different implementation choices:
+
+```python
+# Option 1: Modify service layer
+impact_service = analyze_impact("myapp", "src/services/user.py", depth=2)
+
+# Option 2: Modify model layer
+impact_model = analyze_impact("myapp", "src/models/user.py", depth=2)
+
+print(f"Service change: {len(impact_service['impact'])} files affected")
+print(f"Model change: {len(impact_model['impact'])} files affected")
+
+# Choose option with smaller blast radius
+if len(impact_service['impact']) < len(impact_model['impact']):
+    print("Recommendation: Modify service layer")
+else:
+    print("Recommendation: Modify model layer")
+```
+
+### Finding Critical Files
+
+Identify files that many others depend on:
+
+```python
+import asyncio
+
+async def find_critical_files(repo_id, files):
+    """Find files with highest dependency count"""
+    results = []
+
+    for file in files:
+        impact = await analyze_impact(repo_id, file, depth=1)
+        results.append({
+            "file": file,
+            "dependents": len(impact["impact"])
+        })
+
+    # Sort by dependent count
+    results.sort(key=lambda x: x["dependents"], reverse=True)
+
+    print("Most critical files:")
+    for item in results[:10]:
+        print(f"{item['dependents']:3d} dependents: {item['file']}")
+
+# Usage
+all_files = [
+    "src/database/connection.py",
+    "src/auth/service.py",
+    "src/config/settings.py",
+    # ... more files
+]
+
+asyncio.run(find_critical_files("myapp", all_files))
+```
+
+### Dependency Visualization
+
+Generate dependency graph for visualization:
+
+```python
+def build_dependency_graph(repo_id, root_file, depth=2):
+    """Build graph structure for visualization"""
+    result = analyze_impact(repo_id, root_file, depth)
+
+    nodes = [{"id": root_file, "label": root_file, "type": "target"}]
+    edges = []
+
+    for item in result["impact"]:
+        nodes.append({
+            "id": item["path"],
+            "label": item["path"],
+            "type": "dependent"
+        })
+        edges.append({
+            "from": item["path"],
+            "to": root_file,
+            "label": item["relationship"],
+            "depth": item["depth"]
+        })
+
+    return {"nodes": nodes, "edges": edges}
+
+# Export for visualization tools (D3.js, Cytoscape, etc.)
+graph = build_dependency_graph("myapp", "src/auth/service.py", depth=2)
+
+import json
+with open("dependency_graph.json", "w") as f:
+    json.dump(graph, f, indent=2)
+```
+
+### Impact Trending
+
+Track how impact changes over time:
+
+```python
+import datetime
+import json
+
+def log_impact(repo_id, file_path):
+    """Log impact analysis for trending"""
+    result = analyze_impact(repo_id, file_path, depth=2)
+
+    log_entry = {
+        "timestamp": datetime.datetime.now().isoformat(),
+        "file": file_path,
+        "total_dependents": len(result["impact"]),
+        "critical": len([i for i in result["impact"] if i["score"] >= 0.9]),
+        "medium": len([i for i in result["impact"] if 0.6 <= i["score"] < 0.9]),
+        "low": len([i for i in result["impact"] if i["score"] < 0.6])
+    }
+
+    # Append to log file
+    with open("impact_log.jsonl", "a") as f:
+        f.write(json.dumps(log_entry) + "\n")
+
+# Run weekly
+log_impact("myapp", "src/database/connection.py")
+log_impact("myapp", "src/auth/service.py")
+```
+
+## Performance Optimization
+
+### Caching Results
+
+Cache impact analysis results for frequently checked files:
+
+```python
+from functools import lru_cache
+import hashlib
+
+@lru_cache(maxsize=100)
+def cached_impact_analysis(repo_id, file_path, depth):
+    """Cache impact analysis results"""
+    return analyze_impact(repo_id, file_path, depth)
+
+# Use cached version
+result = cached_impact_analysis("myapp", "src/auth/service.py", 2)
+```
+
+### Batch Analysis
+
+Analyze multiple files efficiently:
+
+```python
+async def batch_analyze_impact(repo_id, file_paths, depth=2):
+    """Analyze impact for multiple files"""
+    import asyncio
+
+    tasks = [
+        analyze_impact_async(repo_id, path, depth)
+        for path in file_paths
+    ]
+
+    results = await asyncio.gather(*tasks)
+
+    return dict(zip(file_paths, results))
+
+# Usage
+files_to_check = [
+    "src/auth/service.py",
+    "src/models/user.py",
+    "src/api/routes.py"
+]
+
+results = await batch_analyze_impact("myapp", files_to_check)
+```
+
+### Limiting Results
+
+For very connected files, limit results:
+
+```python
+def analyze_impact_limited(repo_id, file_path, depth=2, max_results=50):
+    """Limit impact analysis results"""
+    result = analyze_impact(repo_id, file_path, depth)
+
+    # Keep only highest-scored results
+    impact = sorted(result["impact"], key=lambda x: x["score"], reverse=True)
+    result["impact"] = impact[:max_results]
+    result["total_count"] = len(impact)
+    result["limited"] = len(impact) > max_results
+
+    return result
+```
+
+## Troubleshooting
+
+### No Dependencies Found
+
+**Symptoms:**
+```json
+{
+  "success": true,
+  "impact": [],
+  "total_count": 0
+}
+```
+
+**Possible causes:**
+1. File has no dependencies (rare but possible)
+2. File not yet ingested
+3. Symbol extraction incomplete
+4. Relationships not created
+
+**Solutions:**
+
+1. **Check file exists:**
+   ```cypher
+   MATCH (f:File {path: 'your/file.py'})
+   RETURN f
+   ```
+
+2. **Check relationships:**
+   ```cypher
+   MATCH (f:File {path: 'your/file.py'})<-[r]-()
+   RETURN type(r), count(*)
+   ```
+
+3. **Re-ingest repository:**
+   ```json
+   {
+     "local_path": "/path/to/repo",
+     "mode": "full"
+   }
+   ```
+
+### Too Many Results
+
+**Symptoms:**
+- 100+ dependent files
+- Analysis takes > 1 second
+- Hard to interpret results
+
+**Solutions:**
+
+1. **Reduce depth:**
+   ```python
+   # Instead of depth=3
+   result = analyze_impact(repo_id, file_path, depth=2)
+   ```
+
+2. **Filter by score:**
+   ```python
+   high_impact = [i for i in result["impact"] if i["score"] >= 0.7]
+   ```
+
+3. **Focus on direct dependencies:**
+   ```python
+   direct = [i for i in result["impact"] if i["depth"] == 1]
+   ```
+
+### Unexpected Dependencies
+
+**Symptoms:**
+- Files listed that shouldn't be affected
+- Missing expected dependencies
+
+**Solutions:**
+
+1. **Verify relationships in Neo4j:**
+   ```cypher
+   MATCH (f1:File {path: 'your/file.py'})<-[r]-(f2:File)
+   RETURN f1.path, type(r), f2.path
+   LIMIT 20
+   ```
+
+2. **Check for indirect paths:**
+   ```cypher
+   MATCH path = (f1:File {path: 'unexpected/file.py'})-[*..3]->
+                (f2:File {path: 'your/file.py'})
+   RETURN [n in nodes(path) | n.path] as dependency_chain
+   LIMIT 5
+   ```
+
+3. **Re-ingest with full mode:**
+   - May fix stale relationships
+
+## Best Practices
+
+### 1. Use Appropriate Depth
+
+- **depth=1**: Quick checks, direct dependencies
+- **depth=2**: Standard refactoring, most use cases (recommended)
+- **depth=3**: Major changes, core modules
+- **depth>3**: Rarely needed, architectural analysis only
+
+### 2. Interpret Scores
+
+- **Focus on score ≥ 0.9**: These files WILL be affected
+- **Review score 0.6-0.8**: These files MIGHT be affected
+- **Note score < 0.6**: Good to know, but low priority
+
+### 3. Combine with Tests
+
+Always run tests for affected files:
+
+```python
+impact_files = [i["path"] for i in result["impact"]]
+test_files = [f for f in impact_files if "test" in f]
+print(f"Run these tests: {test_files}")
+```
+
+### 4. Document Breaking Changes
+
+For high-impact changes:
+
+1. Document all affected files
+2. Notify file owners
+3. Provide migration guide
+4. Add deprecation warnings
+5. Plan gradual rollout
+
+### 5. Regular Analysis
+
+Run impact analysis regularly:
+
+- Before major refactoring
+- During architecture reviews
+- When planning breaking changes
+- During onboarding sessions
+
+## Next Steps
+
+Now that you understand impact analysis, learn about:
+
+- **[Context Packing](context.md)**: Generate AI-friendly context bundles from impact analysis results
+- **[Search](search.md)**: Find files to analyze with fulltext search
+
+## Reference
+
+### MCP Tool Definition
+
+```json
+{
+  "name": "code_graph_impact",
+  "description": "Analyze impact of changes to a file",
+  "inputSchema": {
+    "type": "object",
+    "properties": {
+      "repo_id": {
+        "type": "string",
+        "description": "Repository identifier"
+      },
+      "file_path": {
+        "type": "string",
+        "description": "File path to analyze"
+      },
+      "depth": {
+        "type": "integer",
+        "minimum": 1,
+        "maximum": 5,
+        "default": 2,
+        "description": "Dependency traversal depth"
+      }
+    },
+    "required": ["repo_id", "file_path"]
+  }
+}
+```
+
+### REST API Specification
+
+**Endpoint:** `POST /api/v1/code-graph/impact`
+
+**Request:**
+```typescript
+interface ImpactRequest {
+  repo_id: string;     // Required: Repository ID
+  file_path: string;   // Required: File to analyze
+  depth?: number;      // Optional: Depth (default: 2, max: 5)
+}
+```
+
+**Response:**
+```typescript
+interface ImpactResponse {
+  success: boolean;
+  target: {
+    path: string;      // Target file path
+    lang: string;      // Programming language
+    repo_id: string;   // Repository ID
+  };
+  impact: Array<{
+    type: string;      // Always "file"
+    path: string;      // Dependent file path
+    lang: string;      // Programming language
+    repoId: string;    // Repository ID
+    relationship: 'CALLS' | 'IMPORTS';  // Dependency type
+    depth: number;     // Distance from target
+    score: number;     // Impact score (0.5-1.0)
+  }>;
+  total_count: number; // Number of affected files
+  depth: number;       // Requested depth
+}
+```
+
+### Cypher Query
+
+The underlying Cypher query (simplified):
+
+```cypher
+// Find target file
+MATCH (target:File {repoId: $repo_id, path: $file_path})
+
+// Find symbols in target file
+OPTIONAL MATCH (target)<-[:DEFINED_IN]-(targetSymbol:Symbol)
+
+// Find reverse CALLS
+OPTIONAL MATCH (targetSymbol)<-[:CALLS*1..$depth]-(callerSymbol:Symbol)
+OPTIONAL MATCH (callerSymbol)-[:DEFINED_IN]->(callerFile:File)
+
+// Find reverse IMPORTS
+OPTIONAL MATCH (target)<-[:IMPORTS*1..$depth]-(importerFile:File)
+
+// Aggregate and score
+WITH target,
+     collect(DISTINCT callerFile) as callers,
+     collect(DISTINCT importerFile) as importers
+
+UNWIND (callers + importers) as impactedFile
+
+RETURN DISTINCT
+       impactedFile.path as path,
+       impactedFile.lang as lang,
+       // ... relationship type and score calculation
+ORDER BY score DESC
+LIMIT $limit
+```
diff --git a/docs/guide/code-graph/ingestion.md b/docs/guide/code-graph/ingestion.md
new file mode 100644
index 0000000..6f49482
--- /dev/null
+++ b/docs/guide/code-graph/ingestion.md
@@ -0,0 +1,973 @@
+# Repository Ingestion Guide
+
+## Introduction
+
+Repository ingestion is the process of transforming your source code into a queryable graph database. This guide covers everything you need to know about ingesting repositories into Code Graph, from basic usage to advanced optimization techniques.
+
+## Overview
+
+When you ingest a repository, Code Graph:
+
+1. **Scans** all source files matching configured patterns
+2. **Detects** programming languages based on file extensions
+3. **Reads** file content (for files < 100KB)
+4. **Calculates** SHA hashes for change detection
+5. **Creates** Neo4j nodes for repositories and files
+6. **Establishes** relationships between files and repos
+7. **Indexes** content for fulltext search
+
+The entire process is automated and typically completes in seconds for most repositories.
+
+## Ingestion Modes
+
+Code Graph supports two ingestion modes, each optimized for different scenarios.
+
+### Incremental Mode (Recommended)
+
+**What it does:**
+- Uses `git diff` to identify changed files
+- Only processes files that have been added, modified, or deleted
+- Updates existing nodes instead of recreating everything
+- Preserves historical data and relationships
+
+**Performance:**
+- **60x faster** than full mode for typical changes
+- Processes 10-100 files per second
+- Sub-second updates for small commits
+- Scales to very large repositories
+
+**Requirements:**
+- Repository must be a git repository
+- `.git` directory must be present
+- Git binary must be accessible
+
+**When to use:**
+- Regular updates during development
+- CI/CD pipeline integration
+- Daily/hourly sync operations
+- After pulling new commits
+
+**Example timing:**
+- 10 changed files: < 1 second
+- 100 changed files: 1-3 seconds
+- 1,000 changed files: 10-30 seconds
+
+### Full Mode
+
+**What it does:**
+- Scans all files in the repository
+- Deletes existing data for the repository
+- Recreates all nodes and relationships from scratch
+- Complete re-ingestion
+
+**Performance:**
+- Slower than incremental mode
+- Processes 100-500 files per second
+- Time scales linearly with repository size
+
+**Requirements:**
+- None (works with any directory)
+- Does not require git
+
+**When to use:**
+- First-time ingestion
+- Non-git repositories
+- After major refactoring
+- Monthly full refresh (optional)
+- When incremental mode produces errors
+
+**Example timing:**
+- 100 files: 5-10 seconds
+- 1,000 files: 30-60 seconds
+- 10,000 files: 5-10 minutes
+- 50,000 files: 30-60 minutes
+
+## Using MCP Tools
+
+MCP (Model Context Protocol) is the recommended way to use Code Graph from AI assistants like Claude Desktop.
+
+### Tool: code_graph_ingest_repo
+
+#### Input Schema
+
+```json
+{
+  "local_path": "/absolute/path/to/repository",
+  "repo_url": "https://github.com/user/repo.git",  // optional
+  "mode": "incremental"  // or "full"
+}
+```
+
+#### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `local_path` | string | Yes | - | Absolute path to local repository |
+| `repo_url` | string | No | `file://{local_path}` | Repository URL for identification |
+| `mode` | string | No | `incremental` | Ingestion mode (`incremental` or `full`) |
+
+#### Example: Basic Ingestion
+
+```json
+{
+  "local_path": "/Users/developer/projects/myapp",
+  "mode": "incremental"
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "repo_id": "myapp",
+  "files_processed": 45,
+  "files_added": 3,
+  "files_updated": 42,
+  "files_deleted": 0,
+  "duration_ms": 1247,
+  "mode": "incremental"
+}
+```
+
+#### Example: Full Ingestion
+
+```json
+{
+  "local_path": "/Users/developer/projects/myapp",
+  "mode": "full"
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "repo_id": "myapp",
+  "files_processed": 847,
+  "total_files": 847,
+  "duration_ms": 34521,
+  "mode": "full"
+}
+```
+
+#### Example: With Repository URL
+
+```json
+{
+  "local_path": "/Users/developer/projects/myapp",
+  "repo_url": "https://github.com/company/myapp.git",
+  "mode": "incremental"
+}
+```
+
+**Benefits of providing repo_url:**
+- Better repository identification
+- Useful for tracking multiple clones
+- Enables future multi-repo features
+
+#### Example: Claude Desktop Usage
+
+In Claude Desktop, you can ingest a repository by saying:
+
+```
+Please ingest my repository at /Users/developer/projects/myapp
+```
+
+Claude will automatically call the MCP tool:
+
+```json
+{
+  "name": "code_graph_ingest_repo",
+  "arguments": {
+    "local_path": "/Users/developer/projects/myapp",
+    "mode": "incremental"
+  }
+}
+```
+
+### Error Handling
+
+The tool returns structured errors when ingestion fails:
+
+```json
+{
+  "success": false,
+  "error": "Repository not found: /invalid/path",
+  "error_type": "FileNotFoundError"
+}
+```
+
+**Common errors:**
+
+- `FileNotFoundError`: Path doesn't exist
+- `PermissionError`: No read access to directory
+- `GitError`: Not a git repository (when using incremental mode)
+- `Neo4jConnectionError`: Database connection failed
+
+## Using REST API
+
+For HTTP clients and custom integrations, use the REST API.
+
+### Endpoint: POST /api/v1/code-graph/ingest
+
+#### Request Body
+
+```json
+{
+  "local_path": "/path/to/repository",
+  "repo_url": "https://github.com/user/repo.git",
+  "mode": "incremental",
+  "include_patterns": ["**/*.py", "**/*.js"],  // optional
+  "exclude_patterns": ["**/node_modules/**", "**/.git/**"]  // optional
+}
+```
+
+#### Response
+
+```json
+{
+  "success": true,
+  "task_id": "ing-20231215-143022-a3f8c2d1",
+  "message": "Ingestion started",
+  "repo_id": "myapp"
+}
+```
+
+#### Example: cURL
+
+```bash
+curl -X POST http://localhost:8000/api/v1/code-graph/ingest \
+  -H "Content-Type: application/json" \
+  -d '{
+    "local_path": "/Users/developer/projects/myapp",
+    "mode": "incremental"
+  }'
+```
+
+#### Example: Python requests
+
+```python
+import requests
+
+response = requests.post(
+    "http://localhost:8000/api/v1/code-graph/ingest",
+    json={
+        "local_path": "/Users/developer/projects/myapp",
+        "mode": "incremental"
+    }
+)
+
+result = response.json()
+print(f"Task ID: {result['task_id']}")
+```
+
+#### Example: JavaScript fetch
+
+```javascript
+const response = await fetch('http://localhost:8000/api/v1/code-graph/ingest', {
+  method: 'POST',
+  headers: { 'Content-Type': 'application/json' },
+  body: JSON.stringify({
+    local_path: '/Users/developer/projects/myapp',
+    mode: 'incremental'
+  })
+});
+
+const result = await response.json();
+console.log('Task ID:', result.task_id);
+```
+
+### Monitoring Progress
+
+Large repositories return a task ID for async processing. Monitor progress using:
+
+#### GET /api/v1/tasks/{task_id}
+
+```bash
+curl http://localhost:8000/api/v1/tasks/ing-20231215-143022-a3f8c2d1
+```
+
+**Response:**
+
+```json
+{
+  "task_id": "ing-20231215-143022-a3f8c2d1",
+  "status": "running",
+  "progress": 45,
+  "total": 100,
+  "message": "Processing file 45/100: src/services/auth.py",
+  "started_at": "2023-12-15T14:30:22Z",
+  "updated_at": "2023-12-15T14:30:45Z"
+}
+```
+
+**Status values:**
+- `pending`: Queued, not started yet
+- `running`: Currently processing
+- `completed`: Successfully finished
+- `failed`: Error occurred
+
+#### Server-Sent Events (SSE)
+
+For real-time updates:
+
+```bash
+curl -N http://localhost:8000/api/v1/sse/task/ing-20231215-143022-a3f8c2d1
+```
+
+**Stream output:**
+
+```
+data: {"status": "running", "progress": 10, "message": "Scanning files..."}
+
+data: {"status": "running", "progress": 50, "message": "Processing file 50/100"}
+
+data: {"status": "completed", "progress": 100, "message": "Ingestion complete"}
+```
+
+## File Patterns
+
+Control which files are ingested using include and exclude patterns.
+
+### Default Patterns
+
+**Include patterns (default):**
+
+```python
+[
+    "**/*.py",      # Python
+    "**/*.ts",      # TypeScript
+    "**/*.tsx",     # TypeScript React
+    "**/*.js",      # JavaScript
+    "**/*.jsx",     # JavaScript React
+    "**/*.go",      # Go
+    "**/*.rs",      # Rust
+    "**/*.java",    # Java
+    "**/*.cpp",     # C++
+    "**/*.c",       # C
+    "**/*.h",       # C/C++ headers
+    "**/*.cs",      # C#
+    "**/*.rb",      # Ruby
+    "**/*.php",     # PHP
+    "**/*.swift",   # Swift
+    "**/*.kt",      # Kotlin
+    "**/*.scala"    # Scala
+]
+```
+
+**Exclude patterns (default):**
+
+```python
+[
+    "**/.git/**",
+    "**/node_modules/**",
+    "**/venv/**",
+    "**/env/**",
+    "**/__pycache__/**",
+    "**/build/**",
+    "**/dist/**",
+    "**/.next/**",
+    "**/target/**",
+    "**/*.min.js",
+    "**/*.bundle.js"
+]
+```
+
+### Custom Patterns
+
+Override default patterns with custom ones:
+
+```json
+{
+  "local_path": "/path/to/repo",
+  "include_patterns": [
+    "**/*.py",
+    "**/*.yaml",
+    "**/*.json"
+  ],
+  "exclude_patterns": [
+    "**/tests/**",
+    "**/docs/**"
+  ]
+}
+```
+
+### Pattern Syntax
+
+Patterns use glob syntax:
+
+- `*`: Matches any characters except `/`
+- `**`: Matches any characters including `/`
+- `?`: Matches single character
+- `[abc]`: Matches any character in brackets
+- `{a,b}`: Matches either `a` or `b`
+
+**Examples:**
+
+```python
+"src/**/*.py"           # All Python files in src/ and subdirectories
+"**/test_*.py"          # All test files
+"**/{models,views}/**"  # Files in models or views directories
+"**/api/*.ts"           # TypeScript files directly in api/
+"**/*.{js,ts}"          # JavaScript or TypeScript files
+```
+
+## Language Detection
+
+Files are automatically categorized by language based on extension.
+
+### Supported Languages
+
+| Extension | Language | Symbol Extraction |
+|-----------|----------|-------------------|
+| `.py` | Python | ✅ Functions, Classes |
+| `.ts`, `.tsx` | TypeScript | ⚠️ Basic |
+| `.js`, `.jsx` | JavaScript | ⚠️ Basic |
+| `.go` | Go | ⚠️ Basic |
+| `.rs` | Rust | ⚠️ Basic |
+| `.java` | Java | ⚠️ Basic |
+| `.cpp`, `.c`, `.h` | C/C++ | ⚠️ Basic |
+| `.cs` | C# | ⚠️ Basic |
+| `.rb` | Ruby | ⚠️ Basic |
+| `.php` | PHP | ⚠️ Basic |
+| `.swift` | Swift | ⚠️ Basic |
+| `.kt` | Kotlin | ⚠️ Basic |
+| `.scala` | Scala | ⚠️ Basic |
+
+**Symbol extraction status:**
+- ✅ Full support: Complete AST parsing
+- ⚠️ Basic: File-level indexing only
+- ❌ Not supported: Treated as unknown
+
+**Note:** In v0.7, only file-level indexing is implemented. Symbol extraction is planned for v0.8.
+
+### Unknown Files
+
+Files with unsupported extensions are still indexed:
+
+- Path is indexed for search
+- File size is recorded
+- Language is marked as `unknown`
+- Content is not indexed
+- No symbol extraction
+
+**Example:** A `.xyz` file is still searchable by filename but not by content.
+
+## Repository Identification
+
+Code Graph needs a unique identifier for each repository.
+
+### Auto-generated repo_id
+
+If you don't provide `repo_url`, the system generates `repo_id` from:
+
+1. **Last directory name**: `/path/to/myapp` → `myapp`
+2. **Git remote URL**: If available, extracts from `git remote -v`
+3. **Fallback**: Uses directory name
+
+### Explicit repo_id
+
+For better control, provide `repo_url`:
+
+```json
+{
+  "local_path": "/Users/dev/work/project",
+  "repo_url": "https://github.com/company/project.git"
+}
+```
+
+Extracted `repo_id`: `project`
+
+### Multiple Clones
+
+You can ingest multiple clones of the same repository:
+
+```json
+// Clone 1: Main branch
+{
+  "local_path": "/repos/myapp-main",
+  "repo_url": "https://github.com/company/myapp.git"
+}
+
+// Clone 2: Feature branch
+{
+  "local_path": "/repos/myapp-feature",
+  "repo_url": "https://github.com/company/myapp.git#feature-branch"
+}
+```
+
+**Note:** Currently, both clones will share the same `repo_id`. Multi-branch support is planned for v0.9.
+
+## Performance Optimization
+
+### Small Repositories (<1,000 files)
+
+**Recommended settings:**
+- Mode: Either `full` or `incremental`
+- Frequency: After every commit
+- Integration: Git hooks or CI/CD
+
+**Performance:**
+- Full mode: 5-10 seconds
+- Incremental: <1 second
+
+**Strategy:**
+- Simple workflow, any mode works fine
+- Run after every `git pull`
+- Automate with pre-commit hooks
+
+### Medium Repositories (1,000-10,000 files)
+
+**Recommended settings:**
+- Mode: `incremental` (always)
+- Frequency: After major changes
+- Integration: CI/CD on push
+
+**Performance:**
+- Full mode: 30-60 seconds
+- Incremental: 1-5 seconds
+
+**Strategy:**
+- Always use incremental mode
+- Run on every push to main branch
+- Full re-ingestion weekly (optional)
+- Exclude large generated files
+
+### Large Repositories (>10,000 files)
+
+**Recommended settings:**
+- Mode: `incremental` (mandatory)
+- Frequency: Scheduled updates
+- Integration: Background jobs
+
+**Performance:**
+- Full mode: 5-30 minutes
+- Incremental: 5-15 seconds
+
+**Strategy:**
+- Never use full mode in regular workflow
+- Schedule incremental every hour/day
+- Full re-ingestion monthly
+- Aggressive exclusion patterns
+- Monitor Neo4j memory usage
+
+### Optimization Checklist
+
+✅ Use incremental mode for regular updates
+✅ Exclude build directories (node_modules, dist, build)
+✅ Exclude generated files (*.min.js, *.bundle.js)
+✅ Keep files under 100KB for content indexing
+✅ Run ingestion during off-peak hours
+✅ Monitor Neo4j disk usage
+✅ Schedule full re-ingestion monthly
+
+## CI/CD Integration
+
+Automate ingestion in your CI/CD pipeline.
+
+### GitHub Actions
+
+```yaml
+name: Update Code Graph
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+jobs:
+  ingest:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+
+      - name: Ingest to Code Graph
+        run: |
+          curl -X POST ${{ secrets.CODE_GRAPH_URL }}/api/v1/code-graph/ingest \
+            -H "Content-Type: application/json" \
+            -d '{
+              "local_path": "${{ github.workspace }}",
+              "repo_url": "${{ github.repository }}",
+              "mode": "incremental"
+            }'
+```
+
+### GitLab CI
+
+```yaml
+update-code-graph:
+  stage: deploy
+  script:
+    - |
+      curl -X POST ${CODE_GRAPH_URL}/api/v1/code-graph/ingest \
+        -H "Content-Type: application/json" \
+        -d "{
+          \"local_path\": \"${CI_PROJECT_DIR}\",
+          \"repo_url\": \"${CI_REPOSITORY_URL}\",
+          \"mode\": \"incremental\"
+        }"
+  only:
+    - main
+```
+
+### Git Hooks
+
+**Post-merge hook** (`.git/hooks/post-merge`):
+
+```bash
+#!/bin/bash
+# Update Code Graph after pulling changes
+
+curl -X POST http://localhost:8000/api/v1/code-graph/ingest \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"local_path\": \"$(pwd)\",
+    \"mode\": \"incremental\"
+  }" &
+```
+
+Make executable:
+
+```bash
+chmod +x .git/hooks/post-merge
+```
+
+### Docker Compose
+
+Add ingestion to your Docker Compose setup:
+
+```yaml
+services:
+  code-graph:
+    image: royisme/codebase-rag:minimal
+    volumes:
+      - ./:/workspace
+    environment:
+      - AUTO_INGEST=true
+      - INGEST_PATH=/workspace
+      - INGEST_MODE=incremental
+```
+
+## Troubleshooting
+
+### Common Issues
+
+#### Issue: "Not a git repository"
+
+**Error:**
+```json
+{
+  "success": false,
+  "error": "Not a git repository: /path/to/repo"
+}
+```
+
+**Solutions:**
+1. Check `.git` directory exists
+2. Use `mode: "full"` instead of `incremental`
+3. Initialize git: `git init`
+
+#### Issue: "Permission denied"
+
+**Error:**
+```json
+{
+  "success": false,
+  "error": "PermissionError: Permission denied: '/path/to/repo'"
+}
+```
+
+**Solutions:**
+1. Check directory permissions: `ls -ld /path/to/repo`
+2. Make directory readable: `chmod -R 755 /path/to/repo`
+3. Run with appropriate user permissions
+
+#### Issue: Slow ingestion
+
+**Symptoms:**
+- Full mode takes > 5 minutes
+- Incremental mode takes > 30 seconds
+- High CPU usage
+
+**Solutions:**
+1. Add more exclude patterns
+2. Check Neo4j memory settings
+3. Reduce include patterns
+4. Skip large binary files
+5. Increase Neo4j heap size
+
+#### Issue: Files not appearing in search
+
+**Symptoms:**
+- Ingestion succeeds
+- File count looks correct
+- But search returns no results
+
+**Solutions:**
+1. Check fulltext index exists:
+   ```cypher
+   SHOW INDEXES
+   ```
+2. Rebuild fulltext index:
+   ```cypher
+   DROP INDEX file_text IF EXISTS;
+   CREATE FULLTEXT INDEX file_text FOR (f:File) ON EACH [f.path, f.lang];
+   ```
+3. Wait for index to build (may take 1-2 minutes)
+4. Verify files have content: `MATCH (f:File) RETURN f.path, f.content LIMIT 10`
+
+#### Issue: Duplicate files
+
+**Symptoms:**
+- Same file appears multiple times
+- Ingestion reports more files than exist
+
+**Possible causes:**
+1. Multiple ingestions with different `repo_id`
+2. Case-sensitive path issues (Windows/Mac)
+3. Symbolic links creating duplicates
+
+**Solutions:**
+1. Delete and re-ingest:
+   ```cypher
+   MATCH (r:Repo {id: 'myapp'})
+   DETACH DELETE r
+   ```
+2. Use consistent `repo_url` parameter
+3. Add symlink exclusions to patterns
+
+#### Issue: Out of memory
+
+**Symptoms:**
+- Neo4j crashes during ingestion
+- Java heap space errors
+- System becomes unresponsive
+
+**Solutions:**
+1. Increase Neo4j heap size in `docker-compose.yml`:
+   ```yaml
+   environment:
+     - NEO4J_dbms_memory_heap_initial__size=1G
+     - NEO4J_dbms_memory_heap_max__size=2G
+   ```
+2. Use incremental mode instead of full
+3. Process repository in batches
+4. Exclude large files/directories
+
+### Debug Mode
+
+Enable debug logging for detailed ingestion information:
+
+**Environment variable:**
+```bash
+export LOG_LEVEL=DEBUG
+```
+
+**Docker Compose:**
+```yaml
+environment:
+  - LOG_LEVEL=DEBUG
+```
+
+**Output example:**
+```
+DEBUG - Scanning directory: /path/to/repo
+DEBUG - Found 1247 files matching patterns
+DEBUG - Processing file 1/1247: src/main.py
+DEBUG - File size: 4523 bytes, language: python
+DEBUG - Creating node: File {path: src/main.py, lang: python}
+DEBUG - Created relationship: (File)-[:IN_REPO]->(Repo)
+```
+
+## Best Practices
+
+### 1. Choose the Right Mode
+
+- **Incremental for:**
+  - Active development
+  - Frequent updates
+  - CI/CD pipelines
+  - Large repositories
+
+- **Full for:**
+  - First-time setup
+  - Non-git repositories
+  - Major refactoring
+  - Monthly refreshes
+
+### 2. Optimize Patterns
+
+```json
+{
+  "include_patterns": [
+    "src/**/*.{py,ts,js}",     // Source code only
+    "lib/**/*.{py,ts,js}"      // Library code
+  ],
+  "exclude_patterns": [
+    "**/node_modules/**",      // Dependencies
+    "**/venv/**",              // Virtual env
+    "**/__pycache__/**",       // Python cache
+    "**/build/**",             // Build output
+    "**/dist/**",              // Distribution
+    "**/*.test.{ts,js}",       // Test files (optional)
+    "**/*.min.js"              // Minified files
+  ]
+}
+```
+
+### 3. Schedule Regular Updates
+
+```bash
+# Cron job: Update every hour
+0 * * * * curl -X POST http://localhost:8000/api/v1/code-graph/ingest \
+  -H "Content-Type: application/json" \
+  -d '{"local_path": "/path/to/repo", "mode": "incremental"}'
+```
+
+### 4. Monitor Ingestion
+
+Track ingestion metrics:
+
+```cypher
+// Check repository stats
+MATCH (r:Repo {id: 'myapp'})
+OPTIONAL MATCH (r)<-[:IN_REPO]-(f:File)
+RETURN r.id as repo_id,
+       r.file_count as expected,
+       count(f) as actual,
+       r.created as created
+```
+
+### 5. Verify Data Quality
+
+```cypher
+// Check for files without content
+MATCH (f:File)
+WHERE f.content IS NULL AND f.size < 100000
+RETURN f.path, f.size, f.lang
+LIMIT 10
+
+// Check language distribution
+MATCH (f:File)
+RETURN f.lang as language, count(*) as count
+ORDER BY count DESC
+```
+
+## Advanced Topics
+
+### Custom Ingestion Script
+
+For complex workflows, use the Python API directly:
+
+```python
+from services.graph_service import graph_service
+from services.code_ingestor import CodeIngestor
+
+# Initialize
+await graph_service.connect()
+ingestor = CodeIngestor(graph_service)
+
+# Scan files
+files = ingestor.scan_files(
+    repo_path="/path/to/repo",
+    include_globs=["**/*.py", "**/*.js"],
+    exclude_globs=["**/node_modules/**"]
+)
+
+# Ingest
+result = ingestor.ingest_files(
+    repo_id="myapp",
+    files=files
+)
+
+print(f"Ingested {result['files_processed']} files")
+```
+
+### Batch Ingestion
+
+Ingest multiple repositories:
+
+```python
+repositories = [
+    {"path": "/repos/app1", "url": "https://github.com/org/app1"},
+    {"path": "/repos/app2", "url": "https://github.com/org/app2"},
+    {"path": "/repos/app3", "url": "https://github.com/org/app3"},
+]
+
+for repo in repositories:
+    result = await ingest_repo(
+        local_path=repo["path"],
+        repo_url=repo["url"],
+        mode="incremental"
+    )
+    print(f"Ingested {repo['url']}: {result['files_processed']} files")
+```
+
+## Next Steps
+
+Now that your repository is ingested, learn how to search and analyze it:
+
+- **[Search and Discovery](search.md)**: Find relevant files using fulltext search
+- **[Impact Analysis](impact.md)**: Understand code dependencies and blast radius
+- **[Context Packing](context.md)**: Generate AI-friendly context bundles
+
+## Reference
+
+### MCP Tool Definition
+
+```json
+{
+  "name": "code_graph_ingest_repo",
+  "description": "Ingest a code repository into the graph database",
+  "inputSchema": {
+    "type": "object",
+    "properties": {
+      "local_path": {
+        "type": "string",
+        "description": "Local repository path"
+      },
+      "repo_url": {
+        "type": "string",
+        "description": "Repository URL (optional)"
+      },
+      "mode": {
+        "type": "string",
+        "enum": ["full", "incremental"],
+        "default": "incremental",
+        "description": "Ingestion mode"
+      }
+    },
+    "required": ["local_path"]
+  }
+}
+```
+
+### REST API Specification
+
+**Endpoint:** `POST /api/v1/code-graph/ingest`
+
+**Request:**
+```typescript
+interface IngestRequest {
+  local_path: string;           // Required
+  repo_url?: string;            // Optional
+  mode?: 'full' | 'incremental'; // Default: 'incremental'
+  include_patterns?: string[];  // Optional
+  exclude_patterns?: string[];  // Optional
+}
+```
+
+**Response:**
+```typescript
+interface IngestResponse {
+  success: boolean;
+  task_id?: string;
+  repo_id?: string;
+  files_processed?: number;
+  duration_ms?: number;
+  error?: string;
+}
+```
diff --git a/docs/guide/code-graph/overview.md b/docs/guide/code-graph/overview.md
new file mode 100644
index 0000000..1ace079
--- /dev/null
+++ b/docs/guide/code-graph/overview.md
@@ -0,0 +1,568 @@
+# Code Graph Overview
+
+## Introduction
+
+Code Graph is the foundational feature of the Code Graph Knowledge System, providing intelligent code intelligence capabilities **without requiring vector embeddings or large language models**. It works in all deployment modes (minimal, standard, and full), making it the most accessible and performant feature for code analysis.
+
+Unlike traditional code search tools that rely on simple text matching, Code Graph uses Neo4j's graph database and native fulltext indexing to understand code structure, file relationships, and dependency chains. This enables powerful capabilities like impact analysis, smart search, and context generation for AI assistants.
+
+## What is Code Graph?
+
+Code Graph is a graph-based representation of your codebase stored in Neo4j. When you ingest a repository, the system:
+
+1. **Scans code files** across your repository (Python, TypeScript, JavaScript, Go, Rust, Java, etc.)
+2. **Creates graph nodes** for repositories, files, and symbols (functions, classes)
+3. **Establishes relationships** like IMPORTS, CALLS, DEFINED_IN, IN_REPO
+4. **Indexes content** using Neo4j's native fulltext search for fast retrieval
+5. **Calculates metrics** like file size, language, and change frequency
+
+The result is a queryable graph that understands:
+
+- Which files import other files
+- Which functions call which other functions
+- What would break if you modify a specific file
+- Which files are most central to your codebase
+
+## Key Features
+
+### 1. Repository Ingestion
+
+Transform your codebase into a searchable graph database.
+
+**Modes:**
+
+- **Incremental** (60x faster): Only process changed files using git diff
+- **Full**: Complete re-ingestion for non-git projects or first-time setup
+
+**Supported Languages:**
+
+- Python (`.py`)
+- TypeScript/JavaScript (`.ts`, `.tsx`, `.js`, `.jsx`)
+- Go (`.go`)
+- Rust (`.rs`)
+- Java (`.java`)
+- C/C++ (`.c`, `.cpp`, `.h`, `.hpp`)
+- C# (`.cs`)
+- Ruby (`.rb`)
+- PHP (`.php`)
+- Swift (`.swift`)
+- Kotlin (`.kt`)
+- Scala (`.scala`)
+
+**What gets indexed:**
+
+- File paths (for pattern matching)
+- Programming language
+- File size
+- File content (for files < 100KB)
+- SHA hash (for change detection)
+- Git commit information (in incremental mode)
+
+### 2. Fulltext Search
+
+Find relevant files using Neo4j's native fulltext search engine. Unlike vector-based semantic search, fulltext search:
+
+- Works **without embeddings or LLM**
+- Provides **instant results** (< 100ms)
+- Supports **fuzzy matching** and relevance scoring
+- Scales to **large repositories** (10,000+ files)
+
+**Search capabilities:**
+
+- Keyword matching in file paths
+- Language filtering
+- Relevance ranking
+- Multi-term queries
+- Path pattern matching
+
+### 3. Impact Analysis
+
+Understand the blast radius of code changes before making them. Impact analysis traverses the dependency graph to find:
+
+- **Direct dependents**: Files that directly import your file
+- **Transitive dependents**: Files that indirectly depend on your file
+- **Function callers**: Code that calls functions you're modifying
+- **Import chains**: Complete dependency paths
+
+This is critical for:
+
+- **Refactoring**: Know what you'll break
+- **Code review**: Understand change implications
+- **Testing strategy**: Identify affected test suites
+- **Architecture analysis**: Map system boundaries
+
+### 4. Context Packing
+
+Generate curated, token-budget-aware context bundles for AI assistants. Context packing solves the problem of "what code should I show the LLM?"
+
+**Features:**
+
+- **Budget-aware**: Respects token limits (500-10,000 tokens)
+- **Stage-specific**: Different content for plan/review/implement stages
+- **Smart ranking**: Prioritizes most relevant files
+- **Deduplication**: Removes redundant references
+- **Category limits**: Balances files vs symbols vs guidelines
+
+**Use cases:**
+
+- Claude Desktop integration via MCP
+- VS Code Copilot context
+- Custom AI agents
+- Automated code review
+- Documentation generation
+
+## Architecture
+
+### Graph Schema
+
+Code Graph uses the following Neo4j schema:
+
+```
+Nodes:
+  - Repo: Repository root
+    - Properties: id, created, file_count
+
+  - File: Source code file
+    - Properties: repoId, path, lang, size, content, sha, updated
+    - Constraints: Composite key (repoId, path)
+
+  - Symbol: Function or class
+    - Properties: id, name, type, lang
+    - Constraints: Unique id
+
+Relationships:
+  - (File)-[:IN_REPO]->(Repo): File belongs to repository
+  - (File)-[:IMPORTS]->(File): File imports another file
+  - (Symbol)-[:DEFINED_IN]->(File): Symbol defined in file
+  - (Symbol)-[:CALLS]->(Symbol): Symbol calls another symbol
+```
+
+### Indexes
+
+Code Graph creates several indexes for optimal performance:
+
+1. **Fulltext Index** (`file_text`):
+   - Indexes: File path, language
+   - Used for: Fast fulltext search
+   - Type: Neo4j native fulltext
+
+2. **Property Indexes**:
+   - `file_path`: Exact path lookups
+   - `file_repo`: Filter by repository
+   - `symbol_name`: Symbol name lookups
+
+3. **Composite Keys**:
+   - `(repoId, path)`: Unique file identification
+   - Allows same filename in different repos
+
+### Performance Characteristics
+
+| Operation | Small Repo (<1K files) | Medium Repo (1K-10K files) | Large Repo (>10K files) |
+|-----------|----------------------|---------------------------|------------------------|
+| **Full Ingestion** | 5-10s | 30-60s | 2-5min |
+| **Incremental Update** | <1s | 1-3s | 3-10s |
+| **Fulltext Search** | <50ms | <100ms | <200ms |
+| **Impact Analysis** | <100ms | <200ms | <500ms |
+| **Context Pack** | <200ms | <300ms | <500ms |
+
+**Scalability:**
+
+- Tested with repositories up to 50,000 files
+- Neo4j graph database scales horizontally
+- Fulltext index automatically optimized
+- Memory usage: ~50MB per 1,000 files
+
+## Integration Points
+
+### 1. MCP Server (Model Context Protocol)
+
+Code Graph provides 4 MCP tools for AI assistants:
+
+- `code_graph_ingest_repo`: Ingest repository
+- `code_graph_related`: Find related files
+- `code_graph_impact`: Analyze impact
+- `context_pack`: Build context bundle
+
+**Compatible with:**
+
+- Claude Desktop
+- VS Code with MCP extension
+- Any MCP-compatible client
+
+### 2. REST API
+
+All Code Graph features are available via HTTP REST API:
+
+```
+POST /api/v1/code-graph/ingest       - Ingest repository
+POST /api/v1/code-graph/search       - Fulltext search
+POST /api/v1/code-graph/impact       - Impact analysis
+POST /api/v1/code-graph/context-pack - Build context pack
+```
+
+### 3. Direct Service Access
+
+For custom integrations, use Python services directly:
+
+```python
+from services.graph_service import graph_service
+from services.code_ingestor import code_ingestor
+from services.ranker import ranker
+from services.pack_builder import pack_builder
+```
+
+## Deployment Modes
+
+Code Graph works identically across all deployment modes:
+
+### Minimal Mode
+
+**What's included:**
+- Neo4j database only
+- Code Graph (all features)
+- No embeddings or LLM required
+
+**Resource requirements:**
+- Docker image: ~500MB
+- Memory: 512MB minimum
+- CPU: 1 core minimum
+- Startup time: ~5 seconds
+
+**Best for:**
+- Individual developers
+- Learning the system
+- CI/CD integration
+- Budget-conscious deployments
+
+### Standard Mode
+
+**What's included:**
+- Neo4j database
+- Code Graph (all features)
+- Memory Store (manual management)
+- Embedding model (for memory search)
+
+**Additional capabilities:**
+- Memory Store for project knowledge
+- Vector search for memories
+- Still no LLM required for Code Graph
+
+### Full Mode
+
+**What's included:**
+- Everything from Standard
+- LLM integration
+- Memory auto-extraction
+- Knowledge RAG
+
+**Additional capabilities:**
+- Memory extraction from git commits
+- Knowledge base Q&A
+- Advanced AI features
+
+**Note:** Code Graph features work identically in all modes. Only additional features change.
+
+## Use Cases
+
+### 1. Understanding Unfamiliar Codebases
+
+**Scenario:** You've joined a new team and need to understand a large codebase quickly.
+
+**Workflow:**
+1. Ingest the repository
+2. Search for key terms (e.g., "authentication", "database")
+3. Use impact analysis to understand dependencies
+4. Generate context packs for specific areas
+
+**Benefits:**
+- No need to read thousands of files
+- Quickly identify entry points
+- Understand system architecture
+- Find related code automatically
+
+### 2. Refactoring with Confidence
+
+**Scenario:** You need to refactor a core module but don't know what depends on it.
+
+**Workflow:**
+1. Run impact analysis on the file you want to change
+2. Review all dependent files (direct and transitive)
+3. Assess the blast radius
+4. Plan your refactoring strategy
+
+**Benefits:**
+- Know exactly what you'll break
+- Identify all test files to update
+- Plan migration strategy
+- Avoid surprise breakages
+
+### 3. AI-Assisted Development
+
+**Scenario:** You want to use Claude Desktop to help with development but need relevant context.
+
+**Workflow:**
+1. Ingest your repository
+2. Use MCP tools in Claude Desktop
+3. Generate context packs for specific tasks
+4. Ask Claude questions with full context
+
+**Benefits:**
+- AI has relevant code context
+- Token budget automatically managed
+- No manual copy-pasting
+- Stay within LLM context limits
+
+### 4. Code Review Assistance
+
+**Scenario:** Reviewing a pull request that touches multiple files.
+
+**Workflow:**
+1. Run impact analysis on changed files
+2. Identify all affected components
+3. Search for related test files
+4. Generate review context pack
+
+**Benefits:**
+- Complete picture of PR impact
+- Don't miss hidden dependencies
+- Find affected tests automatically
+- Better review quality
+
+### 5. Architectural Analysis
+
+**Scenario:** Need to understand system architecture and identify tightly coupled components.
+
+**Workflow:**
+1. Ingest the repository
+2. Query the graph for high-degree nodes (many connections)
+3. Analyze import/call patterns
+4. Identify architectural boundaries
+
+**Benefits:**
+- Discover hidden dependencies
+- Identify refactoring opportunities
+- Understand layer violations
+- Plan architecture improvements
+
+## Comparison with Alternatives
+
+### vs. grep/ripgrep
+
+| Feature | grep/ripgrep | Code Graph |
+|---------|-------------|------------|
+| Text search | ✅ Excellent | ✅ Good |
+| Relationship analysis | ❌ None | ✅ Full support |
+| Impact analysis | ❌ Manual | ✅ Automatic |
+| Ranking | ❌ None | ✅ Relevance-based |
+| Scalability | ⚠️ Slows on large repos | ✅ Constant time |
+
+**When to use grep:** Quick one-off searches, simple text matching
+
+**When to use Code Graph:** Understanding relationships, impact analysis, repeated searches
+
+### vs. ctags/universal-ctags
+
+| Feature | ctags | Code Graph |
+|---------|-------|------------|
+| Symbol indexing | ✅ Excellent | ✅ Good |
+| Cross-file analysis | ❌ Limited | ✅ Full support |
+| Dependency tracking | ❌ None | ✅ Complete |
+| Search capabilities | ⚠️ Basic | ✅ Advanced |
+| Graph traversal | ❌ None | ✅ Full support |
+
+**When to use ctags:** Editor integration, local navigation
+
+**When to use Code Graph:** Cross-file analysis, dependency tracking, impact analysis
+
+### vs. Vector-based semantic search
+
+| Feature | Vector Search | Code Graph |
+|---------|--------------|------------|
+| Semantic understanding | ✅ Excellent | ⚠️ Limited |
+| Relationship analysis | ❌ None | ✅ Full support |
+| Setup complexity | ⚠️ High (embeddings) | ✅ Low (no LLM) |
+| Performance | ⚠️ Slower | ✅ Fast |
+| Resource requirements | ⚠️ High | ✅ Low |
+
+**When to use Vector Search:** Natural language queries, semantic similarity
+
+**When to use Code Graph:** Structural analysis, fast searches, resource-constrained environments
+
+### vs. Language Server Protocol (LSP)
+
+| Feature | LSP | Code Graph |
+|---------|-----|------------|
+| Real-time analysis | ✅ Excellent | ⚠️ Batch |
+| Cross-file features | ✅ Good | ✅ Excellent |
+| Language support | ⚠️ Per-language | ✅ Universal |
+| Historical analysis | ❌ None | ✅ Git integration |
+| AI integration | ❌ Limited | ✅ Native |
+
+**When to use LSP:** Editor integration, real-time feedback, language-specific features
+
+**When to use Code Graph:** Cross-language analysis, historical changes, AI assistance
+
+## Best Practices
+
+### 1. Ingestion Strategy
+
+**For active development:**
+- Use **incremental mode** for fast updates
+- Run ingestion on every pull request
+- Automate with CI/CD hooks
+
+**For initial setup:**
+- Use **full mode** first time
+- Verify ingestion completed successfully
+- Check Neo4j for expected file count
+
+**For large repositories (>10K files):**
+- Use incremental mode exclusively
+- Schedule full re-ingestion monthly
+- Monitor ingestion performance
+
+### 2. Search Optimization
+
+**For best search results:**
+- Use specific terms (not single letters)
+- Include file extensions for language filtering
+- Combine multiple keywords
+- Use path segments for targeted search
+
+**Examples:**
+- Good: `authentication service typescript`
+- Bad: `auth ts`
+- Good: `api/routes payment`
+- Bad: `pay`
+
+### 3. Impact Analysis
+
+**When running impact analysis:**
+- Start with `depth=1` for direct dependencies
+- Increase to `depth=2` for transitive dependencies
+- Rarely go beyond `depth=3` (too much noise)
+- Focus on high-score results first
+
+**Interpreting results:**
+- `score=1.0`: Direct CALLS relationship, depth 1
+- `score=0.9`: Direct IMPORTS relationship, depth 1
+- `score=0.7`: Transitive CALLS, depth 2
+- `score<0.5`: Indirect dependencies, lower priority
+
+### 4. Context Packing
+
+**Budget recommendations:**
+- **Planning**: 500-1000 tokens (high-level overview)
+- **Review**: 1000-2000 tokens (focused analysis)
+- **Implementation**: 1500-3000 tokens (detailed context)
+- **Large context**: 3000-10000 tokens (comprehensive)
+
+**Stage selection:**
+- `plan`: Project structure, entry points, key files
+- `review`: Code quality, patterns, conventions
+- `implement`: Detailed implementation, symbols, logic
+
+### 5. Performance Tuning
+
+**For optimal performance:**
+- Keep files under 100KB (for content indexing)
+- Exclude generated files (node_modules, build/)
+- Run incremental updates frequently
+- Monitor Neo4j memory usage
+
+**Troubleshooting slow queries:**
+- Check Neo4j indexes are created
+- Verify fulltext index exists
+- Reduce search result limit
+- Add more specific search terms
+
+## Limitations
+
+### Current Limitations
+
+1. **No semantic understanding**: Code Graph uses fulltext search, not embeddings
+   - Can't find synonyms or related concepts
+   - Requires keyword matching
+   - No natural language queries
+
+2. **Limited symbol analysis**: Basic function/class detection only
+   - No deep AST parsing
+   - No type inference
+   - No cross-language call graphs (yet)
+
+3. **File size limits**: Files > 100KB are not content-indexed
+   - Path and metadata still indexed
+   - Impact analysis still works
+   - Just no fulltext search of content
+
+4. **No real-time updates**: Ingestion is batch-based
+   - Not suitable for editor integration
+   - Run manually or via CI/CD
+   - Use incremental mode for faster updates
+
+### Planned Improvements
+
+**v0.8 (Next Release):**
+- Enhanced AST parsing for better symbol detection
+- Cross-language call graph analysis
+- Real-time file watching (optional)
+
+**v0.9 (Future):**
+- Hybrid vector + fulltext search
+- AI-powered code summarization
+- Natural language query support
+
+**v1.0 (Long-term):**
+- Multi-repo dependency tracking
+- Language-specific analyzers
+- Performance profiling integration
+
+## Getting Started
+
+Ready to use Code Graph? Check out these guides:
+
+1. [**Repository Ingestion**](ingestion.md) - Learn how to ingest your codebase
+2. [**Search and Discovery**](search.md) - Master fulltext search and ranking
+3. [**Impact Analysis**](impact.md) - Understand dependencies and blast radius
+4. [**Context Packing**](context.md) - Generate AI-friendly context bundles
+
+## FAQ
+
+### Does Code Graph require an LLM or embeddings?
+
+**No.** Code Graph works with Neo4j alone. It uses native fulltext indexing, not vector embeddings or LLMs.
+
+### What deployment mode do I need?
+
+**Any mode.** Code Graph works identically in minimal, standard, and full deployment modes.
+
+### How is this different from GitHub Copilot?
+
+Code Graph is a **knowledge management system**, not a code completion tool. It helps you understand your codebase structure, dependencies, and relationships. It can feed context to Copilot, but doesn't replace it.
+
+### Can I use this with private/confidential code?
+
+**Yes.** Code Graph runs entirely on-premise or in your infrastructure. No code is sent to external services. It's completely self-hosted.
+
+### How much disk space does it need?
+
+**Approximately 10-20% of your source code size.** A 1GB repository typically requires 100-200MB of Neo4j storage.
+
+### Does it work with monorepos?
+
+**Yes.** Code Graph handles monorepos well. You can ingest the entire monorepo and search across all projects, or ingest individual projects separately.
+
+### Can I query the graph directly?
+
+**Yes.** You can access Neo4j Browser at `http://localhost:7474` and run Cypher queries directly. See the Neo4j documentation for query syntax.
+
+### What if my language isn't supported?
+
+Files are still indexed by path and metadata, just without language-specific symbol extraction. Fulltext search and impact analysis still work. Language support is expanding in future releases.
+
+## Next Steps
+
+- **[Ingestion Guide](ingestion.md)**: Learn how to ingest your first repository
+- **[Search Guide](search.md)**: Master search and discovery techniques
+- **[Impact Analysis](impact.md)**: Understand code dependencies
+- **[Context Packing](context.md)**: Generate AI context bundles
diff --git a/docs/guide/code-graph/search.md b/docs/guide/code-graph/search.md
new file mode 100644
index 0000000..540789c
--- /dev/null
+++ b/docs/guide/code-graph/search.md
@@ -0,0 +1,1022 @@
+# Search and Discovery Guide
+
+## Introduction
+
+Code Graph provides powerful fulltext search capabilities powered by Neo4j's native search engine. Unlike simple grep or text matching, Code Graph search understands code structure, ranks results by relevance, and works at graph database speed.
+
+This guide covers everything from basic searches to advanced ranking techniques.
+
+## Search Architecture
+
+### How Search Works
+
+When you search Code Graph:
+
+1. **Query parsing**: Your search terms are analyzed and prepared
+2. **Fulltext index lookup**: Neo4j's native fulltext index is queried
+3. **Result scoring**: Files are ranked by relevance score
+4. **Re-ranking**: Additional ranking factors are applied
+5. **Result formatting**: Files are enriched with metadata and ref:// handles
+
+### Fulltext vs Vector Search
+
+Code Graph uses **fulltext search**, not vector embeddings:
+
+| Feature | Fulltext Search | Vector Search |
+|---------|----------------|---------------|
+| **Setup** | No LLM/embeddings | Requires embeddings |
+| **Speed** | < 100ms | 200-500ms |
+| **Accuracy** | Keyword-based | Semantic |
+| **Resources** | Minimal | High |
+| **Queries** | Keywords | Natural language |
+| **Deployment** | All modes | Full mode only |
+
+**When to use fulltext search:**
+- You know specific terms (function names, file paths)
+- Need instant results
+- Working in minimal/standard mode
+- Want minimal resource usage
+
+**When to use vector search:**
+- Need semantic understanding
+- Natural language queries
+- Working in full mode
+- Have embedding model available
+
+## Using MCP Tools
+
+### Tool: code_graph_related
+
+Find files related to a search query with intelligent ranking.
+
+#### Input Schema
+
+```json
+{
+  "query": "authentication service",
+  "repo_id": "myapp",
+  "limit": 30
+}
+```
+
+#### Parameters
+
+| Parameter | Type | Required | Default | Description |
+|-----------|------|----------|---------|-------------|
+| `query` | string | Yes | - | Search query text |
+| `repo_id` | string | Yes | - | Repository identifier |
+| `limit` | integer | No | 30 | Maximum results (1-100) |
+
+#### Example: Basic Search
+
+```json
+{
+  "query": "user authentication",
+  "repo_id": "myapp",
+  "limit": 10
+}
+```
+
+**Response:**
+
+```json
+{
+  "success": true,
+  "nodes": [
+    {
+      "type": "file",
+      "path": "src/auth/user_auth.py",
+      "lang": "python",
+      "size": 4523,
+      "score": 2.85,
+      "summary": "Python file user_auth.py in auth/ directory",
+      "ref": "ref://file/src/auth/user_auth.py#L1-L1000"
+    },
+    {
+      "type": "file",
+      "path": "src/services/authentication.ts",
+      "lang": "typescript",
+      "size": 6102,
+      "score": 2.41,
+      "summary": "TypeScript file authentication.ts in services/ directory",
+      "ref": "ref://file/src/services/authentication.ts#L1-L1000"
+    }
+  ],
+  "total_count": 2
+}
+```
+
+#### Example: Language-Specific Search
+
+```json
+{
+  "query": "database python",
+  "repo_id": "myapp",
+  "limit": 20
+}
+```
+
+This will find Python files related to database functionality.
+
+#### Example: Path-Based Search
+
+```json
+{
+  "query": "api routes payment",
+  "repo_id": "myapp",
+  "limit": 15
+}
+```
+
+This searches for files in API routes related to payments.
+
+#### Example: Claude Desktop Usage
+
+In Claude Desktop, simply ask:
+
+```
+Find files related to user authentication in myapp
+```
+
+Claude will call the MCP tool:
+
+```json
+{
+  "name": "code_graph_related",
+  "arguments": {
+    "query": "user authentication",
+    "repo_id": "myapp",
+    "limit": 30
+  }
+}
+```
+
+### Understanding Response Fields
+
+#### Node Structure
+
+Each result node contains:
+
+```json
+{
+  "type": "file",                    // Node type (always "file" currently)
+  "path": "src/auth/service.py",    // Relative file path
+  "lang": "python",                  // Programming language
+  "size": 4523,                      // File size in bytes
+  "score": 2.85,                     // Relevance score (higher = more relevant)
+  "summary": "Python file ...",      // Human-readable summary
+  "ref": "ref://file/...",           // Reference handle for AI tools
+  "repoId": "myapp"                  // Repository identifier
+}
+```
+
+#### Score Interpretation
+
+**Score ranges:**
+
+- **3.0+**: Exact match in path, highly relevant
+- **2.0-3.0**: Strong match, multiple keywords matched
+- **1.0-2.0**: Good match, partial keyword matching
+- **0.5-1.0**: Weak match, single keyword or language match
+- **< 0.5**: Very weak match, consider refining query
+
+**Score factors:**
+
+1. **Fulltext score** (base): Neo4j relevance score
+2. **Exact path match** (×2.0): Query appears in path
+3. **Term matching** (×1.0-1.9): Multiple terms matched
+4. **Language match** (×1.5): Query matches file language
+5. **Directory boost** (×1.2): File in src/, lib/, core/, app/
+6. **Test penalty** (×0.5): Test files (unless searching for tests)
+
+#### Reference Handles
+
+The `ref` field provides a standardized way to reference files:
+
+```
+ref://file/{path}#L{start}-L{end}
+```
+
+**Example:**
+```
+ref://file/src/auth/service.py#L1-L1000
+```
+
+**Usage:**
+- AI tools can fetch file content
+- MCP clients can load specific line ranges
+- Context packing uses refs for deduplication
+- Future versions will support symbol refs
+
+## Using REST API
+
+### Endpoint: POST /api/v1/code-graph/search
+
+#### Request Body
+
+```json
+{
+  "query": "authentication",
+  "repo_id": "myapp",
+  "limit": 30
+}
+```
+
+#### Response
+
+```json
+{
+  "success": true,
+  "results": [
+    {
+      "path": "src/auth/service.py",
+      "lang": "python",
+      "size": 4523,
+      "score": 2.85,
+      "ref": "ref://file/src/auth/service.py#L1-L1000"
+    }
+  ],
+  "query": "authentication",
+  "repo_id": "myapp",
+  "total_count": 15,
+  "limit": 30
+}
+```
+
+#### Example: cURL
+
+```bash
+curl -X POST http://localhost:8000/api/v1/code-graph/search \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "authentication service",
+    "repo_id": "myapp",
+    "limit": 10
+  }'
+```
+
+#### Example: Python
+
+```python
+import requests
+
+response = requests.post(
+    "http://localhost:8000/api/v1/code-graph/search",
+    json={
+        "query": "database connection",
+        "repo_id": "myapp",
+        "limit": 20
+    }
+)
+
+results = response.json()
+for file in results["results"]:
+    print(f"{file['score']:.2f}: {file['path']}")
+```
+
+#### Example: JavaScript
+
+```javascript
+const response = await fetch('http://localhost:8000/api/v1/code-graph/search', {
+  method: 'POST',
+  headers: { 'Content-Type': 'application/json' },
+  body: JSON.stringify({
+    query: 'api routes',
+    repo_id: 'myapp',
+    limit: 15
+  })
+});
+
+const data = await response.json();
+data.results.forEach(file => {
+  console.log(`${file.score.toFixed(2)}: ${file.path}`);
+});
+```
+
+## Search Strategies
+
+### 1. Keyword Search
+
+Search for specific terms in file paths and content.
+
+**Example queries:**
+```
+"authentication"
+"database connection"
+"user service"
+"payment processing"
+```
+
+**Best practices:**
+- Use specific terms (not generic words like "code" or "file")
+- Include 1-3 keywords per query
+- Use domain terminology
+- Avoid stop words (the, a, is, etc.)
+
+**When to use:**
+- Looking for specific functionality
+- Know what you're searching for
+- Need precise results
+
+### 2. Multi-term Search
+
+Combine multiple keywords to narrow results.
+
+**Example queries:**
+```
+"auth service typescript"     # Auth service in TypeScript
+"payment api routes"          # Payment API route files
+"database models python"      # Database models in Python
+"user profile component"      # User profile UI components
+```
+
+**Best practices:**
+- Start broad, add terms to narrow
+- Include language for language-specific search
+- Use directory names for path filtering
+- Combine feature + component type
+
+**When to use:**
+- Initial search too broad
+- Need language/path filtering
+- Looking for specific combinations
+
+### 3. Path-Based Search
+
+Search for files in specific directories or matching path patterns.
+
+**Example queries:**
+```
+"src/auth"                    # Files in auth directory
+"api/routes payment"          # Payment routes in API
+"services/user"               # User service files
+"components/profile"          # Profile components
+```
+
+**Best practices:**
+- Use directory names from your project
+- Include path segments for precision
+- Combine with feature keywords
+- Use consistent naming conventions
+
+**When to use:**
+- Know the directory structure
+- Searching within specific module
+- Finding related files in same area
+
+### 4. Language-Specific Search
+
+Filter results by programming language.
+
+**Example queries:**
+```
+"database python"             # Python database files
+"api typescript"              # TypeScript API files
+"utils javascript"            # JavaScript utility files
+"models go"                   # Go model files
+```
+
+**Best practices:**
+- Add language as last term
+- Use full language name (not extensions)
+- Combine with feature keywords
+- Useful for polyglot projects
+
+**When to use:**
+- Multi-language projects
+- Looking for language-specific implementation
+- Comparing implementations across languages
+
+### 5. Component Search
+
+Search for specific types of components.
+
+**Example queries:**
+```
+"service"                     # Service layer files
+"controller"                  # Controller files
+"model"                       # Data models
+"repository"                  # Repository pattern files
+"middleware"                  # Middleware files
+"utils" or "helpers"          # Utility files
+```
+
+**Best practices:**
+- Use architectural terminology
+- Combine with domain keywords
+- Match your project's naming conventions
+- Include common suffixes
+
+**When to use:**
+- Following architectural patterns
+- Finding similar components
+- Understanding layer structure
+
+## Advanced Techniques
+
+### Query Refinement
+
+Start broad, then narrow results iteratively.
+
+**Example workflow:**
+
+1. **Initial query**: `"payment"`
+   - Results: 47 files (too many)
+
+2. **Add context**: `"payment service"`
+   - Results: 12 files (better)
+
+3. **Add language**: `"payment service typescript"`
+   - Results: 4 files (perfect)
+
+4. **Add path**: `"api/services payment typescript"`
+   - Results: 2 files (exact match)
+
+### Fuzzy Matching
+
+Neo4j fulltext search automatically handles:
+
+- **Typos**: "autentication" → "authentication"
+- **Partial words**: "auth" → "authentication"
+- **Case insensitivity**: "USER" → "user"
+- **Stemming**: "payments" → "payment"
+
+**No special syntax required** - just type naturally.
+
+### Boolean Logic
+
+Combine terms with implicit AND logic:
+
+```
+"user auth service"
+```
+
+This finds files matching ALL terms (user AND auth AND service).
+
+**Note:** OR and NOT operators are not currently supported. Use multiple queries instead.
+
+### Wildcards
+
+Not explicitly supported, but partial matching works naturally:
+
+```
+"auth"  →  matches "authentication", "authorize", "auth_service"
+```
+
+### Result Filtering
+
+After getting results, filter programmatically:
+
+```python
+results = search(query="service", repo_id="myapp", limit=100)
+
+# Filter by language
+python_files = [r for r in results if r["lang"] == "python"]
+
+# Filter by path
+api_files = [r for r in results if "api/" in r["path"]]
+
+# Filter by score
+high_score = [r for r in results if r["score"] > 2.0]
+
+# Filter by size
+small_files = [r for r in results if r["size"] < 10000]
+```
+
+## Search Examples
+
+### Example 1: Finding Authentication Code
+
+**Goal:** Find all authentication-related files
+
+**Query:**
+```json
+{
+  "query": "authentication",
+  "repo_id": "myapp",
+  "limit": 20
+}
+```
+
+**Expected results:**
+- `src/auth/authentication.py`
+- `src/services/auth_service.ts`
+- `src/middleware/authenticate.js`
+- `tests/auth/test_authentication.py`
+
+**Refinement:** To exclude tests:
+```json
+{
+  "query": "authentication service",
+  "repo_id": "myapp",
+  "limit": 20
+}
+```
+
+### Example 2: Finding API Routes
+
+**Goal:** Find all API route handlers
+
+**Query:**
+```json
+{
+  "query": "api routes",
+  "repo_id": "myapp",
+  "limit": 30
+}
+```
+
+**Expected results:**
+- `src/api/routes/users.py`
+- `src/api/routes/payments.ts`
+- `src/api/routes/products.js`
+
+**Refinement:** For specific routes:
+```json
+{
+  "query": "api routes payment",
+  "repo_id": "myapp",
+  "limit": 10
+}
+```
+
+### Example 3: Finding Database Models
+
+**Goal:** Find all database model definitions
+
+**Query:**
+```json
+{
+  "query": "models database",
+  "repo_id": "myapp",
+  "limit": 25
+}
+```
+
+**Expected results:**
+- `src/models/user.py`
+- `src/models/payment.py`
+- `src/database/models/order.ts`
+
+**Refinement:** For specific model:
+```json
+{
+  "query": "models user python",
+  "repo_id": "myapp",
+  "limit": 5
+}
+```
+
+### Example 4: Finding Utility Functions
+
+**Goal:** Find utility and helper files
+
+**Query:**
+```json
+{
+  "query": "utils helpers",
+  "repo_id": "myapp",
+  "limit": 20
+}
+```
+
+**Expected results:**
+- `src/utils/string_helpers.py`
+- `src/helpers/date_utils.ts`
+- `lib/utils/format.js`
+
+**Refinement:** For specific utility:
+```json
+{
+  "query": "utils date format",
+  "repo_id": "myapp",
+  "limit": 10
+}
+```
+
+### Example 5: Finding Configuration Files
+
+**Goal:** Find configuration and settings files
+
+**Query:**
+```json
+{
+  "query": "config settings",
+  "repo_id": "myapp",
+  "limit": 15
+}
+```
+
+**Expected results:**
+- `src/config/database.py`
+- `src/config/app_settings.ts`
+- `config/production.js`
+
+## Ranking and Relevance
+
+### How Files Are Ranked
+
+Code Graph uses a multi-factor ranking algorithm:
+
+```python
+final_score = (
+    fulltext_score *          # Base Neo4j score
+    path_match_boost *        # 2.0 if query in path
+    term_match_boost *        # 1.0-1.9 based on terms
+    language_boost *          # 1.5 if language matches
+    directory_boost *         # 1.2 for src/lib/core/app
+    test_penalty              # 0.5 for test files (unless searching tests)
+)
+```
+
+### Improving Search Relevance
+
+#### 1. Use Specific Terms
+
+❌ Bad: `"api"`
+✅ Good: `"api routes payment"`
+
+❌ Bad: `"service"`
+✅ Good: `"user authentication service"`
+
+#### 2. Include Context
+
+❌ Bad: `"utils"`
+✅ Good: `"utils date formatting"`
+
+❌ Bad: `"model"`
+✅ Good: `"database models user"`
+
+#### 3. Match Project Terminology
+
+Use terms from your project:
+
+```
+# If your project uses "handler"
+"payment handler"
+
+# If your project uses "controller"
+"payment controller"
+```
+
+#### 4. Use Directory Structure
+
+```
+"api/routes payment"          # Better than just "payment"
+"services/auth user"          # Better than just "auth"
+```
+
+#### 5. Specify Language
+
+```
+"database python"             # For Python DB files
+"api typescript"              # For TypeScript API files
+```
+
+### Understanding Low Scores
+
+If results have low scores (< 1.0), try:
+
+1. **More specific terms**: Add keywords
+2. **Different terms**: Use synonyms or alternative names
+3. **Path hints**: Include directory names
+4. **Language filter**: Add language name
+5. **Check ingestion**: Verify files were ingested
+
+## Integration Patterns
+
+### Pattern 1: Explore Then Analyze
+
+1. Search for relevant files
+2. Use impact analysis on interesting files
+3. Build context pack for detailed analysis
+
+```javascript
+// 1. Search
+const search_result = await search("authentication", "myapp", 10);
+
+// 2. Pick most relevant
+const top_file = search_result[0].path;
+
+// 3. Impact analysis
+const impact = await analyze_impact("myapp", top_file);
+
+// 4. Context pack
+const context = await build_context_pack("myapp", {
+  focus: top_file,
+  stage: "implement"
+});
+```
+
+### Pattern 2: Multi-Query Discovery
+
+Search multiple related terms to build comprehensive view:
+
+```python
+queries = [
+    "authentication service",
+    "auth middleware",
+    "user login",
+    "session management"
+]
+
+all_files = set()
+for query in queries:
+    result = search(query, "myapp", 20)
+    for file in result:
+        all_files.add(file["path"])
+
+print(f"Found {len(all_files)} unique files")
+```
+
+### Pattern 3: Language-Specific Analysis
+
+Compare implementations across languages:
+
+```python
+languages = ["python", "typescript", "go"]
+implementations = {}
+
+for lang in languages:
+    result = search(f"payment service {lang}", "myapp", 5)
+    implementations[lang] = [f["path"] for f in result]
+
+# Compare implementations
+for lang, files in implementations.items():
+    print(f"{lang}: {files}")
+```
+
+### Pattern 4: Progressive Refinement
+
+Iteratively narrow results:
+
+```python
+query = "payment"
+limit = 50
+
+while True:
+    result = search(query, "myapp", limit)
+    print(f"Query: '{query}' → {len(result)} results")
+
+    if len(result) <= 10:
+        break  # Good number of results
+
+    # Add more terms
+    refinement = input("Add term to narrow: ")
+    query = f"{query} {refinement}"
+```
+
+## Performance Tips
+
+### Optimize Query Speed
+
+1. **Use reasonable limits**: Default 30 is good, 100+ is slow
+2. **Be specific**: More terms = faster, more accurate results
+3. **Cache results**: Reuse results when possible
+4. **Batch queries**: Group related searches
+
+### Monitor Performance
+
+```python
+import time
+
+start = time.time()
+result = search("authentication", "myapp", 30)
+duration = time.time() - start
+
+print(f"Search took {duration*1000:.0f}ms")
+print(f"Found {len(result)} files")
+print(f"Throughput: {len(result)/duration:.0f} files/sec")
+```
+
+**Expected performance:**
+- Small repos (<1K files): < 50ms
+- Medium repos (1-10K files): < 100ms
+- Large repos (>10K files): < 200ms
+
+### Troubleshooting Slow Searches
+
+If searches take > 500ms:
+
+1. **Check fulltext index**:
+   ```cypher
+   SHOW INDEXES
+   ```
+
+2. **Rebuild index**:
+   ```cypher
+   DROP INDEX file_text IF EXISTS;
+   CREATE FULLTEXT INDEX file_text FOR (f:File) ON EACH [f.path, f.lang];
+   ```
+
+3. **Reduce limit**: Use limit=20 instead of limit=100
+
+4. **Check Neo4j memory**: Ensure adequate heap size
+
+5. **Optimize patterns**: Exclude more files during ingestion
+
+## Best Practices
+
+### 1. Start Simple
+
+Begin with 1-2 keywords, add more if needed:
+
+```
+"auth" → "auth service" → "auth service python"
+```
+
+### 2. Use Domain Terms
+
+Match terminology used in your codebase:
+
+```
+# If your code uses "repository pattern"
+"user repository"
+
+# If your code uses "data access layer"
+"user data access"
+```
+
+### 3. Leverage Path Structure
+
+Include directory names for precision:
+
+```
+"api/routes payment"
+"services/auth user"
+"models/database order"
+```
+
+### 4. Filter by Language
+
+For multi-language projects:
+
+```
+"database connection python"
+"api client typescript"
+```
+
+### 5. Iterate Quickly
+
+Don't overthink - search, review, refine:
+
+1. Quick search
+2. Scan results
+3. Add/change terms
+4. Repeat
+
+## Troubleshooting
+
+### No Results Found
+
+**Possible causes:**
+1. Files not ingested
+2. Query too specific
+3. Typo in query
+4. Wrong repo_id
+
+**Solutions:**
+1. Verify ingestion: `MATCH (f:File {repoId: 'myapp'}) RETURN count(f)`
+2. Simplify query: Try single keyword
+3. Check spelling
+4. List available repos: `MATCH (r:Repo) RETURN r.id`
+
+### Irrelevant Results
+
+**Possible causes:**
+1. Query too generic
+2. Test files included
+3. Low-quality matches
+
+**Solutions:**
+1. Add more specific terms
+2. Add "service" or "api" to exclude tests
+3. Filter results by score > 1.0
+
+### Missing Expected Files
+
+**Possible causes:**
+1. File not ingested
+2. File too large (>100KB)
+3. File excluded by patterns
+
+**Solutions:**
+1. Check if file exists in Neo4j
+2. Check file size
+3. Review ingestion patterns
+
+### Duplicate Results
+
+**Possible causes:**
+1. Same repo ingested multiple times
+2. File copied in multiple locations
+
+**Solutions:**
+1. Re-ingest with full mode
+2. Check for actual duplicates in codebase
+
+## Next Steps
+
+Now that you can search effectively, learn about:
+
+- **[Impact Analysis](impact.md)**: Understand code dependencies and blast radius
+- **[Context Packing](context.md)**: Generate AI-friendly context bundles from search results
+
+## Reference
+
+### MCP Tool Definition
+
+```json
+{
+  "name": "code_graph_related",
+  "description": "Find files related to a query using fulltext search",
+  "inputSchema": {
+    "type": "object",
+    "properties": {
+      "query": {
+        "type": "string",
+        "description": "Search query"
+      },
+      "repo_id": {
+        "type": "string",
+        "description": "Repository identifier"
+      },
+      "limit": {
+        "type": "integer",
+        "minimum": 1,
+        "maximum": 100,
+        "default": 30,
+        "description": "Max results"
+      }
+    },
+    "required": ["query", "repo_id"]
+  }
+}
+```
+
+### REST API Specification
+
+**Endpoint:** `POST /api/v1/code-graph/search`
+
+**Request:**
+```typescript
+interface SearchRequest {
+  query: string;           // Required: Search query
+  repo_id: string;         // Required: Repository ID
+  limit?: number;          // Optional: Max results (default: 30, max: 100)
+}
+```
+
+**Response:**
+```typescript
+interface SearchResponse {
+  success: boolean;
+  results: Array<{
+    type: string;          // Always "file"
+    path: string;          // File path
+    lang: string;          // Programming language
+    size: number;          // File size in bytes
+    score: number;         // Relevance score
+    summary: string;       // Human-readable description
+    ref: string;           // ref:// handle
+    repoId: string;        // Repository ID
+  }>;
+  query: string;           // Original query
+  repo_id: string;         // Repository ID
+  total_count: number;     // Number of results returned
+  limit: number;           // Applied limit
+}
+```
+
+### Ranking Algorithm
+
+```python
+def rank_file(file, query):
+    """Calculate relevance score for a file"""
+    score = file.fulltext_score  # Base Neo4j score
+
+    # Path match boost
+    if query.lower() in file.path.lower():
+        score *= 2.0
+
+    # Term matching boost
+    query_terms = set(query.lower().split())
+    path_terms = set(file.path.lower().split('/'))
+    matching_terms = query_terms & path_terms
+    if matching_terms:
+        score *= (1.0 + len(matching_terms) * 0.3)
+
+    # Language boost
+    if query.lower() in file.lang.lower():
+        score *= 1.5
+
+    # Directory boost
+    if any(prefix in file.path for prefix in ['src/', 'lib/', 'core/', 'app/']):
+        score *= 1.2
+
+    # Test penalty
+    if 'test' not in query.lower() and ('test' in file.path or 'spec' in file.path):
+        score *= 0.5
+
+    return score
+```
diff --git a/docs/guide/knowledge/documents.md b/docs/guide/knowledge/documents.md
new file mode 100644
index 0000000..f1bf7a2
--- /dev/null
+++ b/docs/guide/knowledge/documents.md
@@ -0,0 +1,702 @@
+# Document Processing Guide
+
+Learn how to add, manage, and optimize documents in the Knowledge RAG system.
+
+## Overview
+
+The document processing pipeline transforms your documents into an intelligent knowledge graph:
+
+1. **Ingestion**: Read document content
+2. **Chunking**: Split into semantic chunks
+3. **Embedding**: Convert to vector representations
+4. **Storage**: Save to Neo4j with vector index
+5. **Indexing**: Create search-optimized structures
+
+## Document Processing Methods
+
+### 1. Direct Content (add_document)
+
+Add document content directly as a string.
+
+#### MCP Tool Usage
+
+```json
+{
+  "tool": "add_document",
+  "input": {
+    "content": "Your document content here...",
+    "title": "Document Title",
+    "metadata": {
+      "author": "John Doe",
+      "category": "tutorial",
+      "tags": ["python", "tutorial"]
+    }
+  }
+}
+```
+
+#### HTTP API Usage
+
+```bash
+curl -X POST http://localhost:8000/api/v1/knowledge/add \
+  -H "Content-Type: application/json" \
+  -d '{
+    "content": "Your document content...",
+    "title": "Document Title",
+    "metadata": {"category": "tutorial"}
+  }'
+```
+
+#### Python Client Usage
+
+```python
+import httpx
+
+async with httpx.AsyncClient() as client:
+    response = await client.post(
+        "http://localhost:8000/api/v1/knowledge/add",
+        json={
+            "content": "Document content...",
+            "title": "My Document",
+            "metadata": {"type": "article"}
+        }
+    )
+    result = response.json()
+    print(f"Added: {result}")
+```
+
+#### Size-Based Behavior
+
+- **Small documents** (<10KB): Processed synchronously
+  ```json
+  {
+    "success": true,
+    "message": "Document added successfully",
+    "node_id": "abc123..."
+  }
+  ```
+
+- **Large documents** (≥10KB): Queued for async processing
+  ```json
+  {
+    "success": true,
+    "async": true,
+    "task_id": "task_xyz789",
+    "message": "Large document queued (size: 25600 bytes)"
+  }
+  ```
+
+### 2. File Upload (add_file)
+
+Process files from the filesystem.
+
+#### Supported File Types
+
+- **Text files**: .txt, .md, .rst, .log
+- **Code files**: .py, .js, .java, .go, .rs, .cpp, .c, .h
+- **Documentation**: .pdf, .html, .xml
+- **Data files**: .json, .yaml, .yml, .toml, .csv
+
+#### MCP Tool Usage
+
+```json
+{
+  "tool": "add_file",
+  "input": {
+    "file_path": "/path/to/document.md"
+  }
+}
+```
+
+#### HTTP API Usage
+
+```bash
+curl -X POST http://localhost:8000/api/v1/knowledge/add-file \
+  -H "Content-Type: application/json" \
+  -d '{"file_path": "/path/to/document.md"}'
+```
+
+#### Example Response
+
+```json
+{
+  "success": true,
+  "message": "File processed successfully",
+  "file_path": "/path/to/document.md",
+  "chunks_created": 12,
+  "node_id": "file_node_123"
+}
+```
+
+### 3. Directory Batch Processing (add_directory)
+
+Process multiple files from a directory.
+
+#### MCP Tool Usage
+
+```json
+{
+  "tool": "add_directory",
+  "input": {
+    "directory_path": "/path/to/docs",
+    "recursive": true
+  }
+}
+```
+
+#### HTTP API Usage
+
+```bash
+curl -X POST http://localhost:8000/api/v1/knowledge/add-directory \
+  -H "Content-Type: application/json" \
+  -d '{
+    "directory_path": "/path/to/docs",
+    "recursive": true
+  }'
+```
+
+#### Features
+
+- **Recursive scanning**: Process all subdirectories
+- **File filtering**: Automatic filtering by extension
+- **Async processing**: Always queued as background task
+- **Progress tracking**: Monitor via task queue
+
+#### Example Response
+
+```json
+{
+  "success": true,
+  "async": true,
+  "task_id": "dir_task_456",
+  "message": "Directory processing queued: /path/to/docs"
+}
+```
+
+## Task Monitoring
+
+Large documents and directory processing are handled asynchronously. Monitor progress using task queue tools.
+
+### Get Task Status
+
+```json
+{
+  "tool": "get_task_status",
+  "input": {
+    "task_id": "task_xyz789"
+  }
+}
+```
+
+### Watch Task Progress
+
+```json
+{
+  "tool": "watch_task",
+  "input": {
+    "task_id": "task_xyz789",
+    "timeout": 300
+  }
+}
+```
+
+### Task Lifecycle
+
+```
+PENDING → PROCESSING → COMPLETED
+    ↓          ↓
+  FAILED    FAILED
+```
+
+## Document Metadata
+
+Metadata enriches documents and enables advanced filtering.
+
+### Standard Metadata Fields
+
+```python
+{
+  "title": "Document Title",           # Required
+  "author": "Author Name",             # Optional
+  "created_at": "2024-01-15",          # Auto-generated if not provided
+  "updated_at": "2024-01-16",          # Auto-updated
+  "category": "tutorial",              # Custom category
+  "tags": ["python", "async"],         # List of tags
+  "source": "https://example.com",     # Source URL
+  "language": "en",                    # Content language
+  "version": "1.0",                    # Document version
+  "priority": 0.8                      # Relevance priority
+}
+```
+
+### Custom Metadata
+
+Add any custom fields:
+
+```python
+{
+  "metadata": {
+    "department": "Engineering",
+    "project": "Project Alpha",
+    "classification": "internal",
+    "expires_at": "2025-01-01",
+    "custom_field": "custom_value"
+  }
+}
+```
+
+### Metadata Usage
+
+Metadata is stored as node properties and can be:
+- Searched in vector queries
+- Filtered in graph queries
+- Used for relationship inference
+- Displayed in query results
+
+## Chunking Strategy
+
+Documents are split into chunks for optimal processing and retrieval.
+
+### Chunking Parameters
+
+Configure in `.env`:
+
+```bash
+CHUNK_SIZE=512        # Tokens per chunk
+CHUNK_OVERLAP=50      # Overlap between chunks (tokens)
+```
+
+### Chunk Size Guidelines
+
+| Document Type | Recommended Chunk Size | Reasoning |
+|--------------|----------------------|-----------|
+| Technical docs | 512-1024 | Preserve code context |
+| Articles | 256-512 | Natural paragraph breaks |
+| Code files | 1024-2048 | Keep function context |
+| Short content | 128-256 | Small FAQs, snippets |
+
+### Overlap Benefits
+
+Chunk overlap ensures context preservation:
+
+```
+Chunk 1: [tokens 0-512] with overlap [462-512]
+Chunk 2: [tokens 462-974] with overlap [924-974]
+Chunk 3: [tokens 924-1436] ...
+```
+
+Benefits:
+- ✅ Maintains sentence continuity
+- ✅ Preserves context across boundaries
+- ✅ Improves retrieval accuracy
+- ✅ Reduces information loss
+
+## Embedding Generation
+
+### Embedding Providers
+
+Choose from multiple providers:
+
+#### 1. Ollama (Local, Free)
+
+```bash
+# .env configuration
+EMBEDDING_PROVIDER=ollama
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # 768 dimensions
+OLLAMA_BASE_URL=http://localhost:11434
+
+# Available models:
+# - nomic-embed-text (768d) - Recommended
+# - mxbai-embed-large (1024d) - Higher quality
+# - all-minilm (384d) - Faster, smaller
+```
+
+#### 2. OpenAI (Cloud, High Quality)
+
+```bash
+# .env configuration
+EMBEDDING_PROVIDER=openai
+OPENAI_EMBEDDING_MODEL=text-embedding-3-small  # 1536 dimensions
+OPENAI_API_KEY=sk-...
+
+# Available models:
+# - text-embedding-3-small (1536d) - Cost-effective
+# - text-embedding-3-large (3072d) - Best quality
+# - text-embedding-ada-002 (1536d) - Legacy
+```
+
+#### 3. Google Gemini (Cloud, Cost-Effective)
+
+```bash
+# .env configuration
+EMBEDDING_PROVIDER=gemini
+GEMINI_EMBEDDING_MODEL=models/embedding-001  # 768 dimensions
+GOOGLE_API_KEY=AIza...
+```
+
+#### 4. HuggingFace (Local, Customizable)
+
+```bash
+# .env configuration
+EMBEDDING_PROVIDER=huggingface
+HF_EMBEDDING_MODEL=BAAI/bge-small-en-v1.5  # 384 dimensions
+
+# Popular models:
+# - BAAI/bge-small-en-v1.5 (384d) - Fast
+# - BAAI/bge-base-en-v1.5 (768d) - Balanced
+# - BAAI/bge-large-en-v1.5 (1024d) - Best quality
+```
+
+### Vector Dimensions
+
+Different models produce different dimension vectors:
+
+```bash
+# Configure in .env
+VECTOR_DIMENSION=768  # Must match your embedding model
+
+# Common dimensions:
+# 384 - Small models (fast, less accurate)
+# 768 - Medium models (balanced)
+# 1024 - Large models (slower, more accurate)
+# 1536 - OpenAI models
+# 3072 - OpenAI large model
+```
+
+### Embedding Performance
+
+| Provider | Speed | Quality | Cost | Privacy |
+|----------|-------|---------|------|---------|
+| Ollama | Medium | Good | Free | 100% Private |
+| OpenAI | Fast | Excellent | $0.13/1M tokens | Cloud |
+| Gemini | Fast | Very Good | Lower cost | Cloud |
+| HuggingFace | Fast-Slow | Varies | Free | 100% Private |
+
+## Neo4j Storage
+
+### Node Structure
+
+Each document chunk becomes a Neo4j node:
+
+```cypher
+(:Document {
+  id: "doc_123",
+  title: "Document Title",
+  content: "Chunk content...",
+  embedding: [0.123, 0.456, ...],  // Vector embedding
+  chunk_index: 0,
+  total_chunks: 10,
+  metadata: {...},
+  created_at: datetime(),
+  updated_at: datetime()
+})
+```
+
+### Relationships
+
+Documents can have relationships:
+
+```cypher
+// Document parts
+(doc:Document)-[:HAS_CHUNK]->(chunk:DocumentChunk)
+
+// Document references
+(doc1:Document)-[:REFERENCES]->(doc2:Document)
+
+// Topic relationships
+(doc:Document)-[:ABOUT]->(topic:Topic)
+
+// Source relationships
+(doc:Document)-[:FROM_FILE]->(file:File)
+```
+
+### Vector Index
+
+Neo4j creates a vector index for fast similarity search:
+
+```cypher
+// Created automatically
+CREATE VECTOR INDEX document_embeddings
+FOR (d:Document)
+ON d.embedding
+OPTIONS {
+  indexConfig: {
+    `vector.dimensions`: 768,
+    `vector.similarity_function`: 'cosine'
+  }
+}
+```
+
+## Document Management
+
+### List Documents
+
+```bash
+# HTTP API
+curl http://localhost:8000/api/v1/knowledge/documents
+```
+
+Response:
+```json
+{
+  "documents": [
+    {
+      "id": "doc_123",
+      "title": "Document Title",
+      "chunks": 10,
+      "created_at": "2024-01-15T10:00:00Z",
+      "metadata": {...}
+    }
+  ],
+  "total": 1
+}
+```
+
+### Get Document Details
+
+```bash
+# HTTP API
+curl http://localhost:8000/api/v1/knowledge/documents/doc_123
+```
+
+### Update Document
+
+```bash
+# HTTP API
+curl -X PUT http://localhost:8000/api/v1/knowledge/documents/doc_123 \
+  -H "Content-Type: application/json" \
+  -d '{
+    "title": "Updated Title",
+    "metadata": {"updated": true}
+  }'
+```
+
+### Delete Document
+
+```bash
+# HTTP API
+curl -X DELETE http://localhost:8000/api/v1/knowledge/documents/doc_123
+```
+
+**Note**: Deletion removes the document and all its chunks from the graph.
+
+## Best Practices
+
+### 1. Document Preparation
+
+**Clean your content**:
+```python
+# Remove excessive whitespace
+content = " ".join(content.split())
+
+# Remove special characters if needed
+import re
+content = re.sub(r'[^\w\s\.\,\!\?]', '', content)
+
+# Normalize line endings
+content = content.replace('\r\n', '\n')
+```
+
+### 2. Metadata Strategy
+
+**Use consistent metadata**:
+```python
+# Good: Consistent structure
+metadata = {
+    "type": "tutorial",      # Always use "type"
+    "difficulty": "beginner", # Standardized values
+    "tags": ["python", "async"]  # Normalized tags
+}
+
+# Bad: Inconsistent
+metadata = {
+    "kind": "tutorial",      # Different field name
+    "level": "easy",         # Different values
+    "categories": "python"   # Wrong type
+}
+```
+
+### 3. Batch Processing
+
+**Process large collections efficiently**:
+```python
+# Good: Use directory processing
+add_directory("/docs", recursive=True)
+
+# Avoid: Individual file uploads
+for file in files:  # Don't do this for many files
+    add_file(file)
+```
+
+### 4. Error Handling
+
+**Handle failures gracefully**:
+```python
+try:
+    result = await add_document(content, title)
+    if not result["success"]:
+        logger.error(f"Failed: {result['error']}")
+except Exception as e:
+    logger.error(f"Exception: {e}")
+```
+
+### 5. Resource Management
+
+**Monitor system resources**:
+- Check task queue length
+- Monitor Neo4j memory usage
+- Track embedding generation time
+- Watch disk space
+
+## Troubleshooting
+
+### Issue: Document Not Found
+
+**Symptom**: Queries don't return expected document
+
+**Solutions**:
+1. Verify document was added successfully
+2. Check embeddings were generated
+3. Verify vector index exists
+4. Try different query terms
+
+### Issue: Slow Processing
+
+**Symptom**: Documents take long to process
+
+**Solutions**:
+1. Check chunk size (reduce if too large)
+2. Verify embedding provider is responsive
+3. Monitor Neo4j performance
+4. Use async processing for large docs
+
+### Issue: Poor Search Results
+
+**Symptom**: Queries return irrelevant documents
+
+**Solutions**:
+1. Adjust chunk size/overlap
+2. Try different embedding model
+3. Add more descriptive metadata
+4. Use different query mode (hybrid/vector/graph)
+
+### Issue: Out of Memory
+
+**Symptom**: Embedding generation fails
+
+**Solutions**:
+1. Reduce batch size
+2. Increase system memory
+3. Use smaller embedding model
+4. Process documents in smaller batches
+
+## Advanced Techniques
+
+### 1. Custom Document Loaders
+
+Create specialized loaders for custom formats:
+
+```python
+from llama_index.core import Document
+
+def load_custom_format(file_path):
+    # Your custom parsing logic
+    content = parse_custom_file(file_path)
+
+    return Document(
+        text=content,
+        metadata={
+            "source": file_path,
+            "format": "custom"
+        }
+    )
+```
+
+### 2. Document Versioning
+
+Track document versions:
+
+```python
+metadata = {
+    "version": "2.0",
+    "previous_version": "1.0",
+    "change_summary": "Updated API examples",
+    "updated_by": "user@example.com"
+}
+```
+
+### 3. Multi-language Support
+
+Process documents in multiple languages:
+
+```python
+# Specify language in metadata
+metadata = {
+    "language": "es",  # Spanish
+    "original_language": "en",
+    "translated": True
+}
+
+# Use language-specific embedding models
+EMBEDDING_MODEL = "paraphrase-multilingual-mpnet-base-v2"
+```
+
+### 4. Incremental Updates
+
+Update specific document chunks:
+
+```python
+# Add new version while keeping history
+new_content = updated_document_content
+metadata = {
+    "replaces": "old_doc_id",
+    "version": "2.0"
+}
+add_document(new_content, metadata=metadata)
+```
+
+## Performance Optimization
+
+### Embedding Cache
+
+Cache embeddings for repeated content:
+
+```python
+# Configure cache in .env
+ENABLE_EMBEDDING_CACHE=true
+CACHE_SIZE=10000  # Number of embeddings to cache
+```
+
+### Batch Processing
+
+Process multiple documents in batches:
+
+```python
+# Use directory processing for efficiency
+add_directory("/docs", recursive=True)
+
+# Or implement custom batching
+for batch in chunks(documents, batch_size=10):
+    process_batch(batch)
+```
+
+### Parallel Processing
+
+Enable parallel processing for large collections:
+
+```bash
+# Configure in .env
+MAX_WORKERS=4  # Parallel document processing threads
+```
+
+## Next Steps
+
+- **[Query Guide](query.md)**: Learn to query your knowledge base effectively
+- **[MCP Integration](../mcp/overview.md)**: Connect to AI assistants
+- **[Performance Tuning](../../deployment/production.md)**: Optimize for production
+
+## Additional Resources
+
+- **LlamaIndex Documentation**: https://docs.llamaindex.ai/
+- **Neo4j Vector Search**: https://neo4j.com/docs/cypher-manual/current/indexes-for-vector-search/
+- **Embedding Models**: https://huggingface.co/spaces/mteb/leaderboard
diff --git a/docs/guide/knowledge/overview.md b/docs/guide/knowledge/overview.md
new file mode 100644
index 0000000..299102c
--- /dev/null
+++ b/docs/guide/knowledge/overview.md
@@ -0,0 +1,306 @@
+# Knowledge RAG Overview
+
+Knowledge RAG (Retrieval-Augmented Generation) is the document processing and intelligent Q&A system of Code Graph Knowledge System. It combines vector search, graph databases, and LLM integration to provide context-aware answers to questions about your documents.
+
+## What is Knowledge RAG?
+
+Knowledge RAG transforms your documents into an intelligent knowledge base that can:
+
+- **Understand Context**: Process documents and extract semantic meaning
+- **Find Relevant Information**: Use vector similarity to find related content
+- **Generate Intelligent Answers**: Use LLMs to synthesize information from multiple sources
+- **Maintain Relationships**: Store knowledge as a graph with rich connections
+
+## Architecture
+
+```
+Documents → Chunking → Embeddings → Neo4j Graph + Vector Index
+                                              ↓
+Query → Vector Search + Graph Traversal → LLM → Intelligent Answer
+```
+
+### Key Components
+
+1. **Document Processing**
+   - Chunking: Break documents into semantic chunks (configurable size)
+   - Embedding: Convert text to vector representations
+   - Graph Storage: Store chunks as nodes with relationships
+
+2. **Query Engine**
+   - Vector Search: Find similar content using embeddings
+   - Graph Traversal: Navigate relationships between nodes
+   - LLM Generation: Synthesize answers from retrieved context
+
+3. **Multi-Provider Support**
+   - **LLM Providers**: Ollama, OpenAI, Google Gemini, OpenRouter
+   - **Embedding Providers**: Ollama, OpenAI, Google Gemini, HuggingFace
+
+## Feature Set
+
+### Document Processing
+- ✅ Text files (.txt, .md, .rst)
+- ✅ Code files (all major languages)
+- ✅ PDF documents
+- ✅ Web pages (HTML)
+- ✅ Batch directory processing
+- ✅ Recursive subdirectory scanning
+
+### Query Modes
+- **Hybrid** (Default): Combines vector search + graph traversal for best results
+- **Vector Only**: Pure similarity search using embeddings
+- **Graph Only**: Uses only graph relationships
+
+### Intelligent Features
+- **Semantic Search**: Find documents by meaning, not just keywords
+- **Context-Aware Answers**: LLM generates answers using relevant sources
+- **Source Attribution**: Every answer includes source nodes
+- **Relationship Discovery**: Find connections between documents
+
+## Deployment Modes
+
+Knowledge RAG is available **only in Full mode** because it requires both LLM and embedding models.
+
+### Full Mode Requirements
+- ✅ Neo4j database with vector index support
+- ✅ LLM provider (for answer generation)
+- ✅ Embedding provider (for vector search)
+
+### Not Available In:
+- ❌ Lite mode (no LLM/embeddings)
+- ❌ Graph-only mode (no RAG features)
+
+## Quick Start Example
+
+### 1. Add Documents
+```python
+# Via MCP Tool
+{
+  "tool": "add_document",
+  "input": {
+    "content": "Machine learning is a subset of artificial intelligence...",
+    "title": "ML Introduction",
+    "metadata": {"type": "tutorial", "difficulty": "beginner"}
+  }
+}
+```
+
+### 2. Query Knowledge Base
+```python
+# Via MCP Tool
+{
+  "tool": "query_knowledge",
+  "input": {
+    "question": "What is machine learning?",
+    "mode": "hybrid"
+  }
+}
+
+# Response:
+{
+  "answer": "Machine learning is a subset of artificial intelligence that...",
+  "sources": [
+    {"title": "ML Introduction", "content": "...", "score": 0.92}
+  ]
+}
+```
+
+### 3. Search Similar Content
+```python
+# Via MCP Tool
+{
+  "tool": "search_similar_nodes",
+  "input": {
+    "query": "neural networks",
+    "top_k": 5
+  }
+}
+```
+
+## Use Cases
+
+### 1. Documentation Search
+Build searchable knowledge bases from your documentation:
+- Technical documentation
+- API references
+- User manuals
+- Internal wikis
+
+### 2. Codebase Understanding
+Index your codebase for intelligent code search:
+- Find implementations by description
+- Understand code context
+- Discover related components
+- Navigate large codebases
+
+### 3. Research Assistant
+Create research knowledge bases:
+- Academic papers
+- Research notes
+- Literature reviews
+- Citation discovery
+
+### 4. Customer Support
+Build intelligent support systems:
+- Product documentation
+- FAQ databases
+- Troubleshooting guides
+- Knowledge articles
+
+### 5. Learning Platform
+Create interactive learning experiences:
+- Course materials
+- Tutorial content
+- Educational resources
+- Study guides
+
+## Configuration
+
+Knowledge RAG is configured via environment variables. Key settings:
+
+```bash
+# Required for Knowledge RAG
+DEPLOYMENT_MODE=full
+ENABLE_KNOWLEDGE_RAG=true
+
+# LLM Configuration
+LLM_PROVIDER=ollama              # ollama/openai/gemini/openrouter
+OLLAMA_MODEL=llama3.2            # or gpt-4, gemini-pro, etc.
+
+# Embedding Configuration
+EMBEDDING_PROVIDER=ollama         # ollama/openai/gemini/huggingface
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
+# Processing Settings
+CHUNK_SIZE=512                    # Tokens per chunk
+CHUNK_OVERLAP=50                  # Overlap between chunks
+TOP_K=5                          # Number of results to retrieve
+
+# Timeout Settings
+OPERATION_TIMEOUT=120             # Standard operations (seconds)
+LARGE_DOCUMENT_TIMEOUT=300        # Large document processing (seconds)
+```
+
+## System Requirements
+
+### With Local LLM (Ollama)
+- **CPU**: 8+ cores recommended
+- **RAM**: 16GB minimum (32GB for large models)
+- **GPU**: Optional but highly recommended (8GB+ VRAM)
+- **Storage**: 50GB+ for models and data
+
+### With Cloud LLM (OpenAI/Gemini)
+- **CPU**: 4+ cores
+- **RAM**: 8GB minimum
+- **Storage**: 20GB+ for data
+- **Network**: Stable internet connection
+
+## Performance Characteristics
+
+### Processing Speed
+- **Small documents** (<10KB): Synchronous, <1s
+- **Medium documents** (10-50KB): Async queue, 1-10s
+- **Large documents** (>50KB): Async queue, 10-60s
+- **Directories**: Async queue, varies by size
+
+### Query Performance
+- **Vector search**: 50-200ms
+- **Hybrid mode**: 100-500ms
+- **LLM generation**: 1-5s (local), 0.5-2s (cloud)
+
+### Scaling Considerations
+- **Document size**: Up to 10MB per document recommended
+- **Total documents**: Scales to millions with proper Neo4j tuning
+- **Concurrent queries**: 10-50 depending on hardware
+- **Embedding cache**: Speeds up repeated queries
+
+## Integration Points
+
+Knowledge RAG integrates with other system components:
+
+### 1. Task Queue System
+- Async processing for large documents
+- Background directory ingestion
+- Progress tracking
+- Error handling and retries
+
+### 2. MCP Tools
+- 5 knowledge tools available via MCP
+- Integration with Claude Desktop, VS Code
+- Real-time query capabilities
+
+### 3. Memory Store
+- Suggest memories from Q&A sessions
+- Auto-extract knowledge from queries
+- Cross-reference with project memories
+
+### 4. Code Graph
+- Complement code-specific analysis
+- Provide documentation context
+- Enhance code understanding
+
+## Limitations and Considerations
+
+### Current Limitations
+1. **Text-based only**: Images and binary files not supported
+2. **Token limits**: Large documents must fit in LLM context window
+3. **Language**: Best results with English (depends on embedding model)
+4. **Real-time**: Not suitable for rapidly changing documents
+
+### Best Practices
+1. **Document size**: Keep documents focused and well-structured
+2. **Chunking**: Adjust chunk size for your content type
+3. **Metadata**: Add rich metadata for better filtering
+4. **Updates**: Re-process documents when content changes
+5. **Query formulation**: Ask specific, well-formed questions
+
+## Security and Privacy
+
+### Data Storage
+- Documents stored in Neo4j database
+- Embeddings stored as node properties
+- No external data transmission (with local LLM)
+
+### Privacy Options
+- **Full privacy**: Use Ollama for local processing
+- **Cloud processing**: OpenAI/Gemini send data to cloud
+- **Hybrid**: Local embeddings + cloud LLM
+
+### Access Control
+- No built-in authentication (add via reverse proxy)
+- Neo4j database access control
+- MCP tool isolation per user
+
+## Cost Considerations
+
+### Local Deployment (Ollama)
+- **Hardware**: $0-2000 one-time (GPU recommended)
+- **Hosting**: $40-200/month (VPS/cloud)
+- **LLM**: $0 (free)
+- **Embeddings**: $0 (free)
+- **Total ongoing**: $40-200/month
+
+### Cloud Deployment (OpenAI)
+- **Hosting**: $10-20/month (small VPS)
+- **LLM**: $0.01-0.10 per query (GPT-4o-mini)
+- **Embeddings**: $0.0001 per 1K tokens
+- **Total**: $50-500/month (usage-dependent)
+
+### Hybrid Deployment
+- **Hosting**: $10-20/month
+- **LLM**: $0.01-0.10 per query
+- **Embeddings**: $0 (local Ollama)
+- **Total**: $30-300/month
+
+## Next Steps
+
+- **[Document Processing Guide](documents.md)**: Learn how to add and manage documents
+- **[Query Guide](query.md)**: Master intelligent querying techniques
+- **[MCP Integration](../mcp/overview.md)**: Connect to AI assistants
+- **[Full Mode Deployment](../../deployment/full.md)**: Deploy with all features
+
+## Additional Resources
+
+- **Examples**: See `examples/` directory for code samples
+- **API Reference**: HTTP REST API documentation
+- **MCP Tools**: Tool definitions and schemas
+- **Configuration**: Complete `.env` settings guide
diff --git a/docs/guide/knowledge/query.md b/docs/guide/knowledge/query.md
new file mode 100644
index 0000000..7b49b87
--- /dev/null
+++ b/docs/guide/knowledge/query.md
@@ -0,0 +1,828 @@
+# Intelligent Query Guide
+
+Master the art of querying your knowledge base using RAG (Retrieval-Augmented Generation) for intelligent, context-aware answers.
+
+## Overview
+
+The Knowledge RAG query system combines three powerful techniques:
+
+1. **Vector Search**: Find semantically similar content using embeddings
+2. **Graph Traversal**: Navigate relationships between documents
+3. **LLM Generation**: Synthesize intelligent answers from retrieved context
+
+## Query Modes
+
+### 1. Hybrid Mode (Recommended)
+
+Combines vector search and graph traversal for best results.
+
+**When to use**:
+- General-purpose queries
+- Complex questions requiring multiple sources
+- When you want comprehensive answers
+- Default choice for most use cases
+
+**Example**:
+```json
+{
+  "tool": "query_knowledge",
+  "input": {
+    "question": "How does JWT authentication work in the system?",
+    "mode": "hybrid"
+  }
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "answer": "JWT authentication in the system works by...",
+  "sources": [
+    {
+      "node_id": "node_123",
+      "content": "The JWT middleware validates tokens...",
+      "score": 0.92,
+      "metadata": {"file": "auth.py", "type": "code"}
+    },
+    {
+      "node_id": "node_456",
+      "content": "JWT tokens contain user claims...",
+      "score": 0.87,
+      "metadata": {"file": "jwt_docs.md", "type": "docs"}
+    }
+  ],
+  "mode": "hybrid",
+  "retrieval_time_ms": 150,
+  "generation_time_ms": 2300
+}
+```
+
+### 2. Vector-Only Mode
+
+Pure similarity search using embeddings.
+
+**When to use**:
+- Finding similar documents
+- Semantic search without context
+- Fast lookups
+- When graph relationships aren't important
+
+**Example**:
+```json
+{
+  "tool": "query_knowledge",
+  "input": {
+    "question": "authentication security",
+    "mode": "vector_only"
+  }
+}
+```
+
+**Characteristics**:
+- ✅ Fast (50-200ms)
+- ✅ Good for keyword-like queries
+- ✅ Scales well with large datasets
+- ❌ Misses relationship context
+- ❌ May return disconnected results
+
+### 3. Graph-Only Mode
+
+Uses only graph relationships and structure.
+
+**When to use**:
+- Exploring document relationships
+- Finding connected concepts
+- When semantic similarity isn't needed
+- Structured knowledge navigation
+
+**Example**:
+```json
+{
+  "tool": "query_knowledge",
+  "input": {
+    "question": "Show all API documentation",
+    "mode": "graph_only"
+  }
+}
+```
+
+**Characteristics**:
+- ✅ Preserves document structure
+- ✅ Good for hierarchical queries
+- ✅ Finds related documents
+- ❌ Requires well-structured graph
+- ❌ May miss semantically similar content
+
+## Query Techniques
+
+### 1. Simple Questions
+
+Direct, straightforward questions:
+
+```json
+{
+  "question": "What is the purpose of the Memory Store?"
+}
+
+{
+  "question": "How do I configure Neo4j?"
+}
+
+{
+  "question": "What are the system requirements?"
+}
+```
+
+### 2. Comparative Questions
+
+Compare different concepts or approaches:
+
+```json
+{
+  "question": "What's the difference between Ollama and OpenAI?"
+}
+
+{
+  "question": "Compare vector_only and hybrid query modes"
+}
+
+{
+  "question": "Should I use local or cloud LLM for my use case?"
+}
+```
+
+### 3. How-To Questions
+
+Step-by-step instructions:
+
+```json
+{
+  "question": "How do I deploy the system with Docker?"
+}
+
+{
+  "question": "How to add documents to the knowledge base?"
+}
+
+{
+  "question": "How to configure MCP in Claude Desktop?"
+}
+```
+
+### 4. Conceptual Questions
+
+Understanding concepts:
+
+```json
+{
+  "question": "Explain how RAG works in this system"
+}
+
+{
+  "question": "What is the architecture of the Code Graph?"
+}
+
+{
+  "question": "Describe the document processing pipeline"
+}
+```
+
+### 5. Code-Related Questions
+
+When your knowledge base includes code:
+
+```json
+{
+  "question": "Show me how to use the memory_store API"
+}
+
+{
+  "question": "What parameters does add_document accept?"
+}
+
+{
+  "question": "Find examples of async function implementation"
+}
+```
+
+## Advanced Query Features
+
+### Top-K Results
+
+Control the number of source documents retrieved:
+
+```json
+{
+  "tool": "query_knowledge",
+  "input": {
+    "question": "What are deployment options?",
+    "top_k": 10  // Retrieve top 10 most relevant chunks
+  }
+}
+```
+
+**Guidelines**:
+- `top_k=3-5`: Focused, specific answers
+- `top_k=10`: Comprehensive, detailed answers
+- `top_k=20+`: Exhaustive, may include noise
+
+**Default**: `TOP_K=5` (from `.env` configuration)
+
+### Similarity Search
+
+Find similar documents without LLM generation:
+
+```json
+{
+  "tool": "search_similar_nodes",
+  "input": {
+    "query": "authentication implementation",
+    "top_k": 5
+  }
+}
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "results": [
+    {
+      "node_id": "node_789",
+      "content": "JWT authentication middleware...",
+      "score": 0.94,
+      "metadata": {"file": "auth.py"}
+    },
+    {
+      "node_id": "node_790",
+      "content": "OAuth2 implementation...",
+      "score": 0.89,
+      "metadata": {"file": "oauth.py"}
+    }
+  ],
+  "total_results": 5,
+  "search_time_ms": 85
+}
+```
+
+**Use cases**:
+- Finding related documents
+- Building custom result displays
+- Quick content discovery
+- Bypassing LLM for speed
+
+## Query Optimization
+
+### 1. Formulate Clear Questions
+
+**Good questions**:
+```
+✅ "How do I configure Ollama for local LLM?"
+✅ "What are the differences between lite and full mode?"
+✅ "Show me code examples for adding memories"
+```
+
+**Poor questions**:
+```
+❌ "ollama?"  (Too vague)
+❌ "Tell me everything"  (Too broad)
+❌ "thing about the stuff"  (Unclear)
+```
+
+### 2. Use Specific Terms
+
+Include technical terms and keywords:
+
+```json
+// Good: Specific technical terms
+{
+  "question": "How does Neo4j vector index improve search performance?"
+}
+
+// Less effective: Generic terms
+{
+  "question": "How does the database make things faster?"
+}
+```
+
+### 3. Provide Context
+
+Add context for ambiguous terms:
+
+```json
+// Good: Contextual
+{
+  "question": "How do I configure JWT authentication in the FastAPI application?"
+}
+
+// Ambiguous: Lacks context
+{
+  "question": "How do I configure authentication?"
+}
+```
+
+### 4. Break Down Complex Queries
+
+Split complex questions:
+
+```json
+// Instead of:
+{
+  "question": "How do I set up the system with Docker using Ollama with GPU support and configure Neo4j for production?"
+}
+
+// Do this:
+// Query 1:
+{
+  "question": "How do I set up the system with Docker?"
+}
+
+// Query 2:
+{
+  "question": "How do I configure Ollama with GPU support?"
+}
+
+// Query 3:
+{
+  "question": "How do I configure Neo4j for production?"
+}
+```
+
+## Understanding Query Results
+
+### Result Structure
+
+```json
+{
+  "success": true,
+  "answer": "Generated answer text...",
+  "sources": [
+    {
+      "node_id": "unique_node_id",
+      "content": "Source content snippet...",
+      "score": 0.92,  // Similarity score (0-1)
+      "metadata": {
+        "title": "Document Title",
+        "file": "path/to/file",
+        "chunk_index": 0,
+        "type": "documentation"
+      }
+    }
+  ],
+  "mode": "hybrid",
+  "retrieval_time_ms": 150,
+  "generation_time_ms": 2300,
+  "total_time_ms": 2450
+}
+```
+
+### Interpreting Scores
+
+Similarity scores indicate relevance:
+
+- **0.90 - 1.00**: Highly relevant, exact match
+- **0.80 - 0.89**: Very relevant, strong semantic match
+- **0.70 - 0.79**: Relevant, good match
+- **0.60 - 0.69**: Somewhat relevant, partial match
+- **0.50 - 0.59**: Weakly relevant, tangential
+- **< 0.50**: Likely not relevant
+
+### Source Attribution
+
+Each answer includes source nodes for verification:
+
+```python
+# Example: Verify answer sources
+result = query_knowledge("How does RAG work?")
+
+print(f"Answer: {result['answer']}\n")
+print("Sources:")
+for source in result['sources']:
+    print(f"  - {source['metadata']['title']} (score: {source['score']:.2f})")
+    print(f"    {source['content'][:100]}...")
+```
+
+## HTTP API Usage
+
+### Basic Query
+
+```bash
+curl -X POST http://localhost:8000/api/v1/knowledge/query \
+  -H "Content-Type: application/json" \
+  -d '{
+    "question": "What is the deployment architecture?",
+    "mode": "hybrid",
+    "top_k": 5
+  }'
+```
+
+### Similarity Search
+
+```bash
+curl -X POST http://localhost:8000/api/v1/knowledge/search \
+  -H "Content-Type: application/json" \
+  -d '{
+    "query": "docker configuration",
+    "top_k": 10
+  }'
+```
+
+### Python Client
+
+```python
+import httpx
+import asyncio
+
+async def query_knowledge(question: str, mode: str = "hybrid"):
+    async with httpx.AsyncClient() as client:
+        response = await client.post(
+            "http://localhost:8000/api/v1/knowledge/query",
+            json={
+                "question": question,
+                "mode": mode,
+                "top_k": 5
+            },
+            timeout=30.0
+        )
+        return response.json()
+
+# Usage
+result = asyncio.run(query_knowledge(
+    "How do I configure the system?"
+))
+print(result['answer'])
+```
+
+## Query Performance
+
+### Performance Characteristics
+
+| Operation | Speed | Quality | Use Case |
+|-----------|-------|---------|----------|
+| Vector search | 50-200ms | Good | Fast lookups |
+| Hybrid mode | 100-500ms | Excellent | General queries |
+| Graph-only | 100-300ms | Good | Structured data |
+| LLM generation | 1-5s | Excellent | Answer synthesis |
+
+### Performance Tips
+
+1. **Use vector-only for speed**:
+   ```json
+   {"question": "quick lookup", "mode": "vector_only"}
+   ```
+
+2. **Cache frequent queries** (implement client-side):
+   ```python
+   query_cache = {}
+
+   def cached_query(question):
+       if question in query_cache:
+           return query_cache[question]
+       result = query_knowledge(question)
+       query_cache[question] = result
+       return result
+   ```
+
+3. **Adjust top_k based on needs**:
+   ```python
+   # Fast: fewer sources
+   query_knowledge(q, top_k=3)
+
+   # Comprehensive: more sources
+   query_knowledge(q, top_k=10)
+   ```
+
+4. **Use similarity search for bulk operations**:
+   ```python
+   # Faster than multiple query_knowledge calls
+   results = search_similar_nodes(query, top_k=20)
+   ```
+
+## Common Query Patterns
+
+### 1. Documentation Lookup
+
+Finding specific documentation:
+
+```json
+{
+  "question": "Show me the API documentation for the memory store"
+}
+```
+
+### 2. Configuration Help
+
+Getting configuration guidance:
+
+```json
+{
+  "question": "What are the required environment variables for Full mode?"
+}
+```
+
+### 3. Code Examples
+
+Finding code snippets:
+
+```json
+{
+  "question": "Show me examples of using the add_document function"
+}
+```
+
+### 4. Troubleshooting
+
+Getting help with issues:
+
+```json
+{
+  "question": "Why is my Ollama connection failing?"
+}
+```
+
+### 5. Comparison
+
+Comparing options:
+
+```json
+{
+  "question": "Compare the performance of different embedding providers"
+}
+```
+
+## Integration with Other Tools
+
+### Memory Store Integration
+
+Query knowledge base and save important findings:
+
+```python
+# Query for information
+result = query_knowledge("How does authentication work?")
+
+# Save as memory
+add_memory(
+    project_id="myapp",
+    memory_type="note",
+    title="Authentication Overview",
+    content=result['answer'],
+    importance=0.7,
+    tags=["authentication", "security"]
+)
+```
+
+### Code Graph Integration
+
+Combine code analysis with documentation:
+
+```python
+# Find code implementations
+code_results = code_graph_search("authentication")
+
+# Find related documentation
+doc_results = query_knowledge("authentication implementation guide")
+
+# Correlate results
+combined_context = {
+    "code": code_results,
+    "docs": doc_results
+}
+```
+
+## Error Handling
+
+### Common Errors
+
+**1. No results found**:
+```json
+{
+  "success": true,
+  "answer": "I couldn't find relevant information about...",
+  "sources": [],
+  "note": "Try rephrasing your question or adding more context"
+}
+```
+
+**2. LLM timeout**:
+```json
+{
+  "success": false,
+  "error": "LLM generation timeout",
+  "retrieval_successful": true,
+  "sources": [...]  // Sources still available
+}
+```
+
+**3. Empty knowledge base**:
+```json
+{
+  "success": false,
+  "error": "Knowledge base is empty. Add documents first."
+}
+```
+
+### Error Handling Code
+
+```python
+try:
+    result = query_knowledge(question)
+
+    if not result['success']:
+        print(f"Query failed: {result['error']}")
+        return
+
+    if not result['sources']:
+        print("No relevant information found. Try different keywords.")
+        return
+
+    print(result['answer'])
+
+except httpx.TimeoutException:
+    print("Query timeout. Try a simpler question or check system load.")
+except Exception as e:
+    print(f"Unexpected error: {e}")
+```
+
+## Best Practices
+
+### 1. Start Broad, Then Narrow
+
+```python
+# First query: broad
+result1 = query_knowledge("deployment options")
+
+# Follow-up: specific
+result2 = query_knowledge("how to deploy with Docker Compose")
+```
+
+### 2. Verify Sources
+
+Always check source documents:
+
+```python
+result = query_knowledge(question)
+
+# Review sources
+print(f"Answer based on {len(result['sources'])} sources:")
+for src in result['sources']:
+    print(f"  - {src['metadata']['title']} (score: {src['score']})")
+```
+
+### 3. Use Right Mode for Task
+
+```python
+# Exploration: hybrid
+query_knowledge(q, mode="hybrid")
+
+# Quick lookup: vector
+query_knowledge(q, mode="vector_only")
+
+# Structured navigation: graph
+query_knowledge(q, mode="graph_only")
+```
+
+### 4. Monitor Performance
+
+```python
+result = query_knowledge(question)
+
+print(f"Retrieval: {result['retrieval_time_ms']}ms")
+print(f"Generation: {result['generation_time_ms']}ms")
+print(f"Total: {result['total_time_ms']}ms")
+
+# Adjust if too slow
+if result['total_time_ms'] > 5000:
+    # Consider vector_only or reduce top_k
+    pass
+```
+
+## Advanced Techniques
+
+### 1. Multi-Query Strategy
+
+Ask related questions for comprehensive understanding:
+
+```python
+questions = [
+    "What is the system architecture?",
+    "What are the core components?",
+    "How do components interact?"
+]
+
+results = [query_knowledge(q) for q in questions]
+```
+
+### 2. Result Aggregation
+
+Combine results from multiple queries:
+
+```python
+def comprehensive_search(topic):
+    results = []
+
+    # Different query angles
+    queries = [
+        f"What is {topic}?",
+        f"How to use {topic}?",
+        f"{topic} examples and best practices"
+    ]
+
+    for q in queries:
+        result = query_knowledge(q)
+        results.append(result)
+
+    return aggregate_results(results)
+```
+
+### 3. Context Building
+
+Build context from related queries:
+
+```python
+# Initial query
+base_result = query_knowledge("JWT authentication")
+
+# Extract key terms from answer
+key_terms = extract_key_terms(base_result['answer'])
+
+# Query for each key term
+context = {}
+for term in key_terms:
+    context[term] = search_similar_nodes(term, top_k=3)
+```
+
+### 4. Feedback Loop
+
+Use query results to refine questions:
+
+```python
+def iterative_query(initial_question, max_iterations=3):
+    question = initial_question
+
+    for i in range(max_iterations):
+        result = query_knowledge(question)
+
+        if result['success'] and result['sources']:
+            return result
+
+        # Refine question based on failure
+        question = refine_question(question, result)
+
+    return result
+```
+
+## Troubleshooting
+
+### Poor Quality Answers
+
+**Symptoms**:
+- Irrelevant answers
+- Incomplete information
+- Contradictory results
+
+**Solutions**:
+1. Add more documents to knowledge base
+2. Improve document metadata
+3. Adjust chunk size/overlap
+4. Try different embedding model
+5. Rephrase question
+
+### Slow Query Performance
+
+**Symptoms**:
+- Queries taking >5 seconds
+- Timeouts
+
+**Solutions**:
+1. Reduce `top_k` value
+2. Use `vector_only` mode
+3. Check Neo4j performance
+4. Verify LLM provider responsiveness
+5. Enable query caching
+
+### No Results Found
+
+**Symptoms**:
+- Empty sources list
+- Generic "no information" answers
+
+**Solutions**:
+1. Verify documents are indexed
+2. Check embeddings were generated
+3. Try broader query terms
+4. Use different query mode
+5. Inspect Neo4j vector index
+
+## Next Steps
+
+- **[MCP Integration](../mcp/overview.md)**: Connect to AI assistants
+- **[Claude Desktop Setup](../mcp/claude-desktop.md)**: Use queries in Claude
+- **[VS Code Integration](../mcp/vscode.md)**: Query from your editor
+- **[Production Optimization](../../deployment/production.md)**: Scale query performance
+
+## Additional Resources
+
+- **LlamaIndex Query Engine**: https://docs.llamaindex.ai/en/stable/module_guides/deploying/query_engine/
+- **RAG Techniques**: https://docs.llamaindex.ai/en/stable/optimizing/production_rag/
+- **Neo4j Vector Search**: https://neo4j.com/docs/cypher-manual/current/indexes-for-vector-search/
diff --git a/docs/guide/mcp/claude-desktop.md b/docs/guide/mcp/claude-desktop.md
new file mode 100644
index 0000000..f5c687c
--- /dev/null
+++ b/docs/guide/mcp/claude-desktop.md
@@ -0,0 +1,721 @@
+# Claude Desktop MCP Setup
+
+Complete guide to integrating Code Graph Knowledge System with Claude Desktop using the Model Context Protocol.
+
+## Overview
+
+Claude Desktop is Anthropic's official desktop application that supports MCP (Model Context Protocol) integration. This allows Claude to directly call tools in your Code Graph Knowledge System, providing:
+
+- **Direct tool access**: Claude can query your knowledge base, search code, manage memories
+- **Natural language interface**: Ask questions in plain English
+- **Context awareness**: Claude remembers your project knowledge
+- **Real-time responses**: Immediate tool execution and results
+
+## Prerequisites
+
+### 1. Claude Desktop
+
+Download and install Claude Desktop:
+
+**Download Links**:
+- **macOS**: https://claude.ai/download
+- **Windows**: https://claude.ai/download
+- **Linux**: Not officially supported yet (use VS Code extension)
+
+**Minimum Version**: 0.7.0+ (MCP support added in v0.7.0)
+
+### 2. Code Graph Knowledge System
+
+You need a running instance:
+
+```bash
+# Option 1: Docker deployment
+docker-compose -f docker/docker-compose.full.yml up -d
+
+# Option 2: Local development
+python start_mcp.py
+
+# Verify it's running
+ps aux | grep start_mcp.py
+```
+
+### 3. Python Environment
+
+Claude Desktop needs to invoke your MCP server:
+
+```bash
+# Check Python version (3.10+ required)
+python --version
+
+# Verify dependencies are installed
+cd /path/to/codebase-rag
+pip install -e .
+
+# Or with uv
+uv pip install -e .
+```
+
+## Configuration
+
+### Step 1: Locate Configuration File
+
+Claude Desktop stores MCP configuration in a JSON file:
+
+**macOS**:
+```
+~/Library/Application Support/Claude/claude_desktop_config.json
+```
+
+**Windows**:
+```
+%APPDATA%\Claude\claude_desktop_config.json
+```
+
+### Step 2: Create Configuration
+
+If the file doesn't exist, create it:
+
+```bash
+# macOS
+mkdir -p ~/Library/Application\ Support/Claude
+touch ~/Library/Application\ Support/Claude/claude_desktop_config.json
+
+# Windows (PowerShell)
+New-Item -Path "$env:APPDATA\Claude\claude_desktop_config.json" -Force
+```
+
+### Step 3: Add MCP Server Configuration
+
+Edit `claude_desktop_config.json`:
+
+#### Basic Configuration
+
+```json
+{
+  "mcpServers": {
+    "code-graph": {
+      "command": "python",
+      "args": ["/absolute/path/to/codebase-rag/start_mcp.py"]
+    }
+  }
+}
+```
+
+#### With Environment Variables
+
+```json
+{
+  "mcpServers": {
+    "code-graph": {
+      "command": "python",
+      "args": ["/absolute/path/to/codebase-rag/start_mcp.py"],
+      "env": {
+        "PYTHONPATH": "/absolute/path/to/codebase-rag",
+        "NEO4J_URI": "bolt://localhost:7687",
+        "NEO4J_USER": "neo4j",
+        "NEO4J_PASSWORD": "your_password",
+        "LLM_PROVIDER": "ollama",
+        "OLLAMA_HOST": "http://localhost:11434"
+      }
+    }
+  }
+}
+```
+
+#### Using uv (Recommended)
+
+```json
+{
+  "mcpServers": {
+    "code-graph": {
+      "command": "uv",
+      "args": ["run", "mcp_server"],
+      "cwd": "/absolute/path/to/codebase-rag"
+    }
+  }
+}
+```
+
+#### Docker-based Setup
+
+```json
+{
+  "mcpServers": {
+    "code-graph": {
+      "command": "docker",
+      "args": [
+        "exec",
+        "-i",
+        "codebase-rag-mcp",
+        "python",
+        "/app/start_mcp.py"
+      ]
+    }
+  }
+}
+```
+
+### Step 4: Restart Claude Desktop
+
+After configuration changes:
+
+1. **Quit Claude Desktop** completely (not just close window)
+   - macOS: Cmd+Q
+   - Windows: File → Exit
+
+2. **Restart Claude Desktop**
+
+3. **Verify MCP connection** (see Verification section)
+
+## Configuration Examples
+
+### Example 1: Full Mode with Ollama
+
+```json
+{
+  "mcpServers": {
+    "code-graph-full": {
+      "command": "python",
+      "args": ["/Users/john/projects/codebase-rag/start_mcp.py"],
+      "env": {
+        "PYTHONPATH": "/Users/john/projects/codebase-rag",
+        "DEPLOYMENT_MODE": "full",
+        "ENABLE_KNOWLEDGE_RAG": "true",
+        "ENABLE_AUTO_EXTRACTION": "true",
+        "NEO4J_URI": "bolt://localhost:7687",
+        "NEO4J_USER": "neo4j",
+        "NEO4J_PASSWORD": "mypassword",
+        "LLM_PROVIDER": "ollama",
+        "OLLAMA_HOST": "http://localhost:11434",
+        "OLLAMA_MODEL": "llama3.2",
+        "EMBEDDING_PROVIDER": "ollama",
+        "OLLAMA_EMBEDDING_MODEL": "nomic-embed-text"
+      }
+    }
+  }
+}
+```
+
+### Example 2: Standard Mode (No RAG)
+
+```json
+{
+  "mcpServers": {
+    "code-graph-standard": {
+      "command": "python",
+      "args": ["/home/user/codebase-rag/start_mcp.py"],
+      "env": {
+        "PYTHONPATH": "/home/user/codebase-rag",
+        "DEPLOYMENT_MODE": "standard",
+        "ENABLE_KNOWLEDGE_RAG": "false",
+        "NEO4J_URI": "bolt://localhost:7687",
+        "NEO4J_USER": "neo4j",
+        "NEO4J_PASSWORD": "password"
+      }
+    }
+  }
+}
+```
+
+### Example 3: Multiple MCP Servers
+
+```json
+{
+  "mcpServers": {
+    "code-graph-project-a": {
+      "command": "python",
+      "args": ["/path/to/project-a/codebase-rag/start_mcp.py"],
+      "env": {
+        "NEO4J_DATABASE": "project_a"
+      }
+    },
+    "code-graph-project-b": {
+      "command": "python",
+      "args": ["/path/to/project-b/codebase-rag/start_mcp.py"],
+      "env": {
+        "NEO4J_DATABASE": "project_b"
+      }
+    }
+  }
+}
+```
+
+## Verification
+
+### Check MCP Connection
+
+After restarting Claude Desktop:
+
+1. **Start a new conversation**
+
+2. **Look for tool indicator**:
+   - You should see a tools icon or "Tools available" indicator
+   - Click to view available tools
+
+3. **Verify tools are listed**:
+   - You should see 20-30 tools (depending on deployment mode)
+   - Tool names: `query_knowledge`, `add_memory`, `code_graph_ingest_repo`, etc.
+
+### Test Basic Functionality
+
+Try these test prompts:
+
+```
+"List available MCP tools"
+```
+
+Expected: Claude lists all available tools
+
+```
+"Use the get_statistics tool to check system stats"
+```
+
+Expected: Claude calls `get_statistics` and shows results
+
+```
+"Query the knowledge base about deployment"
+```
+
+Expected: Claude calls `query_knowledge` with your question
+
+### Check Logs
+
+If tools don't appear:
+
+**MCP Server Logs**:
+```bash
+# View server logs
+tail -f /path/to/codebase-rag/mcp_server.log
+
+# Enable debug mode
+MCP_LOG_LEVEL=DEBUG python start_mcp.py
+```
+
+**Claude Desktop Logs**:
+
+macOS:
+```bash
+tail -f ~/Library/Logs/Claude/mcp-server-code-graph.log
+```
+
+Windows:
+```powershell
+Get-Content "$env:APPDATA\Claude\Logs\mcp-server-code-graph.log" -Wait
+```
+
+## Usage Patterns
+
+### 1. Query Knowledge Base
+
+**Prompt**:
+```
+"Query the knowledge base: How do I configure Docker deployment?"
+```
+
+**What Claude Does**:
+1. Calls `query_knowledge` tool
+2. Passes your question
+3. Receives answer with sources
+4. Formats and presents results
+
+**Response Example**:
+```
+Based on the knowledge base, here's how to configure Docker deployment:
+
+[Claude presents the answer with source citations]
+
+Sources:
+- Docker Guide (score: 0.92)
+- Deployment Documentation (score: 0.87)
+```
+
+### 2. Search Code
+
+**Prompt**:
+```
+"Search the code graph for authentication implementations"
+```
+
+**What Claude Does**:
+1. Calls `code_graph_fulltext_search`
+2. Finds matching code files
+3. Presents results with context
+
+### 3. Manage Memories
+
+**Prompt**:
+```
+"Add a memory: We decided to use PostgreSQL for the main database
+because it has better JSON support. This is an important architectural decision."
+```
+
+**What Claude Does**:
+1. Extracts memory details
+2. Calls `add_memory` tool
+3. Saves to memory store
+4. Confirms success
+
+### 4. Analyze Code Repository
+
+**Prompt**:
+```
+"Ingest the repository at /path/to/my/project in incremental mode"
+```
+
+**What Claude Does**:
+1. Calls `code_graph_ingest_repo`
+2. Processes repository
+3. Reports statistics
+
+### 5. Monitor Tasks
+
+**Prompt**:
+```
+"Check the status of task task_abc123"
+```
+
+**What Claude Does**:
+1. Calls `get_task_status`
+2. Returns current status
+3. Shows progress if available
+
+## Advanced Usage
+
+### Chained Tool Calls
+
+Claude can chain multiple tool calls:
+
+**Prompt**:
+```
+"Index the /path/to/docs directory, then query it about deployment"
+```
+
+**What Claude Does**:
+1. Calls `add_directory` to index docs
+2. Waits for completion (or gets task_id)
+3. Calls `query_knowledge` with your question
+4. Presents combined results
+
+### Context Building
+
+**Prompt**:
+```
+"Search my memories for database decisions,
+then query the knowledge base for PostgreSQL configuration examples"
+```
+
+**What Claude Does**:
+1. Calls `search_memories` with "database"
+2. Calls `query_knowledge` with "PostgreSQL configuration"
+3. Synthesizes information from both sources
+
+### Memory Extraction
+
+**Prompt**:
+```
+"Extract memories from the last 50 commits in /path/to/repo"
+```
+
+**What Claude Does**:
+1. Calls `batch_extract_from_repository`
+2. Analyzes commits
+3. Extracts decisions and learnings
+4. Saves as memories
+
+### Impact Analysis
+
+**Prompt**:
+```
+"Analyze the impact of changing the authentication module"
+```
+
+**What Claude Does**:
+1. Calls `code_graph_impact_analysis`
+2. Finds dependent files
+3. Assesses risk level
+4. Presents findings
+
+## Best Practices
+
+### 1. Be Specific with Tool Names
+
+**Good**:
+```
+"Use query_knowledge to find information about Docker"
+```
+
+**Less effective**:
+```
+"Find Docker information"  # Claude might not use the right tool
+```
+
+### 2. Provide Complete Paths
+
+**Good**:
+```
+"Ingest repository at /Users/john/projects/myapp"
+```
+
+**Bad**:
+```
+"Ingest myapp"  # Relative paths don't work
+```
+
+### 3. Check Tool Availability
+
+Before using a tool:
+
+```
+"What deployment mode are we running? List available tools."
+```
+
+### 4. Handle Async Operations
+
+For long-running tasks:
+
+```
+"Add the /docs directory. If it's async, give me the task_id
+so I can check status later."
+```
+
+### 5. Verify Results
+
+After tool calls:
+
+```
+"Show me the sources used for that answer"
+"Confirm the memory was saved"
+"Verify the repository was ingested successfully"
+```
+
+## Troubleshooting
+
+### Issue: Tools Not Appearing
+
+**Symptoms**:
+- No tools icon in Claude Desktop
+- Claude says "I don't have access to tools"
+
+**Solutions**:
+
+1. **Verify configuration file location**:
+   ```bash
+   # macOS
+   cat ~/Library/Application\ Support/Claude/claude_desktop_config.json
+
+   # Windows
+   type %APPDATA%\Claude\claude_desktop_config.json
+   ```
+
+2. **Check JSON syntax**:
+   ```bash
+   # Use JSON validator
+   python -m json.tool claude_desktop_config.json
+   ```
+
+3. **Verify absolute paths**:
+   - All paths must be absolute, not relative
+   - Expand ~ to full home path
+
+4. **Restart completely**:
+   - Force quit Claude Desktop
+   - Kill any remaining processes
+   - Start fresh
+
+### Issue: Connection Errors
+
+**Symptoms**:
+- "Failed to connect to MCP server"
+- Tools listed but calls fail
+
+**Solutions**:
+
+1. **Check server is running**:
+   ```bash
+   ps aux | grep start_mcp.py
+   ```
+
+2. **Verify Neo4j connection**:
+   ```bash
+   curl http://localhost:7474
+   ```
+
+3. **Check environment variables**:
+   ```bash
+   # Test the command manually
+   cd /path/to/codebase-rag
+   python start_mcp.py
+   ```
+
+4. **Review MCP server logs**:
+   ```bash
+   tail -f mcp_server.log
+   ```
+
+### Issue: Tool Calls Timeout
+
+**Symptoms**:
+- "Tool call timeout"
+- Long delays before failure
+
+**Solutions**:
+
+1. **Increase timeout** in `.env`:
+   ```bash
+   OPERATION_TIMEOUT=300  # 5 minutes
+   ```
+
+2. **Check system resources**:
+   ```bash
+   # CPU and memory usage
+   top
+   ```
+
+3. **Use async mode** for large operations:
+   - Directory ingestion
+   - Large document processing
+   - Batch memory extraction
+
+### Issue: Permission Errors
+
+**Symptoms**:
+- "Permission denied" when starting MCP server
+- Cannot read configuration files
+
+**Solutions**:
+
+1. **Fix file permissions**:
+   ```bash
+   chmod +x start_mcp.py
+   chmod 600 .env
+   ```
+
+2. **Check directory permissions**:
+   ```bash
+   ls -la /path/to/codebase-rag
+   ```
+
+3. **Run with correct user**:
+   ```bash
+   # Ensure Neo4j is accessible by your user
+   whoami
+   ```
+
+### Issue: Tools Return Errors
+
+**Symptoms**:
+- "Tool execution failed"
+- Error messages in responses
+
+**Solutions**:
+
+1. **Check backend services**:
+   ```bash
+   # Verify Neo4j
+   cypher-shell "RETURN 1"
+
+   # Verify Ollama (if using)
+   curl http://localhost:11434/api/tags
+   ```
+
+2. **Review tool-specific logs**:
+   ```bash
+   grep "ERROR" mcp_server.log
+   ```
+
+3. **Test tools directly** via HTTP API:
+   ```bash
+   curl -X POST http://localhost:8000/api/v1/knowledge/query \
+     -H "Content-Type: application/json" \
+     -d '{"question": "test"}'
+   ```
+
+## Performance Optimization
+
+### Reduce Latency
+
+1. **Use local LLM** (Ollama) to avoid API delays
+2. **Enable caching** in configuration
+3. **Use incremental mode** for code ingestion
+4. **Reduce top_k** for faster queries
+
+### Improve Response Quality
+
+1. **Add more documents** to knowledge base
+2. **Use better embeddings** (larger models)
+3. **Add rich metadata** to documents
+4. **Curate project memories** regularly
+
+### Handle Large Operations
+
+1. **Use async mode** for:
+   - Directory ingestion
+   - Batch memory extraction
+   - Large document processing
+
+2. **Monitor with watch_task**:
+   ```
+   "Add directory /docs then watch the task until complete"
+   ```
+
+## Security Considerations
+
+### Configuration Security
+
+1. **Protect config file**:
+   ```bash
+   chmod 600 ~/Library/Application\ Support/Claude/claude_desktop_config.json
+   ```
+
+2. **Don't commit credentials**:
+   - Use environment variables
+   - Keep passwords in `.env` file
+   - Add `.env` to `.gitignore`
+
+3. **Use authentication**:
+   ```bash
+   # Add Neo4j authentication
+   NEO4J_USER=readonly_user
+   NEO4J_PASSWORD=secure_password
+   ```
+
+### Network Security
+
+1. **Bind to localhost**:
+   ```bash
+   # In .env
+   HOST=127.0.0.1  # Don't expose to network
+   ```
+
+2. **Use firewall rules**:
+   ```bash
+   # Block external access to Neo4j
+   sudo ufw deny 7687
+   sudo ufw allow from 127.0.0.1 to any port 7687
+   ```
+
+### Tool Restrictions
+
+1. **Disable destructive tools** if needed:
+   - Modify `start_mcp.py` to exclude certain tools
+   - Implement tool-level access control
+
+2. **Read-only mode**:
+   ```bash
+   # Configure read-only Neo4j user
+   ENABLE_WRITE_OPERATIONS=false
+   ```
+
+## Next Steps
+
+- **[MCP Overview](overview.md)**: Learn about MCP protocol
+- **[VS Code Setup](vscode.md)**: Configure VS Code extension
+- **[Knowledge RAG Guide](../knowledge/overview.md)**: Use query tools
+- **[Memory Store Guide](../memory/overview.md)**: Manage project memories
+
+## Additional Resources
+
+- **Claude Desktop**: https://claude.ai/download
+- **MCP Documentation**: https://modelcontextprotocol.io/
+- **Configuration Examples**: `/examples/mcp_configs/`
+- **Troubleshooting**: https://docs.anthropic.com/claude/docs/mcp
diff --git a/docs/guide/mcp/overview.md b/docs/guide/mcp/overview.md
new file mode 100644
index 0000000..bfcedb8
--- /dev/null
+++ b/docs/guide/mcp/overview.md
@@ -0,0 +1,606 @@
+# MCP Integration Overview
+
+The Model Context Protocol (MCP) is an open standard that enables AI assistants like Claude Desktop and VS Code extensions to connect to external tools and data sources. The Code Graph Knowledge System provides a complete MCP server implementation with 30 specialized tools.
+
+## What is MCP?
+
+MCP (Model Context Protocol) is an open protocol developed by Anthropic that allows AI assistants to:
+
+- **Access External Tools**: Call functions in external applications
+- **Retrieve Context**: Fetch data from databases, APIs, and services
+- **Execute Actions**: Perform operations on behalf of users
+- **Stream Responses**: Receive real-time updates
+
+Think of MCP as a standardized way for AI assistants to "talk to" your applications.
+
+## Architecture
+
+```
+┌─────────────────┐
+│  AI Assistant   │  (Claude Desktop, VS Code, etc.)
+│  (MCP Client)   │
+└────────┬────────┘
+         │ MCP Protocol
+         │ (stdio, SSE, WebSocket)
+         ↓
+┌─────────────────┐
+│   MCP Server    │  (This Application)
+│  (start_mcp.py) │
+└────────┬────────┘
+         │
+         ↓
+┌─────────────────────────────────────────┐
+│  Backend Services                        │
+│  ┌────────────────────────────────────┐ │
+│  │ Knowledge RAG  │  Code Graph       │ │
+│  │ Memory Store   │  Task Queue       │ │
+│  │ Neo4j Database │  Git Integration  │ │
+│  └────────────────────────────────────┘ │
+└─────────────────────────────────────────┘
+```
+
+## System Components
+
+### 1. MCP Server (`start_mcp.py`)
+
+The main server that:
+- Implements MCP protocol using official SDK
+- Exposes 30 tools across 6 categories
+- Manages connections from AI clients
+- Routes requests to backend services
+
+**Key Features**:
+- ✅ Official MCP SDK (`mcp>=1.1.0`)
+- ✅ Modular architecture (310-line main file)
+- ✅ Session management
+- ✅ Streaming support
+- ✅ Multi-transport (stdio, SSE, WebSocket)
+
+### 2. MCP Clients
+
+AI assistants that connect to the server:
+
+**Supported Clients**:
+- **Claude Desktop**: Official Anthropic desktop app
+- **VS Code**: Via MCP extension
+- **Custom Clients**: Using MCP SDK
+
+### 3. Backend Services
+
+The actual functionality exposed via MCP:
+- Knowledge RAG for document Q&A
+- Code Graph for repository analysis
+- Memory Store for project knowledge
+- Task Queue for async operations
+- Git utilities for repository operations
+
+## Available Tools (30 Total)
+
+### Knowledge Base Tools (5)
+
+Process and query documents using RAG:
+
+1. **query_knowledge**: Ask questions, get LLM-generated answers
+2. **search_similar_nodes**: Find similar documents via vector search
+3. **add_document**: Add document content to knowledge base
+4. **add_file**: Process single file
+5. **add_directory**: Batch process directory
+
+**Availability**: Full mode only (requires LLM + embeddings)
+
+### Code Graph Tools (4)
+
+Analyze code repositories:
+
+1. **code_graph_ingest_repo**: Index repository structure
+2. **code_graph_fulltext_search**: Search code by text
+3. **code_graph_impact_analysis**: Analyze change impact
+4. **code_graph_pack_context**: Build context for LLM
+
+**Availability**: All modes
+
+### Memory Management Tools (7)
+
+Store project knowledge for AI agents:
+
+1. **add_memory**: Save decisions, preferences, experiences
+2. **search_memories**: Find relevant memories
+3. **get_memory**: Retrieve by ID
+4. **update_memory**: Modify existing memory
+5. **delete_memory**: Remove memory (soft delete)
+6. **supersede_memory**: Replace with history preservation
+7. **get_project_summary**: Get overview
+
+**Availability**: All modes
+
+### Memory Extraction Tools (5)
+
+Automatically extract memories (v0.7):
+
+1. **extract_from_conversation**: Analyze AI conversations
+2. **extract_from_git_commit**: Mine git commits
+3. **extract_from_code_comments**: Extract TODOs, FIXMEs
+4. **suggest_memory_from_query**: Suggest from Q&A
+5. **batch_extract_from_repository**: Comprehensive extraction
+
+**Availability**: Full mode only (requires LLM)
+
+### Task Management Tools (6)
+
+Monitor async operations:
+
+1. **get_task_status**: Check task status
+2. **watch_task**: Monitor single task
+3. **watch_tasks**: Monitor multiple tasks
+4. **list_tasks**: List all tasks
+5. **cancel_task**: Cancel running task
+6. **get_queue_stats**: Get queue statistics
+
+**Availability**: All modes
+
+### System Tools (3)
+
+System information and management:
+
+1. **get_graph_schema**: Get Neo4j schema
+2. **get_statistics**: Get system statistics
+3. **clear_knowledge_base**: Clear all data
+
+**Availability**: All modes
+
+## Tool Usage Pattern
+
+### Example: Query Knowledge Base
+
+```json
+{
+  "tool": "query_knowledge",
+  "input": {
+    "question": "How do I configure Docker deployment?",
+    "mode": "hybrid"
+  }
+}
+```
+
+**Response**:
+```json
+{
+  "answer": "To configure Docker deployment, you need to...",
+  "sources": [
+    {"title": "Docker Guide", "score": 0.92, "content": "..."}
+  ],
+  "mode": "hybrid",
+  "retrieval_time_ms": 150,
+  "generation_time_ms": 2300
+}
+```
+
+### Example: Add Memory
+
+```json
+{
+  "tool": "add_memory",
+  "input": {
+    "project_id": "myapp",
+    "memory_type": "decision",
+    "title": "Use PostgreSQL for main database",
+    "content": "Selected PostgreSQL over MySQL",
+    "reason": "Need advanced JSON support and better performance",
+    "importance": 0.9,
+    "tags": ["database", "architecture"]
+  }
+}
+```
+
+### Example: Code Graph Analysis
+
+```json
+{
+  "tool": "code_graph_ingest_repo",
+  "input": {
+    "repo_path": "/path/to/repo",
+    "mode": "incremental"
+  }
+}
+```
+
+## MCP Protocol Details
+
+### Transport Methods
+
+MCP supports multiple transport protocols:
+
+1. **stdio** (Standard Input/Output)
+   - Used by Claude Desktop
+   - Process-based communication
+   - Most common for desktop apps
+
+2. **SSE** (Server-Sent Events)
+   - HTTP-based streaming
+   - Used by web applications
+   - Good for browser-based clients
+
+3. **WebSocket**
+   - Bidirectional streaming
+   - Real-time updates
+   - Low latency
+
+**Our Implementation**: Supports all three via official MCP SDK
+
+### Message Types
+
+MCP uses JSON-RPC 2.0 protocol:
+
+**Request**:
+```json
+{
+  "jsonrpc": "2.0",
+  "id": "req-123",
+  "method": "tools/call",
+  "params": {
+    "name": "query_knowledge",
+    "arguments": {
+      "question": "What is RAG?"
+    }
+  }
+}
+```
+
+**Response**:
+```json
+{
+  "jsonrpc": "2.0",
+  "id": "req-123",
+  "result": {
+    "content": [
+      {
+        "type": "text",
+        "text": "RAG (Retrieval-Augmented Generation) is..."
+      }
+    ]
+  }
+}
+```
+
+### Session Management
+
+MCP maintains sessions for:
+- User context preservation
+- State management
+- Resource tracking
+- Connection lifecycle
+
+## Deployment Modes
+
+MCP server adapts to your deployment mode:
+
+### Full Mode (All Features)
+
+```bash
+DEPLOYMENT_MODE=full
+ENABLE_KNOWLEDGE_RAG=true
+ENABLE_AUTO_EXTRACTION=true
+```
+
+**Tools Available**: 30 tools (all features)
+
+### Standard Mode (No RAG)
+
+```bash
+DEPLOYMENT_MODE=standard
+ENABLE_KNOWLEDGE_RAG=false
+ENABLE_AUTO_EXTRACTION=false
+```
+
+**Tools Available**: 20 tools (no Knowledge RAG, no auto-extraction)
+
+### Minimal Mode (Graph + Memory Only)
+
+```bash
+DEPLOYMENT_MODE=minimal
+```
+
+**Tools Available**: 17 tools (Code Graph + Memory + Tasks + System)
+
+## Configuration
+
+### Server Configuration
+
+Configure MCP server in `.env`:
+
+```bash
+# MCP Server Settings
+MCP_SERVER_NAME="Code Graph Knowledge System"
+MCP_SERVER_VERSION="2.0"
+MCP_LOG_LEVEL=INFO
+
+# Feature Flags
+ENABLE_KNOWLEDGE_RAG=true
+ENABLE_AUTO_EXTRACTION=true
+ENABLE_CODE_GRAPH=true
+ENABLE_MEMORY_STORE=true
+```
+
+### Starting the Server
+
+```bash
+# Direct execution
+python start_mcp.py
+
+# Using uv
+uv run mcp_server
+
+# With custom config
+MCP_LOG_LEVEL=DEBUG python start_mcp.py
+```
+
+### Client Configuration
+
+Configure in Claude Desktop or VS Code settings:
+
+**Claude Desktop** (`claude_desktop_config.json`):
+```json
+{
+  "mcpServers": {
+    "knowledge-graph": {
+      "command": "python",
+      "args": ["/path/to/start_mcp.py"],
+      "env": {
+        "PYTHONPATH": "/path/to/codebase-rag"
+      }
+    }
+  }
+}
+```
+
+**VS Code** (settings.json):
+```json
+{
+  "mcp.servers": {
+    "knowledge-graph": {
+      "command": "python",
+      "args": ["/path/to/start_mcp.py"]
+    }
+  }
+}
+```
+
+## Use Cases
+
+### 1. AI-Powered Code Assistant
+
+**Tools Used**:
+- `code_graph_ingest_repo`: Index codebase
+- `code_graph_fulltext_search`: Find code
+- `code_graph_impact_analysis`: Analyze changes
+- `memory_store`: Remember decisions
+
+**Workflow**:
+1. Ingest repository
+2. Ask questions about code
+3. Analyze impact of changes
+4. Save architectural decisions
+
+### 2. Documentation Assistant
+
+**Tools Used**:
+- `add_directory`: Index documentation
+- `query_knowledge`: Answer questions
+- `search_similar_nodes`: Find related docs
+- `suggest_memory_from_query`: Extract insights
+
+**Workflow**:
+1. Index documentation
+2. Query for information
+3. Get AI-generated answers
+4. Save important findings
+
+### 3. Development Memory
+
+**Tools Used**:
+- `add_memory`: Save knowledge
+- `search_memories`: Find past decisions
+- `extract_from_git_commit`: Mine commits
+- `batch_extract_from_repository`: Auto-extract
+
+**Workflow**:
+1. Extract from git history
+2. Mine code comments
+3. Store decisions manually
+4. Query when needed
+
+### 4. Code Review Assistant
+
+**Tools Used**:
+- `code_graph_impact_analysis`: Analyze changes
+- `query_knowledge`: Check documentation
+- `search_memories`: Find conventions
+- `extract_from_conversation`: Save findings
+
+**Workflow**:
+1. Analyze code changes
+2. Check against documentation
+3. Verify conventions
+4. Save review insights
+
+## Benefits of MCP Integration
+
+### For Users
+
+1. **Natural Language Interface**: Ask questions in plain English
+2. **Context Awareness**: AI remembers project knowledge
+3. **Automated Tasks**: Background processing of large operations
+4. **Unified Experience**: Same tools across different AI assistants
+
+### For Developers
+
+1. **Standardized Protocol**: No custom API clients needed
+2. **Tool Discovery**: AI automatically discovers available tools
+3. **Type Safety**: JSON schemas for all tool inputs
+4. **Error Handling**: Structured error responses
+
+### For Organizations
+
+1. **Vendor Independence**: Works with any MCP-compatible client
+2. **Security**: Local execution, no data sent to external services
+3. **Customization**: Easy to add new tools
+4. **Integration**: Connects to existing infrastructure
+
+## Security Considerations
+
+### Data Privacy
+
+- **Local Execution**: MCP server runs on your infrastructure
+- **No External Calls**: Data stays in your network (with local LLM)
+- **Access Control**: Implement authentication at proxy level
+
+### Tool Permissions
+
+Tools have different permission levels:
+
+**Read-only tools**:
+- `query_knowledge`
+- `search_memories`
+- `code_graph_fulltext_search`
+
+**Write tools**:
+- `add_document`
+- `add_memory`
+- `code_graph_ingest_repo`
+
+**Destructive tools**:
+- `delete_memory`
+- `clear_knowledge_base`
+
+**Best Practice**: Implement tool-level access control in production
+
+### Network Security
+
+```bash
+# Run MCP server in isolated environment
+docker run --network isolated-net mcp-server
+
+# Use authentication proxy
+nginx → (auth) → MCP server
+
+# Restrict tool access by user
+ALLOWED_TOOLS=query_knowledge,search_memories
+```
+
+## Performance Considerations
+
+### Tool Execution Time
+
+| Tool Category | Typical Time | Notes |
+|--------------|--------------|-------|
+| Query | 1-5s | Depends on LLM |
+| Search | 100-500ms | Vector search |
+| Memory | 50-200ms | Graph queries |
+| Code Graph | 200ms-2s | Varies by size |
+| Ingestion | 10s-5min | Background task |
+
+### Concurrent Requests
+
+The server handles concurrent requests:
+
+```python
+# Configure in server
+MAX_CONCURRENT_REQUESTS=10
+REQUEST_TIMEOUT=30  # seconds
+```
+
+### Caching Strategy
+
+**Client-side caching**:
+- Cache frequent queries
+- Store tool schemas
+- Cache project summaries
+
+**Server-side caching**:
+- Embedding cache
+- Query result cache
+- Graph query cache
+
+## Monitoring and Debugging
+
+### Server Logs
+
+```bash
+# View MCP server logs
+tail -f mcp_server.log
+
+# Enable debug logging
+MCP_LOG_LEVEL=DEBUG python start_mcp.py
+```
+
+### Tool Call Tracing
+
+Monitor tool calls:
+
+```python
+# Each tool call is logged
+[INFO] Query: "How does auth work?" (mode: hybrid)
+[INFO] Add memory: "Use JWT authentication" (project: myapp)
+[INFO] Code ingest: /path/to/repo (mode: incremental)
+```
+
+### Health Monitoring
+
+```bash
+# Check MCP server health
+python -c "from mcp_server import server; print(server.health_check())"
+
+# Check backend services
+curl http://localhost:8000/api/v1/health
+```
+
+## Limitations
+
+### Current Limitations
+
+1. **Single User**: No built-in multi-user support
+2. **No Authentication**: Implement at proxy level
+3. **Tool Discovery**: Static tool list (no runtime addition)
+4. **Session Persistence**: In-memory only (no database)
+
+### Planned Features
+
+1. **Multi-user support**: User-specific contexts
+2. **Tool marketplace**: Dynamically load tools
+3. **Enhanced streaming**: Progress updates for long operations
+4. **Webhook support**: External event notifications
+
+## Comparison with Alternatives
+
+### MCP vs REST API
+
+| Feature | MCP | REST API |
+|---------|-----|----------|
+| Tool Discovery | Automatic | Manual |
+| Type Safety | Built-in | Manual |
+| Streaming | Native | SSE/WebSocket |
+| AI Integration | Optimized | Generic |
+| Learning Curve | Low | Medium |
+
+### MCP vs Function Calling
+
+| Feature | MCP | Function Calling |
+|---------|-----|-----------------|
+| Protocol | Standardized | Provider-specific |
+| Transport | Multiple | HTTP only |
+| Session Mgmt | Built-in | Manual |
+| Tool Composability | High | Medium |
+
+## Next Steps
+
+- **[Claude Desktop Setup](claude-desktop.md)**: Configure Claude Desktop
+- **[VS Code Setup](vscode.md)**: Configure VS Code extension
+- **[Deployment Guide](../../deployment/full.md)**: Deploy MCP server
+- **[Contributing Guide](../../development/contributing.md)**: Extend with custom tools
+
+## Additional Resources
+
+- **MCP Documentation**: https://modelcontextprotocol.io/
+- **MCP SDK**: https://github.com/modelcontextprotocol/python-sdk
+- **Claude Desktop**: https://claude.ai/download
+- **MCP Specification**: https://spec.modelcontextprotocol.io/
diff --git a/docs/guide/mcp/vscode.md b/docs/guide/mcp/vscode.md
new file mode 100644
index 0000000..7651a43
--- /dev/null
+++ b/docs/guide/mcp/vscode.md
@@ -0,0 +1,820 @@
+# VS Code MCP Extension Setup
+
+Complete guide to integrating Code Graph Knowledge System with Visual Studio Code using the MCP (Model Context Protocol) extension.
+
+## Overview
+
+VS Code MCP integration allows you to use Code Graph Knowledge System tools directly within your editor:
+
+- **Inline queries**: Query knowledge base from the editor
+- **Code analysis**: Analyze code without leaving VS Code
+- **Memory management**: Save and retrieve project memories
+- **Context awareness**: AI assistant with your codebase context
+- **Seamless workflow**: No context switching
+
+## Prerequisites
+
+### 1. Visual Studio Code
+
+Download and install VS Code:
+
+- **Download**: https://code.visualstudio.com/download
+- **Minimum version**: 1.85.0+
+- **Platforms**: Windows, macOS, Linux
+
+### 2. MCP Extension
+
+Install the MCP extension for VS Code:
+
+**Option 1: VS Code Marketplace**
+1. Open VS Code
+2. Press `Ctrl+Shift+X` (Windows/Linux) or `Cmd+Shift+X` (macOS)
+3. Search for "Model Context Protocol" or "MCP"
+4. Click Install
+
+**Option 2: Command Line**
+```bash
+code --install-extension anthropic.mcp
+```
+
+**Option 3: Extensions View**
+- Open Command Palette: `Ctrl+Shift+P` / `Cmd+Shift+P`
+- Type: "Extensions: Install Extensions"
+- Search: "MCP"
+- Install
+
+### 3. Code Graph Knowledge System
+
+Ensure the MCP server is accessible:
+
+```bash
+# Running locally
+cd /path/to/codebase-rag
+python start_mcp.py
+
+# Or via Docker
+docker-compose -f docker/docker-compose.full.yml up -d
+
+# Verify
+ps aux | grep start_mcp.py
+```
+
+### 4. Python Environment
+
+```bash
+# Python 3.10+ required
+python --version
+
+# Install dependencies
+cd /path/to/codebase-rag
+pip install -e .
+
+# Or with uv
+uv pip install -e .
+```
+
+## Configuration
+
+### Method 1: Settings UI
+
+1. **Open Settings**:
+   - Press `Ctrl+,` (Windows/Linux) or `Cmd+,` (macOS)
+   - Or: File → Preferences → Settings
+
+2. **Search for MCP**:
+   - Type "mcp" in settings search
+   - Look for "MCP: Servers" section
+
+3. **Add Server**:
+   - Click "Edit in settings.json"
+   - Add configuration (see examples below)
+
+### Method 2: settings.json
+
+**Open settings.json**:
+- Command Palette: `Ctrl+Shift+P` / `Cmd+Shift+P`
+- Type: "Preferences: Open User Settings (JSON)"
+- Or: "Preferences: Open Workspace Settings (JSON)"
+
+**Basic Configuration**:
+
+```json
+{
+  "mcp.servers": {
+    "code-graph": {
+      "command": "python",
+      "args": ["/absolute/path/to/codebase-rag/start_mcp.py"],
+      "env": {
+        "PYTHONPATH": "/absolute/path/to/codebase-rag"
+      }
+    }
+  }
+}
+```
+
+## Configuration Examples
+
+### Example 1: Full Mode with Environment Variables
+
+```json
+{
+  "mcp.servers": {
+    "code-graph-full": {
+      "command": "python",
+      "args": ["/Users/developer/projects/codebase-rag/start_mcp.py"],
+      "env": {
+        "PYTHONPATH": "/Users/developer/projects/codebase-rag",
+        "DEPLOYMENT_MODE": "full",
+        "ENABLE_KNOWLEDGE_RAG": "true",
+        "ENABLE_AUTO_EXTRACTION": "true",
+        "NEO4J_URI": "bolt://localhost:7687",
+        "NEO4J_USER": "neo4j",
+        "NEO4J_PASSWORD": "mypassword",
+        "LLM_PROVIDER": "ollama",
+        "OLLAMA_HOST": "http://localhost:11434",
+        "OLLAMA_MODEL": "llama3.2",
+        "EMBEDDING_PROVIDER": "ollama",
+        "OLLAMA_EMBEDDING_MODEL": "nomic-embed-text"
+      }
+    }
+  }
+}
+```
+
+### Example 2: Using uv
+
+```json
+{
+  "mcp.servers": {
+    "code-graph": {
+      "command": "uv",
+      "args": ["run", "mcp_server"],
+      "cwd": "/absolute/path/to/codebase-rag"
+    }
+  }
+}
+```
+
+### Example 3: Docker Container
+
+```json
+{
+  "mcp.servers": {
+    "code-graph-docker": {
+      "command": "docker",
+      "args": [
+        "exec",
+        "-i",
+        "codebase-rag-mcp",
+        "python",
+        "/app/start_mcp.py"
+      ],
+      "env": {
+        "DOCKER_HOST": "unix:///var/run/docker.sock"
+      }
+    }
+  }
+}
+```
+
+### Example 4: Remote Server via SSH
+
+```json
+{
+  "mcp.servers": {
+    "code-graph-remote": {
+      "command": "ssh",
+      "args": [
+        "user@remote-server",
+        "cd /path/to/codebase-rag && python start_mcp.py"
+      ]
+    }
+  }
+}
+```
+
+### Example 5: Workspace-Specific Configuration
+
+Save in `.vscode/settings.json` within your project:
+
+```json
+{
+  "mcp.servers": {
+    "project-knowledge": {
+      "command": "python",
+      "args": ["${workspaceFolder}/../codebase-rag/start_mcp.py"],
+      "env": {
+        "PYTHONPATH": "${workspaceFolder}/../codebase-rag",
+        "NEO4J_DATABASE": "project_specific_db"
+      }
+    }
+  }
+}
+```
+
+### Example 6: Multiple Servers
+
+```json
+{
+  "mcp.servers": {
+    "project-a-knowledge": {
+      "command": "python",
+      "args": ["/path/to/project-a/codebase-rag/start_mcp.py"],
+      "env": {
+        "NEO4J_DATABASE": "project_a"
+      }
+    },
+    "project-b-knowledge": {
+      "command": "python",
+      "args": ["/path/to/project-b/codebase-rag/start_mcp.py"],
+      "env": {
+        "NEO4J_DATABASE": "project_b"
+      }
+    }
+  }
+}
+```
+
+## Verification
+
+### Check MCP Connection
+
+1. **Open Command Palette**: `Ctrl+Shift+P` / `Cmd+Shift+P`
+
+2. **Run**: "MCP: Show Connected Servers"
+
+3. **Expected output**:
+   - Server name: "code-graph" (or your configured name)
+   - Status: "Connected"
+   - Tools: 20-30 tools listed
+
+### Test Tool Access
+
+1. **Open Command Palette**
+
+2. **Run**: "MCP: List Available Tools"
+
+3. **Verify tools** are listed:
+   - `query_knowledge`
+   - `add_memory`
+   - `code_graph_ingest_repo`
+   - `search_memories`
+   - etc.
+
+### Test Basic Query
+
+1. **Open Command Palette**
+
+2. **Run**: "MCP: Execute Tool"
+
+3. **Select**: `get_statistics`
+
+4. **Expected**: System statistics displayed
+
+## Usage Patterns
+
+### 1. Query Knowledge Base
+
+**Method A: Command Palette**
+1. Press `Ctrl+Shift+P` / `Cmd+Shift+P`
+2. Type: "MCP: Execute Tool"
+3. Select: `query_knowledge`
+4. Enter question: "How do I configure Docker?"
+5. View results
+
+**Method B: Quick Input**
+1. Select text in editor
+2. Right-click → "Query Knowledge Base"
+3. Results appear in panel
+
+**Method C: Keyboard Shortcut**
+```json
+// Add to keybindings.json
+{
+  "key": "ctrl+shift+k",
+  "command": "mcp.executeQuery",
+  "args": {
+    "tool": "query_knowledge"
+  }
+}
+```
+
+### 2. Search Code
+
+**Method A: Search Current Project**
+1. Open Command Palette
+2. "MCP: Execute Tool" → `code_graph_fulltext_search`
+3. Enter search term
+4. View matching files
+
+**Method B: Context Menu**
+1. Right-click in editor
+2. "Search Code Graph" → enters selected text
+3. View results
+
+### 3. Add Memory
+
+**Interactive Mode**:
+1. Command Palette → "MCP: Add Memory"
+2. Fill in prompts:
+   - Project ID: `myproject`
+   - Type: `decision`
+   - Title: "Use PostgreSQL"
+   - Content: "Selected PostgreSQL for main database"
+   - Reason: "Better JSON support"
+   - Importance: `0.9`
+   - Tags: `database, architecture`
+
+**Quick Mode**:
+1. Select text in editor
+2. Right-click → "Save as Memory"
+3. Choose memory type
+4. Confirm
+
+### 4. Analyze Code Impact
+
+1. Open file to analyze
+2. Command Palette → "MCP: Execute Tool"
+3. Select: `code_graph_impact_analysis`
+4. Enter file path (or current file)
+5. View impact report
+
+### 5. Monitor Tasks
+
+**For async operations**:
+1. Submit directory processing
+2. Get task_id
+3. Command Palette → "MCP: Watch Task"
+4. Enter task_id
+5. View real-time progress
+
+## Workspace Integration
+
+### Project-Specific Configuration
+
+Create `.vscode/settings.json` in your project:
+
+```json
+{
+  "mcp.servers": {
+    "this-project": {
+      "command": "python",
+      "args": ["${workspaceFolder}/.mcp/start_mcp.py"],
+      "env": {
+        "PROJECT_NAME": "${workspaceFolderBasename}",
+        "NEO4J_DATABASE": "${workspaceFolderBasename}_db"
+      }
+    }
+  },
+  "mcp.defaultServer": "this-project"
+}
+```
+
+### Task Integration
+
+Add MCP tasks to `.vscode/tasks.json`:
+
+```json
+{
+  "version": "2.0.0",
+  "tasks": [
+    {
+      "label": "Index Codebase",
+      "type": "shell",
+      "command": "python",
+      "args": [
+        "-c",
+        "import asyncio; from mcp_tools import handle_code_graph_ingest_repo; asyncio.run(handle_code_graph_ingest_repo({'repo_path': '${workspaceFolder}', 'mode': 'incremental'}))"
+      ],
+      "presentation": {
+        "reveal": "always",
+        "panel": "new"
+      }
+    },
+    {
+      "label": "Query Knowledge",
+      "type": "shell",
+      "command": "python",
+      "args": [
+        "-c",
+        "import sys; from mcp_tools import handle_query_knowledge; print(handle_query_knowledge({'question': sys.argv[1]}))",
+        "${input:question}"
+      ]
+    }
+  ],
+  "inputs": [
+    {
+      "id": "question",
+      "type": "promptString",
+      "description": "Enter your question:"
+    }
+  ]
+}
+```
+
+### Keyboard Shortcuts
+
+Add to `keybindings.json`:
+
+```json
+[
+  {
+    "key": "ctrl+shift+k",
+    "command": "mcp.queryKnowledge",
+    "when": "editorTextFocus"
+  },
+  {
+    "key": "ctrl+shift+m",
+    "command": "mcp.addMemory",
+    "when": "editorHasSelection"
+  },
+  {
+    "key": "ctrl+shift+i",
+    "command": "mcp.ingestRepository",
+    "when": "workspaceFolderCount > 0"
+  },
+  {
+    "key": "ctrl+shift+s",
+    "command": "mcp.searchCode"
+  }
+]
+```
+
+## Advanced Features
+
+### Snippets Integration
+
+Create custom snippets that use MCP tools:
+
+`.vscode/snippets.code-snippets`:
+```json
+{
+  "Query Knowledge": {
+    "prefix": "mcp-query",
+    "body": [
+      "// Query: ${1:question}",
+      "// Answer: ${2:Use MCP to query}",
+      "$0"
+    ],
+    "description": "Insert MCP query placeholder"
+  },
+  "Add Memory": {
+    "prefix": "mcp-memory",
+    "body": [
+      "// MEMORY: ${1:title}",
+      "// Type: ${2|decision,preference,experience,convention|}",
+      "// Importance: ${3:0.8}",
+      "// ${4:description}",
+      "$0"
+    ],
+    "description": "Memory marker for auto-extraction"
+  }
+}
+```
+
+### Extension Integration
+
+Create custom VS Code extension:
+
+```typescript
+import * as vscode from 'vscode';
+
+export function activate(context: vscode.ExtensionContext) {
+  // Register command to query knowledge
+  let disposable = vscode.commands.registerCommand(
+    'extension.queryKnowledge',
+    async () => {
+      const question = await vscode.window.showInputBox({
+        prompt: 'Enter your question'
+      });
+
+      if (question) {
+        const result = await vscode.commands.executeCommand(
+          'mcp.executeTool',
+          {
+            server: 'code-graph',
+            tool: 'query_knowledge',
+            args: { question, mode: 'hybrid' }
+          }
+        );
+
+        // Display result
+        const panel = vscode.window.createWebviewPanel(
+          'mcpResult',
+          'Knowledge Query Result',
+          vscode.ViewColumn.Two,
+          {}
+        );
+        panel.webview.html = formatResult(result);
+      }
+    }
+  );
+
+  context.subscriptions.push(disposable);
+}
+```
+
+### Code Lens Provider
+
+Add inline code lenses:
+
+```typescript
+export class MCPCodeLensProvider implements vscode.CodeLensProvider {
+  provideCodeLenses(document: vscode.TextDocument): vscode.CodeLens[] {
+    const codeLenses: vscode.CodeLens[] = [];
+    const text = document.getText();
+
+    // Find memory markers
+    const memoryRegex = /\/\/ MEMORY: (.+)/g;
+    let match;
+
+    while ((match = memoryRegex.exec(text)) !== null) {
+      const line = document.lineAt(
+        document.positionAt(match.index).line
+      );
+
+      codeLenses.push(
+        new vscode.CodeLens(line.range, {
+          title: '💾 Save as Memory',
+          command: 'mcp.addMemory',
+          arguments: [match[1]]
+        })
+      );
+    }
+
+    return codeLenses;
+  }
+}
+```
+
+## Best Practices
+
+### 1. Workspace Configuration
+
+Use workspace settings for project-specific config:
+
+```json
+// .vscode/settings.json
+{
+  "mcp.servers": {
+    "local": {
+      "command": "python",
+      "args": ["${workspaceFolder}/../codebase-rag/start_mcp.py"]
+    }
+  },
+  "mcp.autoConnect": true,
+  "mcp.logLevel": "info"
+}
+```
+
+### 2. Environment Management
+
+Use `.env` files for sensitive data:
+
+```bash
+# .env (in codebase-rag directory)
+NEO4J_PASSWORD=secret
+OPENAI_API_KEY=sk-...
+```
+
+Reference in VS Code:
+```json
+{
+  "mcp.servers": {
+    "code-graph": {
+      "command": "python",
+      "args": ["${workspaceFolder}/start_mcp.py"],
+      "envFile": "${workspaceFolder}/.env"
+    }
+  }
+}
+```
+
+### 3. Multi-Project Setup
+
+For multiple projects, use workspace folders:
+
+```json
+{
+  "folders": [
+    {
+      "path": "/path/to/project-a",
+      "name": "Project A"
+    },
+    {
+      "path": "/path/to/project-b",
+      "name": "Project B"
+    }
+  ],
+  "settings": {
+    "mcp.servers": {
+      "project-a": {
+        "command": "python",
+        "args": ["/path/to/project-a/codebase-rag/start_mcp.py"]
+      },
+      "project-b": {
+        "command": "python",
+        "args": ["/path/to/project-b/codebase-rag/start_mcp.py"]
+      }
+    }
+  }
+}
+```
+
+### 4. Performance Optimization
+
+**Lazy Loading**:
+```json
+{
+  "mcp.servers": {
+    "code-graph": {
+      "command": "python",
+      "args": ["start_mcp.py"],
+      "lazyLoad": true,  // Only start when needed
+      "timeout": 30000
+    }
+  }
+}
+```
+
+**Connection Pooling**:
+```json
+{
+  "mcp.connectionPool": {
+    "maxConnections": 5,
+    "idleTimeout": 60000
+  }
+}
+```
+
+## Troubleshooting
+
+### Issue: Server Not Connecting
+
+**Symptoms**:
+- "Failed to connect to MCP server"
+- No tools available
+
+**Solutions**:
+
+1. **Check configuration syntax**:
+   ```bash
+   # Validate JSON
+   python -m json.tool .vscode/settings.json
+   ```
+
+2. **Verify command works**:
+   ```bash
+   cd /path/to/codebase-rag
+   python start_mcp.py
+   # Should not exit immediately
+   ```
+
+3. **Check VS Code output**:
+   - View → Output
+   - Select "MCP" from dropdown
+   - Check for error messages
+
+4. **Reload window**:
+   - Command Palette → "Developer: Reload Window"
+
+### Issue: Tools Failing
+
+**Symptoms**:
+- Tool calls return errors
+- Timeout messages
+
+**Solutions**:
+
+1. **Increase timeout**:
+   ```json
+   {
+     "mcp.servers": {
+       "code-graph": {
+         "timeout": 60000  // 60 seconds
+       }
+     }
+   }
+   ```
+
+2. **Check backend services**:
+   ```bash
+   # Neo4j
+   cypher-shell "RETURN 1"
+
+   # Ollama (if using)
+   curl http://localhost:11434/api/tags
+   ```
+
+3. **View MCP server logs**:
+   ```bash
+   tail -f /path/to/codebase-rag/mcp_server.log
+   ```
+
+### Issue: Permission Denied
+
+**Symptoms**:
+- Cannot start MCP server
+- Permission errors in output
+
+**Solutions**:
+
+1. **Fix file permissions**:
+   ```bash
+   chmod +x start_mcp.py
+   ```
+
+2. **Check VS Code has access**:
+   ```bash
+   # macOS: Grant Full Disk Access
+   System Preferences → Security & Privacy → Full Disk Access → Add VS Code
+   ```
+
+3. **Run as correct user**:
+   ```bash
+   whoami
+   # Ensure matches Neo4j user
+   ```
+
+### Issue: Slow Performance
+
+**Symptoms**:
+- Long delays for tool calls
+- VS Code freezing
+
+**Solutions**:
+
+1. **Use async mode**:
+   - Directory processing
+   - Large document ingestion
+
+2. **Reduce query scope**:
+   ```json
+   {
+     "question": "specific query",
+     "top_k": 3  // Reduce from default 5
+   }
+   ```
+
+3. **Enable caching**:
+   ```bash
+   # In .env
+   ENABLE_QUERY_CACHE=true
+   CACHE_TTL=3600
+   ```
+
+## Integration with Other Extensions
+
+### GitHub Copilot
+
+Combine with Copilot:
+
+```json
+{
+  "github.copilot.advanced": {
+    "contextSources": ["mcp-code-graph"]
+  }
+}
+```
+
+### GitLens
+
+Extract memories from git commits:
+
+```json
+{
+  "gitlens.advanced.messages": {
+    "suppressCommitNotFoundWarning": true
+  },
+  "mcp.git.autoExtract": true
+}
+```
+
+### REST Client
+
+Test MCP tools via HTTP:
+
+```http
+### Query Knowledge
+POST http://localhost:8000/api/v1/knowledge/query
+Content-Type: application/json
+
+{
+  "question": "How do I configure Docker?",
+  "mode": "hybrid"
+}
+```
+
+## Next Steps
+
+- **[MCP Overview](overview.md)**: Learn about MCP protocol
+- **[Claude Desktop Setup](claude-desktop.md)**: Configure Claude Desktop
+- **[Knowledge RAG](../knowledge/overview.md)**: Use query tools effectively
+- **[Code Graph](../code-graph/overview.md)**: Analyze your codebase
+
+## Additional Resources
+
+- **VS Code MCP Extension**: https://marketplace.visualstudio.com/items?itemName=anthropic.mcp
+- **MCP Documentation**: https://modelcontextprotocol.io/
+- **VS Code API**: https://code.visualstudio.com/api
+- **Extension Development**: https://code.visualstudio.com/api/get-started/your-first-extension
diff --git a/docs/guide/memory/extraction.md b/docs/guide/memory/extraction.md
new file mode 100644
index 0000000..289d6e1
--- /dev/null
+++ b/docs/guide/memory/extraction.md
@@ -0,0 +1,1212 @@
+# Automatic Memory Extraction Guide (v0.7)
+
+Comprehensive guide to automatic memory extraction features. Learn how to extract memories from conversations, git commits, code comments, and entire repositories.
+
+## Table of Contents
+
+- [Extraction Overview](#extraction-overview)
+- [Conversation Extraction](#conversation-extraction)
+- [Git Commit Extraction](#git-commit-extraction)
+- [Code Comment Mining](#code-comment-mining)
+- [Query-Based Suggestions](#query-based-suggestions)
+- [Batch Repository Extraction](#batch-repository-extraction)
+- [Integration Patterns](#integration-patterns)
+- [Configuration](#configuration)
+- [Best Practices](#best-practices)
+
+---
+
+## Extraction Overview
+
+Memory Store v0.7 introduces automatic extraction capabilities that use LLM analysis to identify and extract important project knowledge from various sources.
+
+### Extraction Sources
+
+1. **Conversations** - AI conversations with users
+2. **Git Commits** - Commit messages and file changes
+3. **Code Comments** - TODO, FIXME, NOTE, DECISION markers
+4. **Knowledge Queries** - Q&A interactions
+5. **Repository Batch** - Comprehensive codebase analysis
+
+### How It Works
+
+```
+Source Content
+     ↓
+LLM Analysis
+     ↓
+Memory Extraction
+     ↓
+Confidence Scoring
+     ↓
+Auto-save (optional) or Suggestions
+```
+
+**Confidence Threshold**: Memories with confidence ≥ 0.7 can be auto-saved
+
+### Key Features
+
+- **LLM-Powered**: Uses project's configured LLM for intelligent analysis
+- **Confidence Scores**: Each extraction includes confidence rating
+- **Auto-Save Option**: High-confidence memories can be saved automatically
+- **Structured Output**: Extracts proper memory type, importance, tags
+- **Batch Processing**: Handle multiple sources efficiently
+
+---
+
+## Conversation Extraction
+
+### Overview
+
+Extract memories from AI-user conversations by analyzing dialogue for important decisions, preferences, and learnings.
+
+**Best For**:
+- Design discussions
+- Technical decision-making conversations
+- Problem-solving sessions
+- Architecture planning discussions
+
+### Basic Usage
+
+**MCP Tool**:
+```python
+extract_from_conversation(
+    project_id="my-project",
+    conversation=[
+        {
+            "role": "user",
+            "content": "Should we use Redis or Memcached for caching?"
+        },
+        {
+            "role": "assistant",
+            "content": "I recommend Redis because it supports data persistence, has richer data structures, and provides better tooling. Redis also allows you to use it as both cache and message queue."
+        },
+        {
+            "role": "user",
+            "content": "Great, let's go with Redis then."
+        }
+    ],
+    auto_save=True
+)
+```
+
+**HTTP API**:
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/extract/conversation \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "conversation": [
+      {"role": "user", "content": "Should we use Redis or Memcached?"},
+      {"role": "assistant", "content": "I recommend Redis because..."}
+    ],
+    "auto_save": true
+  }'
+```
+
+**Python Service**:
+```python
+from services.memory_extractor import memory_extractor
+
+result = await memory_extractor.extract_from_conversation(
+    project_id="my-project",
+    conversation=[
+        {"role": "user", "content": "Should we use Redis or Memcached?"},
+        {"role": "assistant", "content": "I recommend Redis..."}
+    ],
+    auto_save=True
+)
+
+print(f"Auto-saved: {result['auto_saved_count']} memories")
+print(f"Suggestions: {len(result['suggestions'])} memories")
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "type": "decision",
+      "title": "Use Redis for caching",
+      "content": "Decided to use Redis over Memcached for caching layer",
+      "reason": "Redis supports persistence, richer data structures, and better tooling",
+      "tags": ["cache", "redis", "infrastructure"],
+      "importance": 0.8,
+      "memory_id": "550e8400-...",
+      "auto_saved": true
+    }
+  ],
+  "auto_saved_count": 1,
+  "suggestions": [],
+  "total_extracted": 1
+}
+```
+
+### Advanced: Conversation Analysis
+
+**Multi-turn Discussions**:
+```python
+conversation = [
+    {"role": "user", "content": "How should we handle user authentication?"},
+    {"role": "assistant", "content": "For your use case, I'd recommend JWT tokens with refresh token rotation..."},
+    {"role": "user", "content": "What about session storage?"},
+    {"role": "assistant", "content": "For JWTs, you don't need server-side sessions. Store tokens in httpOnly cookies..."},
+    {"role": "user", "content": "Should we use Redis for token blacklisting?"},
+    {"role": "assistant", "content": "Yes, Redis is perfect for token blacklisting with TTL support..."}
+]
+
+result = await memory_extractor.extract_from_conversation(
+    project_id="web-app",
+    conversation=conversation,
+    auto_save=False  # Review before saving
+)
+
+# Review suggestions
+for suggestion in result['suggestions']:
+    print(f"Type: {suggestion['type']}")
+    print(f"Title: {suggestion['title']}")
+    print(f"Confidence: {suggestion['confidence']}")
+    print(f"Importance: {suggestion['importance']}")
+    print()
+
+# Manually save high-value suggestions
+for suggestion in result['suggestions']:
+    if suggestion['confidence'] >= 0.8:
+        await memory_store.add_memory(
+            project_id="web-app",
+            **suggestion
+        )
+```
+
+### Extraction Quality
+
+**What Gets Extracted**:
+- ✅ Technical decisions and rationale
+- ✅ Technology choices with reasoning
+- ✅ Architectural patterns discussed
+- ✅ Problems and solutions
+- ✅ Best practices agreed upon
+- ✅ Security considerations
+
+**What Doesn't Get Extracted**:
+- ❌ Casual greetings
+- ❌ Clarifying questions
+- ❌ Routine code snippets
+- ❌ Trivial preferences
+- ❌ Temporary experiments
+
+### Auto-Save vs Manual Review
+
+**Auto-Save (auto_save=true)**:
+```python
+# Automatically save memories with confidence >= 0.7
+result = await memory_extractor.extract_from_conversation(
+    project_id="my-project",
+    conversation=conversation,
+    auto_save=True
+)
+
+# Only high-confidence memories are saved
+print(f"Auto-saved: {result['auto_saved_count']}")
+```
+
+**Manual Review (auto_save=false)**:
+```python
+# Get suggestions, review before saving
+result = await memory_extractor.extract_from_conversation(
+    project_id="my-project",
+    conversation=conversation,
+    auto_save=False
+)
+
+# Review each suggestion
+for suggestion in result['suggestions']:
+    print(f"Review: {suggestion['title']}")
+    print(f"Confidence: {suggestion['confidence']}")
+
+    # Manually save selected ones
+    if user_approves(suggestion):
+        await memory_store.add_memory(
+            project_id="my-project",
+            memory_type=suggestion['type'],
+            title=suggestion['title'],
+            content=suggestion['content'],
+            reason=suggestion['reason'],
+            tags=suggestion['tags'],
+            importance=suggestion['importance']
+        )
+```
+
+---
+
+## Git Commit Extraction
+
+### Overview
+
+Extract memories from git commits by analyzing commit messages, changed files, and commit types.
+
+**Best For**:
+- Feature additions (decisions)
+- Bug fixes (experiences)
+- Breaking changes (critical decisions)
+- Refactoring (experiences/conventions)
+
+### Basic Usage
+
+**MCP Tool**:
+```python
+extract_from_git_commit(
+    project_id="my-project",
+    commit_sha="abc123def456",
+    commit_message="feat: add JWT authentication\n\nImplemented JWT-based authentication for API endpoints. Tokens expire after 24 hours with refresh token support.",
+    changed_files=[
+        "src/auth/jwt.py",
+        "src/middleware/auth.py",
+        "tests/test_auth.py"
+    ],
+    auto_save=True
+)
+```
+
+**HTTP API**:
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/extract/commit \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "commit_sha": "abc123def456",
+    "commit_message": "feat: add JWT authentication",
+    "changed_files": ["src/auth/jwt.py", "src/middleware/auth.py"],
+    "auto_save": true
+  }'
+```
+
+**Python Service**:
+```python
+from services.memory_extractor import memory_extractor
+
+result = await memory_extractor.extract_from_git_commit(
+    project_id="my-project",
+    commit_sha="abc123def456",
+    commit_message="feat: add JWT authentication\n\nImplemented JWT for API",
+    changed_files=["src/auth/jwt.py"],
+    auto_save=True
+)
+
+print(f"Commit type: {result['commit_type']}")
+print(f"Extracted: {result['auto_saved_count']} memories")
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "type": "decision",
+      "title": "Add JWT authentication",
+      "content": "Implemented JWT-based authentication for API endpoints with 24-hour token expiry and refresh token support",
+      "reason": "Provide secure, stateless authentication for API clients",
+      "tags": ["auth", "jwt", "security", "feat"],
+      "importance": 0.8,
+      "memory_id": "550e8400-...",
+      "metadata": {
+        "source": "git_commit",
+        "commit_sha": "abc123def456",
+        "changed_files": ["src/auth/jwt.py", "src/middleware/auth.py"],
+        "confidence": 0.85
+      }
+    }
+  ],
+  "auto_saved_count": 1,
+  "suggestions": [],
+  "commit_type": "feat"
+}
+```
+
+### Commit Type Classification
+
+The extractor automatically classifies commits:
+
+| Commit Type | Memory Type | Importance Range | Example |
+|-------------|-------------|------------------|---------|
+| `feat` | decision | 0.7-0.9 | "feat: add OAuth support" |
+| `fix` | experience | 0.5-0.8 | "fix: resolve Redis timeout in Docker" |
+| `refactor` | experience | 0.4-0.7 | "refactor: improve auth middleware" |
+| `docs` | convention | 0.3-0.6 | "docs: add API naming conventions" |
+| `breaking` | decision | 0.9-1.0 | "feat!: migrate to PostgreSQL" |
+| `chore` | note | 0.2-0.4 | "chore: update dependencies" |
+
+### Integration with Git Hooks
+
+**Post-commit hook** (.git/hooks/post-commit):
+```bash
+#!/bin/bash
+
+# Get commit details
+COMMIT_SHA=$(git rev-parse HEAD)
+COMMIT_MSG=$(git log -1 --pretty=%B)
+CHANGED_FILES=$(git diff-tree --no-commit-id --name-only -r HEAD)
+
+# Extract memories
+curl -X POST http://localhost:8000/api/v1/memory/extract/commit \
+  -H "Content-Type: application/json" \
+  -d "{
+    \"project_id\": \"my-project\",
+    \"commit_sha\": \"$COMMIT_SHA\",
+    \"commit_message\": \"$COMMIT_MSG\",
+    \"changed_files\": [$(echo $CHANGED_FILES | sed 's/ /", "/g' | sed 's/^/"/;s/$/"/')]
+  }"
+```
+
+### Batch Git History Analysis
+
+```python
+import subprocess
+from pathlib import Path
+
+async def analyze_git_history(
+    project_id: str,
+    repo_path: str,
+    max_commits: int = 50
+):
+    """Analyze recent git commits and extract memories"""
+
+    # Get recent commits
+    result = subprocess.run(
+        ["git", "log", f"-{max_commits}", "--pretty=format:%H|%s|%b"],
+        cwd=repo_path,
+        capture_output=True,
+        text=True
+    )
+
+    extracted_count = 0
+
+    for line in result.stdout.split('\n'):
+        if not line.strip():
+            continue
+
+        parts = line.split('|', 2)
+        commit_sha = parts[0]
+        subject = parts[1]
+        body = parts[2] if len(parts) > 2 else ""
+        commit_message = f"{subject}\n{body}".strip()
+
+        # Get changed files
+        files_result = subprocess.run(
+            ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", commit_sha],
+            cwd=repo_path,
+            capture_output=True,
+            text=True
+        )
+        changed_files = files_result.stdout.strip().split('\n')
+
+        # Extract memories
+        result = await memory_extractor.extract_from_git_commit(
+            project_id=project_id,
+            commit_sha=commit_sha,
+            commit_message=commit_message,
+            changed_files=changed_files,
+            auto_save=True
+        )
+
+        if result.get('success'):
+            extracted_count += result.get('auto_saved_count', 0)
+
+    print(f"Analyzed {max_commits} commits, extracted {extracted_count} memories")
+    return extracted_count
+
+# Usage
+await analyze_git_history("my-project", "/path/to/repo", max_commits=100)
+```
+
+---
+
+## Code Comment Mining
+
+### Overview
+
+Extract memories from code comments by identifying special markers: TODO, FIXME, NOTE, DECISION, IMPORTANT, BUG.
+
+**Best For**:
+- TODOs and future work
+- Known bugs and issues
+- Important implementation notes
+- Documented decisions
+
+### Basic Usage
+
+**MCP Tool**:
+```python
+extract_from_code_comments(
+    project_id="my-project",
+    file_path="/path/to/project/src/service.py"
+)
+```
+
+**HTTP API**:
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/extract/comments \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "file_path": "/path/to/project/src/service.py"
+  }'
+```
+
+**Python Service**:
+```python
+from services.memory_extractor import memory_extractor
+
+result = await memory_extractor.extract_from_code_comments(
+    project_id="my-project",
+    file_path="/path/to/project/src/service.py"
+)
+
+print(f"Total comments: {result['total_comments']}")
+print(f"Extracted: {result['total_extracted']} memories")
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "extracted_memories": [
+    {
+      "type": "plan",
+      "title": "Add rate limiting to API endpoints",
+      "content": "TODO: Add rate limiting to API endpoints",
+      "importance": 0.4,
+      "tags": ["todo", "py"],
+      "memory_id": "550e8400-...",
+      "line": 45
+    },
+    {
+      "type": "experience",
+      "title": "Redis connection pool needs minimum 10 connections",
+      "content": "FIXME: Redis connection pool needs minimum 10 connections",
+      "importance": 0.6,
+      "tags": ["bug", "fixme", "py"],
+      "memory_id": "550e8400-...",
+      "line": 78
+    }
+  ],
+  "total_comments": 15,
+  "total_extracted": 2
+}
+```
+
+### Comment Marker Mapping
+
+| Marker | Memory Type | Importance | Use Case |
+|--------|-------------|------------|----------|
+| `TODO:` | plan | 0.4 | Future work, planned improvements |
+| `FIXME:` | experience | 0.6 | Known bugs, issues to fix |
+| `BUG:` | experience | 0.6 | Documented bugs |
+| `NOTE:` | convention | 0.5 | Important notes, gotchas |
+| `IMPORTANT:` | convention | 0.5 | Critical information |
+| `DECISION:` | decision | 0.7 | Documented decisions |
+
+### Example Code with Markers
+
+**Python**:
+```python
+class UserService:
+    def __init__(self):
+        # DECISION: Using Redis for session storage instead of database
+        # Reason: Need sub-millisecond latency for session lookups
+        self.redis_client = RedisClient()
+
+        # NOTE: Connection pool must have minimum 10 connections
+        # Lower values cause connection timeout under load
+        self.pool_size = 10
+
+    def authenticate(self, token: str):
+        # TODO: Add refresh token rotation for better security
+        # This will require database changes and client updates
+        pass
+
+    def get_user(self, user_id: int):
+        # FIXME: Cache invalidation doesn't work for user updates
+        # Need to implement pub/sub pattern for cache invalidation
+        return self._fetch_from_db(user_id)
+```
+
+**JavaScript**:
+```javascript
+class AuthService {
+  constructor() {
+    // DECISION: Using JWT with 15-minute expiry
+    // Short expiry reduces risk of token theft
+    this.tokenExpiry = 15 * 60; // 15 minutes
+
+    // TODO: Implement token blacklist for logout
+    // Will need Redis for fast blacklist lookups
+  }
+
+  async verifyToken(token) {
+    // IMPORTANT: Must check token expiry AND signature
+    // Checking only signature is a security vulnerability
+    const decoded = jwt.verify(token, SECRET);
+
+    // FIXME: Token validation doesn't handle clock skew
+    // Need to add leeway parameter to jwt.verify()
+    return decoded;
+  }
+}
+```
+
+### Batch Comment Extraction
+
+```python
+from pathlib import Path
+
+async def extract_from_all_files(
+    project_id: str,
+    repo_path: str,
+    file_patterns: list = ["*.py", "*.js", "*.ts"]
+):
+    """Extract comments from all matching files"""
+
+    repo = Path(repo_path)
+    total_extracted = 0
+
+    for pattern in file_patterns:
+        for file_path in repo.rglob(pattern):
+            try:
+                result = await memory_extractor.extract_from_code_comments(
+                    project_id=project_id,
+                    file_path=str(file_path)
+                )
+
+                if result.get('success'):
+                    count = result.get('total_extracted', 0)
+                    total_extracted += count
+                    print(f"{file_path.name}: {count} memories")
+
+            except Exception as e:
+                print(f"Error processing {file_path}: {e}")
+
+    print(f"Total extracted: {total_extracted} memories")
+    return total_extracted
+
+# Usage
+await extract_from_all_files(
+    "my-project",
+    "/path/to/repo",
+    file_patterns=["*.py", "*.js", "*.ts", "*.java"]
+)
+```
+
+---
+
+## Query-Based Suggestions
+
+### Overview
+
+Analyze knowledge base Q&A interactions and suggest creating memories for important information.
+
+**Best For**:
+- Frequently asked questions
+- Non-obvious solutions
+- Architectural information
+- Important conventions
+
+### Basic Usage
+
+**MCP Tool**:
+```python
+suggest_memory_from_query(
+    project_id="my-project",
+    query="How does the authentication system work?",
+    answer="The system uses JWT tokens with refresh token rotation. Access tokens expire after 15 minutes, refresh tokens after 7 days. Tokens are stored in httpOnly cookies to prevent XSS attacks."
+)
+```
+
+**HTTP API**:
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/suggest \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "query": "How does authentication work?",
+    "answer": "The system uses JWT tokens with refresh token rotation..."
+  }'
+```
+
+**Python Service**:
+```python
+from services.memory_extractor import memory_extractor
+
+result = await memory_extractor.suggest_memory_from_query(
+    project_id="my-project",
+    query="How does the authentication system work?",
+    answer="The system uses JWT tokens with refresh token rotation..."
+)
+
+if result['should_save']:
+    suggested = result['suggested_memory']
+    print(f"Suggested: {suggested['title']}")
+    print(f"Type: {suggested['type']}")
+    print(f"Importance: {suggested['importance']}")
+
+    # Manually save if approved
+    await memory_store.add_memory(
+        project_id="my-project",
+        **suggested
+    )
+```
+
+**Response (should save)**:
+```json
+{
+  "success": true,
+  "should_save": true,
+  "suggested_memory": {
+    "type": "note",
+    "title": "Authentication system uses JWT with refresh tokens",
+    "content": "System uses JWT tokens with refresh token rotation. Access tokens: 15min expiry. Refresh tokens: 7 days. Stored in httpOnly cookies for XSS protection",
+    "reason": "Core authentication architecture - important for future development",
+    "tags": ["auth", "jwt", "security"],
+    "importance": 0.7
+  },
+  "query": "How does the authentication system work?",
+  "answer_excerpt": "The system uses JWT tokens with..."
+}
+```
+
+**Response (should not save)**:
+```json
+{
+  "success": true,
+  "should_save": false,
+  "reason": "Routine question about standard library function - not project-specific",
+  "query": "How do I use datetime.now()?"
+}
+```
+
+### Integration with Knowledge Service
+
+```python
+from services.neo4j_knowledge_service import knowledge_service
+from services.memory_extractor import memory_extractor
+
+async def query_with_memory_suggestion(
+    project_id: str,
+    query: str
+):
+    """Query knowledge base and suggest saving as memory if important"""
+
+    # Query knowledge base
+    result = await knowledge_service.query_knowledge(query)
+    answer = result.get('answer', '')
+
+    # Suggest memory
+    suggestion = await memory_extractor.suggest_memory_from_query(
+        project_id=project_id,
+        query=query,
+        answer=answer
+    )
+
+    # Auto-save if highly important
+    if suggestion.get('should_save'):
+        suggested = suggestion['suggested_memory']
+
+        if suggested['importance'] >= 0.8:
+            # Auto-save critical information
+            await memory_store.add_memory(
+                project_id=project_id,
+                **suggested,
+                metadata={'source': 'auto_query'}
+            )
+            print(f"Auto-saved memory: {suggested['title']}")
+
+    return {
+        'answer': answer,
+        'memory_suggested': suggestion.get('should_save', False)
+    }
+```
+
+---
+
+## Batch Repository Extraction
+
+### Overview
+
+Comprehensive analysis of entire repository: git commits, code comments, and documentation files.
+
+**Best For**:
+- Initial project setup
+- Onboarding AI agents
+- Knowledge base bootstrapping
+- Project audits
+
+### Basic Usage
+
+**MCP Tool**:
+```python
+batch_extract_from_repository(
+    project_id="my-project",
+    repo_path="/path/to/repository",
+    max_commits=50,
+    file_patterns=["*.py", "*.js", "*.ts"]
+)
+```
+
+**HTTP API**:
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/extract/batch \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "repo_path": "/path/to/repository",
+    "max_commits": 50,
+    "file_patterns": ["*.py", "*.js"]
+  }'
+```
+
+**Python Service**:
+```python
+from services.memory_extractor import memory_extractor
+
+result = await memory_extractor.batch_extract_from_repository(
+    project_id="my-project",
+    repo_path="/path/to/repository",
+    max_commits=50,
+    file_patterns=["*.py", "*.js", "*.ts"]
+)
+
+print(f"Total extracted: {result['total_extracted']}")
+print(f"From commits: {result['by_source']['git_commits']}")
+print(f"From comments: {result['by_source']['code_comments']}")
+print(f"From docs: {result['by_source']['documentation']}")
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "total_extracted": 45,
+  "by_source": {
+    "git_commits": 12,
+    "code_comments": 28,
+    "documentation": 5
+  },
+  "extracted_memories": [...],
+  "repository": "/path/to/repository"
+}
+```
+
+### What Gets Analyzed
+
+**1. Git Commits**:
+- Recent commits (up to `max_commits`)
+- Commit messages (title + body)
+- Changed files
+- Commit type (feat, fix, refactor, etc.)
+
+**2. Code Comments**:
+- Source files matching `file_patterns`
+- TODO, FIXME, NOTE, DECISION markers
+- Up to 30 files sampled for performance
+
+**3. Documentation**:
+- README.md - Project overview
+- CHANGELOG.md - Project evolution
+- CONTRIBUTING.md - Conventions
+- CLAUDE.md - AI agent instructions
+
+### Configuration Options
+
+```python
+# Default configuration
+await memory_extractor.batch_extract_from_repository(
+    project_id="my-project",
+    repo_path="/path/to/repo",
+    max_commits=50,  # Last 50 commits
+    file_patterns=["*.py", "*.js", "*.ts", "*.java", "*.go", "*.rs"]
+)
+
+# Focused on recent commits
+await memory_extractor.batch_extract_from_repository(
+    project_id="my-project",
+    repo_path="/path/to/repo",
+    max_commits=20,  # Just last 20
+    file_patterns=None  # Skip code comments
+)
+
+# Deep codebase analysis
+await memory_extractor.batch_extract_from_repository(
+    project_id="my-project",
+    repo_path="/path/to/repo",
+    max_commits=100,  # More commits
+    file_patterns=["*.py", "*.js", "*.ts", "*.java", "*.go", "*.rs", "*.cpp", "*.c"]
+)
+```
+
+### Performance Considerations
+
+**Limits**:
+- Max 20 commits processed (even if max_commits is higher)
+- Max 30 source files sampled
+- Max 3 memories per marker type per file
+
+**Processing Time**:
+- Small repo (< 100 files): 1-2 minutes
+- Medium repo (100-1000 files): 3-5 minutes
+- Large repo (> 1000 files): 5-10 minutes
+
+**Optimization**:
+```python
+# Quick bootstrap
+await memory_extractor.batch_extract_from_repository(
+    project_id="my-project",
+    repo_path="/path/to/repo",
+    max_commits=10,  # Limited commits
+    file_patterns=["*.py"]  # Single language
+)
+
+# Comprehensive analysis (run overnight)
+await memory_extractor.batch_extract_from_repository(
+    project_id="my-project",
+    repo_path="/path/to/repo",
+    max_commits=100,
+    file_patterns=["*.py", "*.js", "*.ts", "*.java", "*.go"]
+)
+```
+
+---
+
+## Integration Patterns
+
+### Pattern 1: Post-Session Extraction
+
+Extract memories after AI coding session:
+
+```python
+async def post_session_extraction(
+    project_id: str,
+    conversation: list,
+    repo_path: str
+):
+    """Extract memories after coding session"""
+
+    # 1. Extract from conversation
+    conv_result = await memory_extractor.extract_from_conversation(
+        project_id=project_id,
+        conversation=conversation,
+        auto_save=True
+    )
+
+    # 2. Get recent commit (if any)
+    import subprocess
+    try:
+        commit_sha = subprocess.check_output(
+            ["git", "rev-parse", "HEAD"],
+            cwd=repo_path,
+            text=True
+        ).strip()
+
+        commit_msg = subprocess.check_output(
+            ["git", "log", "-1", "--pretty=%B"],
+            cwd=repo_path,
+            text=True
+        ).strip()
+
+        changed_files = subprocess.check_output(
+            ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", "HEAD"],
+            cwd=repo_path,
+            text=True
+        ).strip().split('\n')
+
+        # Extract from commit
+        commit_result = await memory_extractor.extract_from_git_commit(
+            project_id=project_id,
+            commit_sha=commit_sha,
+            commit_message=commit_msg,
+            changed_files=changed_files,
+            auto_save=True
+        )
+
+    except Exception as e:
+        print(f"No recent commit: {e}")
+
+    return {
+        'conversation_memories': conv_result['auto_saved_count'],
+        'commit_memories': commit_result.get('auto_saved_count', 0)
+    }
+```
+
+### Pattern 2: Continuous Git Hook Integration
+
+Extract on every commit:
+
+```python
+# .git/hooks/post-commit
+#!/usr/bin/env python3
+
+import asyncio
+import subprocess
+import sys
+sys.path.insert(0, '/path/to/project')
+
+from services.memory_extractor import memory_extractor
+
+async def main():
+    # Get commit details
+    commit_sha = subprocess.check_output(
+        ["git", "rev-parse", "HEAD"],
+        text=True
+    ).strip()
+
+    commit_msg = subprocess.check_output(
+        ["git", "log", "-1", "--pretty=%B"],
+        text=True
+    ).strip()
+
+    changed_files = subprocess.check_output(
+        ["git", "diff-tree", "--no-commit-id", "--name-only", "-r", "HEAD"],
+        text=True
+    ).strip().split('\n')
+
+    # Extract memories
+    result = await memory_extractor.extract_from_git_commit(
+        project_id="my-project",
+        commit_sha=commit_sha,
+        commit_message=commit_msg,
+        changed_files=changed_files,
+        auto_save=True
+    )
+
+    if result.get('auto_saved_count', 0) > 0:
+        print(f"✅ Extracted {result['auto_saved_count']} memories from commit")
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+### Pattern 3: Scheduled Repository Scans
+
+Daily/weekly full repository analysis:
+
+```python
+import schedule
+import asyncio
+
+async def daily_repository_scan():
+    """Daily scan of repository for new knowledge"""
+
+    result = await memory_extractor.batch_extract_from_repository(
+        project_id="my-project",
+        repo_path="/path/to/repo",
+        max_commits=10,  # Last day's commits
+        file_patterns=["*.py", "*.js"]
+    )
+
+    print(f"Daily scan: {result['total_extracted']} new memories")
+
+    # Send notification
+    if result['total_extracted'] > 5:
+        send_slack_notification(
+            f"⚠️ {result['total_extracted']} new memories extracted from codebase"
+        )
+
+# Schedule daily at 2 AM
+schedule.every().day.at("02:00").do(lambda: asyncio.run(daily_repository_scan()))
+```
+
+---
+
+## Configuration
+
+### LLM Settings
+
+Extraction uses the project's configured LLM:
+
+```bash
+# .env file
+LLM_PROVIDER=openai  # or ollama, gemini, openrouter
+OPENAI_API_KEY=your-key
+```
+
+### Confidence Threshold
+
+Adjust auto-save threshold (default: 0.7):
+
+```python
+from services.memory_extractor import memory_extractor
+
+# Lower threshold (more auto-saves)
+memory_extractor.confidence_threshold = 0.6
+
+# Higher threshold (fewer auto-saves, higher quality)
+memory_extractor.confidence_threshold = 0.8
+```
+
+### Processing Limits
+
+Adjust processing limits:
+
+```python
+from services.memory_extractor import MemoryExtractor
+
+# Custom limits
+MemoryExtractor.MAX_COMMITS_TO_PROCESS = 30
+MemoryExtractor.MAX_FILES_TO_SAMPLE = 50
+MemoryExtractor.MAX_ITEMS_PER_TYPE = 5
+```
+
+---
+
+## Best Practices
+
+### 1. Choose Appropriate Auto-Save Settings
+
+```python
+# Critical production project: Review before saving
+auto_save=False
+
+# Personal project: Auto-save high confidence
+auto_save=True
+```
+
+### 2. Batch Operations During Off-Hours
+
+```python
+# Run comprehensive analysis during off-hours
+# Avoid running during active development
+
+# Good: 2 AM daily scan
+schedule.every().day.at("02:00").do(run_batch_extraction)
+
+# Bad: Every 10 minutes during work hours
+```
+
+### 3. Monitor Extraction Quality
+
+```python
+result = await memory_extractor.extract_from_conversation(
+    project_id="my-project",
+    conversation=conversation,
+    auto_save=False
+)
+
+# Review confidence distribution
+high_confidence = sum(1 for s in result['suggestions'] if s['confidence'] >= 0.8)
+medium_confidence = sum(1 for s in result['suggestions'] if 0.6 <= s['confidence'] < 0.8)
+low_confidence = sum(1 for s in result['suggestions'] if s['confidence'] < 0.6)
+
+print(f"High: {high_confidence}, Medium: {medium_confidence}, Low: {low_confidence}")
+```
+
+### 4. Customize Marker Importance
+
+```python
+# For your codebase, adjust marker importance in memory_extractor.py
+# Example: Make TODOs more important in your project
+
+# Override classification
+def custom_classify_comment(text: str) -> dict:
+    if "TODO:" in text.upper():
+        # Higher importance for TODOs in your project
+        return {
+            "type": "plan",
+            "importance": 0.7  # Instead of default 0.4
+        }
+    # ... other markers
+```
+
+### 5. Review and Clean Periodically
+
+```python
+async def review_auto_extracted_memories(project_id: str):
+    """Review and clean auto-extracted memories"""
+
+    # Find all auto-extracted memories
+    all_memories = await memory_store.search_memories(
+        project_id=project_id,
+        limit=100
+    )
+
+    auto_extracted = [
+        m for m in all_memories['memories']
+        if m.get('metadata', {}).get('source') in ['conversation', 'git_commit', 'code_comment']
+    ]
+
+    # Review low-importance ones
+    for memory in auto_extracted:
+        if memory['importance'] < 0.4:
+            print(f"Review: {memory['title']} (importance: {memory['importance']})")
+            # Manually decide: keep, delete, or update importance
+```
+
+---
+
+## Troubleshooting
+
+### No Memories Extracted
+
+**Problem**: Extraction returns 0 memories
+
+**Solutions**:
+```python
+# 1. Check LLM is configured
+from llama_index.core import Settings
+print(f"LLM configured: {Settings.llm is not None}")
+
+# 2. Verify source content is substantial
+# Short conversations may not yield memories
+
+# 3. Check confidence threshold
+memory_extractor.confidence_threshold = 0.5  # Lower threshold
+
+# 4. Disable auto_save to see all suggestions
+result = await memory_extractor.extract_from_conversation(
+    project_id="my-project",
+    conversation=conversation,
+    auto_save=False
+)
+print(f"Suggestions: {len(result['suggestions'])}")
+```
+
+### Low Quality Extractions
+
+**Problem**: Extracted memories are not useful
+
+**Solutions**:
+```python
+# 1. Increase confidence threshold
+memory_extractor.confidence_threshold = 0.8
+
+# 2. Use manual review
+auto_save=False
+
+# 3. Provide better source content
+# More detailed conversations yield better extractions
+```
+
+### Extraction Too Slow
+
+**Problem**: Batch extraction takes too long
+
+**Solutions**:
+```python
+# 1. Reduce max_commits
+max_commits=10  # Instead of 50
+
+# 2. Limit file patterns
+file_patterns=["*.py"]  # Just Python
+
+# 3. Use sampling
+# MemoryExtractor already samples (MAX_FILES_TO_SAMPLE = 30)
+```
+
+---
+
+## Next Steps
+
+- **Manual Management**: See [manual.md](./manual.md) for CRUD operations
+- **Search Strategies**: See [search.md](./search.md) for finding memories
+- **Overview**: See [overview.md](./overview.md) for system introduction
+- **API Reference**: See `/api/v1/memory` endpoints
diff --git a/docs/guide/memory/manual.md b/docs/guide/memory/manual.md
new file mode 100644
index 0000000..bf0ff63
--- /dev/null
+++ b/docs/guide/memory/manual.md
@@ -0,0 +1,997 @@
+# Manual Memory Management Guide
+
+This guide covers manual memory management operations in the Memory Store (v0.6 features). Learn how to create, search, update, delete, and evolve project knowledge.
+
+## Table of Contents
+
+- [Core Operations](#core-operations)
+- [Adding Memories](#adding-memories)
+- [Retrieving Memories](#retrieving-memories)
+- [Updating Memories](#updating-memories)
+- [Deleting Memories](#deleting-memories)
+- [Memory Evolution](#memory-evolution)
+- [Project Summaries](#project-summaries)
+- [Advanced Patterns](#advanced-patterns)
+- [Best Practices](#best-practices)
+
+---
+
+## Core Operations
+
+Memory Store provides seven core manual operations:
+
+1. **add_memory** - Create new memory
+2. **search_memories** - Find memories by query/filters
+3. **get_memory** - Retrieve specific memory by ID
+4. **update_memory** - Modify existing memory
+5. **delete_memory** - Remove memory
+6. **supersede_memory** - Create new memory that replaces old one
+7. **get_project_summary** - Get project overview
+
+Each operation is available via:
+- **MCP Tools** - For AI assistants (Claude Desktop, VSCode)
+- **HTTP API** - For web applications
+- **Python Service** - For direct integration
+
+---
+
+## Adding Memories
+
+### Basic Memory Creation
+
+**MCP Tool**:
+```python
+add_memory(
+    project_id="my-project",
+    memory_type="decision",
+    title="Use JWT for authentication",
+    content="Decided to use JWT tokens instead of session-based authentication",
+    reason="Need stateless authentication for mobile clients and microservices",
+    importance=0.9,
+    tags=["auth", "security", "architecture"]
+)
+```
+
+**HTTP API**:
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/add \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "memory_type": "decision",
+    "title": "Use JWT for authentication",
+    "content": "Decided to use JWT tokens instead of session-based authentication",
+    "reason": "Need stateless authentication for mobile clients",
+    "importance": 0.9,
+    "tags": ["auth", "security"]
+  }'
+```
+
+**Python Service**:
+```python
+from services.memory_store import memory_store
+
+result = await memory_store.add_memory(
+    project_id="my-project",
+    memory_type="decision",
+    title="Use JWT for authentication",
+    content="Decided to use JWT tokens",
+    reason="Need stateless auth for mobile clients",
+    importance=0.9,
+    tags=["auth", "security"]
+)
+
+memory_id = result['memory_id']
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "memory_id": "550e8400-e29b-41d4-a716-446655440000",
+  "type": "decision",
+  "title": "Use JWT for authentication"
+}
+```
+
+### Memory Types and Examples
+
+#### 1. Decision Memory
+**Use For**: Architecture choices, technology selections, design decisions
+
+```python
+add_memory(
+    project_id="web-app",
+    memory_type="decision",
+    title="Adopt microservices architecture",
+    content="Transitioning from monolith to microservices for user service, order service, and payment service",
+    reason="Need independent scaling and deployment. User service has different load patterns than order service",
+    importance=0.95,
+    tags=["architecture", "microservices", "scaling"],
+    related_refs=[
+        "ref://file/docs/architecture/microservices.md",
+        "ref://file/services/user-service/README.md"
+    ]
+)
+```
+
+#### 2. Preference Memory
+**Use For**: Team coding styles, tool preferences
+
+```python
+add_memory(
+    project_id="web-app",
+    memory_type="preference",
+    title="Use functional components in React",
+    content="Team prefers functional components with hooks over class components",
+    reason="Hooks provide better code reuse and easier testing. Team is more familiar with functional approach",
+    importance=0.6,
+    tags=["react", "frontend", "coding-style"]
+)
+```
+
+#### 3. Experience Memory
+**Use For**: Problems encountered and solutions
+
+```python
+add_memory(
+    project_id="web-app",
+    memory_type="experience",
+    title="Redis connection timeout in Docker Compose",
+    content="Redis connections were timing out when using 'localhost:6379' in Docker environment",
+    reason="Docker Compose networking requires using service name 'redis:6379' instead of 'localhost'. Docker creates an internal network where services communicate by name",
+    importance=0.7,
+    tags=["docker", "redis", "networking", "deployment"],
+    related_refs=["ref://file/docker-compose.yml#L15"]
+)
+```
+
+#### 4. Convention Memory
+**Use For**: Team rules, naming standards
+
+```python
+add_memory(
+    project_id="web-app",
+    memory_type="convention",
+    title="API endpoints use kebab-case",
+    content="All REST API endpoints must use kebab-case naming convention. Example: /api/user-profiles instead of /api/userProfiles",
+    reason="Consistency across API, better readability in URLs",
+    importance=0.5,
+    tags=["api", "naming", "conventions"]
+)
+```
+
+#### 5. Plan Memory
+**Use For**: Future improvements, TODOs
+
+```python
+add_memory(
+    project_id="web-app",
+    memory_type="plan",
+    title="Add rate limiting to public API",
+    content="Plan to implement rate limiting on all public API endpoints. Use Redis-based rate limiter with sliding window algorithm. Limit: 100 requests per minute per IP",
+    reason="Prevent API abuse and ensure fair usage",
+    importance=0.6,
+    tags=["api", "security", "todo", "performance"]
+)
+```
+
+#### 6. Note Memory
+**Use For**: General information
+
+```python
+add_memory(
+    project_id="web-app",
+    memory_type="note",
+    title="Production database backup location",
+    content="Production PostgreSQL backups are stored in S3 bucket 'prod-db-backups' with 30-day retention. Daily backups run at 2 AM UTC",
+    reason="Critical operational information for disaster recovery",
+    importance=0.7,
+    tags=["operations", "backup", "production"]
+)
+```
+
+### Advanced: Linking to Code References
+
+Use `ref://` handles to link memories to specific code:
+
+```python
+add_memory(
+    project_id="api-service",
+    memory_type="decision",
+    title="Use dependency injection for services",
+    content="Implemented dependency injection pattern for all service classes",
+    reason="Improves testability and reduces coupling",
+    importance=0.8,
+    tags=["architecture", "patterns"],
+    related_refs=[
+        "ref://file/src/core/container.py",
+        "ref://file/src/services/user_service.py#L25",
+        "ref://symbol/UserService",
+        "ref://symbol/inject_dependencies"
+    ]
+)
+```
+
+**Supported ref:// formats**:
+- `ref://file/path/to/file.py` - Link to entire file
+- `ref://file/path/to/file.py#L45` - Link to specific line
+- `ref://symbol/ClassName` - Link to class/function
+- `ref://file/path#section` - Link to section
+
+### Advanced: Custom Metadata
+
+Add custom metadata for application-specific needs:
+
+```python
+add_memory(
+    project_id="web-app",
+    memory_type="decision",
+    title="Switch to PostgreSQL 15",
+    content="Upgraded from PostgreSQL 13 to 15",
+    reason="Better performance and new JSON features",
+    importance=0.8,
+    tags=["database", "upgrade"],
+    metadata={
+        "migration_date": "2024-11-01",
+        "downtime": "5 minutes",
+        "rollback_plan": "restore from backup",
+        "approved_by": "tech-lead"
+    }
+)
+```
+
+---
+
+## Retrieving Memories
+
+### Get Specific Memory
+
+**MCP Tool**:
+```python
+get_memory(memory_id="550e8400-e29b-41d4-a716-446655440000")
+```
+
+**HTTP API**:
+```bash
+curl http://localhost:8000/api/v1/memory/550e8400-e29b-41d4-a716-446655440000
+```
+
+**Python Service**:
+```python
+result = await memory_store.get_memory("550e8400-e29b-41d4-a716-446655440000")
+
+if result['success']:
+    memory = result['memory']
+    print(f"Title: {memory['title']}")
+    print(f"Content: {memory['content']}")
+    print(f"Related refs: {memory['related_refs']}")
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "memory": {
+    "id": "550e8400-e29b-41d4-a716-446655440000",
+    "type": "decision",
+    "title": "Use JWT for authentication",
+    "content": "Decided to use JWT tokens...",
+    "reason": "Need stateless authentication...",
+    "tags": ["auth", "security"],
+    "importance": 0.9,
+    "created_at": "2025-11-06T10:00:00Z",
+    "updated_at": "2025-11-06T10:00:00Z",
+    "metadata": {},
+    "related_refs": [
+      {"type": "File", "path": "src/auth/jwt.py"}
+    ]
+  }
+}
+```
+
+---
+
+## Updating Memories
+
+### Update Memory Fields
+
+**MCP Tool**:
+```python
+update_memory(
+    memory_id="550e8400-e29b-41d4-a716-446655440000",
+    importance=0.95,
+    tags=["auth", "security", "critical", "production"]
+)
+```
+
+**HTTP API**:
+```bash
+curl -X PUT http://localhost:8000/api/v1/memory/550e8400-e29b-41d4-a716-446655440000 \
+  -H "Content-Type: application/json" \
+  -d '{
+    "importance": 0.95,
+    "tags": ["auth", "security", "critical"]
+  }'
+```
+
+**Python Service**:
+```python
+result = await memory_store.update_memory(
+    memory_id="550e8400-e29b-41d4-a716-446655440000",
+    title="Use JWT with refresh token rotation",
+    content="Updated implementation to include refresh token rotation",
+    importance=0.95,
+    tags=["auth", "security", "critical"]
+)
+```
+
+**Updatable Fields**:
+- `title` - Change the title
+- `content` - Update the content
+- `reason` - Modify the rationale
+- `tags` - Replace tags (not append - provide full list)
+- `importance` - Adjust importance score
+
+**Note**: Only provided fields are updated. Omitted fields remain unchanged.
+
+### Common Update Scenarios
+
+#### Increase Importance After Production Issue
+```python
+# Security vulnerability discovered
+update_memory(
+    memory_id=auth_memory_id,
+    importance=1.0,
+    tags=["auth", "security", "critical", "vulnerability"]
+)
+```
+
+#### Add Context to Existing Memory
+```python
+# Found additional information
+update_memory(
+    memory_id=redis_memory_id,
+    content=original_content + "\n\nUpdate: Also affects Redis Sentinel configuration. Must use sentinel service names.",
+    reason=original_reason + " Additionally, Sentinel failover requires proper service name configuration."
+)
+```
+
+#### Reclassify Memory Importance
+```python
+# Initially thought important, but turned out routine
+update_memory(
+    memory_id=config_memory_id,
+    importance=0.4  # Downgrade from 0.7
+)
+```
+
+---
+
+## Deleting Memories
+
+### Delete Memory
+
+**MCP Tool**:
+```python
+delete_memory(memory_id="550e8400-e29b-41d4-a716-446655440000")
+```
+
+**HTTP API**:
+```bash
+curl -X DELETE http://localhost:8000/api/v1/memory/550e8400-e29b-41d4-a716-446655440000
+```
+
+**Python Service**:
+```python
+result = await memory_store.delete_memory("550e8400-e29b-41d4-a716-446655440000")
+
+if result['success']:
+    print("Memory deleted")
+else:
+    print(f"Error: {result['error']}")
+```
+
+**Note**: This is a **hard delete** - the memory is permanently removed from the database. For preserving history when decisions change, use `supersede_memory` instead.
+
+### When to Delete vs Supersede
+
+**Use Delete When**:
+- ❌ Memory was created by mistake
+- ❌ Information is completely wrong
+- ❌ Duplicate memory exists
+- ❌ Memory is obsolete and not worth preserving
+
+**Use Supersede When**:
+- ✅ Decision has changed and you want history
+- ✅ Solution was improved and old approach is deprecated
+- ✅ Convention evolved and old one should be marked outdated
+
+---
+
+## Memory Evolution
+
+### Superseding Memories
+
+When decisions change, use `supersede_memory` to preserve history:
+
+**MCP Tool**:
+```python
+supersede_memory(
+    old_memory_id="abc-123-def-456",
+    new_memory_type="decision",
+    new_title="Migrate from MySQL to PostgreSQL",
+    new_content="Migrated from MySQL to PostgreSQL for production database",
+    new_reason="Need advanced JSON support, full-text search, and better geospatial features",
+    new_tags=["database", "postgresql", "migration"],
+    new_importance=0.9
+)
+```
+
+**HTTP API**:
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/supersede \
+  -H "Content-Type: application/json" \
+  -d '{
+    "old_memory_id": "abc-123-def-456",
+    "new_memory_type": "decision",
+    "new_title": "Migrate from MySQL to PostgreSQL",
+    "new_content": "Migrated from MySQL to PostgreSQL",
+    "new_reason": "Need advanced JSON and full-text search",
+    "new_importance": 0.9
+  }'
+```
+
+**Python Service**:
+```python
+result = await memory_store.supersede_memory(
+    old_memory_id="abc-123-def-456",
+    new_memory_data={
+        "memory_type": "decision",
+        "title": "Migrate from MySQL to PostgreSQL",
+        "content": "Migrated from MySQL to PostgreSQL",
+        "reason": "Need advanced features",
+        "tags": ["database", "postgresql"],
+        "importance": 0.9
+    }
+)
+
+new_memory_id = result['new_memory_id']
+old_memory_id = result['old_memory_id']
+```
+
+**What Happens**:
+1. New memory is created with your data
+2. `SUPERSEDES` relationship is created: `(new)-[:SUPERSEDES]->(old)`
+3. Old memory gets `superseded_by` and `superseded_at` fields
+4. Old memory remains in database but marked as superseded
+5. Both memories belong to same project
+
+### Evolution Example: Database Decision
+
+**Phase 1: Original Decision (January)**
+```python
+mysql_memory = add_memory(
+    project_id="web-app",
+    memory_type="decision",
+    title="Use MySQL as primary database",
+    content="Selected MySQL for application database",
+    reason="Team familiarity and existing infrastructure",
+    importance=0.7,
+    tags=["database", "mysql"]
+)
+# memory_id: "original-123"
+```
+
+**Phase 2: Requirements Change (March)**
+```python
+postgres_v1 = supersede_memory(
+    old_memory_id="original-123",
+    new_memory_type="decision",
+    new_title="Migrate to PostgreSQL",
+    new_content="Migrating from MySQL to PostgreSQL",
+    new_reason="Need better JSON support and full-text search",
+    new_tags=["database", "postgresql", "migration"],
+    new_importance=0.8
+)
+# new_memory_id: "update-456"
+# "original-123" now marked as superseded
+```
+
+**Phase 3: Full Migration Complete (June)**
+```python
+postgres_v2 = supersede_memory(
+    old_memory_id="update-456",
+    new_memory_type="decision",
+    new_title="PostgreSQL 15 in production",
+    new_content="Completed migration to PostgreSQL 15. All services migrated",
+    new_reason="Migration successful. Using advanced features: JSONB, GiST indexes, full-text search",
+    new_tags=["database", "postgresql", "production"],
+    new_importance=0.9
+)
+# new_memory_id: "final-789"
+# "update-456" now marked as superseded
+```
+
+**Result**: Complete decision history preserved:
+```
+"original-123" (MySQL)
+    ← superseded by "update-456" (PostgreSQL migration)
+        ← superseded by "final-789" (PostgreSQL production)
+```
+
+---
+
+## Project Summaries
+
+### Get Project Overview
+
+**MCP Tool**:
+```python
+get_project_summary(project_id="web-app")
+```
+
+**HTTP API**:
+```bash
+curl http://localhost:8000/api/v1/memory/project/web-app/summary
+```
+
+**Python Service**:
+```python
+result = await memory_store.get_project_summary("web-app")
+
+summary = result['summary']
+print(f"Total memories: {summary['total_memories']}")
+
+for memory_type, data in summary['by_type'].items():
+    print(f"{memory_type}: {data['count']}")
+    for mem in data['top_memories'][:3]:
+        print(f"  - {mem['title']} (importance: {mem['importance']})")
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "summary": {
+    "project_id": "web-app",
+    "total_memories": 56,
+    "by_type": {
+      "decision": {
+        "count": 15,
+        "top_memories": [
+          {
+            "id": "...",
+            "title": "Adopt microservices architecture",
+            "importance": 0.95
+          },
+          {
+            "id": "...",
+            "title": "Use JWT for authentication",
+            "importance": 0.9
+          }
+        ]
+      },
+      "preference": {
+        "count": 8,
+        "top_memories": [...]
+      },
+      "experience": {
+        "count": 12,
+        "top_memories": [...]
+      },
+      "convention": {
+        "count": 6,
+        "top_memories": [...]
+      },
+      "plan": {
+        "count": 10,
+        "top_memories": [...]
+      },
+      "note": {
+        "count": 5,
+        "top_memories": [...]
+      }
+    }
+  }
+}
+```
+
+**Use Cases**:
+- Onboarding new team members or AI agents
+- Project health checks
+- Memory audit and cleanup
+- Understanding project knowledge distribution
+
+---
+
+## Advanced Patterns
+
+### Pattern 1: AI Agent Session Workflow
+
+```python
+# Start of session: Get context
+async def start_session(project_id: str, task_area: str):
+    # 1. Get project overview
+    summary = await memory_store.get_project_summary(project_id)
+    print(f"Project has {summary['summary']['total_memories']} memories")
+
+    # 2. Search for relevant context
+    context = await memory_store.search_memories(
+        project_id=project_id,
+        query=task_area,
+        min_importance=0.6
+    )
+
+    # 3. Review top decisions and experiences
+    for memory in context['memories'][:5]:
+        print(f"Relevant: {memory['title']}")
+
+    return context
+
+# During work: Check conventions
+async def check_conventions(project_id: str, area: str):
+    conventions = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="convention",
+        tags=[area]
+    )
+    return conventions
+
+# End of session: Save learnings
+async def save_learnings(project_id: str, new_knowledge: dict):
+    result = await memory_store.add_memory(
+        project_id=project_id,
+        **new_knowledge
+    )
+    return result['memory_id']
+```
+
+### Pattern 2: Memory Cleanup and Maintenance
+
+```python
+async def cleanup_low_value_memories(project_id: str):
+    """Remove low-importance notes older than 6 months"""
+
+    # Search for low-importance notes
+    old_notes = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="note",
+        min_importance=0.0,
+        limit=100
+    )
+
+    from datetime import datetime, timedelta
+    cutoff_date = datetime.utcnow() - timedelta(days=180)
+
+    deleted_count = 0
+    for memory in old_notes['memories']:
+        created_at = datetime.fromisoformat(memory['created_at'])
+
+        if memory['importance'] < 0.3 and created_at < cutoff_date:
+            await memory_store.delete_memory(memory['id'])
+            deleted_count += 1
+            print(f"Deleted old note: {memory['title']}")
+
+    return deleted_count
+```
+
+### Pattern 3: Memory Migration Between Projects
+
+```python
+async def migrate_memory(memory_id: str, from_project: str, to_project: str):
+    """Copy memory from one project to another"""
+
+    # Get original memory
+    result = await memory_store.get_memory(memory_id)
+    memory = result['memory']
+
+    # Create copy in new project
+    new_result = await memory_store.add_memory(
+        project_id=to_project,
+        memory_type=memory['type'],
+        title=f"[Migrated] {memory['title']}",
+        content=memory['content'],
+        reason=memory.get('reason'),
+        tags=memory.get('tags', []) + ['migrated'],
+        importance=memory.get('importance', 0.5),
+        metadata={
+            **memory.get('metadata', {}),
+            'migrated_from': from_project,
+            'original_memory_id': memory_id
+        }
+    )
+
+    return new_result['memory_id']
+```
+
+### Pattern 4: Tag-Based Memory Organization
+
+```python
+async def organize_by_tags(project_id: str):
+    """Get memories organized by tag"""
+
+    # Get all memories
+    all_memories = await memory_store.search_memories(
+        project_id=project_id,
+        limit=100
+    )
+
+    # Organize by tag
+    by_tag = {}
+    for memory in all_memories['memories']:
+        for tag in memory.get('tags', []):
+            if tag not in by_tag:
+                by_tag[tag] = []
+            by_tag[tag].append({
+                'id': memory['id'],
+                'title': memory['title'],
+                'importance': memory['importance']
+            })
+
+    # Sort tags by memory count
+    sorted_tags = sorted(
+        by_tag.items(),
+        key=lambda x: len(x[1]),
+        reverse=True
+    )
+
+    print("Top tags:")
+    for tag, memories in sorted_tags[:10]:
+        print(f"{tag}: {len(memories)} memories")
+
+    return by_tag
+```
+
+---
+
+## Best Practices
+
+### 1. Importance Scoring
+
+**Be Consistent**:
+```python
+# Critical architecture decision
+importance=0.95
+
+# Important feature decision
+importance=0.8
+
+# Team preference
+importance=0.6
+
+# Minor convention
+importance=0.4
+
+# Future plan
+importance=0.3
+```
+
+**Adjust Over Time**:
+```python
+# Initially seemed important
+add_memory(..., importance=0.7)
+
+# Later found to be critical
+update_memory(memory_id, importance=0.95)
+```
+
+### 2. Effective Tagging
+
+**Use Hierarchical Tags**:
+```python
+tags = [
+    "auth",           # Domain
+    "security",       # Category
+    "jwt",            # Technology
+    "production",     # Environment
+    "critical"        # Status
+]
+```
+
+**Be Specific**:
+```python
+# ❌ Too vague
+tags = ["backend", "code"]
+
+# ✅ Specific and useful
+tags = ["auth", "jwt", "refresh-token", "security"]
+```
+
+### 3. Writing Good Content
+
+**Include Context**:
+```python
+# ❌ Too brief
+content = "Using Redis"
+
+# ✅ Comprehensive
+content = """
+Implementing Redis as caching layer for user sessions and API responses.
+
+Configuration:
+- Redis 7.0 in cluster mode
+- 3 master nodes, 3 replicas
+- Maxmemory policy: allkeys-lru
+- Persistence: RDB + AOF
+
+Cache strategy:
+- User sessions: TTL 24h
+- API responses: TTL 5min
+- Invalidation on data updates
+"""
+```
+
+**Explain Why**:
+```python
+# ❌ No rationale
+reason = "Better performance"
+
+# ✅ Clear rationale
+reason = """
+Need to reduce database load by 70%. Current response times averaging 500ms,
+target is 100ms. Redis provides:
+1. Sub-millisecond latency
+2. Horizontal scaling
+3. Built-in data structures
+4. Proven at scale by similar companies
+"""
+```
+
+### 4. Related References
+
+**Link to Relevant Code**:
+```python
+related_refs = [
+    "ref://file/src/cache/redis_client.py",
+    "ref://file/config/redis.yml",
+    "ref://file/docs/cache-strategy.md"
+]
+```
+
+**Be Specific**:
+```python
+# ✅ Point to exact implementation
+related_refs = [
+    "ref://file/src/auth/jwt.py#L45",  # JWT generation
+    "ref://symbol/verify_token"         # Verification function
+]
+```
+
+### 5. Memory Lifecycle Management
+
+**Regular Reviews**:
+```python
+# Monthly review
+async def monthly_review(project_id: str):
+    # Check for outdated plans
+    plans = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="plan",
+        min_importance=0.0
+    )
+
+    # Review and update or delete completed plans
+    for plan in plans['memories']:
+        print(f"Review: {plan['title']}")
+        # Manual decision: delete, update, or keep
+```
+
+**Update on Changes**:
+```python
+# When decision evolves, supersede instead of update
+# Preserves history
+await memory_store.supersede_memory(old_id, new_data)
+```
+
+---
+
+## Common Patterns
+
+### Pattern: Feature Development Workflow
+
+```python
+async def feature_workflow(project_id: str, feature_name: str):
+    # 1. Check existing decisions
+    decisions = await memory_store.search_memories(
+        project_id=project_id,
+        query=feature_name,
+        memory_type="decision"
+    )
+
+    # 2. Check conventions
+    conventions = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="convention"
+    )
+
+    # 3. Check past experiences
+    experiences = await memory_store.search_memories(
+        project_id=project_id,
+        query=feature_name,
+        memory_type="experience"
+    )
+
+    # 4. Implement feature...
+
+    # 5. Save new knowledge
+    await memory_store.add_memory(
+        project_id=project_id,
+        memory_type="decision",
+        title=f"Implemented {feature_name}",
+        content="...",
+        reason="...",
+        importance=0.7
+    )
+```
+
+### Pattern: Debugging Workflow
+
+```python
+async def document_bug_fix(project_id: str, bug_description: str, solution: str):
+    # Save the experience
+    result = await memory_store.add_memory(
+        project_id=project_id,
+        memory_type="experience",
+        title=f"Bug: {bug_description}",
+        content=f"Problem: {bug_description}\n\nSolution: {solution}",
+        reason="Prevent recurrence of this issue",
+        importance=0.7,
+        tags=["bug", "debugging"]
+    )
+
+    return result['memory_id']
+```
+
+---
+
+## Troubleshooting
+
+### Memory Not Found
+
+```python
+result = await memory_store.get_memory(memory_id)
+
+if not result['success']:
+    if "not found" in result['error'].lower():
+        print("Memory doesn't exist or was deleted")
+    else:
+        print(f"Error: {result['error']}")
+```
+
+### Update Not Applied
+
+```python
+# Make sure at least one field is provided
+result = await memory_store.update_memory(
+    memory_id=memory_id,
+    importance=0.9  # At least one field required
+)
+
+if not result['success']:
+    if "No updates" in result['error']:
+        print("Must provide at least one field to update")
+```
+
+### Search Returns No Results
+
+```python
+# Try broader search
+result = await memory_store.search_memories(
+    project_id=project_id,
+    query="auth",  # Remove filters
+    limit=100      # Increase limit
+)
+
+if result['total_count'] == 0:
+    print("No memories found for this project")
+```
+
+---
+
+## Next Steps
+
+- **Search Guide**: Learn advanced search strategies in [search.md](./search.md)
+- **Auto-Extraction**: Discover automatic memory extraction in [extraction.md](./extraction.md)
+- **API Reference**: Full API documentation at `/api/v1/memory`
+- **Examples**: See `/examples/memory_usage_example.py`
diff --git a/docs/guide/memory/overview.md b/docs/guide/memory/overview.md
new file mode 100644
index 0000000..4ca0268
--- /dev/null
+++ b/docs/guide/memory/overview.md
@@ -0,0 +1,653 @@
+# Memory Store Overview
+
+The Memory Store is a project knowledge persistence system designed specifically for AI agents to maintain continuity across development sessions. Unlike short-term conversation history, the Memory Store preserves curated, structured project knowledge.
+
+## Table of Contents
+
+- [What is Memory Store?](#what-is-memory-store)
+- [Why Memory Store Matters](#why-memory-store-matters)
+- [Core Concepts](#core-concepts)
+- [Memory Types](#memory-types)
+- [Architecture](#architecture)
+- [Operation Modes](#operation-modes)
+- [Quick Start](#quick-start)
+- [Use Cases](#use-cases)
+
+---
+
+## What is Memory Store?
+
+Memory Store is a Neo4j-based knowledge management system that allows AI agents and developers to:
+
+- **Save Important Decisions**: Architectural choices, technology selections, and their rationale
+- **Record Preferences**: Coding styles, tool choices, and team conventions
+- **Document Experiences**: Problems encountered and their solutions
+- **Track Plans**: Future improvements, TODOs, and roadmap items
+- **Preserve Context**: Maintain project knowledge across sessions, weeks, and months
+
+**Key Principle**: Memory = Structured Project Knowledge
+
+Instead of re-explaining project context every session, AI agents can search memories and immediately understand:
+- "Why did we choose PostgreSQL over MySQL?"
+- "What's our convention for API endpoint naming?"
+- "What Redis issues did we encounter in Docker?"
+
+---
+
+## Why Memory Store Matters
+
+### Problem: Context Loss Across Sessions
+
+Without Memory Store, AI agents suffer from:
+- ❌ Repeating the same questions every session
+- ❌ Forgetting why decisions were made
+- ❌ Making inconsistent choices
+- ❌ Re-encountering solved problems
+- ❌ Breaking established conventions
+
+### Solution: Long-term Project Memory
+
+With Memory Store, AI agents gain:
+- ✅ **Cross-session continuity** - Remember decisions from previous sessions
+- ✅ **Avoid repeating mistakes** - Recall past problems and solutions
+- ✅ **Maintain consistency** - Follow established patterns and conventions
+- ✅ **Track evolution** - Document how decisions change over time
+- ✅ **Preserve rationale** - Remember *why* something was done, not just *what*
+
+---
+
+## Core Concepts
+
+### 1. Memory as Knowledge
+
+Each memory represents a discrete piece of project knowledge:
+
+```python
+{
+  "id": "uuid-here",
+  "type": "decision",
+  "title": "Use JWT for authentication",
+  "content": "Decided to use JWT tokens instead of session-based auth",
+  "reason": "Need stateless authentication for mobile clients",
+  "importance": 0.9,
+  "tags": ["auth", "architecture"],
+  "created_at": "2025-11-06T10:30:00Z",
+  "updated_at": "2025-11-06T10:30:00Z"
+}
+```
+
+### 2. Project Organization
+
+Memories belong to projects, enabling multi-project knowledge management:
+
+```
+Project: web-app
+├── Decisions: 15 memories
+├── Preferences: 8 memories
+├── Experiences: 12 memories
+├── Conventions: 6 memories
+├── Plans: 10 memories
+└── Notes: 5 memories
+```
+
+### 3. Knowledge Evolution
+
+Memories can supersede each other, preserving decision history:
+
+```
+Original Decision (2024-01-15)
+  ↓ superseded by
+New Decision (2024-03-20)
+  ↓ superseded by
+Current Decision (2024-11-06)
+```
+
+### 4. Code Integration
+
+Memories can link to code via `ref://` handles:
+
+```python
+related_refs = [
+  "ref://file/src/auth/jwt.py",
+  "ref://symbol/authenticate_user",
+  "ref://file/config/database.py#L45"
+]
+```
+
+---
+
+## Memory Types
+
+The Memory Store supports six memory types, each serving a specific purpose:
+
+### 1. Decision
+**Purpose**: Architectural choices, technology selections, and major design decisions
+
+**Importance Range**: 0.7 - 1.0 (high importance)
+
+**Examples**:
+- "Use JWT tokens for stateless authentication"
+- "Adopt microservices architecture for scalability"
+- "Choose PostgreSQL over MySQL for JSON support"
+
+**When to Use**:
+- Making technology stack choices
+- Deciding on architectural patterns
+- Selecting third-party services or libraries
+- Establishing security policies
+
+### 2. Preference
+**Purpose**: Team coding styles, tool preferences, and development practices
+
+**Importance Range**: 0.5 - 0.7 (medium importance)
+
+**Examples**:
+- "Use raw SQL instead of ORM for database queries"
+- "Prefer functional components in React"
+- "Use kebab-case for API endpoint naming"
+
+**When to Use**:
+- Establishing coding style guidelines
+- Choosing between equivalent approaches
+- Setting team tool preferences
+- Defining code review standards
+
+### 3. Experience
+**Purpose**: Problems encountered and their solutions, bug fixes, gotchas
+
+**Importance Range**: 0.5 - 0.9 (varies by severity)
+
+**Examples**:
+- "Redis fails with 'localhost' in Docker - use service name instead"
+- "Large file uploads timeout - need to increase nginx client_max_body_size"
+- "Date parsing breaks in Safari - must use ISO 8601 format"
+
+**When to Use**:
+- Documenting bugs and their fixes
+- Recording deployment issues
+- Noting platform-specific quirks
+- Sharing debugging insights
+
+### 4. Convention
+**Purpose**: Team rules, naming standards, and established practices
+
+**Importance Range**: 0.4 - 0.6 (medium importance)
+
+**Examples**:
+- "All API endpoints must use kebab-case"
+- "Test files must be in __tests__ directory"
+- "Environment variables must use UPPER_SNAKE_CASE"
+
+**When to Use**:
+- Documenting naming conventions
+- Establishing file organization rules
+- Setting commit message standards
+- Defining code structure patterns
+
+### 5. Plan
+**Purpose**: Future improvements, TODOs, roadmap items
+
+**Importance Range**: 0.3 - 0.7 (varies by priority)
+
+**Examples**:
+- "Migrate to PostgreSQL 16 for performance improvements"
+- "Add rate limiting to public API endpoints"
+- "Refactor authentication middleware for better testability"
+
+**When to Use**:
+- Tracking technical debt
+- Planning future features
+- Recording optimization opportunities
+- Documenting refactoring needs
+
+### 6. Note
+**Purpose**: General information that doesn't fit other categories
+
+**Importance Range**: 0.2 - 0.8 (varies widely)
+
+**Examples**:
+- "Production database backups stored in S3 bucket prod-backups"
+- "Weekly deployment window is Thursdays 2-4 PM EST"
+- "API rate limit is 100 requests per minute per IP"
+
+**When to Use**:
+- Recording operational information
+- Documenting deployment procedures
+- Noting configuration details
+- Capturing miscellaneous knowledge
+
+---
+
+## Architecture
+
+### Storage: Neo4j Graph Database
+
+Memory Store uses Neo4j for flexible, connected knowledge storage:
+
+```cypher
+# Node Types
+(Memory)  - Individual memory record
+(Project) - Project container
+
+# Relationships
+(Memory)-[:BELONGS_TO]->(Project)
+(Memory)-[:SUPERSEDES]->(Memory)
+(Memory)-[:RELATES_TO]->(File)
+(Memory)-[:RELATES_TO]->(Symbol)
+```
+
+**Why Neo4j?**
+- **Graph Relationships**: Natural modeling of memory connections
+- **Fulltext Search**: Fast search across title, content, reason, tags
+- **Vector Integration**: Future support for semantic search
+- **Flexible Schema**: Easy to add new memory types and relationships
+
+### Components
+
+```
+┌─────────────────────────────────────────────────┐
+│           Application Layer                     │
+│  ┌──────────────┐  ┌──────────────┐            │
+│  │  MCP Server  │  │  HTTP API    │            │
+│  │  (30 tools)  │  │  (FastAPI)   │            │
+│  └──────────────┘  └──────────────┘            │
+└─────────────────────────────────────────────────┘
+                      ↓
+┌─────────────────────────────────────────────────┐
+│           Service Layer                         │
+│  ┌──────────────┐  ┌──────────────┐            │
+│  │MemoryStore   │  │MemoryExtractor│           │
+│  │  (manual)    │  │  (auto v0.7) │            │
+│  └──────────────┘  └──────────────┘            │
+└─────────────────────────────────────────────────┘
+                      ↓
+┌─────────────────────────────────────────────────┐
+│         Data Layer (Neo4j)                      │
+│                                                 │
+│  ┌────────┐  ┌────────┐  ┌────────┐           │
+│  │ Memory │──│Project │  │  Code  │           │
+│  │ Nodes  │  │ Nodes  │  │  Refs  │           │
+│  └────────┘  └────────┘  └────────┘           │
+└─────────────────────────────────────────────────┘
+```
+
+---
+
+## Operation Modes
+
+Memory Store operates in two modes based on your configuration:
+
+### Standard Mode (Fulltext Search)
+**Requirements**: Neo4j database only
+
+**Features**:
+- ✅ Add, update, delete memories
+- ✅ Fulltext search across title, content, reason, tags
+- ✅ Filter by type, tags, importance
+- ✅ Manual memory management (v0.6)
+- ✅ Automatic extraction (v0.7)
+
+**Limitations**:
+- ❌ No semantic similarity search
+- ❌ No embedding-based retrieval
+
+**Best For**: Projects that don't need semantic search
+
+### Full Mode (With Embeddings)
+**Requirements**: Neo4j + Embedding provider (OpenAI/Gemini/HuggingFace)
+
+**Features**:
+- ✅ All Standard Mode features
+- ✅ Semantic similarity search
+- ✅ Embedding-based memory retrieval
+- ✅ Find conceptually related memories
+
+**Best For**: Large projects with extensive knowledge bases
+
+**Configuration**:
+```bash
+# .env file
+EMBEDDING_PROVIDER=openai  # or gemini, huggingface
+OPENAI_API_KEY=your-key-here
+```
+
+---
+
+## Quick Start
+
+### 1. Using MCP Tools (Recommended for AI Agents)
+
+If you're using Claude Desktop, VSCode with MCP, or other MCP-compatible clients:
+
+```python
+# Add a decision memory
+add_memory(
+    project_id="my-project",
+    memory_type="decision",
+    title="Use JWT for authentication",
+    content="Decided to use JWT tokens for stateless auth",
+    reason="Need mobile client support and horizontal scaling",
+    importance=0.9,
+    tags=["auth", "security"]
+)
+
+# Search for memories
+search_memories(
+    project_id="my-project",
+    query="authentication",
+    memory_type="decision",
+    min_importance=0.7
+)
+
+# Get project summary
+get_project_summary(project_id="my-project")
+```
+
+### 2. Using HTTP API
+
+For web applications and custom integrations:
+
+```bash
+# Add a memory
+curl -X POST http://localhost:8000/api/v1/memory/add \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "memory_type": "decision",
+    "title": "Use JWT for authentication",
+    "content": "Decided to use JWT tokens for stateless auth",
+    "reason": "Need mobile client support",
+    "importance": 0.9,
+    "tags": ["auth", "security"]
+  }'
+
+# Search memories
+curl -X POST http://localhost:8000/api/v1/memory/search \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "query": "authentication",
+    "min_importance": 0.7
+  }'
+```
+
+### 3. Using Python Service Directly
+
+For Python applications:
+
+```python
+from services.memory_store import memory_store
+import asyncio
+
+async def main():
+    # Initialize
+    await memory_store.initialize()
+
+    # Add memory
+    result = await memory_store.add_memory(
+        project_id="my-project",
+        memory_type="decision",
+        title="Use JWT for authentication",
+        content="Decided to use JWT tokens",
+        reason="Need stateless auth",
+        importance=0.9,
+        tags=["auth"]
+    )
+
+    print(f"Added memory: {result['memory_id']}")
+
+    # Search
+    results = await memory_store.search_memories(
+        project_id="my-project",
+        query="authentication"
+    )
+
+    for memory in results['memories']:
+        print(f"- {memory['title']}")
+
+asyncio.run(main())
+```
+
+---
+
+## Use Cases
+
+### Use Case 1: AI Agent Development Session
+
+**Scenario**: AI agent starts working on a new feature
+
+**Workflow**:
+1. **Search memories** for related decisions and conventions
+2. **Review experiences** to avoid known issues
+3. **Implement feature** following established patterns
+4. **Save new learnings** as memories for future sessions
+
+**Example**:
+```python
+# Session starts
+memories = search_memories(
+    project_id="web-app",
+    query="database migration",
+    memory_type="experience"
+)
+# AI learns: "Always backup before migrations"
+
+# After implementation
+add_memory(
+    project_id="web-app",
+    memory_type="decision",
+    title="Use Alembic for database migrations",
+    content="Adopted Alembic for schema migrations",
+    reason="Better than custom scripts, team familiar with it",
+    importance=0.8
+)
+```
+
+### Use Case 2: Team Onboarding
+
+**Scenario**: New team member or AI agent needs to understand project
+
+**Workflow**:
+```python
+# Get project overview
+summary = get_project_summary(project_id="web-app")
+# Shows: 15 decisions, 8 preferences, 12 experiences
+
+# Review top decisions
+decisions = search_memories(
+    project_id="web-app",
+    memory_type="decision",
+    min_importance=0.8
+)
+# Quickly understand key architectural choices
+
+# Check coding conventions
+conventions = search_memories(
+    project_id="web-app",
+    memory_type="convention"
+)
+# Learn team standards and practices
+```
+
+### Use Case 3: Knowledge Evolution
+
+**Scenario**: Decision needs to change, preserve history
+
+**Workflow**:
+```python
+# Original decision
+old_memory = add_memory(
+    memory_type="decision",
+    title="Use MySQL as database",
+    importance=0.7
+)
+
+# Requirements change, decision evolves
+supersede_memory(
+    old_memory_id=old_memory['memory_id'],
+    new_memory_type="decision",
+    new_title="Migrate to PostgreSQL",
+    new_content="Switched from MySQL to PostgreSQL",
+    new_reason="Need advanced JSON support and full-text search",
+    new_importance=0.9
+)
+# Old decision preserved but marked as superseded
+# History maintained for audit trail
+```
+
+### Use Case 4: Bug Prevention
+
+**Scenario**: Team encounters a tricky bug, wants to prevent recurrence
+
+**Workflow**:
+```python
+# Document the experience
+add_memory(
+    project_id="mobile-app",
+    memory_type="experience",
+    title="iOS date parsing fails without explicit timezone",
+    content="Date.parse() in iOS Safari fails on dates without explicit timezone",
+    reason="Safari is stricter than Chrome about date formats",
+    importance=0.7,
+    tags=["ios", "safari", "datetime", "bug"],
+    related_refs=["ref://file/src/utils/dateParser.js"]
+)
+
+# Future sessions
+# AI agent searches for "date parsing" before implementing
+# Finds the experience, avoids the bug
+```
+
+### Use Case 5: Automatic Knowledge Capture (v0.7)
+
+**Scenario**: Extract memories from git history and code
+
+**Workflow**:
+```python
+# Extract from conversation
+extract_from_conversation(
+    project_id="my-app",
+    conversation=[
+        {"role": "user", "content": "Should we use Redis or Memcached?"},
+        {"role": "assistant", "content": "Redis is better because..."}
+    ],
+    auto_save=True
+)
+# Automatically extracts and saves the decision
+
+# Extract from git commits
+extract_from_git_commit(
+    project_id="my-app",
+    commit_sha="abc123",
+    commit_message="feat: add JWT authentication",
+    changed_files=["src/auth/jwt.py"],
+    auto_save=True
+)
+# Extracts architectural decision from commit
+
+# Batch extract from repository
+batch_extract_from_repository(
+    project_id="my-app",
+    repo_path="/path/to/repo",
+    max_commits=50
+)
+# Comprehensive analysis: commits, comments, docs
+```
+
+---
+
+## Best Practices
+
+### 1. Importance Scoring Guidelines
+
+| Score | Category | Examples |
+|-------|----------|----------|
+| 0.9-1.0 | Critical | Security decisions, breaking changes, data model changes |
+| 0.7-0.8 | Important | Architecture choices, major features, API contracts |
+| 0.5-0.6 | Moderate | Preferences, conventions, common patterns |
+| 0.3-0.4 | Low | Plans, future work, minor notes |
+| 0.0-0.2 | Minimal | Temporary notes, experimental ideas |
+
+### 2. Tagging Strategy
+
+**Use Domain Tags**:
+```python
+tags = ["auth", "database", "api", "frontend", "backend"]
+```
+
+**Use Category Tags**:
+```python
+tags = ["security", "performance", "testing", "deployment"]
+```
+
+**Use Status Tags**:
+```python
+tags = ["critical", "deprecated", "experimental", "production"]
+```
+
+**Combine Multiple Levels**:
+```python
+tags = ["auth", "security", "jwt", "production", "critical"]
+```
+
+### 3. When to Create Memories
+
+**DO Create Memories For**:
+- ✅ Architecture decisions
+- ✅ Technology choices
+- ✅ Tricky bugs and solutions
+- ✅ Team conventions
+- ✅ Deployment procedures
+- ✅ Security findings
+- ✅ Performance optimizations
+
+**DON'T Create Memories For**:
+- ❌ Routine code changes
+- ❌ Trivial fixes
+- ❌ Temporary experiments
+- ❌ Information already in documentation
+- ❌ Standard best practices
+
+### 4. Memory Maintenance
+
+**Regular Review**:
+- Review memories every sprint/month
+- Update importance scores as project evolves
+- Supersede outdated decisions
+- Delete obsolete notes
+
+**Quality Over Quantity**:
+- Better to have 20 high-quality memories than 200 low-quality ones
+- Focus on non-obvious knowledge
+- Prioritize "why" over "what"
+
+---
+
+## Next Steps
+
+- **Manual Memory Management**: See [Manual Guide](./manual.md)
+- **Search Strategies**: See [Search Guide](./search.md)
+- **Automatic Extraction**: See [Extraction Guide](./extraction.md)
+- **API Reference**: See `/api/v1/memory` endpoints
+- **MCP Tools**: See MCP server documentation
+
+---
+
+## Version History
+
+- **v0.6** - Manual memory management with fulltext search
+- **v0.7** - Automatic extraction from conversations, commits, code comments
+  - `extract_from_conversation`: LLM-powered conversation analysis
+  - `extract_from_git_commit`: Analyze git commits for decisions
+  - `extract_from_code_comments`: Mine TODO, FIXME, NOTE markers
+  - `suggest_memory_from_query`: Auto-suggest from knowledge queries
+  - `batch_extract_from_repository`: Comprehensive repository analysis
+
+---
+
+## Support
+
+For issues or questions:
+- Check the documentation in `/docs/guide/memory/`
+- Review examples in `/examples/memory_usage_example.py`
+- See test cases in `/tests/test_memory_store.py`
diff --git a/docs/guide/memory/search.md b/docs/guide/memory/search.md
new file mode 100644
index 0000000..85c3f1a
--- /dev/null
+++ b/docs/guide/memory/search.md
@@ -0,0 +1,1001 @@
+# Memory Search Guide
+
+Comprehensive guide to searching and retrieving memories in the Memory Store. Learn fulltext search, filtering strategies, and advanced query patterns.
+
+## Table of Contents
+
+- [Search Overview](#search-overview)
+- [Basic Search](#basic-search)
+- [Search Filters](#search-filters)
+- [Search Modes](#search-modes)
+- [Advanced Patterns](#advanced-patterns)
+- [Search Strategies](#search-strategies)
+- [Performance Tips](#performance-tips)
+- [Troubleshooting](#troubleshooting)
+
+---
+
+## Search Overview
+
+Memory Store provides powerful search capabilities:
+
+**Search Methods**:
+- **Fulltext Search** - Search across title, content, reason, tags (Standard Mode)
+- **Vector Search** - Semantic similarity search (Full Mode with embeddings)
+
+**Filter Options**:
+- **Memory Type** - Filter by type (decision, preference, etc.)
+- **Tags** - Filter by one or more tags
+- **Importance** - Minimum importance threshold
+- **Limit** - Control number of results
+
+**Ranking**:
+- Search results are ranked by relevance score
+- Secondary sorting by importance and creation date
+
+---
+
+## Basic Search
+
+### Simple Text Search
+
+**MCP Tool**:
+```python
+search_memories(
+    project_id="my-project",
+    query="authentication"
+)
+```
+
+**HTTP API**:
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/search \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "my-project",
+    "query": "authentication"
+  }'
+```
+
+**Python Service**:
+```python
+from services.memory_store import memory_store
+
+result = await memory_store.search_memories(
+    project_id="my-project",
+    query="authentication"
+)
+
+for memory in result['memories']:
+    print(f"[{memory['type']}] {memory['title']}")
+    print(f"  Score: {memory['search_score']}")
+    print(f"  Importance: {memory['importance']}")
+```
+
+**Response**:
+```json
+{
+  "success": true,
+  "memories": [
+    {
+      "id": "550e8400-e29b-41d4-a716-446655440000",
+      "type": "decision",
+      "title": "Use JWT for authentication",
+      "content": "Decided to use JWT tokens...",
+      "reason": "Need stateless authentication...",
+      "tags": ["auth", "security", "jwt"],
+      "importance": 0.9,
+      "created_at": "2025-11-06T10:00:00Z",
+      "updated_at": "2025-11-06T10:00:00Z",
+      "search_score": 2.45
+    }
+  ],
+  "total_count": 5
+}
+```
+
+### Search Without Query
+
+Get all memories, sorted by importance:
+
+```python
+# Get all memories (no search query)
+result = await memory_store.search_memories(
+    project_id="my-project",
+    limit=20
+)
+
+# Returns memories sorted by importance, then creation date
+```
+
+---
+
+## Search Filters
+
+### Filter by Memory Type
+
+Find only decisions:
+
+```python
+search_memories(
+    project_id="my-project",
+    memory_type="decision"
+)
+```
+
+Find only experiences (bug fixes, gotchas):
+
+```python
+search_memories(
+    project_id="my-project",
+    memory_type="experience"
+)
+```
+
+### Filter by Tags
+
+Single tag:
+
+```python
+search_memories(
+    project_id="my-project",
+    tags=["security"]
+)
+```
+
+Multiple tags (OR logic - matches any tag):
+
+```python
+search_memories(
+    project_id="my-project",
+    tags=["security", "auth", "jwt"]
+)
+# Returns memories with ANY of these tags
+```
+
+### Filter by Importance
+
+Get only critical memories:
+
+```python
+search_memories(
+    project_id="my-project",
+    min_importance=0.9
+)
+```
+
+Get medium to high importance:
+
+```python
+search_memories(
+    project_id="my-project",
+    min_importance=0.6
+)
+```
+
+### Combine Filters
+
+```python
+# Find critical security decisions
+search_memories(
+    project_id="my-project",
+    query="authentication authorization",
+    memory_type="decision",
+    tags=["security"],
+    min_importance=0.8,
+    limit=10
+)
+```
+
+---
+
+## Search Modes
+
+### Standard Mode: Fulltext Search
+
+**Available in**: All installations
+
+**How it works**:
+- Uses Neo4j fulltext index
+- Searches across: title, content, reason, tags
+- Returns relevance score based on term frequency
+- Case-insensitive
+- Supports partial word matching
+
+**Example**:
+```python
+# Query: "redis cache"
+# Matches:
+#   - Title: "Redis configuration for caching"
+#   - Content: "...using Redis as cache layer..."
+#   - Tags: ["redis", "cache", "performance"]
+```
+
+**Search Syntax**:
+```python
+# Single word
+query="authentication"
+
+# Multiple words (AND logic)
+query="jwt token refresh"
+
+# Phrase search (use quotes in query string)
+query="'refresh token rotation'"
+
+# Wildcard (automatic partial matching)
+query="auth"  # Matches "authentication", "authorize", etc.
+```
+
+### Full Mode: Vector/Semantic Search
+
+**Available in**: Installations with embedding provider configured
+
+**How it works**:
+- Converts query to embedding vector
+- Finds semantically similar memories
+- Understands concept similarity
+- Language-independent
+
+**Example**:
+```python
+# Query: "user login system"
+# Semantically matches:
+#   - "JWT authentication implementation"
+#   - "OAuth 2.0 authorization"
+#   - "Session management strategy"
+# Even if exact words don't match
+```
+
+**Configuration**:
+```bash
+# .env file
+EMBEDDING_PROVIDER=openai
+OPENAI_API_KEY=your-key
+
+# Or use Gemini
+EMBEDDING_PROVIDER=gemini
+GEMINI_API_KEY=your-key
+```
+
+**Note**: Vector search is a planned feature (coming soon).
+
+---
+
+## Advanced Patterns
+
+### Pattern 1: Hierarchical Search
+
+Start broad, then narrow down:
+
+```python
+async def hierarchical_search(project_id: str, topic: str):
+    # Step 1: Broad search
+    broad = await memory_store.search_memories(
+        project_id=project_id,
+        query=topic,
+        limit=50
+    )
+
+    print(f"Found {broad['total_count']} total matches")
+
+    # Step 2: Filter for decisions only
+    decisions = [m for m in broad['memories'] if m['type'] == 'decision']
+    print(f"Found {len(decisions)} decisions")
+
+    # Step 3: Get high-importance only
+    critical = [m for m in decisions if m['importance'] >= 0.8]
+    print(f"Found {len(critical)} critical decisions")
+
+    return critical
+```
+
+### Pattern 2: Multi-Query Search
+
+Search multiple related terms:
+
+```python
+async def multi_query_search(project_id: str, queries: list):
+    all_results = {}
+
+    for query in queries:
+        result = await memory_store.search_memories(
+            project_id=project_id,
+            query=query
+        )
+
+        for memory in result['memories']:
+            memory_id = memory['id']
+            if memory_id not in all_results:
+                all_results[memory_id] = memory
+            else:
+                # Boost score for multiple matches
+                all_results[memory_id]['search_score'] += memory['search_score']
+
+    # Sort by combined score
+    sorted_results = sorted(
+        all_results.values(),
+        key=lambda m: m['search_score'],
+        reverse=True
+    )
+
+    return sorted_results
+
+# Usage
+results = await multi_query_search(
+    "my-project",
+    ["authentication", "user login", "jwt token"]
+)
+```
+
+### Pattern 3: Tag-Based Discovery
+
+Find all memories with a specific tag:
+
+```python
+async def discover_by_tag(project_id: str, tag: str):
+    result = await memory_store.search_memories(
+        project_id=project_id,
+        tags=[tag],
+        limit=100
+    )
+
+    # Group by type
+    by_type = {}
+    for memory in result['memories']:
+        mem_type = memory['type']
+        if mem_type not in by_type:
+            by_type[mem_type] = []
+        by_type[mem_type].append(memory)
+
+    # Show distribution
+    for mem_type, memories in by_type.items():
+        print(f"{mem_type}: {len(memories)}")
+        for m in memories[:3]:  # Top 3
+            print(f"  - {m['title']} (importance: {m['importance']})")
+
+    return by_type
+```
+
+### Pattern 4: Time-Based Search
+
+Find recent memories:
+
+```python
+from datetime import datetime, timedelta
+
+async def find_recent_memories(project_id: str, days: int = 7):
+    # Get all memories (search doesn't filter by date)
+    result = await memory_store.search_memories(
+        project_id=project_id,
+        limit=100
+    )
+
+    # Filter by date
+    cutoff = datetime.utcnow() - timedelta(days=days)
+    recent = []
+
+    for memory in result['memories']:
+        created_at = datetime.fromisoformat(memory['created_at'])
+        if created_at > cutoff:
+            recent.append(memory)
+
+    return recent
+
+# Find what changed in last week
+recent = await find_recent_memories("my-project", days=7)
+print(f"Found {len(recent)} memories from last 7 days")
+```
+
+### Pattern 5: Related Memories
+
+Find memories related to a specific memory:
+
+```python
+async def find_related_memories(project_id: str, memory_id: str):
+    # Get original memory
+    original = await memory_store.get_memory(memory_id)
+    memory = original['memory']
+
+    # Search using same tags
+    related_by_tags = await memory_store.search_memories(
+        project_id=project_id,
+        tags=memory['tags'],
+        limit=20
+    )
+
+    # Search using title/content keywords
+    keywords = extract_keywords(memory['title'], memory['content'])
+    related_by_content = await memory_store.search_memories(
+        project_id=project_id,
+        query=" ".join(keywords),
+        limit=20
+    )
+
+    # Combine and deduplicate
+    all_related = {}
+    for m in related_by_tags['memories'] + related_by_content['memories']:
+        if m['id'] != memory_id:  # Exclude original
+            all_related[m['id']] = m
+
+    return list(all_related.values())
+
+def extract_keywords(title: str, content: str) -> list:
+    # Simple keyword extraction (can be improved)
+    import re
+    words = re.findall(r'\w+', (title + " " + content).lower())
+    # Remove common words
+    stopwords = {'the', 'a', 'an', 'and', 'or', 'but', 'in', 'on', 'at', 'to', 'for'}
+    keywords = [w for w in words if w not in stopwords and len(w) > 3]
+    # Return top 10 most frequent
+    from collections import Counter
+    return [word for word, count in Counter(keywords).most_common(10)]
+```
+
+---
+
+## Search Strategies
+
+### Strategy 1: Task-Based Search
+
+When starting a task, search for relevant context:
+
+```python
+async def search_for_task(project_id: str, task_description: str):
+    """Search memories relevant to a task"""
+
+    # 1. Search for related decisions
+    print("Searching for related decisions...")
+    decisions = await memory_store.search_memories(
+        project_id=project_id,
+        query=task_description,
+        memory_type="decision",
+        min_importance=0.6
+    )
+
+    # 2. Search for conventions
+    print("Checking team conventions...")
+    conventions = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="convention"
+    )
+
+    # 3. Search for past experiences
+    print("Looking for past experiences...")
+    experiences = await memory_store.search_memories(
+        project_id=project_id,
+        query=task_description,
+        memory_type="experience"
+    )
+
+    return {
+        'decisions': decisions['memories'],
+        'conventions': conventions['memories'],
+        'experiences': experiences['memories']
+    }
+
+# Usage
+context = await search_for_task(
+    "web-app",
+    "implement user authentication with OAuth"
+)
+
+print(f"Found {len(context['decisions'])} relevant decisions")
+print(f"Found {len(context['conventions'])} conventions to follow")
+print(f"Found {len(context['experiences'])} past experiences")
+```
+
+### Strategy 2: Progressive Refinement
+
+Start broad, refine based on results:
+
+```python
+async def progressive_search(project_id: str, initial_query: str):
+    # Round 1: Broad search
+    print(f"Searching: {initial_query}")
+    round1 = await memory_store.search_memories(
+        project_id=project_id,
+        query=initial_query,
+        limit=50
+    )
+
+    if round1['total_count'] == 0:
+        print("No results, broadening search...")
+        # Try single words from query
+        words = initial_query.split()
+        for word in words:
+            result = await memory_store.search_memories(
+                project_id=project_id,
+                query=word,
+                limit=10
+            )
+            if result['total_count'] > 0:
+                print(f"Found results for: {word}")
+                return result
+
+    elif round1['total_count'] > 20:
+        print("Too many results, refining...")
+        # Add importance filter
+        round2 = await memory_store.search_memories(
+            project_id=project_id,
+            query=initial_query,
+            min_importance=0.7,
+            limit=50
+        )
+        return round2
+
+    return round1
+```
+
+### Strategy 3: Category-First Search
+
+Search by category, then by content:
+
+```python
+async def category_first_search(project_id: str, category: str, query: str):
+    """Search within a specific category first"""
+
+    # Map category to memory type and tags
+    category_mapping = {
+        'security': {
+            'types': ['decision', 'experience', 'convention'],
+            'tags': ['security', 'auth', 'encryption']
+        },
+        'database': {
+            'types': ['decision', 'preference', 'experience'],
+            'tags': ['database', 'sql', 'migration']
+        },
+        'api': {
+            'types': ['decision', 'convention'],
+            'tags': ['api', 'rest', 'graphql']
+        }
+    }
+
+    config = category_mapping.get(category, {})
+
+    # Search within category
+    results = []
+    for mem_type in config.get('types', []):
+        result = await memory_store.search_memories(
+            project_id=project_id,
+            query=query,
+            memory_type=mem_type,
+            tags=config.get('tags'),
+            limit=20
+        )
+        results.extend(result['memories'])
+
+    # Sort by relevance
+    results.sort(key=lambda m: m['search_score'], reverse=True)
+
+    return results
+
+# Usage
+security_results = await category_first_search(
+    "my-project",
+    "security",
+    "password hashing"
+)
+```
+
+### Strategy 4: Importance-Weighted Search
+
+Prioritize critical memories:
+
+```python
+async def importance_weighted_search(project_id: str, query: str):
+    """Search with importance-weighted scoring"""
+
+    result = await memory_store.search_memories(
+        project_id=project_id,
+        query=query,
+        limit=50
+    )
+
+    # Calculate weighted score
+    for memory in result['memories']:
+        search_score = memory['search_score']
+        importance = memory['importance']
+
+        # Weighted score: 70% relevance, 30% importance
+        memory['weighted_score'] = (search_score * 0.7) + (importance * 10 * 0.3)
+
+    # Re-sort by weighted score
+    result['memories'].sort(key=lambda m: m['weighted_score'], reverse=True)
+
+    return result
+
+# Critical memories will rank higher even if search score is lower
+```
+
+### Strategy 5: Type-Specific Search
+
+Different search strategies for different memory types:
+
+```python
+async def type_specific_search(project_id: str):
+    """Use different search strategies per type"""
+
+    # For decisions: prioritize high importance
+    decisions = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="decision",
+        min_importance=0.7,
+        limit=20
+    )
+
+    # For experiences: get all (even low importance can be useful)
+    experiences = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="experience",
+        min_importance=0.0,
+        limit=50
+    )
+
+    # For conventions: latest first
+    conventions = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="convention",
+        limit=20
+    )
+    # Sort by creation date
+    conventions['memories'].sort(
+        key=lambda m: m['created_at'],
+        reverse=True
+    )
+
+    # For plans: filter out old ones
+    plans = await memory_store.search_memories(
+        project_id=project_id,
+        memory_type="plan",
+        limit=30
+    )
+    from datetime import datetime, timedelta
+    cutoff = datetime.utcnow() - timedelta(days=90)
+    recent_plans = [
+        p for p in plans['memories']
+        if datetime.fromisoformat(p['created_at']) > cutoff
+    ]
+
+    return {
+        'decisions': decisions['memories'],
+        'experiences': experiences['memories'],
+        'conventions': conventions['memories'],
+        'plans': recent_plans
+    }
+```
+
+---
+
+## Performance Tips
+
+### 1. Use Appropriate Limits
+
+```python
+# For quick overview
+search_memories(project_id, query="auth", limit=10)
+
+# For comprehensive search
+search_memories(project_id, query="auth", limit=50)
+
+# For exhaustive search (use sparingly)
+search_memories(project_id, query="auth", limit=100)
+```
+
+**Recommendation**: Start with limit=20, increase if needed
+
+### 2. Filter Early
+
+```python
+# ❌ Slower: Get all, filter in Python
+all_results = await memory_store.search_memories(project_id, limit=100)
+critical = [m for m in all_results['memories'] if m['importance'] >= 0.8]
+
+# ✅ Faster: Filter in database
+critical = await memory_store.search_memories(
+    project_id=project_id,
+    min_importance=0.8,
+    limit=20
+)
+```
+
+### 3. Reuse Search Results
+
+```python
+# Cache search results if doing multiple operations
+search_cache = {}
+
+async def cached_search(project_id: str, query: str):
+    cache_key = f"{project_id}:{query}"
+
+    if cache_key not in search_cache:
+        result = await memory_store.search_memories(project_id, query=query)
+        search_cache[cache_key] = result
+
+    return search_cache[cache_key]
+```
+
+### 4. Use Specific Queries
+
+```python
+# ❌ Too vague, returns many irrelevant results
+query="system"
+
+# ✅ Specific, returns focused results
+query="authentication system jwt implementation"
+```
+
+### 5. Leverage Tags
+
+```python
+# ❌ Broad search
+search_memories(project_id, query="bug")
+
+# ✅ Narrow with tags
+search_memories(
+    project_id,
+    query="timeout",
+    tags=["database", "performance"],
+    memory_type="experience"
+)
+```
+
+---
+
+## Search Result Ranking
+
+### Default Ranking
+
+Results are sorted by:
+1. **Search Score** (primary) - Relevance to query
+2. **Importance** (secondary) - Memory importance
+3. **Created Date** (tertiary) - Newer first
+
+### Understanding Search Scores
+
+```python
+for memory in result['memories']:
+    score = memory['search_score']
+
+    if score > 3.0:
+        print("Excellent match")
+    elif score > 2.0:
+        print("Good match")
+    elif score > 1.0:
+        print("Moderate match")
+    else:
+        print("Weak match")
+```
+
+**Score Factors**:
+- Term frequency in title (highest weight)
+- Term frequency in content
+- Term frequency in reason
+- Tag matches
+- Exact phrase matches
+
+### Custom Ranking
+
+Implement custom ranking logic:
+
+```python
+async def custom_ranked_search(project_id: str, query: str, preferences: dict):
+    result = await memory_store.search_memories(
+        project_id=project_id,
+        query=query,
+        limit=50
+    )
+
+    # Custom scoring
+    for memory in result['memories']:
+        score = 0
+
+        # Base search score
+        score += memory['search_score'] * preferences.get('relevance_weight', 0.5)
+
+        # Importance factor
+        score += memory['importance'] * 10 * preferences.get('importance_weight', 0.3)
+
+        # Recency factor
+        from datetime import datetime
+        age_days = (datetime.utcnow() - datetime.fromisoformat(memory['created_at'])).days
+        recency_score = max(0, 1 - (age_days / 365))  # Decay over 1 year
+        score += recency_score * 10 * preferences.get('recency_weight', 0.2)
+
+        # Type preference
+        type_weights = preferences.get('type_weights', {})
+        score += type_weights.get(memory['type'], 1.0)
+
+        memory['custom_score'] = score
+
+    # Sort by custom score
+    result['memories'].sort(key=lambda m: m['custom_score'], reverse=True)
+
+    return result
+
+# Usage: Prioritize recent, high-importance decisions
+results = await custom_ranked_search(
+    "my-project",
+    "database migration",
+    preferences={
+        'relevance_weight': 0.4,
+        'importance_weight': 0.4,
+        'recency_weight': 0.2,
+        'type_weights': {
+            'decision': 2.0,
+            'experience': 1.5,
+            'preference': 1.0
+        }
+    }
+)
+```
+
+---
+
+## Troubleshooting
+
+### No Results Found
+
+**Problem**: Search returns 0 results
+
+**Solutions**:
+
+```python
+# 1. Try broader query
+search_memories(project_id, query="auth")  # Instead of "authentication jwt token"
+
+# 2. Remove filters
+search_memories(project_id, query="auth")  # Remove memory_type, tags filters
+
+# 3. Check if project has any memories
+summary = await memory_store.get_project_summary(project_id)
+print(f"Total memories: {summary['summary']['total_memories']}")
+
+# 4. Verify project_id is correct
+```
+
+### Too Many Results
+
+**Problem**: Search returns hundreds of low-relevance results
+
+**Solutions**:
+
+```python
+# 1. Add importance filter
+search_memories(project_id, query="database", min_importance=0.7)
+
+# 2. Add type filter
+search_memories(project_id, query="database", memory_type="decision")
+
+# 3. Add tag filter
+search_memories(project_id, query="database", tags=["postgresql"])
+
+# 4. Use more specific query
+search_memories(project_id, query="postgresql migration script")
+```
+
+### Low Relevance Results
+
+**Problem**: Results don't match what you're looking for
+
+**Solutions**:
+
+```python
+# 1. Use exact phrases
+search_memories(project_id, query="'refresh token rotation'")
+
+# 2. Use multiple specific keywords
+search_memories(project_id, query="oauth refresh token jwt")
+
+# 3. Combine query with filters
+search_memories(
+    project_id,
+    query="token",
+    tags=["auth", "security"],
+    memory_type="decision"
+)
+```
+
+### Slow Search
+
+**Problem**: Search takes too long
+
+**Solutions**:
+
+```python
+# 1. Reduce limit
+search_memories(project_id, query="auth", limit=20)  # Instead of 100
+
+# 2. Add filters to narrow scope
+search_memories(
+    project_id,
+    query="auth",
+    memory_type="decision",
+    min_importance=0.7
+)
+
+# 3. Check database indexes
+# Ensure Neo4j fulltext index exists (automatic in Memory Store)
+```
+
+---
+
+## Best Practices
+
+### 1. Start Broad, Then Narrow
+
+```python
+# First search: broad
+broad = await search_memories(project_id, query="authentication")
+
+# Analyze results
+if len(broad['memories']) > 30:
+    # Too many, narrow down
+    narrow = await search_memories(
+        project_id,
+        query="authentication",
+        memory_type="decision",
+        min_importance=0.8
+    )
+```
+
+### 2. Use Type Filters Appropriately
+
+```python
+# Looking for past decisions
+search_memories(project_id, query="database", memory_type="decision")
+
+# Looking for known issues
+search_memories(project_id, query="timeout", memory_type="experience")
+
+# Looking for standards
+search_memories(project_id, memory_type="convention")
+```
+
+### 3. Tag Strategically
+
+```python
+# Search by domain
+search_memories(project_id, tags=["auth"])
+
+# Search by technology
+search_memories(project_id, tags=["redis", "cache"])
+
+# Search by status
+search_memories(project_id, tags=["critical", "production"])
+```
+
+### 4. Consider Importance Thresholds
+
+```python
+# Critical only
+search_memories(project_id, min_importance=0.9)
+
+# Important and above
+search_memories(project_id, min_importance=0.7)
+
+# All memories (including low importance)
+search_memories(project_id, min_importance=0.0)
+```
+
+### 5. Check Search Quality
+
+```python
+result = await search_memories(project_id, query="authentication")
+
+# Review top results
+print("Top 5 results:")
+for memory in result['memories'][:5]:
+    print(f"Score: {memory['search_score']:.2f}")
+    print(f"  {memory['title']}")
+    print(f"  Type: {memory['type']}, Importance: {memory['importance']}")
+
+# If top results aren't relevant, refine query
+```
+
+---
+
+## Next Steps
+
+- **Manual Management**: See [manual.md](./manual.md) for CRUD operations
+- **Auto-Extraction**: See [extraction.md](./extraction.md) for automatic memory capture
+- **Overview**: See [overview.md](./overview.md) for system introduction
diff --git a/docs/index.md b/docs/index.md
new file mode 100644
index 0000000..66026b4
--- /dev/null
+++ b/docs/index.md
@@ -0,0 +1,203 @@
+# Code Graph Knowledge System
+
+<div align="center">
+
+**AI-Powered Code Intelligence & Knowledge Management**
+
+[![Docker Hub](https://img.shields.io/docker/pulls/royisme/codebase-rag?style=flat-square)](https://hub.docker.com/r/royisme/codebase-rag)
+[![GitHub](https://img.shields.io/github/stars/royisme/codebase-rag?style=flat-square)](https://github.com/royisme/codebase-rag)
+[![License](https://img.shields.io/badge/license-MIT-blue.svg?style=flat-square)](LICENSE)
+
+</div>
+
+---
+
+## What is Code Graph Knowledge System?
+
+Code Graph Knowledge System is an enterprise-grade solution that transforms unstructured development documentation and code into a structured, queryable knowledge graph. By combining **vector search**, **graph database technology**, and **large language models**, it provides intelligent code analysis, documentation management, and development assistance capabilities.
+
+## ✨ Key Features
+
+### 🎯 Deployment Modes
+
+Choose the right deployment mode for your needs:
+
+| Feature | Minimal | Standard | Full |
+|---------|---------|----------|------|
+| **Code Graph** | | | |
+| └ Repository ingestion | ✅ | ✅ | ✅ |
+| └ File relationship search | ✅ | ✅ | ✅ |
+| └ Impact analysis | ✅ | ✅ | ✅ |
+| └ Context packing | ✅ | ✅ | ✅ |
+| **Memory Store** | | | |
+| └ Manual management | ❌ | ✅ | ✅ |
+| └ Vector search | ❌ | ✅ | ✅ |
+| └ Auto extraction (Git) | ❌ | ❌ | ✅ |
+| └ Auto extraction (Conversations) | ❌ | ❌ | ✅ |
+| **Knowledge RAG** | | | |
+| └ Document vectorization | ❌ | ❌ | ✅ |
+| └ Intelligent Q&A | ❌ | ❌ | ✅ |
+| **Requirements** | | | |
+| └ Neo4j | ✅ | ✅ | ✅ |
+| └ Embedding Model | ❌ | ✅ | ✅ |
+| └ LLM | ❌ | ❌ | ✅ |
+| **Image Size** | ~500MB | ~600MB | ~800MB |
+| **Startup Time** | ~5s | ~8s | ~15s |
+
+### 🚀 Core Capabilities
+
+=== "Code Graph"
+    **No LLM Required** - Pure graph-based code intelligence
+
+    - **Repository Ingestion**: Parse and index entire codebases
+    - **Relationship Discovery**: Find file dependencies and imports
+    - **Impact Analysis**: Understand the blast radius of changes
+    - **Context Packing**: Generate AI-friendly context bundles
+
+    ```bash
+    # Start minimal deployment
+    make docker-minimal
+    ```
+
+=== "Memory Store"
+    **Embedding Required** - Long-term project knowledge
+
+    - **Manual Memory Management**: Add, search, update memories
+    - **Vector Search**: Find relevant project decisions
+    - **Auto Extraction**: Extract from git commits and conversations
+    - **Knowledge Evolution**: Track decision changes over time
+
+    ```bash
+    # Start standard deployment
+    make docker-standard
+    ```
+
+=== "Knowledge RAG"
+    **LLM + Embedding Required** - Full AI capabilities
+
+    - **Document Processing**: Index documentation and code
+    - **Intelligent Q&A**: Answer questions about your codebase
+    - **Multi-format Support**: Markdown, PDF, code files
+    - **Hybrid Search**: Combine vector and graph traversal
+
+    ```bash
+    # Start full deployment
+    make docker-full
+    ```
+
+## 🎯 Quick Start
+
+### 1. Choose Your Deployment Mode
+
+```bash
+# Minimal - Code Graph only (No LLM needed)
+make docker-minimal
+
+# Standard - Code Graph + Memory (Embedding needed)
+make docker-standard
+
+# Full - All features (LLM + Embedding needed)
+make docker-full
+```
+
+### 2. Access the System
+
+- **API Documentation**: [http://localhost:8000/docs](http://localhost:8000/docs)
+- **Neo4j Browser**: [http://localhost:7474](http://localhost:7474)
+- **Health Check**: [http://localhost:8000/api/v1/health](http://localhost:8000/api/v1/health)
+
+### 3. Ingest Your Repository
+
+=== "MCP (Claude Desktop)"
+    ```json
+    // In Claude Desktop, use MCP tools:
+    code_graph_ingest_repo({
+      "local_path": "/path/to/your/repo",
+      "mode": "incremental"
+    })
+    ```
+
+=== "REST API"
+    ```bash
+    curl -X POST http://localhost:8000/api/v1/code-graph/ingest \
+      -H "Content-Type: application/json" \
+      -d '{
+        "local_path": "/path/to/your/repo",
+        "mode": "incremental"
+      }'
+    ```
+
+## 🎨 Use Cases
+
+### For Individual Developers
+
+- **Learn Large Codebases**: Quickly understand unfamiliar projects
+- **Code Navigation**: Find relationships and dependencies
+- **Impact Assessment**: See what breaks before making changes
+
+### For Development Teams
+
+- **Project Knowledge Base**: Preserve team decisions and context
+- **Onboarding**: Help new team members get up to speed
+- **Documentation**: Auto-generate context for AI coding assistants
+
+### For AI-Assisted Development
+
+- **Claude Desktop Integration**: Use as MCP server for enhanced code understanding
+- **VS Code Integration**: Access code graph directly in your editor
+- **Context Generation**: Create optimal context for LLM queries
+
+## 📚 Documentation
+
+- [**Quick Start Guide**](getting-started/quickstart.md) - Get running in 5 minutes
+- [**Deployment Overview**](deployment/overview.md) - Choose the right mode
+- [**Code Graph Guide**](guide/code-graph/overview.md) - Learn code intelligence features
+- [**MCP Integration**](guide/mcp/overview.md) - Use with Claude Desktop
+- [**API Reference**](api/mcp-tools.md) - Complete tool documentation
+
+## 🌟 Why Code Graph Knowledge System?
+
+### 🎯 Flexible Architecture
+
+- **No Vendor Lock-in**: Use Ollama, OpenAI, Gemini, or any LLM
+- **Scalable**: From single developer to enterprise teams
+- **Modular**: Only use what you need
+
+### 🚀 Performance Optimized
+
+- **Incremental Updates**: 60x faster than full re-indexing
+- **Smart Caching**: Reduce redundant processing
+- **Efficient Storage**: Neo4j native vector indexes
+
+### 🔒 Privacy Focused
+
+- **Self-Hosted**: Keep your code on your infrastructure
+- **No Data Leaks**: Optional local LLM support
+- **Secure**: Enterprise-grade Neo4j backend
+
+## 🛠️ Technology Stack
+
+- **Backend**: Python 3.13, FastAPI
+- **Database**: Neo4j 5.15+ with APOC
+- **AI**: LlamaIndex, Multiple LLM providers
+- **Protocol**: Model Context Protocol (MCP)
+- **Deployment**: Docker, Docker Compose
+
+## 🤝 Community
+
+- **GitHub**: [royisme/codebase-rag](https://github.com/royisme/codebase-rag)
+- **Docker Hub**: [royisme/codebase-rag](https://hub.docker.com/r/royisme/codebase-rag)
+- **Issues**: [Report bugs](https://github.com/royisme/codebase-rag/issues)
+- **Discussions**: [Community forum](https://github.com/royisme/codebase-rag/discussions)
+
+## 📝 License
+
+This project is licensed under the MIT License - see the [LICENSE](https://github.com/royisme/codebase-rag/blob/main/LICENSE) file for details.
+
+---
+
+<div align="center">
+
+**Ready to get started?** → [Quick Start Guide](getting-started/quickstart.md)
+
+</div>
diff --git a/docs/troubleshooting.md b/docs/troubleshooting.md
new file mode 100644
index 0000000..e0df1b1
--- /dev/null
+++ b/docs/troubleshooting.md
@@ -0,0 +1,1245 @@
+# Troubleshooting Guide
+
+This guide helps you diagnose and resolve common issues with the Code Graph Knowledge System.
+
+## Table of Contents
+
+- [Quick Diagnostics](#quick-diagnostics)
+- [Connection Issues](#connection-issues)
+- [Docker Deployment Issues](#docker-deployment-issues)
+- [Neo4j Problems](#neo4j-problems)
+- [LLM Provider Issues](#llm-provider-issues)
+- [Performance Problems](#performance-problems)
+- [Memory Issues](#memory-issues)
+- [MCP Server Problems](#mcp-server-problems)
+- [API Errors](#api-errors)
+- [Installation Issues](#installation-issues)
+- [Data and Storage Issues](#data-and-storage-issues)
+- [Common Error Messages](#common-error-messages)
+
+## Quick Diagnostics
+
+### Health Check
+
+Start with the health check endpoint:
+
+```bash
+curl http://localhost:8000/api/v1/health
+```
+
+**Healthy Response:**
+```json
+{
+  "status": "healthy",
+  "neo4j": "connected",
+  "llm_provider": "ollama",
+  "version": "0.7.0"
+}
+```
+
+**Unhealthy Response:**
+```json
+{
+  "status": "unhealthy",
+  "neo4j": "disconnected",
+  "llm_provider": "error",
+  "error": "Connection timeout"
+}
+```
+
+### System Check Command
+
+```bash
+# Check all services
+python start.py --check
+
+# Check logs
+tail -f logs/application.log
+
+# Check Docker containers
+docker ps -a
+docker logs code-graph-api
+docker logs code-graph-neo4j
+```
+
+### Common Issues Quick Reference
+
+| Symptom | Likely Cause | Quick Fix |
+|---------|--------------|-----------|
+| Application won't start | Missing dependencies | `uv pip install -e .` |
+| Connection timeout | Neo4j not running | `docker-compose up -d neo4j` |
+| 502 Bad Gateway | Service crashed | Check logs, restart service |
+| Slow responses | Memory/CPU limits | Increase Docker resources |
+| Import errors | Wrong Python path | `export PYTHONPATH=$PWD` |
+| API 500 errors | Configuration issue | Check .env file |
+
+## Connection Issues
+
+### Neo4j Connection Timeout
+
+**Symptom:**
+```
+neo4j.exceptions.ServiceUnavailable: Failed to establish connection to bolt://localhost:7687
+```
+
+**Diagnosis:**
+```bash
+# Check if Neo4j is running
+docker ps | grep neo4j
+# or
+sudo systemctl status neo4j
+
+# Try connecting manually
+docker exec -it code-graph-neo4j cypher-shell -u neo4j -p yourpassword
+```
+
+**Solutions:**
+
+1. **Neo4j not running:**
+   ```bash
+   # Docker
+   docker-compose up -d neo4j
+
+   # Native
+   sudo systemctl start neo4j
+   ```
+
+2. **Wrong connection details:**
+   ```bash
+   # Check .env file
+   cat .env | grep NEO4J
+
+   # Should be:
+   NEO4J_URI=bolt://localhost:7687
+   NEO4J_USER=neo4j
+   NEO4J_PASSWORD=your-password
+   ```
+
+3. **Neo4j not ready yet:**
+   ```bash
+   # Wait for Neo4j to fully start (30-60 seconds)
+   docker logs -f code-graph-neo4j
+   # Look for: "Started."
+   ```
+
+4. **Firewall blocking connection:**
+   ```bash
+   # Check port accessibility
+   telnet localhost 7687
+   nc -zv localhost 7687
+
+   # Allow port in firewall
+   sudo ufw allow 7687
+   ```
+
+5. **Docker network issues:**
+   ```bash
+   # Check network
+   docker network ls
+   docker network inspect code-graph_default
+
+   # Recreate network
+   docker-compose down
+   docker-compose up -d
+   ```
+
+### Ollama Connection Failed
+
+**Symptom:**
+```
+httpx.ConnectError: [Errno 111] Connection refused
+```
+
+**Diagnosis:**
+```bash
+# Check if Ollama is running
+curl http://localhost:11434/api/tags
+ps aux | grep ollama
+```
+
+**Solutions:**
+
+1. **Ollama not running:**
+   ```bash
+   # Start Ollama
+   ollama serve
+
+   # Or run in background
+   nohup ollama serve > /dev/null 2>&1 &
+   ```
+
+2. **Model not downloaded:**
+   ```bash
+   # Check available models
+   ollama list
+
+   # Pull required models
+   ollama pull llama3.2:3b
+   ollama pull nomic-embed-text
+   ```
+
+3. **Wrong Ollama URL:**
+   ```bash
+   # Check .env
+   OLLAMA_BASE_URL=http://localhost:11434
+
+   # If Ollama is on different host
+   OLLAMA_BASE_URL=http://192.168.1.100:11434
+   ```
+
+### OpenAI API Connection Issues
+
+**Symptom:**
+```
+openai.error.AuthenticationError: Invalid API key
+```
+
+**Solutions:**
+
+1. **Invalid API key:**
+   ```bash
+   # Check .env file
+   cat .env | grep OPENAI_API_KEY
+
+   # Get new key from: https://platform.openai.com/api-keys
+   OPENAI_API_KEY=sk-your-key-here
+   ```
+
+2. **Network connectivity:**
+   ```bash
+   # Test OpenAI connectivity
+   curl https://api.openai.com/v1/models -H "Authorization: Bearer $OPENAI_API_KEY"
+   ```
+
+3. **Rate limiting:**
+   ```bash
+   # Wait and retry
+   # Implement exponential backoff in code
+   # Upgrade OpenAI plan for higher limits
+   ```
+
+## Docker Deployment Issues
+
+### Container Won't Start
+
+**Symptom:**
+```bash
+docker ps -a
+# Shows container with status "Exited (1)"
+```
+
+**Diagnosis:**
+```bash
+# Check container logs
+docker logs code-graph-api
+
+# Check all container logs
+docker-compose logs
+```
+
+**Common Issues:**
+
+1. **Missing environment variables:**
+   ```bash
+   # Check docker-compose.yml has environment section
+   environment:
+     - NEO4J_URI=bolt://neo4j:7687
+     - NEO4J_USER=neo4j
+     - NEO4J_PASSWORD=${NEO4J_PASSWORD}
+
+   # Check .env file exists
+   ls -la .env
+   ```
+
+2. **Port already in use:**
+   ```bash
+   # Check what's using port 8000
+   lsof -i :8000  # Linux/macOS
+   netstat -ano | findstr :8000  # Windows
+
+   # Kill process or change port
+   docker-compose down
+   # Edit docker-compose.yml to use different port
+   ports:
+     - "8001:8000"
+   docker-compose up -d
+   ```
+
+3. **Out of disk space:**
+   ```bash
+   # Check disk space
+   df -h
+
+   # Clean up Docker
+   docker system prune -a
+   docker volume prune
+   ```
+
+4. **Memory limits:**
+   ```bash
+   # Increase Docker memory
+   # Docker Desktop: Settings > Resources > Memory (increase to 4GB+)
+
+   # Or in docker-compose.yml
+   services:
+     api:
+       deploy:
+         resources:
+           limits:
+             memory: 2G
+   ```
+
+### Docker Compose Errors
+
+**Error: "Version is not supported"**
+```bash
+# Update Docker Compose
+sudo apt-get update
+sudo apt-get install docker-compose-plugin
+
+# Or use docker compose (without hyphen)
+docker compose up -d
+```
+
+**Error: "Network not found"**
+```bash
+# Recreate network
+docker-compose down
+docker network prune
+docker-compose up -d
+```
+
+**Error: "Volume not found"**
+```bash
+# List volumes
+docker volume ls
+
+# Recreate volumes
+docker-compose down -v
+docker-compose up -d
+```
+
+### Container Networking Issues
+
+**Containers can't communicate:**
+
+```bash
+# Check network
+docker network ls
+docker network inspect code-graph_default
+
+# Ensure containers are on same network
+docker-compose ps
+
+# Test connectivity
+docker exec code-graph-api ping neo4j
+```
+
+**DNS resolution fails:**
+
+```bash
+# Use service name, not localhost
+NEO4J_URI=bolt://neo4j:7687  # Correct in Docker
+NEO4J_URI=bolt://localhost:7687  # Wrong in Docker
+```
+
+## Neo4j Problems
+
+### Neo4j Out of Memory
+
+**Symptom:**
+```
+Neo4j heap memory exceeded
+OutOfMemoryError: Java heap space
+```
+
+**Solutions:**
+
+1. **Increase heap size:**
+   ```yaml
+   # docker-compose.yml
+   services:
+     neo4j:
+       environment:
+         - NEO4J_dbms_memory_heap_max__size=4G
+         - NEO4J_dbms_memory_pagecache_size=2G
+   ```
+
+2. **Clear old data:**
+   ```cypher
+   // In Neo4j Browser (http://localhost:7474)
+
+   // Delete old nodes
+   MATCH (n:Document) WHERE n.created < datetime() - duration('P30D')
+   DETACH DELETE n
+
+   // Or clear all data (CAUTION!)
+   MATCH (n) DETACH DELETE n
+   ```
+
+3. **Optimize queries:**
+   ```cypher
+   // Add indexes
+   CREATE INDEX document_id IF NOT EXISTS FOR (d:Document) ON (d.id)
+   CREATE INDEX memory_project IF NOT EXISTS FOR (m:Memory) ON (m.project_id)
+   ```
+
+### Neo4j Browser Not Accessible
+
+**Symptom:**
+Cannot access http://localhost:7474
+
+**Solutions:**
+
+1. **Check port mapping:**
+   ```bash
+   docker ps | grep neo4j
+   # Should show: 0.0.0.0:7474->7474/tcp
+
+   # If not, check docker-compose.yml
+   ports:
+     - "7474:7474"
+     - "7687:7687"
+   ```
+
+2. **Neo4j not ready:**
+   ```bash
+   # Wait for startup
+   docker logs -f code-graph-neo4j
+   # Look for: "Started."
+   ```
+
+3. **Firewall blocking:**
+   ```bash
+   sudo ufw allow 7474
+   ```
+
+### APOC Plugin Issues
+
+**Symptom:**
+```
+There is no procedure with the name `apoc.meta.data` registered
+```
+
+**Solutions:**
+
+1. **APOC not installed:**
+   ```yaml
+   # docker-compose.yml
+   services:
+     neo4j:
+       environment:
+         - NEO4J_PLUGINS=["apoc"]
+         - NEO4J_dbms_security_procedures_unrestricted=apoc.*
+   ```
+
+2. **Restart Neo4j:**
+   ```bash
+   docker-compose restart neo4j
+   ```
+
+3. **Verify APOC:**
+   ```cypher
+   // In Neo4j Browser
+   RETURN apoc.version()
+   ```
+
+### Neo4j Authentication Failed
+
+**Symptom:**
+```
+Neo4j.ClientError.Security.Unauthorized
+```
+
+**Solutions:**
+
+```bash
+# Reset password
+docker exec -it code-graph-neo4j cypher-shell -u neo4j -p neo4j
+# Follow prompts to change password
+
+# Update .env
+NEO4J_PASSWORD=new-password
+
+# Restart application
+docker-compose restart api
+```
+
+## LLM Provider Issues
+
+### Ollama Model Not Found
+
+**Symptom:**
+```
+Error: model 'llama3.2:3b' not found
+```
+
+**Solutions:**
+
+```bash
+# List available models
+ollama list
+
+# Pull required model
+ollama pull llama3.2:3b
+ollama pull nomic-embed-text
+
+# Verify in .env
+OLLAMA_MODEL=llama3.2:3b
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+```
+
+### Ollama Out of Memory
+
+**Symptom:**
+```
+Error loading model: insufficient memory
+```
+
+**Solutions:**
+
+1. **Use smaller model:**
+   ```bash
+   # 3B model instead of 7B
+   ollama pull llama3.2:3b
+
+   # Update .env
+   OLLAMA_MODEL=llama3.2:3b
+   ```
+
+2. **Increase system memory:**
+   - Close other applications
+   - Increase Docker memory limit
+   - Use smaller context window
+
+3. **Use CPU offloading:**
+   ```bash
+   # Ollama automatically offloads to CPU when GPU memory is full
+   # Monitor with:
+   ollama ps
+   ```
+
+### OpenAI Rate Limit Exceeded
+
+**Symptom:**
+```
+openai.error.RateLimitError: Rate limit exceeded
+```
+
+**Solutions:**
+
+1. **Wait and retry:**
+   ```python
+   # Application has exponential backoff
+   # Wait 1 minute and try again
+   ```
+
+2. **Upgrade OpenAI plan:**
+   - Visit OpenAI dashboard
+   - Increase rate limits
+   - Add payment method
+
+3. **Switch to Ollama:**
+   ```bash
+   # No rate limits with local Ollama
+   LLM_PROVIDER=ollama
+   ```
+
+### Gemini API Quota Exceeded
+
+**Symptom:**
+```
+google.api_core.exceptions.ResourceExhausted: Quota exceeded
+```
+
+**Solutions:**
+
+1. **Check quota:**
+   - Visit Google AI Studio
+   - Check daily quota usage
+   - Wait for quota reset (midnight Pacific)
+
+2. **Request quota increase:**
+   - Contact Google support
+   - Upgrade to paid plan
+
+## Performance Problems
+
+### Slow Query Responses
+
+**Symptom:**
+Queries take > 30 seconds
+
+**Diagnosis:**
+
+```bash
+# Check system resources
+top
+htop
+docker stats
+
+# Check Neo4j query performance
+# In Neo4j Browser, run with PROFILE:
+PROFILE MATCH (n:Document) RETURN n LIMIT 10
+```
+
+**Solutions:**
+
+1. **Add Neo4j indexes:**
+   ```cypher
+   // Create indexes on frequently queried fields
+   CREATE INDEX document_content IF NOT EXISTS
+   FOR (d:Document) ON (d.content)
+
+   CREATE INDEX memory_tags IF NOT EXISTS
+   FOR (m:Memory) ON (m.tags)
+
+   // Check indexes
+   SHOW INDEXES
+   ```
+
+2. **Optimize chunk size:**
+   ```python
+   # In .env
+   CHUNK_SIZE=512  # Smaller chunks for faster search
+   CHUNK_OVERLAP=50
+   ```
+
+3. **Use smaller embedding model:**
+   ```bash
+   # Ollama
+   OLLAMA_EMBEDDING_MODEL=nomic-embed-text  # Fast
+
+   # OpenAI
+   OPENAI_EMBEDDING_MODEL=text-embedding-3-small  # Fast & cheap
+   ```
+
+4. **Increase timeouts:**
+   ```bash
+   # .env
+   OPERATION_TIMEOUT=600  # 10 minutes
+   LARGE_DOCUMENT_TIMEOUT=1200  # 20 minutes
+   ```
+
+### High Memory Usage
+
+**Symptom:**
+```bash
+docker stats
+# Shows 90%+ memory usage
+```
+
+**Solutions:**
+
+1. **Increase Docker memory:**
+   ```bash
+   # Docker Desktop: Settings > Resources
+   # Increase memory to 8GB or more
+   ```
+
+2. **Reduce Neo4j memory:**
+   ```yaml
+   # docker-compose.yml
+   services:
+     neo4j:
+       environment:
+         - NEO4J_dbms_memory_heap_max__size=2G  # Reduce from 4G
+   ```
+
+3. **Process documents in batches:**
+   ```python
+   # Use directory processing with batch size
+   # MCP: ingest_directory with smaller batches
+   # API: Process files one at a time
+   ```
+
+4. **Clear cache:**
+   ```bash
+   # Restart services
+   docker-compose restart
+
+   # Clear Neo4j page cache
+   # In Neo4j Browser:
+   CALL dbms.clearQueryCaches()
+   ```
+
+### High CPU Usage
+
+**Symptom:**
+CPU at 100% constantly
+
+**Solutions:**
+
+1. **Check what's consuming CPU:**
+   ```bash
+   docker stats
+   # Identify the container
+
+   docker top code-graph-api
+   ```
+
+2. **Ollama consuming CPU:**
+   ```bash
+   # Normal during inference
+   # Use GPU if available
+   # Reduce concurrent requests
+   ```
+
+3. **Limit concurrent operations:**
+   ```python
+   # In code, limit concurrent tasks
+   # Default: 5 concurrent operations
+   MAX_CONCURRENT_OPERATIONS=3
+   ```
+
+## Memory Issues
+
+### Memory Store Search Slow
+
+**Symptom:**
+Memory search takes > 5 seconds
+
+**Solutions:**
+
+1. **Add fulltext index:**
+   ```cypher
+   // In Neo4j Browser
+   CREATE FULLTEXT INDEX memory_fulltext IF NOT EXISTS
+   FOR (m:Memory)
+   ON EACH [m.title, m.content, m.reason, m.tags]
+
+   // Verify
+   SHOW INDEXES
+   ```
+
+2. **Limit search results:**
+   ```python
+   # When searching, use limit
+   search_memories(query="...", limit=20)  # Instead of 100
+   ```
+
+3. **Filter by importance:**
+   ```python
+   # Only search important memories
+   search_memories(query="...", min_importance=0.7)
+   ```
+
+### Memory Not Found After Adding
+
+**Symptom:**
+Memory added successfully but search doesn't find it
+
+**Diagnosis:**
+
+```cypher
+// Check if memory exists
+MATCH (m:Memory {project_id: "your-project"})
+RETURN m LIMIT 10
+
+// Check memory count
+MATCH (m:Memory {project_id: "your-project"})
+RETURN count(m)
+```
+
+**Solutions:**
+
+1. **Index not updated:**
+   ```cypher
+   // Rebuild fulltext index
+   DROP INDEX memory_fulltext IF EXISTS
+   CREATE FULLTEXT INDEX memory_fulltext
+   FOR (m:Memory)
+   ON EACH [m.title, m.content, m.reason, m.tags]
+   ```
+
+2. **Search query too specific:**
+   ```python
+   # Use broader search terms
+   # Instead of: "PostgreSQL database configuration"
+   # Try: "PostgreSQL" or "database"
+   ```
+
+3. **Project ID mismatch:**
+   ```bash
+   # Check project ID is consistent
+   echo $PROJECT_ID
+   ```
+
+## MCP Server Problems
+
+### MCP Server Won't Start
+
+**Symptom:**
+```
+Error: MCP server failed to start
+```
+
+**Diagnosis:**
+
+```bash
+# Try starting manually
+python start_mcp.py
+
+# Check logs
+python start_mcp.py 2>&1 | tee mcp.log
+```
+
+**Solutions:**
+
+1. **Missing MCP package:**
+   ```bash
+   uv pip install mcp>=1.1.0
+   ```
+
+2. **Port already in use:**
+   ```bash
+   # Check ports
+   lsof -i :8001
+
+   # Kill process or use different port
+   # MCP server uses stdio by default (no port needed)
+   ```
+
+3. **Neo4j not accessible:**
+   ```bash
+   # MCP server needs Neo4j
+   # Check Neo4j is running
+   docker ps | grep neo4j
+   ```
+
+### MCP Tools Not Appearing in Claude
+
+**Symptom:**
+MCP tools don't show up in Claude Desktop
+
+**Solutions:**
+
+1. **Check Claude config:**
+   ```bash
+   # macOS
+   cat ~/Library/Application\ Support/Claude/claude_desktop_config.json
+
+   # Should contain:
+   {
+     "mcpServers": {
+       "code-graph": {
+         "command": "python",
+         "args": ["/path/to/codebase-rag/start_mcp.py"]
+       }
+     }
+   }
+   ```
+
+2. **Check MCP server path:**
+   ```bash
+   # Use absolute path
+   "args": ["/home/user/codebase-rag/start_mcp.py"]
+
+   # Not relative path
+   "args": ["./start_mcp.py"]  # Wrong
+   ```
+
+3. **Restart Claude Desktop:**
+   ```bash
+   # Completely quit and restart Claude Desktop
+   # Check MCP status in Claude: cmd/ctrl + ,
+   ```
+
+4. **Check Python environment:**
+   ```json
+   {
+     "mcpServers": {
+       "code-graph": {
+         "command": "/path/to/.venv/bin/python",
+         "args": ["/path/to/start_mcp.py"],
+         "env": {
+           "PYTHONPATH": "/path/to/codebase-rag"
+         }
+       }
+     }
+   }
+   ```
+
+### MCP Tool Execution Fails
+
+**Symptom:**
+```
+Error executing tool: Connection timeout
+```
+
+**Solutions:**
+
+1. **Increase timeouts:**
+   ```bash
+   # .env
+   OPERATION_TIMEOUT=600
+   CONNECTION_TIMEOUT=60
+   ```
+
+2. **Check Neo4j connection:**
+   ```bash
+   # MCP tools need working Neo4j
+   python -c "from neo4j import GraphDatabase; driver = GraphDatabase.driver('bolt://localhost:7687', auth=('neo4j', 'password')); driver.verify_connectivity(); print('OK')"
+   ```
+
+3. **Check file paths:**
+   ```bash
+   # Use absolute paths in MCP tool calls
+   # Not: "documents/file.txt"
+   # Use: "/home/user/project/documents/file.txt"
+   ```
+
+## API Errors
+
+### 500 Internal Server Error
+
+**Diagnosis:**
+
+```bash
+# Check application logs
+docker logs code-graph-api
+
+# Or local logs
+tail -f logs/application.log
+```
+
+**Common Causes:**
+
+1. **Configuration error:**
+   ```bash
+   # Check .env file
+   cat .env
+   # Ensure all required variables are set
+   ```
+
+2. **Database connection lost:**
+   ```bash
+   # Restart Neo4j
+   docker-compose restart neo4j
+   ```
+
+3. **Unhandled exception:**
+   ```bash
+   # Check logs for stack trace
+   # Report bug with full error message
+   ```
+
+### 422 Validation Error
+
+**Symptom:**
+```json
+{
+  "detail": [
+    {
+      "loc": ["body", "memory_type"],
+      "msg": "field required",
+      "type": "value_error.missing"
+    }
+  ]
+}
+```
+
+**Solution:**
+Check your request body includes all required fields.
+
+**Example correct request:**
+```bash
+curl -X POST http://localhost:8000/api/v1/memory/add \
+  -H "Content-Type: application/json" \
+  -d '{
+    "project_id": "myapp",
+    "memory_type": "decision",
+    "title": "Use PostgreSQL",
+    "content": "Selected PostgreSQL as database",
+    "importance": 0.8
+  }'
+```
+
+### 404 Not Found
+
+**Diagnosis:**
+
+```bash
+# Check available endpoints
+curl http://localhost:8000/openapi.json | jq '.paths | keys'
+```
+
+**Solutions:**
+
+1. **Check API version:**
+   ```bash
+   # Correct
+   curl http://localhost:8000/api/v1/health
+
+   # Wrong
+   curl http://localhost:8000/health
+   ```
+
+2. **Check URL spelling:**
+   ```bash
+   # Correct
+   /api/v1/memory/add
+
+   # Wrong
+   /api/v1/memories/add
+   ```
+
+## Installation Issues
+
+### uv Installation Fails
+
+**Symptom:**
+```
+curl: command not found
+```
+
+**Solution:**
+
+```bash
+# Install curl first
+sudo apt-get install curl  # Debian/Ubuntu
+brew install curl  # macOS
+
+# Then install uv
+curl -LsSf https://astral.sh/uv/install.sh | sh
+```
+
+### Python Version Issues
+
+**Symptom:**
+```
+ERROR: This package requires Python 3.13+
+```
+
+**Solutions:**
+
+```bash
+# Check Python version
+python --version
+
+# Install Python 3.13
+## Ubuntu/Debian
+sudo add-apt-repository ppa:deadsnakes/ppa
+sudo apt update
+sudo apt install python3.13
+
+## macOS
+brew install python@3.13
+
+# Create venv with correct Python
+python3.13 -m venv .venv
+source .venv/bin/activate
+```
+
+### Dependency Installation Fails
+
+**Symptom:**
+```
+ERROR: Could not build wheels for llama-index
+```
+
+**Solutions:**
+
+1. **Install build dependencies:**
+   ```bash
+   # Debian/Ubuntu
+   sudo apt-get install build-essential python3.13-dev
+
+   # macOS
+   xcode-select --install
+   ```
+
+2. **Clear cache and retry:**
+   ```bash
+   uv cache clean
+   uv pip install -e .
+   ```
+
+3. **Use pre-built wheels:**
+   ```bash
+   uv pip install --only-binary :all: -e .
+   ```
+
+## Data and Storage Issues
+
+### Disk Space Full
+
+**Symptom:**
+```
+OSError: [Errno 28] No space left on device
+```
+
+**Solutions:**
+
+```bash
+# Check disk space
+df -h
+
+# Clean up Docker
+docker system prune -a
+docker volume prune
+
+# Clean up Neo4j data (CAUTION: deletes all data)
+docker-compose down -v
+docker-compose up -d
+
+# Clean up logs
+rm -rf logs/*.log.*
+```
+
+### Data Corruption
+
+**Symptom:**
+```
+Neo4j database files are corrupted
+```
+
+**Solutions:**
+
+1. **Backup and restore:**
+   ```bash
+   # Stop Neo4j
+   docker-compose stop neo4j
+
+   # Backup data
+   docker cp code-graph-neo4j:/data ./neo4j-backup
+
+   # Remove corrupted data
+   docker volume rm code-graph_neo4j_data
+
+   # Restart and restore
+   docker-compose up -d neo4j
+   # Re-import your data
+   ```
+
+2. **Check disk health:**
+   ```bash
+   # Check for disk errors
+   dmesg | grep error
+   sudo fsck /dev/sda1
+   ```
+
+## Common Error Messages
+
+### "Cannot connect to Neo4j"
+- Check Neo4j is running: `docker ps | grep neo4j`
+- Check connection string in `.env`
+- Check network connectivity
+
+### "LLM provider not configured"
+- Set `LLM_PROVIDER` in `.env`
+- Install provider (Ollama/OpenAI/Gemini)
+- Download models if using Ollama
+
+### "Memory not found"
+- Check project_id is correct
+- Verify memory exists: `MATCH (m:Memory) RETURN m`
+- Rebuild search index
+
+### "Operation timeout"
+- Increase timeout in `.env`
+- Check system resources
+- Process smaller documents
+
+### "Permission denied"
+- Check file permissions: `ls -la`
+- Fix with: `chmod +x start.py`
+- Check Docker permissions: Add user to docker group
+
+### "Module not found"
+- Install dependencies: `uv pip install -e .`
+- Check virtual environment is activated
+- Set PYTHONPATH: `export PYTHONPATH=$PWD`
+
+## Getting More Help
+
+### Collect Diagnostic Information
+
+When reporting issues, include:
+
+```bash
+# System information
+uname -a
+python --version
+docker --version
+
+# Application logs
+docker logs code-graph-api > api.log
+docker logs code-graph-neo4j > neo4j.log
+
+# Configuration (remove sensitive data!)
+cat .env | sed 's/PASSWORD=.*/PASSWORD=REDACTED/'
+
+# Neo4j information
+# In Neo4j Browser:
+CALL dbms.components()
+CALL apoc.version()
+```
+
+### Where to Get Help
+
+1. **Documentation**: https://code-graph.vantagecraft.dev
+2. **GitHub Issues**: Search existing issues or create new one
+3. **GitHub Discussions**: Ask questions
+4. **Discord/Slack**: Community chat (if available)
+
+### Creating a Good Bug Report
+
+Include:
+
+1. **Environment**: OS, Python version, deployment mode
+2. **Steps to reproduce**: Exact commands and actions
+3. **Expected behavior**: What should happen
+4. **Actual behavior**: What actually happens
+5. **Logs**: Relevant error messages and stack traces
+6. **Configuration**: Sanitized `.env` file contents
+7. **Attempts to fix**: What you've already tried
+
+### Example Bug Report Template
+
+```markdown
+## Environment
+- OS: Ubuntu 22.04
+- Python: 3.13.1
+- Deployment: Docker Compose
+- Version: 0.7.0
+
+## Issue
+Memory search returns no results despite having memories in database.
+
+## Steps to Reproduce
+1. Add memory: `curl -X POST ...`
+2. Search memory: `curl -X POST ...`
+3. Gets empty result: `{"memories": []}`
+
+## Expected Behavior
+Should return the memory added in step 1.
+
+## Logs
+```
+[ERROR] Memory search failed: Index not found
+```
+
+## Configuration
+```env
+NEO4J_URI=bolt://neo4j:7687
+LLM_PROVIDER=ollama
+```
+
+## Attempts to Fix
+- Restarted Neo4j: No change
+- Checked memory exists: Confirmed with Cypher query
+- Rebuilt index: Same error
+```
+
+## Need Immediate Help?
+
+For critical production issues:
+
+1. Check [Status Page](https://status.example.com) (if available)
+2. Rollback to previous version
+3. Enable debug logging: `LOG_LEVEL=DEBUG`
+4. Contact maintainers directly
+5. Post in #urgent channel (if community exists)
+
+Remember: Most issues are configuration-related. Double-check your `.env` file and ensure all services are running!
diff --git a/mcp-registry-submission/README.md b/mcp-registry-submission/README.md
new file mode 100644
index 0000000..6f481d9
--- /dev/null
+++ b/mcp-registry-submission/README.md
@@ -0,0 +1,80 @@
+# MCP Registry Submission for Code Graph Knowledge System
+
+This directory contains the submission files for registering Code Graph Knowledge System in the Docker MCP Registry.
+
+## Three Deployment Variants
+
+We provide three separate MCP server entries to match different user needs:
+
+### 1. codebase-rag-minimal (Code Graph Only)
+- **No LLM required** - Pure graph-based code intelligence
+- **Image**: `royisme/codebase-rag:minimal`
+- **Tools**: 4 (code graph operations)
+- **Use case**: Developers who want code navigation without LLM overhead
+
+### 2. codebase-rag-standard (Code Graph + Memory)
+- **Embedding required** - Vector-powered memory search
+- **Image**: `royisme/codebase-rag:standard`
+- **Tools**: 11 (code graph + memory management)
+- **Use case**: Teams building project knowledge bases
+
+### 3. codebase-rag-full (All Features)
+- **LLM + Embedding required** - Complete AI capabilities
+- **Image**: `royisme/codebase-rag:full`
+- **Tools**: 30 (code graph + memory + RAG + auto-extraction)
+- **Use case**: AI-assisted development workflows
+
+## Submission Process
+
+1. Fork https://github.com/docker/mcp-registry
+2. Copy the three directories to `servers/`:
+   ```bash
+   cp -r mcp-registry-submission/codebase-rag-* /path/to/mcp-registry/servers/
+   ```
+3. Test locally (if possible)
+4. Create pull request with title: "Add Code Graph Knowledge System (3 variants)"
+5. Wait for Docker team review
+
+## File Structure
+
+Each variant contains:
+- `server.yaml` - MCP server configuration
+- `tools.json` - Static tool list (required for servers needing pre-configuration)
+
+## Requirements Met
+
+✅ **License**: MIT (confirmed in repository)
+✅ **Type**: Local (containerized) - Docker builds and hosts images
+✅ **Dockerfile**: Available in repository (`docker/Dockerfile.*`)
+✅ **Tools List**: Provided (`tools.json`) - prevents build failures
+✅ **Public Repository**: https://github.com/royisme/codebase-rag
+✅ **Documentation**: https://code-graph.vantagecraft.dev (once deployed)
+
+## Configuration Requirements
+
+### Minimal
+- Neo4j connection (URI, user, password)
+
+### Standard
+- Neo4j connection
+- Embedding provider (Ollama/OpenAI/Gemini)
+
+### Full
+- Neo4j connection
+- LLM provider (Ollama/OpenAI/Gemini/OpenRouter)
+- Embedding provider
+
+## Notes
+
+- All three images will be built and hosted by Docker
+- Images support both AMD64 and ARM64 architectures
+- Neo4j database runs as separate container (configured in docker-compose)
+- Users can choose cloud or local LLM providers
+- Documentation available at https://code-graph.vantagecraft.dev
+
+## Links
+
+- **Repository**: https://github.com/royisme/codebase-rag
+- **Docker Hub**: https://hub.docker.com/r/royisme/codebase-rag
+- **Documentation**: https://code-graph.vantagecraft.dev
+- **Issues**: https://github.com/royisme/codebase-rag/issues
diff --git a/mcp-registry-submission/SUBMISSION_GUIDE.md b/mcp-registry-submission/SUBMISSION_GUIDE.md
new file mode 100644
index 0000000..abef34d
--- /dev/null
+++ b/mcp-registry-submission/SUBMISSION_GUIDE.md
@@ -0,0 +1,265 @@
+# Docker MCP Registry Submission Guide
+
+## Prerequisites Checklist
+
+Before submitting, ensure:
+
+- [x] **License**: Project has MIT or Apache 2 license (GPL not allowed)
+- [x] **Docker Images**: Built and available on Docker Hub
+  - `royisme/codebase-rag:minimal`
+  - `royisme/codebase-rag:standard`
+  - `royisme/codebase-rag:full`
+- [x] **Dockerfiles**: Present in repository
+- [x] **Tools List**: `tools.json` files created for all variants
+- [x] **Server Config**: `server.yaml` files completed
+- [x] **Public Repository**: GitHub repository is public
+- [ ] **Documentation**: code-graph.vantagecraft.dev is live (pending DNS)
+- [ ] **CI Passing**: GitHub Actions workflows succeed
+
+## Step-by-Step Submission
+
+### 1. Verify Docker Images Are Published
+
+```bash
+# Check images are available
+docker pull royisme/codebase-rag:minimal
+docker pull royisme/codebase-rag:standard
+docker pull royisme/codebase-rag:full
+
+# Verify they work
+docker run --rm royisme/codebase-rag:minimal --help
+```
+
+### 2. Fork MCP Registry
+
+1. Go to https://github.com/docker/mcp-registry
+2. Click "Fork" button
+3. Clone your fork:
+   ```bash
+   git clone https://github.com/YOUR_USERNAME/mcp-registry.git
+   cd mcp-registry
+   ```
+
+### 3. Add Server Configurations
+
+```bash
+# Copy our submission files
+cp -r /path/to/codebase-rag/mcp-registry-submission/codebase-rag-minimal servers/
+cp -r /path/to/codebase-rag/mcp-registry-submission/codebase-rag-standard servers/
+cp -r /path/to/codebase-rag/mcp-registry-submission/codebase-rag-full servers/
+
+# Verify structure
+ls -la servers/codebase-rag-*
+```
+
+Each directory should contain:
+- `server.yaml`
+- `tools.json`
+
+### 4. Test Locally (Optional)
+
+If the registry has a test command:
+
+```bash
+# Install dependencies (if task tool is available)
+task validate server=codebase-rag-minimal
+task validate server=codebase-rag-standard
+task validate server=codebase-rag-full
+```
+
+### 5. Create Pull Request
+
+```bash
+# Create branch
+git checkout -b add-codebase-rag
+
+# Stage files
+git add servers/codebase-rag-*
+
+# Commit
+git commit -m "Add Code Graph Knowledge System (3 variants: minimal, standard, full)
+
+This PR adds Code Graph Knowledge System, an AI-powered code intelligence
+and knowledge management system.
+
+Three deployment variants:
+
+1. codebase-rag-minimal (Code Graph only, no LLM)
+   - Repository analysis and indexing
+   - File relationship discovery
+   - Impact analysis
+   - Context packing for AI assistants
+
+2. codebase-rag-standard (Code Graph + Memory Store)
+   - All minimal features
+   - Manual memory management
+   - Vector-based memory search
+   - Requires: Embedding model
+
+3. codebase-rag-full (All features)
+   - All standard features
+   - Automatic memory extraction (git, conversations, comments)
+   - Knowledge base RAG (document Q&A)
+   - Batch repository analysis
+   - Requires: LLM + Embedding
+
+Repository: https://github.com/royisme/codebase-rag
+Docker Hub: https://hub.docker.com/r/royisme/codebase-rag
+Documentation: https://code-graph.vantagecraft.dev
+License: MIT"
+
+# Push
+git push origin add-codebase-rag
+```
+
+### 6. Open Pull Request on GitHub
+
+1. Go to your fork on GitHub
+2. Click "Contribute" → "Open pull request"
+3. Title: `Add Code Graph Knowledge System (3 variants)`
+4. Description:
+   ```markdown
+   ## Overview
+
+   Adding Code Graph Knowledge System - an AI-powered code intelligence system
+   with three deployment modes based on LLM requirements.
+
+   ## Variants Included
+
+   1. **codebase-rag-minimal** - Code Graph only (no LLM)
+   2. **codebase-rag-standard** - + Memory Store (embedding required)
+   3. **codebase-rag-full** - All features (LLM + embedding)
+
+   ## Links
+
+   - Repository: https://github.com/royisme/codebase-rag
+   - Docker Hub: https://hub.docker.com/r/royisme/codebase-rag
+   - Documentation: https://code-graph.vantagecraft.dev
+
+   ## Testing
+
+   All three variants have been tested locally with:
+   - Neo4j 5.15
+   - Multiple LLM providers (Ollama, OpenAI, Gemini)
+   - Various repository sizes
+
+   ## License
+
+   MIT License - compliant with Docker MCP Registry requirements.
+
+   ## Checklist
+
+   - [x] Dockerfiles present in repository
+   - [x] Images published to Docker Hub
+   - [x] Tools list provided (tools.json)
+   - [x] Configuration schema defined
+   - [x] Multi-architecture support (AMD64, ARM64)
+   - [x] Public repository
+   - [ ] CI passing (waiting for initial push)
+   ```
+5. Click "Create pull request"
+
+### 7. Monitor PR Status
+
+1. Watch for CI/CD checks
+2. Address any failures reported by automated checks
+3. Respond to reviewer comments
+4. Wait for Docker team approval
+
+## Common Issues and Solutions
+
+### Issue: Build Failures
+
+**Cause**: Tools can't be listed without configuration
+
+**Solution**: We've provided `tools.json` files to avoid this
+
+### Issue: CI Failures
+
+**Cause**: Invalid YAML syntax or schema
+
+**Solution**: Validate YAML files:
+```bash
+# Install yamllint
+pip install yamllint
+
+# Validate
+yamllint servers/codebase-rag-*/server.yaml
+```
+
+### Issue: Image Not Found
+
+**Cause**: Docker images not yet pushed to Docker Hub
+
+**Solution**:
+```bash
+# Ensure images are published
+make docker-build-all
+make docker-push
+```
+
+### Issue: Missing Required Fields
+
+**Cause**: server.yaml missing required fields
+
+**Solution**: Check all required fields are present:
+- `name`
+- `type`
+- `meta.category`
+- `about.title`
+- `about.description`
+- `source.project`
+
+## Expected Timeline
+
+1. **PR Opened**: You submit the PR
+2. **Automated Checks**: ~5-10 minutes
+3. **Docker Team Review**: 1-7 days
+4. **Approval & Merge**: Same day as approval
+5. **Catalog Update**: Within 24 hours
+6. **Available Everywhere**:
+   - MCP catalog
+   - Docker Desktop toolkit
+   - Docker Hub `mcp` namespace
+
+## Post-Approval
+
+Once approved and merged:
+
+1. **Verify Listing**:
+   - Check Docker MCP catalog
+   - Check Docker Desktop's MCP toolkit
+   - Search for "codebase-rag" or "code graph"
+
+2. **Update Documentation**:
+   - Add badge to README
+   - Update docs with MCP registry information
+   - Add quick start guide for Docker Desktop users
+
+3. **Announce**:
+   - GitHub Discussions
+   - Social media
+   - Blog post (optional)
+
+## Badge for README
+
+Once approved, add this badge to your README:
+
+```markdown
+[![MCP Registry](https://img.shields.io/badge/MCP%20Registry-Available-blue?logo=docker)](https://mcp-registry.docker.com/search?q=codebase-rag)
+```
+
+## Support
+
+If you encounter issues:
+
+1. Check Docker MCP Registry documentation
+2. Review other approved servers for examples
+3. Open issue in mcp-registry repository
+4. Ask in Docker Community forums
+
+## Contact
+
+For questions about this submission:
+- GitHub Issues: https://github.com/royisme/codebase-rag/issues
+- Repository: https://github.com/royisme/codebase-rag
diff --git a/mcp-registry-submission/codebase-rag-full/server.yaml b/mcp-registry-submission/codebase-rag-full/server.yaml
new file mode 100644
index 0000000..985665a
--- /dev/null
+++ b/mcp-registry-submission/codebase-rag-full/server.yaml
@@ -0,0 +1,111 @@
+name: codebase-rag-full
+image: royisme/codebase-rag:full
+type: server
+meta:
+  category: development
+  tags:
+    - code-analysis
+    - knowledge-management
+    - rag
+    - ai-assistant
+    - developer-tools
+    - llm
+about:
+  title: Code Graph Knowledge System (Full)
+  description: |
+    Complete AI-powered code intelligence and knowledge management system.
+    Combines code graph analysis, memory management, and RAG capabilities.
+
+    Features:
+    - All Code Graph features (repository analysis, impact analysis)
+    - All Memory Store features (manual + vector search)
+    - Automatic memory extraction from:
+      • Git commits and history
+      • AI conversations
+      • Code comments (TODO/FIXME/NOTE)
+    - Knowledge Base RAG:
+      • Document ingestion and indexing
+      • Intelligent Q&A over codebase
+      • Multi-format support (Markdown, PDF, code)
+    - Batch repository analysis
+
+    Requires: LLM + Embedding model (Ollama/OpenAI/Gemini/OpenRouter)
+    Perfect for AI-assisted development workflows.
+  icon: https://avatars.githubusercontent.com/u/182288589?s=200&v=4
+source:
+  project: https://github.com/royisme/codebase-rag
+  commit: main
+config:
+  description: Configure Neo4j connection, LLM provider, and embedding model
+  env:
+    - name: NEO4J_URI
+      example: bolt://neo4j:7687
+      value: '{{codebase-rag-full.neo4j_uri}}'
+    - name: NEO4J_USER
+      example: neo4j
+      value: '{{codebase-rag-full.neo4j_user}}'
+    - name: NEO4J_DATABASE
+      example: neo4j
+      value: '{{codebase-rag-full.neo4j_database}}'
+    - name: LLM_PROVIDER
+      example: ollama
+      value: '{{codebase-rag-full.llm_provider}}'
+    - name: EMBEDDING_PROVIDER
+      example: ollama
+      value: '{{codebase-rag-full.embedding_provider}}'
+    - name: OLLAMA_BASE_URL
+      example: http://host.docker.internal:11434
+      value: '{{codebase-rag-full.ollama_base_url}}'
+    - name: OLLAMA_MODEL
+      example: llama3.2
+      value: '{{codebase-rag-full.ollama_model}}'
+    - name: OLLAMA_EMBEDDING_MODEL
+      example: nomic-embed-text
+      value: '{{codebase-rag-full.ollama_embedding_model}}'
+  secrets:
+    - name: codebase-rag-full.neo4j_password
+      env: NEO4J_PASSWORD
+      example: your_neo4j_password
+    - name: codebase-rag-full.openai_api_key
+      env: OPENAI_API_KEY
+      example: sk-...
+    - name: codebase-rag-full.google_api_key
+      env: GOOGLE_API_KEY
+      example: your_gemini_api_key
+    - name: codebase-rag-full.openrouter_api_key
+      env: OPENROUTER_API_KEY
+      example: your_openrouter_api_key
+  parameters:
+    type: object
+    properties:
+      neo4j_uri:
+        type: string
+        default: bolt://neo4j:7687
+      neo4j_user:
+        type: string
+        default: neo4j
+      neo4j_database:
+        type: string
+        default: neo4j
+      llm_provider:
+        type: string
+        enum: [ollama, openai, gemini, openrouter]
+        default: ollama
+      embedding_provider:
+        type: string
+        enum: [ollama, openai, gemini]
+        default: ollama
+      ollama_base_url:
+        type: string
+        default: http://host.docker.internal:11434
+      ollama_model:
+        type: string
+        default: llama3.2
+      ollama_embedding_model:
+        type: string
+        default: nomic-embed-text
+    required:
+      - neo4j_uri
+      - neo4j_user
+      - llm_provider
+      - embedding_provider
diff --git a/mcp-registry-submission/codebase-rag-full/tools.json b/mcp-registry-submission/codebase-rag-full/tools.json
new file mode 100644
index 0000000..71b59aa
--- /dev/null
+++ b/mcp-registry-submission/codebase-rag-full/tools.json
@@ -0,0 +1,364 @@
+{
+  "tools": [
+    {
+      "name": "code_graph_ingest_repo",
+      "description": "Ingest a code repository into the graph database",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "local_path": {"type": "string"},
+          "repo_url": {"type": "string"},
+          "mode": {"type": "string", "enum": ["full", "incremental"], "default": "incremental"}
+        },
+        "required": ["local_path"]
+      }
+    },
+    {
+      "name": "code_graph_related",
+      "description": "Find files related to a query",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "query": {"type": "string"},
+          "repo_id": {"type": "string"},
+          "limit": {"type": "integer", "default": 30}
+        },
+        "required": ["query", "repo_id"]
+      }
+    },
+    {
+      "name": "code_graph_impact",
+      "description": "Analyze impact of file changes",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "repo_id": {"type": "string"},
+          "file_path": {"type": "string"},
+          "depth": {"type": "integer", "default": 2}
+        },
+        "required": ["repo_id", "file_path"]
+      }
+    },
+    {
+      "name": "context_pack",
+      "description": "Build context pack for AI assistants",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "repo_id": {"type": "string"},
+          "keywords": {"type": "array", "items": {"type": "string"}},
+          "budget": {"type": "integer", "default": 8000},
+          "stage": {"type": "string", "enum": ["plan", "implement", "review"], "default": "implement"}
+        },
+        "required": ["repo_id", "keywords"]
+      }
+    },
+    {
+      "name": "add_memory",
+      "description": "Add a new memory to project knowledge base",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {"type": "string"},
+          "memory_type": {"type": "string", "enum": ["decision", "preference", "experience", "convention", "plan", "note"]},
+          "title": {"type": "string"},
+          "content": {"type": "string"},
+          "reason": {"type": "string"},
+          "importance": {"type": "number", "minimum": 0, "maximum": 1, "default": 0.5},
+          "tags": {"type": "array", "items": {"type": "string"}}
+        },
+        "required": ["project_id", "memory_type", "title", "content"]
+      }
+    },
+    {
+      "name": "search_memories",
+      "description": "Search project memories using vector similarity",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {"type": "string"},
+          "query": {"type": "string"},
+          "memory_type": {"type": "string"},
+          "min_importance": {"type": "number"},
+          "tags": {"type": "array", "items": {"type": "string"}},
+          "limit": {"type": "integer", "default": 10}
+        },
+        "required": ["project_id", "query"]
+      }
+    },
+    {
+      "name": "get_memory",
+      "description": "Get a specific memory by ID",
+      "inputSchema": {
+        "type": "object",
+        "properties": {"memory_id": {"type": "string"}},
+        "required": ["memory_id"]
+      }
+    },
+    {
+      "name": "update_memory",
+      "description": "Update an existing memory",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "memory_id": {"type": "string"},
+          "title": {"type": "string"},
+          "content": {"type": "string"},
+          "reason": {"type": "string"},
+          "importance": {"type": "number"},
+          "tags": {"type": "array", "items": {"type": "string"}}
+        },
+        "required": ["memory_id"]
+      }
+    },
+    {
+      "name": "delete_memory",
+      "description": "Delete a memory (soft delete)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {"memory_id": {"type": "string"}},
+        "required": ["memory_id"]
+      }
+    },
+    {
+      "name": "supersede_memory",
+      "description": "Create a new memory that supersedes an old one",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "old_memory_id": {"type": "string"},
+          "new_title": {"type": "string"},
+          "new_content": {"type": "string"},
+          "new_reason": {"type": "string"},
+          "new_importance": {"type": "number"}
+        },
+        "required": ["old_memory_id", "new_title", "new_content"]
+      }
+    },
+    {
+      "name": "get_project_summary",
+      "description": "Get summary of all memories for a project",
+      "inputSchema": {
+        "type": "object",
+        "properties": {"project_id": {"type": "string"}},
+        "required": ["project_id"]
+      }
+    },
+    {
+      "name": "extract_from_conversation",
+      "description": "Extract memories from AI conversation history using LLM analysis",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {"type": "string"},
+          "conversation": {"type": "array", "items": {"type": "object"}},
+          "auto_save": {"type": "boolean", "default": false}
+        },
+        "required": ["project_id", "conversation"]
+      }
+    },
+    {
+      "name": "extract_from_git_commit",
+      "description": "Extract memories from git commit using LLM analysis",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {"type": "string"},
+          "commit_sha": {"type": "string"},
+          "commit_message": {"type": "string"},
+          "changed_files": {"type": "array", "items": {"type": "string"}},
+          "auto_save": {"type": "boolean", "default": false}
+        },
+        "required": ["project_id", "commit_message"]
+      }
+    },
+    {
+      "name": "extract_from_code_comments",
+      "description": "Extract memories from code comments (TODO, FIXME, NOTE, DECISION)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {"type": "string"},
+          "file_path": {"type": "string"}
+        },
+        "required": ["project_id", "file_path"]
+      }
+    },
+    {
+      "name": "suggest_memory_from_query",
+      "description": "Suggest memory from knowledge base Q&A session",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {"type": "string"},
+          "query": {"type": "string"},
+          "answer": {"type": "string"},
+          "auto_save": {"type": "boolean", "default": false}
+        },
+        "required": ["project_id", "query", "answer"]
+      }
+    },
+    {
+      "name": "batch_extract_from_repository",
+      "description": "Batch extract memories from entire repository (git history + code comments)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {"type": "string"},
+          "repo_path": {"type": "string"},
+          "max_commits": {"type": "integer", "default": 50},
+          "file_patterns": {"type": "array", "items": {"type": "string"}}
+        },
+        "required": ["project_id", "repo_path"]
+      }
+    },
+    {
+      "name": "add_document",
+      "description": "Add document to knowledge base (supports large documents via async processing)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "content": {"type": "string"},
+          "title": {"type": "string"},
+          "metadata": {"type": "object"}
+        },
+        "required": ["content"]
+      }
+    },
+    {
+      "name": "add_file",
+      "description": "Add file to knowledge base",
+      "inputSchema": {
+        "type": "object",
+        "properties": {"file_path": {"type": "string"}},
+        "required": ["file_path"]
+      }
+    },
+    {
+      "name": "add_directory",
+      "description": "Add directory to knowledge base (async processing)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "directory_path": {"type": "string"},
+          "recursive": {"type": "boolean", "default": true}
+        },
+        "required": ["directory_path"]
+      }
+    },
+    {
+      "name": "query_knowledge",
+      "description": "Query knowledge base using Neo4j GraphRAG with multiple modes",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "question": {"type": "string"},
+          "mode": {"type": "string", "enum": ["hybrid", "graph_only", "vector_only"], "default": "hybrid"}
+        },
+        "required": ["question"]
+      }
+    },
+    {
+      "name": "search_similar_nodes",
+      "description": "Search for similar nodes using vector similarity",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "query": {"type": "string"},
+          "top_k": {"type": "integer", "default": 10}
+        },
+        "required": ["query"]
+      }
+    },
+    {
+      "name": "get_task_status",
+      "description": "Get status of an async task",
+      "inputSchema": {
+        "type": "object",
+        "properties": {"task_id": {"type": "string"}},
+        "required": ["task_id"]
+      }
+    },
+    {
+      "name": "watch_task",
+      "description": "Monitor task progress until completion (real-time)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "task_id": {"type": "string"},
+          "timeout": {"type": "integer", "default": 300},
+          "interval": {"type": "number", "default": 1.0}
+        },
+        "required": ["task_id"]
+      }
+    },
+    {
+      "name": "watch_tasks",
+      "description": "Monitor multiple tasks until all complete",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "task_ids": {"type": "array", "items": {"type": "string"}},
+          "timeout": {"type": "integer", "default": 300},
+          "interval": {"type": "number", "default": 2.0}
+        },
+        "required": ["task_ids"]
+      }
+    },
+    {
+      "name": "list_tasks",
+      "description": "List recent tasks with optional status filter",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "status": {"type": "string", "enum": ["pending", "running", "completed", "failed"]},
+          "limit": {"type": "integer", "default": 10}
+        }
+      }
+    },
+    {
+      "name": "cancel_task",
+      "description": "Cancel a running task",
+      "inputSchema": {
+        "type": "object",
+        "properties": {"task_id": {"type": "string"}},
+        "required": ["task_id"]
+      }
+    },
+    {
+      "name": "get_queue_stats",
+      "description": "Get task queue statistics",
+      "inputSchema": {
+        "type": "object",
+        "properties": {}
+      }
+    },
+    {
+      "name": "get_graph_schema",
+      "description": "Get Neo4j graph schema (node labels and relationships)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {}
+      }
+    },
+    {
+      "name": "get_statistics",
+      "description": "Get system statistics (nodes, relationships, etc.)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {}
+      }
+    },
+    {
+      "name": "clear_knowledge_base",
+      "description": "Clear entire knowledge base (use with caution!)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "confirm": {"type": "boolean", "default": false}
+        },
+        "required": ["confirm"]
+      }
+    }
+  ]
+}
diff --git a/mcp-registry-submission/codebase-rag-minimal/server.yaml b/mcp-registry-submission/codebase-rag-minimal/server.yaml
new file mode 100644
index 0000000..3cb4a55
--- /dev/null
+++ b/mcp-registry-submission/codebase-rag-minimal/server.yaml
@@ -0,0 +1,59 @@
+name: codebase-rag-minimal
+image: royisme/codebase-rag:minimal
+type: server
+meta:
+  category: development
+  tags:
+    - code-analysis
+    - graph-database
+    - developer-tools
+    - code-intelligence
+about:
+  title: Code Graph Knowledge System (Minimal)
+  description: |
+    Lightweight code intelligence and analysis system powered by Neo4j graph database.
+    Perfect for code navigation without LLM overhead.
+
+    Features:
+    - Repository ingestion and indexing
+    - File relationship discovery (imports, dependencies)
+    - Impact analysis (reverse dependencies)
+    - Context packing for AI assistants
+    - Full-text search on code
+
+    No LLM or embedding model required - pure graph-based intelligence.
+  icon: https://avatars.githubusercontent.com/u/182288589?s=200&v=4
+source:
+  project: https://github.com/royisme/codebase-rag
+  commit: main
+config:
+  description: Configure Neo4j connection for code graph storage
+  env:
+    - name: NEO4J_URI
+      example: bolt://neo4j:7687
+      value: '{{codebase-rag-minimal.neo4j_uri}}'
+    - name: NEO4J_USER
+      example: neo4j
+      value: '{{codebase-rag-minimal.neo4j_user}}'
+    - name: NEO4J_DATABASE
+      example: neo4j
+      value: '{{codebase-rag-minimal.neo4j_database}}'
+  secrets:
+    - name: codebase-rag-minimal.neo4j_password
+      env: NEO4J_PASSWORD
+      example: your_neo4j_password
+  parameters:
+    type: object
+    properties:
+      neo4j_uri:
+        type: string
+        default: bolt://neo4j:7687
+      neo4j_user:
+        type: string
+        default: neo4j
+      neo4j_database:
+        type: string
+        default: neo4j
+    required:
+      - neo4j_uri
+      - neo4j_user
diff --git a/mcp-registry-submission/codebase-rag-minimal/tools.json b/mcp-registry-submission/codebase-rag-minimal/tools.json
new file mode 100644
index 0000000..e8abf99
--- /dev/null
+++ b/mcp-registry-submission/codebase-rag-minimal/tools.json
@@ -0,0 +1,117 @@
+{
+  "tools": [
+    {
+      "name": "code_graph_ingest_repo",
+      "description": "Ingest a code repository into the graph database.\n\nModes:\n- full: Complete re-ingestion (slow but thorough)\n- incremental: Only changed files (60x faster)\n\nExtracts:\n- File nodes\n- Symbol nodes (functions, classes)\n- IMPORTS relationships\n- Code structure",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "local_path": {
+            "type": "string",
+            "description": "Local repository path"
+          },
+          "repo_url": {
+            "type": "string",
+            "description": "Repository URL (optional)"
+          },
+          "mode": {
+            "type": "string",
+            "enum": ["full", "incremental"],
+            "default": "incremental",
+            "description": "Ingestion mode"
+          }
+        },
+        "required": ["local_path"]
+      }
+    },
+    {
+      "name": "code_graph_related",
+      "description": "Find files related to a query using fulltext search.\n\nReturns ranked list of relevant files with ref:// handles.",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "query": {
+            "type": "string",
+            "description": "Search query"
+          },
+          "repo_id": {
+            "type": "string",
+            "description": "Repository identifier"
+          },
+          "limit": {
+            "type": "integer",
+            "minimum": 1,
+            "maximum": 100,
+            "default": 30,
+            "description": "Max results"
+          }
+        },
+        "required": ["query", "repo_id"]
+      }
+    },
+    {
+      "name": "code_graph_impact",
+      "description": "Analyze impact of changes to a file.\n\nFinds all files that depend on the given file (reverse dependencies).\nUseful for understanding blast radius of changes.",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "repo_id": {
+            "type": "string",
+            "description": "Repository identifier"
+          },
+          "file_path": {
+            "type": "string",
+            "description": "File path to analyze"
+          },
+          "depth": {
+            "type": "integer",
+            "minimum": 1,
+            "maximum": 5,
+            "default": 2,
+            "description": "Traversal depth"
+          }
+        },
+        "required": ["repo_id", "file_path"]
+      }
+    },
+    {
+      "name": "context_pack",
+      "description": "Build context pack for AI coding assistants.\n\nPacks relevant files and symbols into a token-budgeted context suitable for LLM consumption.\n\nStages:\n- plan: Planning phase context\n- implement: Implementation context\n- review: Code review context",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "repo_id": {
+            "type": "string",
+            "description": "Repository identifier"
+          },
+          "keywords": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Keywords to search for"
+          },
+          "budget": {
+            "type": "integer",
+            "default": 8000,
+            "description": "Token budget"
+          },
+          "stage": {
+            "type": "string",
+            "enum": ["plan", "implement", "review"],
+            "default": "implement",
+            "description": "Development stage"
+          },
+          "focus_paths": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Paths to prioritize"
+          }
+        },
+        "required": ["repo_id", "keywords"]
+      }
+    }
+  ]
+}
diff --git a/mcp-registry-submission/codebase-rag-standard/server.yaml b/mcp-registry-submission/codebase-rag-standard/server.yaml
new file mode 100644
index 0000000..8d08e97
--- /dev/null
+++ b/mcp-registry-submission/codebase-rag-standard/server.yaml
@@ -0,0 +1,84 @@
+name: codebase-rag-standard
+image: royisme/codebase-rag:standard
+type: server
+meta:
+  category: development
+  tags:
+    - code-analysis
+    - knowledge-management
+    - memory-store
+    - developer-tools
+    - vector-search
+about:
+  title: Code Graph Knowledge System (Standard)
+  description: |
+    Advanced code intelligence with project memory management.
+    Combines graph-based code analysis with vector-powered memory search.
+
+    Features:
+    - All Code Graph features (repository analysis, impact analysis)
+    - Manual memory management (decisions, preferences, experiences)
+    - Vector-based memory search
+    - Project knowledge summaries
+    - Memory superseding (track decision evolution)
+
+    Requires: Embedding model (Ollama/OpenAI/Gemini)
+    No LLM required for core functionality.
+  icon: https://avatars.githubusercontent.com/u/182288589?s=200&v=4
+source:
+  project: https://github.com/royisme/codebase-rag
+  commit: main
+config:
+  description: Configure Neo4j connection and embedding provider
+  env:
+    - name: NEO4J_URI
+      example: bolt://neo4j:7687
+      value: '{{codebase-rag-standard.neo4j_uri}}'
+    - name: NEO4J_USER
+      example: neo4j
+      value: '{{codebase-rag-standard.neo4j_user}}'
+    - name: NEO4J_DATABASE
+      example: neo4j
+      value: '{{codebase-rag-standard.neo4j_database}}'
+    - name: EMBEDDING_PROVIDER
+      example: ollama
+      value: '{{codebase-rag-standard.embedding_provider}}'
+    - name: OLLAMA_BASE_URL
+      example: http://host.docker.internal:11434
+      value: '{{codebase-rag-standard.ollama_base_url}}'
+    - name: OLLAMA_EMBEDDING_MODEL
+      example: nomic-embed-text
+      value: '{{codebase-rag-standard.ollama_embedding_model}}'
+  secrets:
+    - name: codebase-rag-standard.neo4j_password
+      env: NEO4J_PASSWORD
+      example: your_neo4j_password
+    - name: codebase-rag-standard.openai_api_key
+      env: OPENAI_API_KEY
+      example: sk-...
+  parameters:
+    type: object
+    properties:
+      neo4j_uri:
+        type: string
+        default: bolt://neo4j:7687
+      neo4j_user:
+        type: string
+        default: neo4j
+      neo4j_database:
+        type: string
+        default: neo4j
+      embedding_provider:
+        type: string
+        enum: [ollama, openai, gemini]
+        default: ollama
+      ollama_base_url:
+        type: string
+        default: http://host.docker.internal:11434
+      ollama_embedding_model:
+        type: string
+        default: nomic-embed-text
+    required:
+      - neo4j_uri
+      - neo4j_user
+      - embedding_provider
diff --git a/mcp-registry-submission/codebase-rag-standard/tools.json b/mcp-registry-submission/codebase-rag-standard/tools.json
new file mode 100644
index 0000000..eb7fab4
--- /dev/null
+++ b/mcp-registry-submission/codebase-rag-standard/tools.json
@@ -0,0 +1,268 @@
+{
+  "tools": [
+    {
+      "name": "code_graph_ingest_repo",
+      "description": "Ingest a code repository into the graph database",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "local_path": {
+            "type": "string"
+          },
+          "repo_url": {
+            "type": "string"
+          },
+          "mode": {
+            "type": "string",
+            "enum": ["full", "incremental"],
+            "default": "incremental"
+          }
+        },
+        "required": ["local_path"]
+      }
+    },
+    {
+      "name": "code_graph_related",
+      "description": "Find files related to a query",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "query": {
+            "type": "string"
+          },
+          "repo_id": {
+            "type": "string"
+          },
+          "limit": {
+            "type": "integer",
+            "default": 30
+          }
+        },
+        "required": ["query", "repo_id"]
+      }
+    },
+    {
+      "name": "code_graph_impact",
+      "description": "Analyze impact of file changes",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "repo_id": {
+            "type": "string"
+          },
+          "file_path": {
+            "type": "string"
+          },
+          "depth": {
+            "type": "integer",
+            "default": 2
+          }
+        },
+        "required": ["repo_id", "file_path"]
+      }
+    },
+    {
+      "name": "context_pack",
+      "description": "Build context pack for AI assistants",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "repo_id": {
+            "type": "string"
+          },
+          "keywords": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "budget": {
+            "type": "integer",
+            "default": 8000
+          },
+          "stage": {
+            "type": "string",
+            "enum": ["plan", "implement", "review"],
+            "default": "implement"
+          }
+        },
+        "required": ["repo_id", "keywords"]
+      }
+    },
+    {
+      "name": "add_memory",
+      "description": "Add a new memory to project knowledge base.\n\nMemory types:\n- decision: Architecture and technical choices\n- preference: Team coding styles and preferences\n- experience: Problems encountered and solutions\n- convention: Team rules and standards\n- plan: Future work and improvements\n- note: General notes",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {
+            "type": "string",
+            "description": "Project identifier"
+          },
+          "memory_type": {
+            "type": "string",
+            "enum": ["decision", "preference", "experience", "convention", "plan", "note"],
+            "description": "Type of memory"
+          },
+          "title": {
+            "type": "string",
+            "description": "Memory title"
+          },
+          "content": {
+            "type": "string",
+            "description": "Memory content"
+          },
+          "reason": {
+            "type": "string",
+            "description": "Reason or rationale"
+          },
+          "importance": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1,
+            "default": 0.5,
+            "description": "Importance score (0-1)"
+          },
+          "tags": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            },
+            "description": "Tags for categorization"
+          }
+        },
+        "required": ["project_id", "memory_type", "title", "content"]
+      }
+    },
+    {
+      "name": "search_memories",
+      "description": "Search project memories using vector similarity and filters",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {
+            "type": "string"
+          },
+          "query": {
+            "type": "string"
+          },
+          "memory_type": {
+            "type": "string",
+            "enum": ["decision", "preference", "experience", "convention", "plan", "note"]
+          },
+          "min_importance": {
+            "type": "number",
+            "minimum": 0,
+            "maximum": 1
+          },
+          "tags": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          },
+          "limit": {
+            "type": "integer",
+            "default": 10
+          }
+        },
+        "required": ["project_id", "query"]
+      }
+    },
+    {
+      "name": "get_memory",
+      "description": "Get a specific memory by ID",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "memory_id": {
+            "type": "string"
+          }
+        },
+        "required": ["memory_id"]
+      }
+    },
+    {
+      "name": "update_memory",
+      "description": "Update an existing memory",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "memory_id": {
+            "type": "string"
+          },
+          "title": {
+            "type": "string"
+          },
+          "content": {
+            "type": "string"
+          },
+          "reason": {
+            "type": "string"
+          },
+          "importance": {
+            "type": "number"
+          },
+          "tags": {
+            "type": "array",
+            "items": {
+              "type": "string"
+            }
+          }
+        },
+        "required": ["memory_id"]
+      }
+    },
+    {
+      "name": "delete_memory",
+      "description": "Delete a memory (soft delete)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "memory_id": {
+            "type": "string"
+          }
+        },
+        "required": ["memory_id"]
+      }
+    },
+    {
+      "name": "supersede_memory",
+      "description": "Create a new memory that supersedes an old one (tracks evolution)",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "old_memory_id": {
+            "type": "string"
+          },
+          "new_title": {
+            "type": "string"
+          },
+          "new_content": {
+            "type": "string"
+          },
+          "new_reason": {
+            "type": "string"
+          },
+          "new_importance": {
+            "type": "number"
+          }
+        },
+        "required": ["old_memory_id", "new_title", "new_content"]
+      }
+    },
+    {
+      "name": "get_project_summary",
+      "description": "Get summary of all memories for a project",
+      "inputSchema": {
+        "type": "object",
+        "properties": {
+          "project_id": {
+            "type": "string"
+          }
+        },
+        "required": ["project_id"]
+      }
+    }
+  ]
+}
diff --git a/mkdocs.yml b/mkdocs.yml
new file mode 100644
index 0000000..ebbe78c
--- /dev/null
+++ b/mkdocs.yml
@@ -0,0 +1,193 @@
+site_name: Code Graph Knowledge System
+site_url: https://code-graph.vantagecraft.dev
+site_description: AI-powered code intelligence and knowledge management system
+site_author: VantageCraft
+repo_url: https://github.com/royisme/codebase-rag
+repo_name: codebase-rag
+edit_uri: edit/main/docs/
+
+theme:
+  name: material
+  language: en
+  features:
+    - announce.dismiss
+    - content.action.edit
+    - content.action.view
+    - content.code.annotate
+    - content.code.copy
+    - content.tooltips
+    - navigation.expand
+    - navigation.footer
+    - navigation.indexes
+    - navigation.sections
+    - navigation.tabs
+    - navigation.tabs.sticky
+    - navigation.top
+    - navigation.tracking
+    - search.highlight
+    - search.share
+    - search.suggest
+    - toc.follow
+  palette:
+    # Light mode
+    - media: "(prefers-color-scheme: light)"
+      scheme: default
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    # Dark mode
+    - media: "(prefers-color-scheme: dark)"
+      scheme: slate
+      primary: indigo
+      accent: indigo
+      toggle:
+        icon: material/brightness-4
+        name: Switch to light mode
+  font:
+    text: Roboto
+    code: Roboto Mono
+  favicon: assets/favicon.svg
+  logo: assets/logo.svg
+  icon:
+    repo: fontawesome/brands/github
+
+extra:
+  homepage: https://vantagecraft.dev
+  social:
+    - icon: fontawesome/brands/github
+      link: https://github.com/royisme/codebase-rag
+      name: GitHub Repository
+    - icon: fontawesome/brands/docker
+      link: https://hub.docker.com/r/royisme/codebase-rag
+      name: Docker Hub
+  version:
+    provider: mike
+    default: latest
+  analytics:
+    feedback:
+      title: Was this page helpful?
+      ratings:
+        - icon: material/emoticon-happy-outline
+          name: This page was helpful
+          data: 1
+          note: >-
+            Thanks for your feedback!
+        - icon: material/emoticon-sad-outline
+          name: This page could be improved
+          data: 0
+          note: >-
+            Thanks for your feedback! Help us improve by
+            <a href="https://github.com/royisme/codebase-rag/issues/new" target="_blank" rel="noopener">opening an issue</a>.
+
+plugins:
+  - search:
+      separator: '[\s\-,:!=\[\]()"`/]+|\.(?!\d)|&[lg]t;|(?!\b)(?=[A-Z][a-z])'
+  - minify:
+      minify_html: true
+  - git-revision-date-localized:
+      enable_creation_date: true
+      type: timeago
+
+markdown_extensions:
+  - abbr
+  - admonition
+  - attr_list
+  - def_list
+  - footnotes
+  - md_in_html
+  - toc:
+      permalink: true
+      toc_depth: 3
+  - pymdownx.arithmatex:
+      generic: true
+  - pymdownx.betterem:
+      smart_enable: all
+  - pymdownx.caret
+  - pymdownx.details
+  - pymdownx.emoji:
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.keys
+  - pymdownx.magiclink:
+      repo_url_shorthand: true
+      user: royisme
+      repo: codebase-rag
+  - pymdownx.mark
+  - pymdownx.smartsymbols
+  - pymdownx.superfences:
+      custom_fences:
+        - name: mermaid
+          class: mermaid
+          format: !!python/name:pymdownx.superfences.fence_code_format
+  - pymdownx.tabbed:
+      alternate_style: true
+  - pymdownx.tasklist:
+      custom_checkbox: true
+  - pymdownx.tilde
+
+nav:
+  - Home: index.md
+  - Getting Started:
+    - Quick Start: getting-started/quickstart.md
+    - Installation: getting-started/installation.md
+    - Configuration: getting-started/configuration.md
+  - Deployment:
+    - Overview: deployment/overview.md
+    - Minimal Mode: deployment/minimal.md
+    - Standard Mode: deployment/standard.md
+    - Full Mode: deployment/full.md
+    - Docker Guide: deployment/docker.md
+    - Production Setup: deployment/production.md
+  - User Guide:
+    - Code Graph:
+      - Overview: guide/code-graph/overview.md
+      - Repository Ingestion: guide/code-graph/ingestion.md
+      - Search & Discovery: guide/code-graph/search.md
+      - Impact Analysis: guide/code-graph/impact.md
+      - Context Packing: guide/code-graph/context.md
+    - Memory Store:
+      - Overview: guide/memory/overview.md
+      - Manual Management: guide/memory/manual.md
+      - Vector Search: guide/memory/search.md
+      - Auto Extraction: guide/memory/extraction.md
+    - Knowledge RAG:
+      - Overview: guide/knowledge/overview.md
+      - Document Processing: guide/knowledge/documents.md
+      - Intelligent Q&A: guide/knowledge/query.md
+    - MCP Integration:
+      - Overview: guide/mcp/overview.md
+      - Claude Desktop: guide/mcp/claude-desktop.md
+      - VS Code: guide/mcp/vscode.md
+  - API Reference:
+    - REST API: api/rest.md
+    - MCP Tools: api/mcp-tools.md
+    - Python SDK: api/python-sdk.md
+  - Architecture:
+    - System Design: architecture/design.md
+    - Components: architecture/components.md
+    - Data Flow: architecture/dataflow.md
+  - Development:
+    - Contributing: development/contributing.md
+    - Development Setup: development/setup.md
+    - Testing: development/testing.md
+    - Version Management: development/version-management.md
+    - Changelog Automation: development/changelog-automation.md
+    - Release Process: development/release.md
+  - Technical Notes:
+    - Implementation Summary: IMPLEMENTATION_SUMMARY.md
+    - MCP Migration Guide: MCP_MIGRATION_GUIDE.md
+    - MCP V2 Modularization: MCP_V2_MODULARIZATION.md
+    - v0.6 Summary: v0.6-SUMMARY.md
+  - Troubleshooting: troubleshooting.md
+  - FAQ: faq.md
+  - Changelog: changelog.md
+
+copyright: |
+  &copy; 2024 <a href="https://vantagecraft.dev" target="_blank" rel="noopener">VantageCraft</a>
diff --git a/pyproject.toml b/pyproject.toml
index ef8367e..6d0ff49 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "code-graph"
-version = "0.3.0"
-description = "Add your description here"
+version = "0.7.0"
+description = "AI-powered code intelligence and knowledge management system with graph database"
 readme = "README.md"
 requires-python = ">=3.13"
 dependencies = [
@@ -35,6 +35,17 @@ dependencies = [
     "prometheus-client>=0.21.0",
 ]
 
+[project.optional-dependencies]
+dev = [
+    "bump-my-version>=0.26.0",
+    "pytest>=8.0.0",
+    "pytest-asyncio>=0.23.0",
+    "pytest-cov>=4.1.0",
+    "black>=24.0.0",
+    "isort>=5.13.0",
+    "ruff>=0.6.0",
+]
+
 [project.scripts]
 server = "start:main"
 mcp_client = "start_mcp:main"
diff --git a/scripts/bump-version.sh b/scripts/bump-version.sh
new file mode 100755
index 0000000..f8453c4
--- /dev/null
+++ b/scripts/bump-version.sh
@@ -0,0 +1,148 @@
+#!/bin/bash
+# Automated version bumping script using bump-my-version
+# Usage: ./scripts/bump-version.sh [major|minor|patch] [--dry-run] [--no-changelog]
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+# Check if bump-my-version is installed
+if ! command -v bump-my-version &> /dev/null; then
+    echo -e "${RED}Error: bump-my-version is not installed${NC}"
+    echo "Install it with: pip install bump-my-version"
+    exit 1
+fi
+
+# Parse arguments
+BUMP_TYPE=${1:-patch}  # Default to patch
+DRY_RUN=""
+GENERATE_CHANGELOG=true
+
+for arg in "$@"; do
+    case $arg in
+        --dry-run)
+            DRY_RUN="--dry-run"
+            ;;
+        --no-changelog)
+            GENERATE_CHANGELOG=false
+            ;;
+    esac
+done
+
+# Validate bump type
+if [[ ! "$BUMP_TYPE" =~ ^(major|minor|patch)$ ]]; then
+    echo -e "${RED}Error: Invalid bump type '$BUMP_TYPE'${NC}"
+    echo "Usage: $0 [major|minor|patch] [--dry-run] [--no-changelog]"
+    exit 1
+fi
+
+# Get current version
+CURRENT_VERSION=$(grep '^version = ' pyproject.toml | cut -d'"' -f2)
+
+# Calculate new version
+case "$BUMP_TYPE" in
+    major)
+        NEW_VERSION=$(echo "$CURRENT_VERSION" | awk -F. '{print $1+1".0.0"}')
+        ;;
+    minor)
+        NEW_VERSION=$(echo "$CURRENT_VERSION" | awk -F. '{print $1"."$2+1".0"}')
+        ;;
+    patch)
+        NEW_VERSION=$(echo "$CURRENT_VERSION" | awk -F. '{print $1"."$2"."$3+1}')
+        ;;
+esac
+
+echo -e "${YELLOW}=== Version Bump Tool ===${NC}"
+echo -e "Current version: ${GREEN}$CURRENT_VERSION${NC}"
+echo -e "Bump type:       ${GREEN}$BUMP_TYPE${NC}"
+echo -e "New version:     ${GREEN}$NEW_VERSION${NC}"
+echo ""
+
+if [[ -n "$DRY_RUN" ]]; then
+    echo -e "${YELLOW}DRY RUN MODE - No changes will be made${NC}"
+    echo ""
+fi
+
+# Check for uncommitted changes
+if [[ -z "$DRY_RUN" ]] && ! git diff-index --quiet HEAD --; then
+    echo -e "${RED}Error: You have uncommitted changes${NC}"
+    echo "Please commit or stash your changes before bumping version"
+    git status --short
+    exit 1
+fi
+
+# Confirm with user (unless dry run)
+if [[ -z "$DRY_RUN" ]]; then
+    echo -e "${YELLOW}This will:${NC}"
+    if [[ "$GENERATE_CHANGELOG" == true ]]; then
+        echo "  1. Generate changelog from git commits"
+        echo "  2. Update version in pyproject.toml, src/__version__.py"
+        echo "  3. Create a git commit"
+        echo "  4. Create a git tag v$NEW_VERSION"
+    else
+        echo "  1. Update version in pyproject.toml, src/__version__.py"
+        echo "  2. Create a git commit"
+        echo "  3. Create a git tag v$NEW_VERSION"
+    fi
+    echo ""
+    read -p "Continue? (y/N) " -n 1 -r
+    echo
+    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
+        echo "Aborted"
+        exit 0
+    fi
+fi
+
+# Generate changelog if enabled
+if [[ "$GENERATE_CHANGELOG" == true ]] && [[ -z "$DRY_RUN" ]]; then
+    echo ""
+    echo -e "${BLUE}Generating changelog from commits...${NC}"
+
+    # Check if generate-changelog.py exists
+    if [[ -f "scripts/generate-changelog.py" ]]; then
+        if python3 scripts/generate-changelog.py --update --version "$NEW_VERSION"; then
+            echo -e "${GREEN}✓ Changelog generated and updated${NC}"
+            git add docs/changelog.md
+        else
+            echo -e "${YELLOW}⚠ Changelog generation failed, continuing anyway${NC}"
+        fi
+    else
+        echo -e "${YELLOW}⚠ scripts/generate-changelog.py not found, skipping changelog generation${NC}"
+    fi
+fi
+
+# Run bump-my-version
+echo ""
+echo -e "${GREEN}Running bump-my-version...${NC}"
+
+if [[ -n "$DRY_RUN" ]]; then
+    bump-my-version bump "$BUMP_TYPE" --verbose --dry-run --allow-dirty
+else
+    bump-my-version bump "$BUMP_TYPE" --verbose
+fi
+
+if [[ $? -eq 0 ]]; then
+    echo ""
+    echo -e "${GREEN}✓ Version bumped successfully!${NC}"
+    echo ""
+
+    if [[ -z "$DRY_RUN" ]]; then
+        echo -e "${YELLOW}Next steps:${NC}"
+        echo "  1. Review the changes: git show"
+        echo "  2. Push changes: git push origin main"
+        echo "  3. Push tag: git push origin v$NEW_VERSION"
+        echo ""
+        echo -e "${GREEN}GitHub Actions will automatically build and publish Docker images${NC}"
+    else
+        echo -e "${YELLOW}This was a dry run. No changes were made.${NC}"
+        echo "Run without --dry-run to apply changes."
+    fi
+else
+    echo -e "${RED}✗ Version bump failed${NC}"
+    exit 1
+fi
diff --git a/scripts/docker-deploy.sh b/scripts/docker-deploy.sh
new file mode 100755
index 0000000..1c0cfc0
--- /dev/null
+++ b/scripts/docker-deploy.sh
@@ -0,0 +1,254 @@
+#!/bin/bash
+# Interactive deployment script for Code Graph Knowledge System
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+echo -e "${BLUE}"
+echo "╔════════════════════════════════════════════════════════════╗"
+echo "║     Code Graph Knowledge System - Docker Deployment       ║"
+echo "╚════════════════════════════════════════════════════════════╝"
+echo -e "${NC}"
+echo ""
+
+# Function to print colored messages
+print_info() {
+    echo -e "${BLUE}ℹ${NC}  $1"
+}
+
+print_success() {
+    echo -e "${GREEN}✓${NC}  $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}⚠${NC}  $1"
+}
+
+print_error() {
+    echo -e "${RED}✗${NC}  $1"
+}
+
+# Check Docker is installed
+if ! command -v docker &> /dev/null; then
+    print_error "Docker is not installed. Please install Docker first."
+    exit 1
+fi
+
+if ! command -v docker-compose &> /dev/null; then
+    print_error "docker-compose is not installed. Please install docker-compose first."
+    exit 1
+fi
+
+print_success "Docker and docker-compose are installed"
+echo ""
+
+# Display deployment modes
+echo "Choose deployment mode:"
+echo ""
+echo -e "${GREEN}1) Minimal${NC}  - Code Graph only"
+echo "   ✓ Repository ingestion and code analysis"
+echo "   ✓ File search and relationship discovery"
+echo "   ✓ Impact analysis and context packing"
+echo "   ✗ No LLM or Embedding required"
+echo ""
+echo -e "${YELLOW}2) Standard${NC} - Code Graph + Memory Store"
+echo "   ✓ All Minimal features"
+echo "   ✓ Manual memory management"
+echo "   ✓ Vector-based memory search"
+echo "   ⚠  Embedding provider required"
+echo ""
+echo -e "${BLUE}3) Full${NC}     - All features"
+echo "   ✓ All Standard features"
+echo "   ✓ Automatic memory extraction"
+echo "   ✓ Knowledge base RAG"
+echo "   ✓ Document processing and Q&A"
+echo "   ⚠  LLM + Embedding required"
+echo ""
+
+read -p "Enter choice [1-3]: " choice
+echo ""
+
+case $choice in
+    1)
+        MODE="minimal"
+        COMPOSE_FILE="docker/docker-compose.minimal.yml"
+        ;;
+    2)
+        MODE="standard"
+        COMPOSE_FILE="docker/docker-compose.standard.yml"
+        ;;
+    3)
+        MODE="full"
+        COMPOSE_FILE="docker/docker-compose.full.yml"
+
+        echo "Do you want to include local Ollama in Docker?"
+        echo "(If you already have Ollama on your host, choose No)"
+        read -p "[y/N]: " include_ollama
+
+        if [ "$include_ollama" = "y" ] || [ "$include_ollama" = "Y" ]; then
+            OLLAMA_PROFILE="--profile with-ollama"
+            print_info "Will start Ollama in Docker container"
+        else
+            OLLAMA_PROFILE=""
+            print_info "Assuming Ollama or other LLM on host/cloud"
+        fi
+        echo ""
+        ;;
+    *)
+        print_error "Invalid choice"
+        exit 1
+        ;;
+esac
+
+# Check for .env file
+if [ ! -f ".env" ]; then
+    print_warning ".env file not found"
+    print_info "Creating .env from template..."
+
+    if [ -f "docker/.env.template/.env.$MODE" ]; then
+        cp "docker/.env.template/.env.$MODE" .env
+        print_success ".env file created"
+        echo ""
+        print_warning "Please edit .env file with your configuration"
+        print_info "Required settings for $MODE mode:"
+
+        case $MODE in
+            minimal)
+                echo "  - NEO4J_PASSWORD"
+                ;;
+            standard)
+                echo "  - NEO4J_PASSWORD"
+                echo "  - EMBEDDING_PROVIDER (ollama/openai/gemini)"
+                echo "  - Provider-specific settings (API keys, etc.)"
+                ;;
+            full)
+                echo "  - NEO4J_PASSWORD"
+                echo "  - LLM_PROVIDER (ollama/openai/gemini/openrouter)"
+                echo "  - EMBEDDING_PROVIDER"
+                echo "  - Provider-specific settings (API keys, etc.)"
+                ;;
+        esac
+
+        echo ""
+        read -p "Press Enter after configuring .env file..."
+    else
+        print_error "Template file not found: docker/.env.template/.env.$MODE"
+        exit 1
+    fi
+fi
+
+# Display pre-deployment summary
+echo ""
+print_info "Deployment Summary:"
+echo "  Mode: $MODE"
+echo "  Compose file: $COMPOSE_FILE"
+if [ -n "$OLLAMA_PROFILE" ]; then
+    echo "  Ollama: Included in Docker"
+fi
+echo ""
+
+# Confirm deployment
+read -p "Proceed with deployment? [Y/n]: " confirm
+if [ "$confirm" = "n" ] || [ "$confirm" = "N" ]; then
+    print_info "Deployment cancelled"
+    exit 0
+fi
+
+echo ""
+print_info "Starting deployment..."
+
+# Pull/build images
+read -p "Pull images from Docker Hub or build locally? [pull/build]: " build_choice
+
+if [ "$build_choice" = "pull" ]; then
+    print_info "Pulling images from Docker Hub..."
+    docker pull royisme/codebase-rag:$MODE || {
+        print_warning "Failed to pull image, will build locally"
+        build_choice="build"
+    }
+fi
+
+if [ "$build_choice" = "build" ]; then
+    print_info "Building Docker images..."
+    docker-compose -f $COMPOSE_FILE build
+    print_success "Build complete"
+fi
+
+# Start services
+echo ""
+print_info "Starting services..."
+docker-compose -f $COMPOSE_FILE $OLLAMA_PROFILE up -d
+
+# Wait for services to be healthy
+echo ""
+print_info "Waiting for services to be healthy..."
+sleep 5
+
+# Check health
+if docker ps | grep -q "codebase-rag-mcp"; then
+    print_success "MCP service is running"
+else
+    print_error "MCP service failed to start"
+    print_info "Check logs with: docker-compose -f $COMPOSE_FILE logs"
+    exit 1
+fi
+
+if docker ps | grep -q "codebase-rag-neo4j"; then
+    print_success "Neo4j is running"
+else
+    print_error "Neo4j failed to start"
+    exit 1
+fi
+
+# Display access information
+echo ""
+echo -e "${GREEN}═══════════════════════════════════════════════════════════${NC}"
+print_success "Deployment successful!"
+echo -e "${GREEN}═══════════════════════════════════════════════════════════${NC}"
+echo ""
+echo "Access points:"
+echo "  • API:          http://localhost:8000"
+echo "  • API Docs:     http://localhost:8000/docs"
+echo "  • Neo4j:        http://localhost:7474"
+echo "  • Health Check: http://localhost:8000/api/v1/health"
+
+if [ -n "$OLLAMA_PROFILE" ]; then
+    echo "  • Ollama:       http://localhost:11434"
+fi
+
+echo ""
+echo "Useful commands:"
+echo "  • View logs:    docker-compose -f $COMPOSE_FILE logs -f"
+echo "  • Stop:         docker-compose -f $COMPOSE_FILE down"
+echo "  • Restart:      docker-compose -f $COMPOSE_FILE restart"
+echo ""
+
+case $MODE in
+    minimal)
+        print_info "Minimal mode - Available MCP tools:"
+        echo "  • code_graph_ingest_repo"
+        echo "  • code_graph_related"
+        echo "  • code_graph_impact"
+        echo "  • context_pack"
+        ;;
+    standard)
+        print_info "Standard mode - Available MCP tools:"
+        echo "  • All Code Graph tools"
+        echo "  • add_memory, get_memory, update_memory, delete_memory"
+        echo "  • search_memories (vector search)"
+        ;;
+    full)
+        print_info "Full mode - All MCP tools available"
+        echo "  • Code Graph, Memory, Knowledge RAG"
+        echo "  • Automatic extraction from git/conversations"
+        ;;
+esac
+
+echo ""
+print_success "Ready to use!"
diff --git a/scripts/generate-changelog.py b/scripts/generate-changelog.py
new file mode 100755
index 0000000..6c81ba2
--- /dev/null
+++ b/scripts/generate-changelog.py
@@ -0,0 +1,350 @@
+#!/usr/bin/env python3
+"""
+Automatic Changelog Generator from Git Commits
+
+Generates changelog entries from git commits following Conventional Commits format:
+- feat: New features
+- fix: Bug fixes
+- docs: Documentation changes
+- perf: Performance improvements
+- refactor: Code refactoring
+- test: Test updates
+- chore: Build/tooling changes
+"""
+
+import re
+import subprocess
+import sys
+from collections import defaultdict
+from datetime import datetime
+from typing import Dict, List, Tuple
+
+
+# Conventional Commits types mapping to changelog sections
+COMMIT_TYPE_MAP = {
+    "feat": "### Added",
+    "fix": "### Fixed",
+    "docs": "### Documentation",
+    "perf": "### Performance",
+    "refactor": "### Changed",
+    "test": "### Testing",
+    "style": "### Changed",
+    "build": "### Build System",
+    "ci": "### CI/CD",
+    "chore": "### Maintenance",
+}
+
+# Breaking change marker
+BREAKING_CHANGE_MARKERS = ["BREAKING CHANGE:", "BREAKING-CHANGE:", "!:"]
+
+
+def get_git_commits(from_tag: str = None, to_ref: str = "HEAD") -> List[Tuple[str, str, str]]:
+    """
+    Get git commits between two references.
+
+    Returns: List of (hash, date, message) tuples
+    """
+    if from_tag:
+        range_spec = f"{from_tag}..{to_ref}"
+    else:
+        # Get all commits
+        range_spec = to_ref
+
+    try:
+        result = subprocess.run(
+            ["git", "log", range_spec, "--pretty=format:%H|%as|%s%n%b%n---END---"],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+
+        commits = []
+        current_commit = []
+
+        for line in result.stdout.split('\n'):
+            if line == '---END---':
+                if current_commit:
+                    full_message = '\n'.join(current_commit[1:])
+                    hash_date_subject = current_commit[0].split('|')
+                    if len(hash_date_subject) >= 3:
+                        commits.append((
+                            hash_date_subject[0],
+                            hash_date_subject[1],
+                            full_message
+                        ))
+                current_commit = []
+            else:
+                current_commit.append(line)
+
+        return commits
+    except subprocess.CalledProcessError as e:
+        print(f"Error getting git commits: {e}", file=sys.stderr)
+        return []
+
+
+def parse_commit_message(message: str) -> Dict:
+    """
+    Parse a conventional commit message.
+
+    Returns dict with: type, scope, subject, body, breaking
+    """
+    # Match conventional commit format: type(scope): subject
+    match = re.match(r'^(\w+)(?:\(([^)]+)\))?(!)?:\s*(.+?)(?:\n\n(.*))?$',
+                     message, re.DOTALL)
+
+    if not match:
+        # Not a conventional commit, treat as misc
+        return {
+            "type": "chore",
+            "scope": None,
+            "subject": message.split('\n')[0],
+            "body": '\n'.join(message.split('\n')[1:]),
+            "breaking": False
+        }
+
+    type_, scope, breaking_mark, subject, body = match.groups()
+    body = body or ""
+
+    # Check for breaking changes
+    is_breaking = bool(breaking_mark) or any(
+        marker in body for marker in BREAKING_CHANGE_MARKERS
+    )
+
+    return {
+        "type": type_.lower(),
+        "scope": scope,
+        "subject": subject.strip(),
+        "body": body.strip(),
+        "breaking": is_breaking
+    }
+
+
+def group_commits(commits: List[Tuple[str, str, str]]) -> Dict[str, List[Dict]]:
+    """
+    Group commits by type.
+
+    Returns: Dict mapping section name to list of commits
+    """
+    grouped = defaultdict(list)
+    breaking_changes = []
+
+    for hash_, date, message in commits:
+        parsed = parse_commit_message(message)
+        parsed["hash"] = hash_[:7]  # Short hash
+        parsed["date"] = date
+
+        # Handle breaking changes separately
+        if parsed["breaking"]:
+            breaking_changes.append(parsed)
+
+        # Group by type
+        section = COMMIT_TYPE_MAP.get(parsed["type"], "### Other")
+        grouped[section].append(parsed)
+
+    # Add breaking changes section at the top if any
+    if breaking_changes:
+        grouped["### ⚠️ Breaking Changes"] = breaking_changes
+
+    return dict(grouped)
+
+
+def format_changelog_entry(commits: List[Dict]) -> str:
+    """
+    Format commits into changelog entry lines.
+    """
+    lines = []
+    for commit in commits:
+        scope = f"**{commit['scope']}**: " if commit['scope'] else ""
+        subject = commit['subject']
+
+        # Capitalize first letter if not already
+        if subject and subject[0].islower():
+            subject = subject[0].upper() + subject[1:]
+
+        lines.append(f"- {scope}{subject}")
+
+    return '\n'.join(lines)
+
+
+def generate_changelog_section(version: str, date: str, grouped_commits: Dict[str, List[Dict]]) -> str:
+    """
+    Generate a complete changelog section for a version.
+    """
+    lines = [
+        f"## [{version}] - {date}",
+        ""
+    ]
+
+    # Define preferred order
+    preferred_order = [
+        "### ⚠️ Breaking Changes",
+        "### Added",
+        "### Fixed",
+        "### Changed",
+        "### Performance",
+        "### Documentation",
+        "### Testing",
+        "### Build System",
+        "### CI/CD",
+        "### Maintenance",
+        "### Other",
+    ]
+
+    # Add sections in preferred order
+    for section in preferred_order:
+        if section in grouped_commits:
+            lines.append(section)
+            lines.append("")
+            lines.append(format_changelog_entry(grouped_commits[section]))
+            lines.append("")
+
+    return '\n'.join(lines)
+
+
+def get_latest_tag() -> str:
+    """Get the most recent git tag."""
+    try:
+        result = subprocess.run(
+            ["git", "describe", "--tags", "--abbrev=0"],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        return result.stdout.strip()
+    except subprocess.CalledProcessError:
+        return None
+
+
+def get_current_version() -> str:
+    """Get version from pyproject.toml."""
+    try:
+        with open("pyproject.toml", "r") as f:
+            for line in f:
+                if line.startswith("version = "):
+                    return line.split('"')[1]
+    except FileNotFoundError:
+        pass
+    return "0.1.0"
+
+
+def main():
+    """Main function to generate changelog."""
+    import argparse
+
+    parser = argparse.ArgumentParser(
+        description="Generate changelog from git commits"
+    )
+    parser.add_argument(
+        "--from-tag",
+        help="Start from this tag (default: latest tag or all commits)"
+    )
+    parser.add_argument(
+        "--to-ref",
+        default="HEAD",
+        help="End at this ref (default: HEAD)"
+    )
+    parser.add_argument(
+        "--version",
+        help="Version for this changelog entry (default: from pyproject.toml)"
+    )
+    parser.add_argument(
+        "--date",
+        help="Date for this entry (default: today)"
+    )
+    parser.add_argument(
+        "--output",
+        help="Output file (default: print to stdout)"
+    )
+    parser.add_argument(
+        "--update",
+        action="store_true",
+        help="Update docs/changelog.md by prepending new section"
+    )
+
+    args = parser.parse_args()
+
+    # Determine from_tag
+    from_tag = args.from_tag
+    if not from_tag:
+        from_tag = get_latest_tag()
+        if from_tag:
+            print(f"Using latest tag as starting point: {from_tag}", file=sys.stderr)
+        else:
+            print("No tags found, processing all commits", file=sys.stderr)
+
+    # Get commits
+    commits = get_git_commits(from_tag, args.to_ref)
+
+    if not commits:
+        print("No commits found to process", file=sys.stderr)
+        return 1
+
+    print(f"Processing {len(commits)} commits...", file=sys.stderr)
+
+    # Group commits
+    grouped = group_commits(commits)
+
+    # Determine version and date
+    version = args.version or get_current_version()
+    date = args.date or datetime.now().strftime("%Y-%m-%d")
+
+    # Generate changelog section
+    changelog_section = generate_changelog_section(version, date, grouped)
+
+    # Output
+    if args.update:
+        # Read existing changelog
+        try:
+            with open("docs/changelog.md", "r") as f:
+                existing_content = f.read()
+        except FileNotFoundError:
+            print("docs/changelog.md not found, creating new file", file=sys.stderr)
+            existing_content = """# Changelog
+
+All notable changes to the Code Graph Knowledge System will be documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
+and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+
+## [Unreleased]
+
+"""
+
+        # Find [Unreleased] section and insert after it
+        unreleased_match = re.search(r'## \[Unreleased\].*?\n\n', existing_content, re.DOTALL)
+
+        if unreleased_match:
+            insert_pos = unreleased_match.end()
+            new_content = (
+                existing_content[:insert_pos] +
+                changelog_section + "\n\n" +
+                existing_content[insert_pos:]
+            )
+        else:
+            # Just prepend after header
+            header_end = existing_content.find('\n\n') + 2
+            new_content = (
+                existing_content[:header_end] +
+                changelog_section + "\n\n" +
+                existing_content[header_end:]
+            )
+
+        # Write back
+        with open("docs/changelog.md", "w") as f:
+            f.write(new_content)
+
+        print(f"✅ Updated docs/changelog.md with v{version} entries", file=sys.stderr)
+
+    elif args.output:
+        with open(args.output, "w") as f:
+            f.write(changelog_section)
+        print(f"✅ Wrote changelog to {args.output}", file=sys.stderr)
+
+    else:
+        print(changelog_section)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/__version__.py b/src/__version__.py
new file mode 100644
index 0000000..79d4b33
--- /dev/null
+++ b/src/__version__.py
@@ -0,0 +1,30 @@
+"""Version information for Code Graph Knowledge System."""
+
+__version__ = "0.7.0"
+__version_info__ = tuple(int(i) for i in __version__.split("."))
+
+# Feature flags based on version
+FEATURES = {
+    "code_graph": True,           # Available since 0.1.0
+    "memory_store": True,          # Available since 0.6.0
+    "auto_extraction": True,       # Available since 0.7.0
+    "knowledge_rag": True,         # Available since 0.2.0
+}
+
+# Deployment modes
+DEPLOYMENT_MODES = ["minimal", "standard", "full"]
+
+
+def get_version() -> str:
+    """Get the current version string."""
+    return __version__
+
+
+def get_version_info() -> tuple:
+    """Get the version as a tuple of integers."""
+    return __version_info__
+
+
+def get_features() -> dict:
+    """Get available features for this version."""
+    return FEATURES.copy()