From 1faca8d673e2aec9a19196fa9d00d0b4b9989e3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9mie=20Gisserot?= Date: Thu, 26 Mar 2026 20:30:10 +0100 Subject: [PATCH] feat: Implement comprehensive security scanning including Gitleaks, Unicode checks, custom scripts, and Git hooks managed by Lefthook. --- .githooks/commit-msg | 71 +++++++++++ .githooks/pre-commit | 71 +++++++++++ .githooks/pre-push | 71 +++++++++++ .github/workflows/security.yml | 74 ++++++++++++ .gitignore | 4 + .gitleaks.toml | 38 ++++++ CONTRIBUTING.md | 96 ++++++++++++++- SECURITY.md | 129 +++++++++++++++++++- lefthook.yml | 56 +++++++++ scripts/security-scan.py | 215 +++++++++++++++++++++++++++++++++ scripts/unicode-check.py | 134 ++++++++++++++++++++ 11 files changed, 952 insertions(+), 7 deletions(-) create mode 100755 .githooks/commit-msg create mode 100755 .githooks/pre-commit create mode 100755 .githooks/pre-push create mode 100644 .github/workflows/security.yml create mode 100644 .gitleaks.toml create mode 100644 lefthook.yml create mode 100755 scripts/security-scan.py create mode 100755 scripts/unicode-check.py diff --git a/.githooks/commit-msg b/.githooks/commit-msg new file mode 100755 index 0000000..ce8a2fc --- /dev/null +++ b/.githooks/commit-msg @@ -0,0 +1,71 @@ +#!/bin/sh + +if [ "$LEFTHOOK_VERBOSE" = "1" -o "$LEFTHOOK_VERBOSE" = "true" ]; then + set -x +fi + +if [ "$LEFTHOOK" = "0" ]; then + exit 0 +fi + +call_lefthook() +{ + if test -n "$LEFTHOOK_BIN" + then + "$LEFTHOOK_BIN" "$@" + elif lefthook -h >/dev/null 2>&1 + then + lefthook "$@" + elif /opt/homebrew/bin/lefthook -h >/dev/null 2>&1 + then + /opt/homebrew/bin/lefthook "$@" + else + dir="$(git rev-parse --show-toplevel)" + osArch=$(uname | tr '[:upper:]' '[:lower:]') + cpuArch=$(uname -m | sed 's/aarch64/arm64/;s/x86_64/x64/') + if test -f "$dir/node_modules/lefthook-${osArch}-${cpuArch}/bin/lefthook" + then + "$dir/node_modules/lefthook-${osArch}-${cpuArch}/bin/lefthook" "$@" + elif test -f "$dir/node_modules/@evilmartians/lefthook/bin/lefthook-${osArch}-${cpuArch}/lefthook" + then + "$dir/node_modules/@evilmartians/lefthook/bin/lefthook-${osArch}-${cpuArch}/lefthook" "$@" + elif test -f "$dir/node_modules/@evilmartians/lefthook-installer/bin/lefthook" + then + "$dir/node_modules/@evilmartians/lefthook-installer/bin/lefthook" "$@" + elif test -f "$dir/node_modules/lefthook/bin/index.js" + then + "$dir/node_modules/lefthook/bin/index.js" "$@" + elif go tool lefthook -h >/dev/null 2>&1 + then + go tool lefthook "$@" + elif bundle exec lefthook -h >/dev/null 2>&1 + then + bundle exec lefthook "$@" + elif yarn lefthook -h >/dev/null 2>&1 + then + yarn lefthook "$@" + elif pnpm lefthook -h >/dev/null 2>&1 + then + pnpm lefthook "$@" + elif swift package lefthook >/dev/null 2>&1 + then + swift package --build-path .build/lefthook --disable-sandbox lefthook "$@" + elif command -v mint >/dev/null 2>&1 + then + mint run csjones/lefthook-plugin "$@" + elif uv run lefthook -h >/dev/null 2>&1 + then + uv run lefthook "$@" + elif mise exec -- lefthook -h >/dev/null 2>&1 + then + mise exec -- lefthook "$@" + elif devbox run lefthook -h >/dev/null 2>&1 + then + devbox run lefthook "$@" + else + echo "Can't find lefthook in PATH" + fi + fi +} + +call_lefthook run "commit-msg" "$@" diff --git a/.githooks/pre-commit b/.githooks/pre-commit new file mode 100755 index 0000000..2132d1c --- /dev/null +++ b/.githooks/pre-commit @@ -0,0 +1,71 @@ +#!/bin/sh + +if [ "$LEFTHOOK_VERBOSE" = "1" -o "$LEFTHOOK_VERBOSE" = "true" ]; then + set -x +fi + +if [ "$LEFTHOOK" = "0" ]; then + exit 0 +fi + +call_lefthook() +{ + if test -n "$LEFTHOOK_BIN" + then + "$LEFTHOOK_BIN" "$@" + elif lefthook -h >/dev/null 2>&1 + then + lefthook "$@" + elif /opt/homebrew/bin/lefthook -h >/dev/null 2>&1 + then + /opt/homebrew/bin/lefthook "$@" + else + dir="$(git rev-parse --show-toplevel)" + osArch=$(uname | tr '[:upper:]' '[:lower:]') + cpuArch=$(uname -m | sed 's/aarch64/arm64/;s/x86_64/x64/') + if test -f "$dir/node_modules/lefthook-${osArch}-${cpuArch}/bin/lefthook" + then + "$dir/node_modules/lefthook-${osArch}-${cpuArch}/bin/lefthook" "$@" + elif test -f "$dir/node_modules/@evilmartians/lefthook/bin/lefthook-${osArch}-${cpuArch}/lefthook" + then + "$dir/node_modules/@evilmartians/lefthook/bin/lefthook-${osArch}-${cpuArch}/lefthook" "$@" + elif test -f "$dir/node_modules/@evilmartians/lefthook-installer/bin/lefthook" + then + "$dir/node_modules/@evilmartians/lefthook-installer/bin/lefthook" "$@" + elif test -f "$dir/node_modules/lefthook/bin/index.js" + then + "$dir/node_modules/lefthook/bin/index.js" "$@" + elif go tool lefthook -h >/dev/null 2>&1 + then + go tool lefthook "$@" + elif bundle exec lefthook -h >/dev/null 2>&1 + then + bundle exec lefthook "$@" + elif yarn lefthook -h >/dev/null 2>&1 + then + yarn lefthook "$@" + elif pnpm lefthook -h >/dev/null 2>&1 + then + pnpm lefthook "$@" + elif swift package lefthook >/dev/null 2>&1 + then + swift package --build-path .build/lefthook --disable-sandbox lefthook "$@" + elif command -v mint >/dev/null 2>&1 + then + mint run csjones/lefthook-plugin "$@" + elif uv run lefthook -h >/dev/null 2>&1 + then + uv run lefthook "$@" + elif mise exec -- lefthook -h >/dev/null 2>&1 + then + mise exec -- lefthook "$@" + elif devbox run lefthook -h >/dev/null 2>&1 + then + devbox run lefthook "$@" + else + echo "Can't find lefthook in PATH" + fi + fi +} + +call_lefthook run "pre-commit" "$@" diff --git a/.githooks/pre-push b/.githooks/pre-push new file mode 100755 index 0000000..1c83448 --- /dev/null +++ b/.githooks/pre-push @@ -0,0 +1,71 @@ +#!/bin/sh + +if [ "$LEFTHOOK_VERBOSE" = "1" -o "$LEFTHOOK_VERBOSE" = "true" ]; then + set -x +fi + +if [ "$LEFTHOOK" = "0" ]; then + exit 0 +fi + +call_lefthook() +{ + if test -n "$LEFTHOOK_BIN" + then + "$LEFTHOOK_BIN" "$@" + elif lefthook -h >/dev/null 2>&1 + then + lefthook "$@" + elif /opt/homebrew/bin/lefthook -h >/dev/null 2>&1 + then + /opt/homebrew/bin/lefthook "$@" + else + dir="$(git rev-parse --show-toplevel)" + osArch=$(uname | tr '[:upper:]' '[:lower:]') + cpuArch=$(uname -m | sed 's/aarch64/arm64/;s/x86_64/x64/') + if test -f "$dir/node_modules/lefthook-${osArch}-${cpuArch}/bin/lefthook" + then + "$dir/node_modules/lefthook-${osArch}-${cpuArch}/bin/lefthook" "$@" + elif test -f "$dir/node_modules/@evilmartians/lefthook/bin/lefthook-${osArch}-${cpuArch}/lefthook" + then + "$dir/node_modules/@evilmartians/lefthook/bin/lefthook-${osArch}-${cpuArch}/lefthook" "$@" + elif test -f "$dir/node_modules/@evilmartians/lefthook-installer/bin/lefthook" + then + "$dir/node_modules/@evilmartians/lefthook-installer/bin/lefthook" "$@" + elif test -f "$dir/node_modules/lefthook/bin/index.js" + then + "$dir/node_modules/lefthook/bin/index.js" "$@" + elif go tool lefthook -h >/dev/null 2>&1 + then + go tool lefthook "$@" + elif bundle exec lefthook -h >/dev/null 2>&1 + then + bundle exec lefthook "$@" + elif yarn lefthook -h >/dev/null 2>&1 + then + yarn lefthook "$@" + elif pnpm lefthook -h >/dev/null 2>&1 + then + pnpm lefthook "$@" + elif swift package lefthook >/dev/null 2>&1 + then + swift package --build-path .build/lefthook --disable-sandbox lefthook "$@" + elif command -v mint >/dev/null 2>&1 + then + mint run csjones/lefthook-plugin "$@" + elif uv run lefthook -h >/dev/null 2>&1 + then + uv run lefthook "$@" + elif mise exec -- lefthook -h >/dev/null 2>&1 + then + mise exec -- lefthook "$@" + elif devbox run lefthook -h >/dev/null 2>&1 + then + devbox run lefthook "$@" + else + echo "Can't find lefthook in PATH" + fi + fi +} + +call_lefthook run "pre-push" "$@" diff --git a/.github/workflows/security.yml b/.github/workflows/security.yml new file mode 100644 index 0000000..3641e96 --- /dev/null +++ b/.github/workflows/security.yml @@ -0,0 +1,74 @@ +name: Security Scan + +on: + pull_request: + branches: [main] + push: + branches: [main] + +permissions: + contents: read + security-events: write + +jobs: + scan: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install Cisco Skill Scanner + run: | + pip install cisco-ai-skill-scanner + + - name: Scan skills with Cisco Scanner + run: | + skill-scanner scan ./ \ + --format sarif \ + --output cisco-results.sarif \ + --policy medium \ + || true + continue-on-error: true + + - name: Upload SARIF results + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: cisco-results.sarif + category: cisco-skill-scanner + continue-on-error: true + + - name: Run local security scan + run: | + python3 scripts/security-scan.py $(find . -name "SKILL.md" | head -20) + continue-on-error: true + + - name: Run Unicode check + run: | + python3 scripts/unicode-check.py $(find . -name "*.md" | head -20) + continue-on-error: true + + - name: Run Gitleaks + uses: gitleaks/gitleaks-action@v2 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + continue-on-error: true + + # Summary job that fails if critical issues found + security-summary: + runs-on: ubuntu-latest + needs: scan + if: always() + steps: + - name: Security Check Summary + run: | + echo "✅ Security scan completed" + echo "Check the SARIF results in the Security tab for details" + echo "" + echo "If critical issues were found, they will be listed above" diff --git a/.gitignore b/.gitignore index 3f88806..449aa1d 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,7 @@ build/ # Letta local config .letta/ + +# Security scan results +*.sarif +results.sarif diff --git a/.gitleaks.toml b/.gitleaks.toml new file mode 100644 index 0000000..ced6553 --- /dev/null +++ b/.gitleaks.toml @@ -0,0 +1,38 @@ +# Gitleaks configuration for ledjay-skills +# https://github.com/gitleaks/gitleaks + +title = "ledjay-skills secrets detection" + +[extend] +# Use default rules plus custom ones +useDefault = true + +# Allowlist - files that might have false positives +[[allowlists]] +description = "Allow example files" +paths = [ + '''_template/.*''', + '''examples/.*''', + '''\.design-system/.*''', +] + +# Custom rules for skill-specific patterns +[[rules]] +id = "llm-api-key" +description = "LLM API Key" +regex = '''(?i)(openai|anthropic|claude|gemini|groq)[-_]?(api[_-]?key|key)\s*[=:]\s*['\"][a-zA-Z0-9_-]{20,}['\"]''' +tags = ["api", "llm", "key"] + +[[rules]] +id = "mcp-server-token" +description = "MCP Server Token" +regex = '''(?i)mcp[_-]?(token|secret|key)\s*[=:]\s*['\"][a-zA-Z0-9_-]{16,}['\"]''' +tags = ["mcp", "token"] + +# Entropy-based detection for random strings +[[rules]] +id = "high-entropy-string" +description = "High entropy string (potential secret)" +regex = '''['"]([a-zA-Z0-9_-]{32,})['"]''' +entropy = 4.5 +tags = ["entropy"] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 887482f..6b6fde4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -2,23 +2,85 @@ Thanks for your interest in contributing to this repository of AI agent skills. +## Quick Start + +```bash +# 1. Fork and clone +git clone https://github.com/YOUR_USERNAME/ledjay-skills.git +cd ledjay-skills + +# 2. Install security tools (required) +brew install lefthook gitleaks + +# 3. Initialize git hooks +lefthook install + +# 4. Create your branch +git checkout -b feat/my-skill +``` + +## Security Requirements + +**All contributions are automatically scanned** for security issues. + +### Pre-commit Checks + +Before each commit, these checks run automatically: + +| Check | What it detects | +|-------|------------------| +| `security-scan.py` | Prompt injection, malicious code, data exfiltration | +| `gitleaks` | Hardcoded secrets, API keys, tokens | +| `unicode-check.py` | Unicode smuggling, invisible characters | + +### If a check fails + +1. **Prompt injection**: Remove patterns like "ignore previous instructions" +2. **Secrets detected**: Remove hardcoded credentials or add to `.gitleaks.toml` +3. **Unicode issues**: Remove invisible characters (zero-width, Unicode Tags) + +### CI/CD Scanning + +Every pull request is automatically scanned by: +- **Cisco Skill Scanner** - Multi-engine detection +- **Local security scripts** - Pattern matching +- **Gitleaks** - Secret detection + +Results appear in the **Security** tab. + ## How to Contribute 1. **Fork** the repository -2. **Create a branch** for your contribution +2. **Install security tools** (see above) +3. **Create a branch** for your contribution ```bash git checkout -b feat/my-skill ``` -3. **Commit** your changes -4. **Push** the branch +4. **Create your skill** using `_template/` as reference +5. **Test locally** + ```bash + # Run security scan manually + python3 scripts/security-scan.py my-skill/SKILL.md + + # Run unicode check + python3 scripts/unicode-check.py my-skill/SKILL.md + ``` +6. **Commit** your changes (hooks run automatically) +7. **Push** the branch ```bash git push origin feat/my-skill ``` -5. Open a **Pull Request** +8. Open a **Pull Request** ## Skill Structure -Use the `_template/` folder as reference. +Use the `_template/` folder as reference. Choose the right template: + +| Template | Type | Use For | +|----------|------|---------| +| `SKILL.md` | Rules | Best practices, patterns, guidelines | +| `SKILL-TOOL.md` | Tool | API documentation, tool usage | +| `SKILL-WORKFLOW.md` | Workflow | Questionnaires, onboarding, configuration | ``` my-skill/ @@ -27,6 +89,24 @@ my-skill/ └── references/ # Optional: reference files ``` +## Security Best Practices + +### ✅ DO + +- Keep SKILL.md under 500 lines +- Use clear, specific descriptions +- Document all scripts and their purpose +- Use relative paths, not absolute +- Validate all user inputs + +### ❌ DON'T + +- Include hardcoded credentials +- Use `eval()`, `exec()`, or dynamic code execution +- Make network requests to unknown domains +- Hide instructions in comments or encoded strings +- Use Unicode tricks or zero-width characters + ## Naming Conventions - **Directory name**: `kebab-case` (e.g., `design-tokens`, `react-patterns`) @@ -78,6 +158,12 @@ Before submitting, verify the skill works with: - [ ] Claude Code - [ ] Codex +## Need Help? + +- Check `_template/examples/` for working examples +- Read `SECURITY.md` for security details +- Open an issue for questions + ## License By contributing, you agree that your contributions will be licensed under the MIT License. diff --git a/SECURITY.md b/SECURITY.md index 0016f45..e2854ab 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -2,6 +2,131 @@ ## Reporting a Vulnerability -If you discover a security vulnerability, please report it privately to j@jeremie-gisserot.net. +If you discover a security vulnerability in this repository, please report it privately: -**Do not** open a public issue for security-related problems. +- **Email**: security@jeremie-gisserot.net +- **GitHub**: Use the private vulnerability reporting feature + +Do not open a public issue for security vulnerabilities. + +## Security Measures + +This repository implements multiple layers of security to protect against malicious skills: + +### 1. Pre-commit Hooks (Lefthook) + +```bash +# Install hooks +lefthook install + +# Run manually +lefthook run pre-commit +``` + +**Checks:** +- `security-scan.py` - Detects prompt injection, malicious code, data exfiltration +- `gitleaks` - Detects hardcoded secrets +- `unicode-check.py` - Detects Unicode smuggling attacks + +### 2. CI/CD Scanning (GitHub Actions) + +Every pull request is automatically scanned: +- **Cisco Skill Scanner** - Multi-engine detection (static + LLM + dataflow) +- **Local security scan** - Pattern-based detection +- **Unicode check** - Invisible character detection +- **Gitleaks** - Secret detection + +Results appear in the **Security** tab. + +### 3. Threat Detection + +We detect the following threat categories: + +| Category | Examples | Severity | +|----------|----------|----------| +| **Prompt Injection** | "ignore previous instructions", DAN, jailbreak | 🔴 Critical | +| **Data Exfiltration** | curl to unknown domains, file uploads | 🔴 Critical | +| **Malicious Code** | eval(), exec(), reverse shells | 🔴 Critical | +| **Unicode Smuggling** | Invisible Unicode Tags, zero-width chars | 🔴 Critical | +| **Obfuscation** | Base64, hex encoding, fromCharCode | 🟡 High | +| **Credentials** | API keys, tokens, passwords | 🟡 High | +| **Suspicious URLs** | Unknown domains, webhook endpoints | 🟢 Medium | + +### 4. Known Attack Patterns + +Based on Snyk's ToxicSkills research and Cisco's threat intelligence: + +**ClawHavoc Campaign Patterns:** +- Metadata poisoning with overbroad descriptions +- Credential theft via embedded scripts +- Atomic macOS Stealer (AMOS) payloads +- VMProtect-packed infostealers + +**Prompt Injection Variants:** +- Instruction hierarchy overrides +- Unicode Tag smuggling (U+E0000-U+E007F) +- Zero-width character injection +- Base64/hex encoded instructions + +**Data Exfiltration Methods:** +- Curl to attacker-controlled servers +- Multipart form uploads +- Webhook exfiltration +- Environment variable snooping + +## Installing Security Tools + +### Required (for contributors) + +```bash +# Install Lefthook and Gitleaks +brew install lefthook gitleaks + +# Initialize hooks +lefthook install +``` + +### Optional (enhanced scanning) + +```bash +# Cisco Skill Scanner (Python 3.10+) +pip install cisco-ai-skill-scanner + +# Run full scan +skill-scanner scan ./ --format text +``` + +## Security Best Practices for Skills + +When contributing skills, follow these guidelines: + +### ✅ DO + +- Keep SKILL.md under 500 lines +- Use clear, specific descriptions +- Document all scripts and their purpose +- Use relative paths, not absolute +- Validate all user inputs + +### ❌ DON'T + +- Don't include hardcoded credentials +- Don't use `eval()`, `exec()`, or dynamic code execution +- Don't make network requests to unknown domains +- Don't hide instructions in comments or encoded strings +- Don't use Unicode tricks or zero-width characters + +## False Positives + +If a security scan produces false positives: + +1. Check if the pattern is legitimate (e.g., example code) +2. Add to `.gitleaks.toml` allowlist if it's a secret false positive +3. Document in the skill's README if it's a known pattern + +## References + +- [Snyk ToxicSkills Research](https://snyk.io/blog/toxicskills-malicious-ai-agent-skills-clawhub/) +- [Cisco Skill Scanner](https://github.com/cisco-ai-defense/skill-scanner) +- [OWASP Top 10 for Agentic Applications](https://owasp.org/www-project-top-10-for-agentic-applications/) +- [Agent Skills Security Research](https://arxiv.org/abs/2601.10338) diff --git a/lefthook.yml b/lefthook.yml new file mode 100644 index 0000000..8d20539 --- /dev/null +++ b/lefthook.yml @@ -0,0 +1,56 @@ +# Lefthook configuration for ledjay-skills +# https://github.com/evilmartians/lefthook + +pre-commit: + parallel: true + commands: + # Security scan for SKILL.md files + security-scan-skills: + glob: "**/SKILL.md" + run: python3 scripts/security-scan.py {staged_files} + fail_text: "⚠️ Security issues detected in skill files" + + # Security scan for scripts (ts, js, cjs, mjs) + security-scan-scripts: + glob: "**/*.{ts,js,cjs,mjs,sh,py}" + run: python3 scripts/security-scan.py --scripts {staged_files} + fail_text: "⚠️ Security issues detected in scripts" + + # Check for hardcoded secrets + secrets-check: + run: gitleaks protect --staged + fail_text: "🔐 Potential secrets detected. Remove or use .gitleaks.toml to allowlist" + + # Unicode smuggling detection + unicode-check: + glob: "**/*.md" + run: python3 scripts/unicode-check.py {staged_files} + fail_text: "🔤 Suspicious Unicode characters detected (potential smuggling)" + +pre-push: + commands: + # Full Cisco Skill Scanner (if available) + cisco-scan: + run: | + if command -v skill-scanner &> /dev/null; then + skill-scanner scan ./ --format text + else + echo "ℹ️ Cisco Skill Scanner not installed. Install with: pip install cisco-ai-skill-scanner" + fi + fail_text: "🚨 Cisco Skill Scanner detected issues" + +# Run on commit message +commit-msg: + commands: + # Conventional commit format check + commit-format: + run: | + if grep -qE "^(feat|fix|docs|style|refactor|test|chore|perf|ci|build|revert)(\(.+\))?: .+" {1}; then + exit 0 + else + echo "Commit message must follow conventional commits format:" + echo " feat: description" + echo " fix(scope): description" + exit 1 + fi + fail_text: "📝 Commit message must follow conventional commits format" diff --git a/scripts/security-scan.py b/scripts/security-scan.py new file mode 100755 index 0000000..35c3147 --- /dev/null +++ b/scripts/security-scan.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python3 +""" +Security scanner for SKILL.md files and scripts. +Detects prompt injection, malicious code patterns, and data exfiltration. +""" + +import argparse +import re +import sys +from pathlib import Path + +# Files to skip (self-referential) +SKIP_FILES = { + "scripts/security-scan.py", + "scripts/unicode-check.py", +} + +# Patterns for SKILL.md files +SKILL_PATTERNS = { + # Prompt injection patterns + "prompt_injection": [ + r"ignore\s+(all\s+)?previous\s+instructions?", + r"ignore\s+(the\s+)?above", + r"disregard\s+(all\s+)?previous", + r"forget\s+(all\s+)?previous", + r"you\s+are\s+now\s+\w+", + r"system\s*:\s*you\s+are", + r"\[SYSTEM\]", + r"<\|system\|>", + r"###\s*SYSTEM", + r"jailbreak", + r"DAN\s*:", + r"Do Anything Now", + ], + + # Obfuscation patterns + "obfuscation": [ + r"base64\.b64decode\s*\(", + r"atob\s*\(['\"][A-Za-z0-9+/=]{20,}", + r"btoa\s*\(", + r"\\x[0-9a-fA-F]{2}\\x[0-9a-fA-F]{2}\\x[0-9a-fA-F]{2}", + r"fromCharCode\s*\(\s*\d+\s*,", + ], + + # Data exfiltration patterns (only suspicious endpoints) + "exfiltration": [ + r"curl\s+['\"]https?://[^\s'\"]+\.(?:xyz|top|click|link|work)", + r"fetch\s*\(\s*['\"]https?://[^\s'\"]*webhook", + r"fetch\s*\(\s*['\"]https?://[^\s'\"]+\.xyz", + r"axios\.(?:post|put)\s*\(\s*['\"]https?://[^\s'\"]+\.xyz", + r"multipart/form-data.*fetch", + r"upload.*to\s+['\"]https?://", + ], + + # Suspicious URLs + "suspicious_urls": [ + r"https?://[^\s'\"]+\.(?:xyz|top|click|link|work|party|space)", + r"https?://[^\s'\"]*webhook[^/\s'\"]*['\"]", + r"https?://[^\s'\"]*paste\.", + r"ngrok-free\.app", + ], + + # Credential patterns (actual hardcoded values) + "credentials": [ + r"api[_-]?key\s*=\s*['\"][a-zA-Z0-9_-]{32,}['\"]", + r"secret[_-]?key\s*=\s*['\"][a-zA-Z0-9_-]{32,}['\"]", + r"private[_-]?key\s*=\s*['\"]-----BEGIN", + r"Authorization\s*:\s*Bearer\s+[a-zA-Z0-9_-]{40,}", + ], +} + +# Patterns for scripts (JS/TS/Python/Shell) +SCRIPT_PATTERNS = { + # Dangerous code execution + "code_execution": [ + r"\beval\s*\(\s*['\"`]", + r"\bnew\s+Function\s*\(\s*['\"`]", + r"\bexec\s*\(\s*['\"`]", + r"\bexecSync\s*\(\s*['\"`]", + r"child_process.*exec\s*\(", + r"\bspawn\s*\(\s*['\"`]/", + r"\bspawnSync\s*\(\s*['\"`]/", + r"subprocess\.(?:call|run|Popen)\s*\(\s*['\"`]", + r"os\.system\s*\(\s*['\"`]", + ], + + # File system manipulation (destructive) + "filesystem": [ + r"rm\s+-rf\s+/(?:Users|home|etc|var)", + r"rm\s+-rf\s+['\"]/", + r"fs\.rm\s*\(\s*['\"]/", + r"shutil\.rmtree\s*\(\s*['\"]/", + r">\s*/dev/(?:sda|nvme|hda)", + r"dd\s+if=/dev/zero", + ], + + # Network operations (suspicious endpoints) + "network": [ + r"curl\s+['\"]https?://[^\s'\"]+\.(?:xyz|top|click|link)", + r"wget\s+['\"]https?://[^\s'\"]+\.(?:xyz|top|click)", + r"nc\s+-[elp]+\s+['\"]?[^\s'\"]+\.(?:xyz|top)", + r"fetch\s*\(\s*['\"]https?://[^\s'\"]*webhook", + r"axios\.(?:post|put)\s*\(\s*['\"]https?://[^\s'\"]*webhook", + ], + + # Obfuscation (suspicious) + "obfuscation": [ + r"Buffer\.from\s*\([^)]*,\s*['\"]base64['\"]\)\.toString\s*\(", + r"atob\s*\(\s*['\"][A-Za-z0-9+/=]{50,}", + r"String\.fromCharCode\s*\(\s*\d+\s*,\s*\d+\s*,\s*\d+", + r"\\x[0-9a-fA-F]{2}(?:\\x[0-9a-fA-F]{2}){5,}", + ], + + # Privilege escalation + "privilege": [ + r"\bsudo\s+chmod\s+[0-7]*[47]77", + r"\bsudo\s+rm\s+-rf\s+/", + r"setuid\s*\(\s*0\s*\)", + r"chmod\s+4777", + r"'/etc/passwd'.*write", + r"'/etc/shadow'.*read", + ], + + # Data exfiltration to suspicious endpoints + "exfiltration": [ + r"fetch\s*\(\s*['\"]https?://[^\s'\"]+\.(?:xyz|top|click|link)", + r"axios\.(?:post|put)\s*\(\s*['\"]https?://[^\s'\"]+\.(?:xyz|top)", + r"curl\s+.*-d\s+.*['\"]https?://[^\s'\"]*webhook", + r"multipart.*upload.*['\"]https?://[^\s'\"]+\.(?:xyz|top)", + ], + + # Suspicious URLs + "suspicious_urls": [ + r"https?://[^\s'\"]+\.(?:xyz|top|click|link|work|party|space)", + r"https?://[^\s'\"]*ngrok-free\.app", + r"https?://[^\s'\"]*paste\.[^\s'\"]*['\"]", + ], + + # Credentials (hardcoded, high entropy) + "credentials": [ + r"api[_-]?key\s*=\s*['\"][a-zA-Z0-9_-]{32,}['\"]", + r"secret[_-]?key\s*=\s*['\"][a-zA-Z0-9_-]{32,}['\"]", + r"private[_-]?key\s*=\s*['\"]-----BEGIN", + r"access[_-]?token\s*=\s*['\"][a-zA-Z0-9_-]{40,}['\"]", + r"Authorization\s*:\s*Bearer\s+[a-zA-Z0-9_-]{40,}", + ], +} + +def scan_file(filepath: str, patterns: dict) -> list[dict]: + """Scan a single file for security issues.""" + issues = [] + path = Path(filepath) + + if not path.exists(): + return issues + + # Skip self-referential files + for skip in SKIP_FILES: + if skip in str(path): + return issues + + try: + content = path.read_text(encoding='utf-8', errors='ignore') + lines = content.split('\n') + except Exception as e: + return [{"type": "error", "message": f"Could not read file: {e}", "line": 0}] + + for category, pattern_list in patterns.items(): + for pattern in pattern_list: + for i, line in enumerate(lines, 1): + # Skip lines that are just pattern definitions (in this file) + if line.strip().startswith('r"') or line.strip().startswith("r'"): + continue + if re.search(pattern, line, re.IGNORECASE): + issues.append({ + "type": category, + "pattern": pattern, + "line": i, + "content": line.strip()[:100], + "severity": "HIGH" if category in ["code_execution", "filesystem", "network", "exfiltration", "prompt_injection", "privilege"] else "MEDIUM" + }) + + return issues + +def main(): + parser = argparse.ArgumentParser(description="Security scanner for skills and scripts") + parser.add_argument("--scripts", action="store_true", help="Scan scripts instead of SKILL.md") + parser.add_argument("files", nargs="+", help="Files to scan") + args = parser.parse_args() + + patterns = SCRIPT_PATTERNS if args.scripts else SKILL_PATTERNS + all_issues = [] + + for filepath in args.files: + issues = scan_file(filepath, patterns) + if issues: + all_issues.append((filepath, issues)) + + if all_issues: + scan_type = "scripts" if args.scripts else "skills" + print(f"\n🚨 Security issues detected in {scan_type}:\n") + for filepath, issues in all_issues: + print(f"📄 {filepath}") + for issue in issues: + print(f" [{issue['severity']}] {issue['type']}: {issue['content'][:60]}...") + print(f" Line {issue['line']}: pattern '{issue['pattern']}'\n") + + sys.exit(1) + else: + scan_type = "scripts" if args.scripts else "skills" + print(f"✅ No security issues detected in {scan_type}") + sys.exit(0) + +if __name__ == "__main__": + main() diff --git a/scripts/unicode-check.py b/scripts/unicode-check.py new file mode 100755 index 0000000..14a5bd6 --- /dev/null +++ b/scripts/unicode-check.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +""" +Unicode smuggling detector for markdown files. +Detects hidden Unicode characters that could be used for prompt injection. +""" + +import sys +import unicodedata +from pathlib import Path + +# Suspicious Unicode ranges +SUSPICIOUS_RANGES = { + # Unicode Tags (used for invisible instructions) + "unicode_tags": (0xE0000, 0xE007F), + # Zero-width characters + "zero_width": { + 0x200B, # Zero Width Space + 0x200C, # Zero Width Non-Joiner + 0x200D, # Zero Width Joiner + 0x200E, # Left-to-Right Mark + 0x200F, # Right-to-Left Mark + 0xFEFF, # Zero Width No-Break Space (BOM) + }, + # Homoglyphs (confusable characters) + "homoglyphs": { + # Cyrillic lookalikes + 0x0430, 0x0435, 0x043E, 0x0440, 0x0441, 0x0443, 0x0445, + # Greek lookalikes + 0x03B1, 0x03B5, 0x03B9, 0x03BF, 0x03C1, 0x03C5, + }, +} + +# Control characters that shouldn't appear in markdown +CONTROL_CHARS = set(range(0x00, 0x20)) - {0x09, 0x0A, 0x0D} # Allow tab, LF, CR + +def check_file(filepath: str) -> list[dict]: + """Check a file for suspicious Unicode characters.""" + issues = [] + path = Path(filepath) + + if not path.exists(): + return issues + + try: + content = path.read_text(encoding='utf-8', errors='ignore') + except Exception as e: + return [{"type": "error", "message": f"Could not read file: {e}"}] + + # Check for Unicode Tags (invisible instructions) + tag_count = 0 + for char in content: + code = ord(char) + if SUSPICIOUS_RANGES["unicode_tags"][0] <= code <= SUSPICIOUS_RANGES["unicode_tags"][1]: + tag_count += 1 + + if tag_count > 0: + issues.append({ + "type": "unicode_tags", + "severity": "CRITICAL", + "message": f"Found {tag_count} Unicode Tag characters (invisible instructions)", + "count": tag_count + }) + + # Check for zero-width characters + zw_chars = [] + for i, char in enumerate(content): + code = ord(char) + if code in SUSPICIOUS_RANGES["zero_width"]: + zw_chars.append((i, hex(code), unicodedata.name(char, "UNKNOWN"))) + + if len(zw_chars) > 5: # Allow a few (emojis use them) + issues.append({ + "type": "zero_width", + "severity": "HIGH", + "message": f"Found {len(zw_chars)} zero-width characters", + "positions": zw_chars[:10] # First 10 + }) + + # Check for control characters + control_found = [] + for i, char in enumerate(content): + if ord(char) in CONTROL_CHARS: + control_found.append((i, hex(ord(char)))) + + if control_found: + issues.append({ + "type": "control_chars", + "severity": "MEDIUM", + "message": f"Found {len(control_found)} unexpected control characters", + "positions": control_found[:5] + }) + + # Check for homoglyphs that might be used for impersonation + homoglyph_count = 0 + for char in content: + if ord(char) in SUSPICIOUS_RANGES["homoglyphs"]: + homoglyph_count += 1 + + # Note: homoglyphs are common in some languages, so only flag if concentrated + if homoglyph_count > 10: + issues.append({ + "type": "homoglyphs", + "severity": "LOW", + "message": f"Found {homoglyph_count} potential homoglyph characters" + }) + + return issues + +def main(): + if len(sys.argv) < 2: + print("Usage: unicode-check.py [file2] ...") + sys.exit(1) + + all_issues = [] + + for filepath in sys.argv[1:]: + issues = check_file(filepath) + if issues: + all_issues.append((filepath, issues)) + + if all_issues: + print(f"\n🔤 Unicode issues detected:\n") + for filepath, issues in all_issues: + print(f"📄 {filepath}") + for issue in issues: + print(f" [{issue['severity']}] {issue['type']}: {issue['message']}") + + sys.exit(1) + else: + print("✅ No Unicode issues detected") + sys.exit(0) + +if __name__ == "__main__": + main()