diff --git a/docs.json b/docs.json index 6cd73304..044b31d8 100644 --- a/docs.json +++ b/docs.json @@ -20,100 +20,50 @@ "navigation": { "tabs": [ { - "tab": "Overview", + "tab": "Documentation", "pages": [ { - "group": "Overview", + "group": "Get Started", "pages": [ "overview/introduction", "overview/quickstart", - "overview/first-projects", - "overview/faqs", - "overview/community", - "overview/contributing" + "openhands/usage/get-started/tutorials" ] }, { - "group": "Common Features", + "group": "Essential Guidelines", "pages": [ - "overview/model-context-protocol", - { - "group": "Skills", - "pages": [ - "overview/skills", - "overview/skills/repo", - "overview/skills/keyword", - "overview/skills/org", - "overview/skills/public" - ] - } + "openhands/usage/essential-guidelines/when-to-use-openhands", + "openhands/usage/tips/prompting-best-practices", + "openhands/usage/essential-guidelines/good-vs-bad-instructions", + "openhands/usage/essential-guidelines/sdlc-integration" ] }, { - "group": "Tips and Tricks", + "group": "Onboarding OpenHands", "pages": [ - "openhands/usage/tips/prompting-best-practices" + "openhands/usage/customization/repository", + "overview/skills/repo" ] }, { - "group": "Help", + "group": "Product Guides", "pages": [ - "openhands/usage/troubleshooting/troubleshooting", - "openhands/usage/troubleshooting/feedback" - ] - } - ] - }, - { - "tab": "Web", - "pages": [ - { - "group": "Key Features", - "pages": [ - "openhands/usage/key-features" - ] - }, - { - "group": "OpenHands Cloud", - "pages": [ - "openhands/usage/cloud/openhands-cloud", + "openhands/usage/key-features", + "overview/model-context-protocol", { - "group": "Integrations", + "group": "Skills", "pages": [ - "openhands/usage/cloud/bitbucket-installation", - "openhands/usage/cloud/github-installation", - "openhands/usage/cloud/gitlab-installation", - "openhands/usage/cloud/slack-installation", - { - "group": "Project Management Tools", - "pages": [ - "openhands/usage/cloud/project-management/jira-integration" - ] - } + "overview/skills", + "overview/skills/repo", + "overview/skills/keyword", + "overview/skills/org", + "overview/skills/public" ] }, - "openhands/usage/cloud/cloud-ui", - "openhands/usage/cloud/cloud-api" - ] - }, - { - "group": "Local GUI", - "pages": [ - "openhands/usage/run-openhands/local-setup", - "openhands/usage/run-openhands/gui-mode" - ] - }, - { - "group": "REST API", - "pages": [ - "openhands/usage/api/v1" - ] - }, - { - "group": "Customizations & Settings", - "pages": [ + "openhands/usage/settings/secrets-settings", { - "group": "OpenHands Settings", + "group": "Settings", "pages": [ "openhands/usage/settings/application-settings", "openhands/usage/settings/llm-settings", @@ -123,66 +73,99 @@ "openhands/usage/settings/mcp-settings" ] }, - "openhands/usage/customization/repository" + { + "group": "OpenHands Cloud", + "pages": [ + "openhands/usage/cloud/openhands-cloud", + "openhands/usage/cloud/cloud-ui" + ] + } ] }, { - "group": "Advanced Configuration", + "group": "Integrations", "pages": [ + "openhands/usage/settings/integrations-settings", + "openhands/usage/cloud/github-installation", + "openhands/usage/cloud/gitlab-installation", + "openhands/usage/cloud/bitbucket-installation", + "openhands/usage/cloud/slack-installation", + "openhands/usage/cloud/project-management/jira-integration", { - "group": "LLM Configuration", + "group": "Cloud API", "pages": [ - "openhands/usage/llms/llms", - { - "group": "Providers", - "pages": [ - "openhands/usage/llms/openhands-llms", - "openhands/usage/llms/azure-llms", - "openhands/usage/llms/google-llms", - "openhands/usage/llms/groq", - "openhands/usage/llms/local-llms", - "openhands/usage/llms/litellm-proxy", - "openhands/usage/llms/moonshot", - "openhands/usage/llms/openai-llms", - "openhands/usage/llms/openrouter" - ] - } + "openhands/usage/cloud/cloud-api", + "openhands/usage/api/v1" + ] + } + ] + }, + { + "group": "CLI", + "pages": [ + { + "group": "Getting Started", + "pages": [ + "openhands/usage/cli/installation", + "openhands/usage/cli/quick-start" ] }, { - "group": "Sandbox Configuration", + "group": "Ways to Run", "pages": [ - "openhands/usage/sandboxes/overview", + "openhands/usage/cli/terminal", + "openhands/usage/cli/headless", + "openhands/usage/cli/web-interface", + "openhands/usage/cli/gui-server", { - "group": "Providers", + "group": "IDE Integration (ACP)", "pages": [ - "openhands/usage/sandboxes/docker", - "openhands/usage/sandboxes/remote", - "openhands/usage/sandboxes/process" + "openhands/usage/cli/ide/overview", + "openhands/usage/cli/ide/zed", + "openhands/usage/cli/ide/toad", + "openhands/usage/cli/ide/vscode", + "openhands/usage/cli/ide/jetbrains" ] } ] }, - "openhands/usage/advanced/configuration-options", - "openhands/usage/advanced/custom-sandbox-guide", - "openhands/usage/advanced/search-engine-setup" + { + "group": "Cloud", + "pages": [ + "openhands/usage/cli/cloud" + ] + }, + { + "group": "Extensions", + "pages": [ + "openhands/usage/cli/mcp-servers", + "openhands/usage/cli/critic" + ] + }, + { + "group": "Reference", + "pages": [ + "openhands/usage/cli/command-reference", + "openhands/usage/cli/resume" + ] + } ] }, { - "group": "Legacy (V0)", + "group": "Additional Documentation", "pages": [ { - "group": "V0 Runtime Configuration", + "group": "V0 Reference", "pages": [ "openhands/usage/v0/runtimes/V0_overview", { - "group": "Providers", + "group": "V0 Runtime Providers", "pages": [ "openhands/usage/v0/runtimes/V0_docker", "openhands/usage/v0/runtimes/V0_remote", "openhands/usage/v0/runtimes/V0_local", { - "group": "Third-Party Providers", + "group": "V0 Third-Party Providers", "pages": [ "openhands/usage/v0/runtimes/V0_modal", "openhands/usage/v0/runtimes/V0_daytona", @@ -191,72 +174,101 @@ ] } ] + }, + "openhands/usage/v0/advanced/V0_configuration-options", + { + "group": "V0 REST API", + "openapi": "openapi/V0_openapi.json" } ] }, { - "group": "V0 Configuration Options", + "group": "Local GUI", "pages": [ - "openhands/usage/v0/advanced/V0_configuration-options" + "openhands/usage/run-openhands/local-setup", + "openhands/usage/run-openhands/gui-mode", + "openhands/usage/troubleshooting/troubleshooting", + { + "group": "Sandbox Configuration", + "pages": [ + "openhands/usage/sandboxes/overview", + { + "group": "Sandbox Providers", + "pages": [ + "openhands/usage/sandboxes/docker", + "openhands/usage/sandboxes/remote", + "openhands/usage/sandboxes/process" + ] + } + ] + }, + { + "group": "Advanced", + "pages": [ + "openhands/usage/advanced/configuration-options", + "openhands/usage/advanced/custom-sandbox-guide", + "openhands/usage/advanced/search-engine-setup" + ] + } ] }, { - "group": "V0 REST API", - "openapi": "openapi/V0_openapi.json" - } - ] - } - ] - }, - { - "tab": "CLI", - "pages": [ - { - "group": "Getting Started", - "pages": [ - "openhands/usage/cli/installation", - "openhands/usage/cli/quick-start" - ] - }, - { - "group": "Ways to Run", - "pages": [ - "openhands/usage/cli/terminal", - "openhands/usage/cli/headless", - "openhands/usage/cli/web-interface", - "openhands/usage/cli/gui-server", + "group": "LLM Configuration", + "pages": [ + "openhands/usage/llms/llms", + { + "group": "LLM Providers", + "pages": [ + "openhands/usage/llms/openhands-llms", + "openhands/usage/llms/azure-llms", + "openhands/usage/llms/google-llms", + "openhands/usage/llms/groq", + "openhands/usage/llms/local-llms", + "openhands/usage/llms/litellm-proxy", + "openhands/usage/llms/moonshot", + "openhands/usage/llms/openai-llms", + "openhands/usage/llms/openrouter" + ] + } + ] + }, { - "group": "IDE Integration (ACP)", + "group": "Developers", "pages": [ - "openhands/usage/cli/ide/overview", - "openhands/usage/cli/ide/zed", - "openhands/usage/cli/ide/toad", - "openhands/usage/cli/ide/vscode", - "openhands/usage/cli/ide/jetbrains" + "openhands/usage/developers/development-overview", + "openhands/usage/developers/debugging", + "openhands/usage/developers/websocket-connection", + "openhands/usage/developers/evaluation-harness" ] } ] }, { - "group": "Cloud", + "group": "OpenHands Community", "pages": [ - "openhands/usage/cli/cloud" + "overview/community", + "overview/contributing", + "overview/faqs", + "openhands/usage/troubleshooting/feedback" ] - }, - { - "group": "Extensions", - "pages": [ - "openhands/usage/cli/mcp-servers", - "openhands/usage/cli/critic" - ] - }, + } + ] + }, + { + "tab": "Use Cases", + "pages": [ { - "group": "Reference", + "group": "Use Cases", "pages": [ - "openhands/usage/cli/command-reference", - "openhands/usage/cli/resume" + "openhands/usage/use-cases/vulnerability-remediation", + "openhands/usage/use-cases/code-review", + "openhands/usage/use-cases/incident-triage", + "openhands/usage/use-cases/cobol-modernization", + "openhands/usage/use-cases/dependency-upgrades", + "openhands/usage/use-cases/spark-migrations" ] } + ] }, { diff --git a/openhands/usage/cli/quick-start.mdx b/openhands/usage/cli/quick-start.mdx index c86211d9..be5bf014 100644 --- a/openhands/usage/cli/quick-start.mdx +++ b/openhands/usage/cli/quick-start.mdx @@ -33,7 +33,7 @@ The OpenHands CLI provides multiple ways to interact with the OpenHands AI agent ```bash openhands login ``` - This authenticates with OpenHands Cloud and fetches your settings. First-time users get **$10 in free credits**. + This authenticates with OpenHands Cloud and fetches your settings. The CLI will prompt you to configure your LLM provider and API key on first run. diff --git a/openhands/usage/essential-guidelines/good-vs-bad-instructions.mdx b/openhands/usage/essential-guidelines/good-vs-bad-instructions.mdx new file mode 100644 index 00000000..2801866b --- /dev/null +++ b/openhands/usage/essential-guidelines/good-vs-bad-instructions.mdx @@ -0,0 +1,433 @@ +--- +title: Good vs. Bad Instructions +description: Learn how to write effective instructions for OpenHands +--- + +The quality of your instructions directly impacts the quality of OpenHands' output. This guide shows concrete examples of good and bad prompts, explains why some work better than others, and provides principles for writing effective instructions. + +## Concrete Examples of Good/Bad Prompts + +### Bug Fixing Examples + +#### Bad Example + +``` +Fix the bug in my code. +``` + +**Why it's bad:** +- No information about what the bug is +- No indication of where to look +- No description of expected vs. actual behavior +- OpenHands would have to guess what's wrong + +#### Good Example + +``` +Fix the TypeError in src/api/users.py line 45. + +Error message: +TypeError: 'NoneType' object has no attribute 'get' + +Expected behavior: The get_user_preferences() function should return +default preferences when the user has no saved preferences. + +Actual behavior: It crashes with the error above when user.preferences is None. + +The fix should handle the None case gracefully and return DEFAULT_PREFERENCES. +``` + +**Why it works:** +- Specific file and line number +- Exact error message +- Clear expected vs. actual behavior +- Suggested approach for the fix + +### Feature Development Examples + +#### Bad Example + +``` +Add user authentication to my app. +``` + +**Why it's bad:** +- Scope is too large and undefined +- No details about authentication requirements +- No mention of existing code or patterns +- Could mean many different things + +#### Good Example + +``` +Add email/password login to our Express.js API. + +Requirements: +1. POST /api/auth/login endpoint +2. Accept email and password in request body +3. Validate against users in PostgreSQL database +4. Return JWT token on success, 401 on failure +5. Use bcrypt for password comparison (already in dependencies) + +Follow the existing patterns in src/api/routes.js for route structure. +Use the existing db.query() helper in src/db/index.js for database access. + +Success criteria: I can call the endpoint with valid credentials +and receive a JWT token that works with our existing auth middleware. +``` + +**Why it works:** +- Specific, scoped feature +- Clear technical requirements +- Points to existing patterns to follow +- Defines what "done" looks like + +### Code Review Examples + +#### Bad Example + +``` +Review my code. +``` + +**Why it's bad:** +- No code provided or referenced +- No indication of what to look for +- No context about the code's purpose +- No criteria for the review + +#### Good Example + +``` +Review this pull request for our payment processing module: + +Focus areas: +1. Security - we're handling credit card data +2. Error handling - payments must never silently fail +3. Idempotency - duplicate requests should be safe + +Context: +- This integrates with Stripe API +- It's called from our checkout flow +- We have ~10,000 transactions/day + +Please flag any issues as Critical/Major/Minor with explanations. +``` + +**Why it works:** +- Clear scope and focus areas +- Important context provided +- Business implications explained +- Requested output format specified + +### Refactoring Examples + +#### Bad Example + +``` +Make the code better. +``` + +**Why it's bad:** +- "Better" is subjective and undefined +- No specific problems identified +- No goals for the refactoring +- No constraints or requirements + +#### Good Example + +``` +Refactor the UserService class in src/services/user.js: + +Problems to address: +1. The class is 500+ lines - split into smaller, focused services +2. Database queries are mixed with business logic - separate them +3. There's code duplication in the validation methods + +Constraints: +- Keep the public API unchanged (other code depends on it) +- Maintain test coverage (run npm test after changes) +- Follow our existing service patterns in src/services/ + +Goal: Improve maintainability while keeping the same functionality. +``` + +**Why it works:** +- Specific problems identified +- Clear constraints and requirements +- Points to patterns to follow +- Measurable success criteria + +## Key Principles for Effective Instructions + +### Be Specific + +Vague instructions produce vague results. Be concrete about: + +| Instead of... | Say... | +|---------------|--------| +| "Fix the error" | "Fix the TypeError on line 45 of api.py" | +| "Add tests" | "Add unit tests for the calculateTotal function covering edge cases" | +| "Improve performance" | "Reduce the database queries from N+1 to a single join query" | +| "Clean up the code" | "Extract the validation logic into a separate ValidatorService class" | + +### Provide Context + +Help OpenHands understand the bigger picture: + +``` +Context to include: +- What does this code do? (purpose) +- Who uses it? (users/systems) +- Why does this matter? (business impact) +- What constraints exist? (performance, compatibility) +- What patterns should be followed? (existing conventions) +``` + +**Example with context:** + +``` +Add rate limiting to our public API endpoints. + +Context: +- This is a REST API serving mobile apps and third-party integrations +- We've been seeing abuse from web scrapers hitting us 1000+ times/minute +- Our infrastructure can handle 100 req/sec per client sustainably +- We use Redis (already available in the project) +- Our API follows the controller pattern in src/controllers/ + +Requirement: Limit each API key to 100 requests per minute with +appropriate 429 responses and Retry-After headers. +``` + +### Set Clear Goals + +Define what success looks like: + +``` +Success criteria checklist: +✓ What specific outcome do you want? +✓ How will you verify it worked? +✓ What tests should pass? +✓ What should the user experience be? +``` + +**Example with clear goals:** + +``` +Implement password reset functionality. + +Success criteria: +1. User can request reset via POST /api/auth/forgot-password +2. System sends email with secure reset link +3. Link expires after 1 hour +4. User can set new password via POST /api/auth/reset-password +5. Old sessions are invalidated after password change +6. All edge cases return appropriate error messages +7. Existing tests still pass, new tests cover the feature +``` + +### Include Constraints + +Specify what you can't or won't change: + +``` +Constraints to specify: +- API compatibility (can't break existing clients) +- Technology restrictions (must use existing stack) +- Performance requirements (must respond in <100ms) +- Security requirements (must not log PII) +- Time/scope limits (just this one file) +``` + +## Common Pitfalls to Avoid + +### Vague Requirements + + + + ``` + Make the dashboard faster. + ``` + + + ``` + The dashboard takes 5 seconds to load. + + Profile it and optimize to load in under 1 second. + + Likely issues: + - N+1 queries in getWidgetData() + - Uncompressed images + - Missing database indexes + + Focus on the biggest wins first. + ``` + + + +### Missing Context + + + + ``` + Add caching to the API. + ``` + + + ``` + Add caching to the product catalog API. + + Context: + - 95% of requests are for the same 1000 products + - Product data changes only via admin panel (rare) + - We already have Redis running for sessions + - Current response time is 200ms, target is <50ms + + Cache strategy: Cache product data in Redis with 5-minute TTL, + invalidate on product update. + ``` + + + +### Unrealistic Expectations + + + + ``` + Rewrite our entire backend from PHP to Go. + ``` + + + ``` + Create a Go microservice for the image processing currently in + src/php/ImageProcessor.php. + + This is the first step in our gradual migration. + The Go service should: + 1. Expose the same API endpoints + 2. Be deployable alongside the existing PHP app + 3. Include a feature flag to route traffic + + Start with just the resize and crop functions. + ``` + + + +### Incomplete Information + + + + ``` + The login is broken, fix it. + ``` + + + ``` + Users can't log in since yesterday's deployment. + + Symptoms: + - Login form submits but returns 500 error + - Server logs show: "Redis connection refused" + - Redis was moved to a new host yesterday + + The issue is likely in src/config/redis.js which may + have the old host hardcoded. + + Expected: Login should work with the new Redis at redis.internal:6380 + ``` + + + +## Best Practices + +### Structure Your Instructions + +Use clear structure for complex requests: + +``` +## Task +[One sentence describing what you want] + +## Background +[Context and why this matters] + +## Requirements +1. [Specific requirement] +2. [Specific requirement] +3. [Specific requirement] + +## Constraints +- [What you can't change] +- [What must be preserved] + +## Success Criteria +- [How to verify it works] +``` + +### Provide Examples + +Show what you want through examples: + +``` +Add input validation to the user registration endpoint. + +Example of what validation errors should look like: + +{ + "error": "validation_failed", + "details": [ + {"field": "email", "message": "Invalid email format"}, + {"field": "password", "message": "Must be at least 8 characters"} + ] +} + +Validate: +- email: valid format, not already registered +- password: min 8 chars, at least 1 number +- username: 3-20 chars, alphanumeric only +``` + +### Define Success Criteria + +Be explicit about what "done" means: + +``` +This task is complete when: +1. All existing tests pass (npm test) +2. New tests cover the added functionality +3. The feature works as described in the acceptance criteria +4. Code follows our style guide (npm run lint passes) +5. Documentation is updated if needed +``` + +### Iterate and Refine + +Build on previous work: + +``` +In our last session, you added the login endpoint. + +Now add the logout functionality: +1. POST /api/auth/logout endpoint +2. Invalidate the current session token +3. Clear any server-side session data +4. Follow the same patterns used in login + +The login implementation is in src/api/auth/login.js for reference. +``` + +## Quick Reference + +| Element | Bad | Good | +|---------|-----|------| +| Location | "in the code" | "in src/api/users.py line 45" | +| Problem | "it's broken" | "TypeError when user.preferences is None" | +| Scope | "add authentication" | "add JWT-based login endpoint" | +| Behavior | "make it work" | "return 200 with user data on success" | +| Patterns | (none) | "follow patterns in src/services/" | +| Success | (none) | "all tests pass, endpoint returns correct data" | + + +The investment you make in writing clear instructions pays off in fewer iterations, better results, and less time debugging miscommunication. Take the extra minute to be specific. + diff --git a/openhands/usage/essential-guidelines/sdlc-integration.mdx b/openhands/usage/essential-guidelines/sdlc-integration.mdx new file mode 100644 index 00000000..b56e3eb5 --- /dev/null +++ b/openhands/usage/essential-guidelines/sdlc-integration.mdx @@ -0,0 +1,372 @@ +--- +title: OpenHands in Your SDLC +description: How OpenHands fits into your software development lifecycle +--- + +OpenHands can enhance every phase of your software development lifecycle (SDLC), from planning through deployment. This guide shows some example prompts that you can use when you integrate OpenHands into your development workflow. + +## Integration with Development Workflows + +### Planning Phase + +Use OpenHands during planning to accelerate technical decisions: + +**Technical specification assistance:** +``` +Create a technical specification for adding search functionality: + +Requirements from product: +- Full-text search across products and articles +- Filter by category, price range, and date +- Sub-200ms response time at 1000 QPS + +Provide: +1. Architecture options (Elasticsearch vs. PostgreSQL full-text) +2. Data model changes needed +3. API endpoint designs +4. Estimated implementation effort +5. Risks and mitigations +``` + +**Sprint planning support:** +``` +Review these user stories and create implementation tasks in our Linear task management software using the LINEAR_API_KEY environment variable: + +Story 1: As a user, I can reset my password via email +Story 2: As an admin, I can view user activity logs + +For each story, create: +- Technical subtasks +- Estimated effort (hours) +- Dependencies on other work +- Testing requirements +``` + +### Development Phase + +OpenHands excels during active development: + +**Feature implementation:** +- Write new features with clear specifications +- Follow existing code patterns automatically +- Generate tests alongside code +- Create documentation as you go + +**Bug fixing:** +- Analyze error logs and stack traces +- Identify root causes +- Implement fixes with regression tests +- Document the issue and solution + +**Code improvement:** +- Refactor for clarity and maintainability +- Optimize performance bottlenecks +- Update deprecated APIs +- Improve error handling + +### Testing Phase + +Automate test creation and improvement: + +``` +Add comprehensive tests for the UserService module: + +Current coverage: 45% +Target coverage: 85% + +1. Analyze uncovered code paths using the codecov module +2. Write unit tests for edge cases +3. Add integration tests for API endpoints +4. Create test data factories +5. Document test scenarios + +Each time you add new tests, re-run codecov to check the increased coverage. Continue until you have sufficient coverage, and all tests pass (by either fixing the tests, or fixing the code if your tests uncover bugs). +``` + +### Review Phase + +Accelerate code reviews: + +``` +Review this PR for our coding standards: + +Check for: +1. Security issues (SQL injection, XSS, etc.) +2. Performance concerns +3. Test coverage adequacy +4. Documentation completeness +5. Adherence to our style guide + +Provide actionable feedback with severity ratings. +``` + +### Deployment Phase + +Assist with deployment preparation: + +``` +Prepare for production deployment: + +1. Review all changes since last release +2. Check for breaking API changes +3. Verify database migrations are reversible +4. Update the changelog +5. Create release notes +6. Identify rollback steps if needed +``` + +## CI/CD Integration + +OpenHands can be integrated into your CI/CD pipelines through the [Software Agent SDK](/sdk/index). Rather than using hypothetical actions, you can build powerful, customized workflows using real, production-ready tools. + +### GitHub Actions Integration + +The Software Agent SDK provides composite GitHub Actions for common workflows: + +- **[Automated PR Review](/openhands/usage/use-cases/code-review)** - Automatically review pull requests with inline comments +- **[SDK GitHub Workflows Guide](/sdk/guides/github-workflows/pr-review)** - Build custom GitHub workflows with the SDK + +For example, to set up automated PR reviews, see the [Automated Code Review](/openhands/usage/use-cases/code-review) guide which uses the real `OpenHands/software-agent-sdk/.github/actions/pr-review` composite action. + +### What You Can Automate + +Using the SDK, you can create GitHub Actions workflows to: + +1. **Automatic code review** when a PR is opened +2. **Automatically update docs** weekly when new functionality is added +3. **Diagnose errors** that have appeared in monitoring software such as DataDog and automatically send analyses and improvements +4. **Manage TODO comments** and track technical debt +5. **Assign reviewers** based on code ownership patterns + +### Getting Started + +To integrate OpenHands into your CI/CD: + +1. Review the [SDK Getting Started guide](/sdk/getting-started) +2. Explore the [GitHub Workflows examples](/sdk/guides/github-workflows/pr-review) +3. Set up your `LLM_API_KEY` as a repository secret +4. Use the provided composite actions or build custom workflows + +See the [Use Cases](/openhands/usage/use-cases/code-review) section for complete examples of production-ready integrations. + +## Team Workflows + +### Solo Developer Workflows + +For individual developers: + +**Daily workflow:** +1. **Morning review**: Have OpenHands analyze overnight CI results +2. **Feature development**: Use OpenHands for implementation +3. **Pre-commit**: Request review before pushing +4. **Documentation**: Generate/update docs for changes + +**Best practices:** +- Set up automated reviews on all PRs +- Use OpenHands for boilerplate and repetitive tasks +- Keep AGENTS.md updated with project patterns + +### Small Team Workflows + +For teams of 2-10 developers: + +**Collaborative workflow:** +``` +Team Member A: Creates feature branch, writes initial implementation +OpenHands: Reviews code, suggests improvements +Team Member B: Reviews OpenHands suggestions, approves or modifies +OpenHands: Updates documentation, adds missing tests +Team: Merges after final human review +``` + +**Communication integration:** +- Slack notifications for OpenHands findings +- Automatic issue creation for bugs found +- Weekly summary reports + +### Enterprise Team Workflows + +For larger organizations: + +**Governance and oversight:** +- Configure approval requirements for OpenHands changes +- Set up audit logging for all AI-assisted changes +- Define scope limits for automated actions +- Establish human review requirements + +**Scale patterns:** +``` +Central Platform Team: +├── Defines OpenHands policies +├── Manages integrations +└── Monitors usage and quality + +Feature Teams: +├── Use OpenHands within policies +├── Customize for team needs +└── Report issues to platform team +``` + +## Best Practices + +### Code Review Integration + +Set up effective automated reviews: + +```yaml +# .openhands/review-config.yml +review: + focus_areas: + - security + - performance + - test_coverage + - documentation + + severity_levels: + block_merge: + - critical + - security + require_response: + - major + informational: + - minor + - suggestion + + ignore_patterns: + - "*.generated.*" + - "vendor/*" +``` + +### Pull Request Automation + +Automate common PR tasks: + +| Trigger | Action | +|---------|--------| +| PR opened | Auto-review, label by type | +| Tests fail | Analyze failures, suggest fixes | +| Coverage drops | Identify missing tests | +| PR approved | Update changelog, check docs | + +### Quality Gates + +Define automated quality gates: + +```yaml +quality_gates: + - name: test_coverage + threshold: 80% + action: block_merge + + - name: security_issues + threshold: 0 critical + action: block_merge + + - name: code_review_score + threshold: 7/10 + action: require_review + + - name: documentation + requirement: all_public_apis + action: warn +``` + +### Automated Testing + +Integrate OpenHands with your testing strategy: + +**Test generation triggers:** +- New code without tests +- Coverage below threshold +- Bug fix without regression test +- API changes without contract tests + +**Example workflow:** +```yaml +on: + push: + branches: [main] + +jobs: + ensure-coverage: + steps: + - name: Check coverage + run: | + COVERAGE=$(npm test -- --coverage | grep "All files" | awk '{print $10}') + if [ "$COVERAGE" -lt "80" ]; then + openhands generate-tests --target 80 + fi +``` + +## Common Integration Patterns + +### Pre-Commit Hooks + +Run OpenHands checks before commits: + +```bash +# .git/hooks/pre-commit +#!/bin/bash + +# Quick code review +openhands review --quick --staged-only + +if [ $? -ne 0 ]; then + echo "OpenHands found issues. Review and fix before committing." + exit 1 +fi +``` + +### Post-Commit Actions + +Automate tasks after commits: + +```yaml +# .github/workflows/post-commit.yml +on: + push: + branches: [main] + +jobs: + update-docs: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Update API docs + run: openhands update-docs --api + - name: Commit changes + run: | + git add docs/ + git commit -m "docs: auto-update API documentation" || true + git push +``` + +### Scheduled Tasks + +Run regular maintenance: + +```yaml +# Weekly dependency check +on: + schedule: + - cron: '0 9 * * 1' # Monday 9am + +jobs: + dependency-review: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Check dependencies + run: | + openhands check-dependencies --security --outdated + - name: Create issues + run: openhands create-issues --from-report deps.json +``` + +### Event-Triggered Workflows + +You can build custom event-triggered workflows using the Software Agent SDK. For example, the [Incident Triage](/openhands/usage/use-cases/incident-triage) use case shows how to automatically analyze and respond to issues. + +For more event-driven automation patterns, see: +- [SDK GitHub Workflows Guide](/sdk/guides/github-workflows/pr-review) - Build custom workflows triggered by GitHub events +- [GitHub Action Integration](/openhands/usage/run-openhands/github-action) - Use the OpenHands resolver for issue triage diff --git a/openhands/usage/essential-guidelines/when-to-use-openhands.mdx b/openhands/usage/essential-guidelines/when-to-use-openhands.mdx new file mode 100644 index 00000000..23016039 --- /dev/null +++ b/openhands/usage/essential-guidelines/when-to-use-openhands.mdx @@ -0,0 +1,293 @@ +--- +title: When to Use OpenHands +description: Guidance on when OpenHands is the right tool for your task +--- + +OpenHands excels at many development tasks, but knowing when to use it—and when to handle things yourself—helps you get the best results. This guide helps you identify the right tasks for OpenHands and set yourself up for success. + +## Task Complexity Guidance + +### Simple Tasks + +**Ideal for OpenHands** — These tasks can often be completed in a single session with minimal guidance. + +- Adding a new function or method +- Writing unit tests for existing code +- Fixing simple bugs with clear error messages +- Code formatting and style fixes +- Adding documentation or comments +- Simple refactoring (rename, extract method) +- Configuration changes + +**Example prompt:** +``` +Add a calculateDiscount() function to src/utils/pricing.js that takes +a price and discount percentage, returns the discounted price. +Add unit tests. +``` + +### Medium Complexity Tasks + +**Good for OpenHands** — These tasks may need more context and possibly some iteration. + +- Implementing a new API endpoint +- Adding a feature to an existing module +- Debugging issues that span multiple files +- Migrating code to a new pattern +- Writing integration tests +- Performance optimization with clear metrics +- Setting up CI/CD workflows + +**Example prompt:** +``` +Add a user profile endpoint to our API: +- GET /api/users/:id/profile +- Return user data with their recent activity +- Follow patterns in existing controllers +- Add integration tests +- Handle not-found and unauthorized cases +``` + +### Complex Tasks + +**May require iteration** — These benefit from breaking down into smaller pieces. + +- Large refactoring across many files +- Architectural changes +- Implementing complex business logic +- Multi-service integrations +- Performance optimization without clear cause +- Security audits +- Framework or major dependency upgrades + +**Recommended approach:** +``` +Break large tasks into phases: + +Phase 1: "Analyze the current authentication system and document +all touch points that need to change for OAuth2 migration." + +Phase 2: "Implement the OAuth2 provider configuration and basic +token flow, keeping existing auth working in parallel." + +Phase 3: "Migrate the user login flow to use OAuth2, maintaining +backwards compatibility." +``` + +## Best Use Cases + +### Ideal Scenarios + +OpenHands is **most effective** when: + +| Scenario | Why It Works | +|----------|--------------| +| Clear requirements | OpenHands can work independently | +| Well-defined scope | Less ambiguity, fewer iterations | +| Existing patterns to follow | Consistency with codebase | +| Good test coverage | Easy to verify changes | +| Isolated changes | Lower risk of side effects | + +**Perfect use cases:** + +- **Bug fixes with reproduction steps**: Clear problem, measurable solution +- **Test additions**: Existing code provides the specification +- **Documentation**: Code is the source of truth +- **Boilerplate generation**: Follows established patterns +- **Code review and analysis**: Read-only, analytical tasks + +### Good Fit Scenarios + +OpenHands works **well with some guidance** for: + +- **Feature implementation**: When requirements are documented +- **Refactoring**: When goals and constraints are clear +- **Debugging**: When you can provide logs and context +- **Code modernization**: When patterns are established +- **API development**: When specs exist + +**Tips for these scenarios:** + +1. Provide clear acceptance criteria +2. Point to examples of similar work in the codebase +3. Specify constraints and non-goals +4. Be ready to iterate and clarify + +### Poor Fit Scenarios + +**Consider alternatives** when: + +| Scenario | Challenge | Alternative | +|----------|-----------|-------------| +| Vague requirements | Unclear what "done" means | Define requirements first | +| Exploratory work | Need human creativity/intuition | Brainstorm first, then implement | +| Highly sensitive code | Risk tolerance is zero | Human review essential | +| Organizational knowledge | Needs tribal knowledge | Pair with domain expert | +| Visual design | Subjective aesthetic judgments | Use design tools | + +**Red flags that a task may not be suitable:** + +- "Make it look better" (subjective) +- "Figure out what's wrong" (too vague) +- "Rewrite everything" (too large) +- "Do what makes sense" (unclear requirements) +- Changes to production infrastructure without review + +## Limitations + +### Current Limitations + +Be aware of these constraints: + +- **Long-running processes**: Sessions have time limits +- **Interactive debugging**: Can't set breakpoints interactively +- **Visual verification**: Can't see rendered UI easily +- **External system access**: May need credentials configured +- **Large codebase analysis**: Memory and time constraints + +### Technical Constraints + +| Constraint | Impact | Workaround | +|------------|--------|------------| +| Session duration | Very long tasks may timeout | Break into smaller tasks | +| Context window | Can't see entire large codebase at once | Focus on relevant files | +| No persistent state | Previous sessions not remembered | Use AGENTS.md for context | +| Network access | Some external services may be blocked | Use local resources when possible | + +### Scope Boundaries + +OpenHands works within your codebase but has boundaries: + +**Can do:** +- Read and write files in the repository +- Run tests and commands +- Access configured services and APIs +- Browse documentation and reference material + +**Cannot do:** +- Access your local environment outside the sandbox +- Make decisions requiring business context it doesn't have +- Replace human judgment for critical decisions +- Guarantee production-safe changes without review + +## Pre-Task Checklist + +### Prerequisites + +Before starting a task, ensure: + +- [ ] Clear description of what you want +- [ ] Expected outcome is defined +- [ ] Relevant files are identified +- [ ] Dependencies are available +- [ ] Tests can be run + +### Environment Setup + +Prepare your repository: + +```markdown +## AGENTS.md Checklist + +- [ ] Build commands documented +- [ ] Test commands documented +- [ ] Code style guidelines noted +- [ ] Architecture overview included +- [ ] Common patterns described +``` + +See [Repository Setup](/openhands/usage/customization/repository) for details. + +### Repository Preparation + +Optimize for success: + +1. **Clean state**: Commit or stash uncommitted changes +2. **Working build**: Ensure the project builds +3. **Passing tests**: Start from a green state +4. **Updated dependencies**: Resolve any dependency issues +5. **Clear documentation**: Update AGENTS.md if needed + +## Post-Task Review + +### Quality Checks + +After OpenHands completes a task: + +- [ ] Review all changed files +- [ ] Understand each change made +- [ ] Check for unintended modifications +- [ ] Verify code style consistency +- [ ] Look for hardcoded values or credentials + +### Validation Steps + +1. **Run tests**: `npm test`, `pytest`, etc. +2. **Check linting**: Ensure style compliance +3. **Build the project**: Verify it still compiles +4. **Manual testing**: Test the feature yourself +5. **Edge cases**: Try unusual inputs + +### Learning from Results + +After each significant task: + +**What went well?** +- Note effective prompt patterns +- Document successful approaches +- Update AGENTS.md with learnings + +**What could improve?** +- Identify unclear instructions +- Note missing context +- Plan better for next time + +**Update your repository:** +```markdown +## Things OpenHands Should Know (add to AGENTS.md) + +- When adding API endpoints, always add to routes/index.js +- Our date format is ISO 8601 everywhere +- All database queries go through the repository pattern +``` + +## Decision Framework + +Use this framework to decide if a task is right for OpenHands: + +``` +Is the task well-defined? +├── No → Define it better first +└── Yes → Continue + +Do you have clear success criteria? +├── No → Define acceptance criteria +└── Yes → Continue + +Is the scope manageable (< 100 LOC)? +├── No → Break into smaller tasks +└── Yes → Continue + +Do examples exist in the codebase? +├── No → Provide examples or patterns +└── Yes → Continue + +Can you verify the result? +├── No → Add tests or verification steps +└── Yes → ✅ Good candidate for OpenHands +``` + +OpenHands can be used for most development tasks -- the developers of OpenHands write most of their code with OpenHands! + +But it can be particularly useful for certain types of tasks. For instance: + +- **Clearly Specified Tasks:** Generally, if the task has a very clear success criterion, OpenHands will do better. It is especially useful if you can define it in a way that can be verified programmatically, like making sure that all of the tests pass or test coverage gets above a certain value using a particular program. But even when you don't have something like that, you can just provide a checklist of things that need to be done. +- **Highly Repetitive Tasks:** These are tasks that need to be done over and over again, but nobody really wants to do them. Some good examples include code review, improving test coverage, upgrading dependency libraries. In addition to having clear success criteria, you can create "[skills](/overview/skills)" that clearly describe your policies about how to perform these tasks, and improve the skills over time. +- **Helping Answer Questions:** OpenHands agents are generally pretty good at answering questions about code bases, so you can feel free to ask them when you don't understand how something works. They can explore the code base and understand it deeply before providing an answer. +- **Checking the Correctness of Library/Backend Code:** when agents work, they can run code, and they are particularly good at checking whether libraries or backend code works well. +- **Reading Logs and Understanding Errors:** Agents can read blogs from GitHub or monitoring software and understand what is going wrong with your service in a live production setting. They're actually quite good at filtering through large amounts of data, especially if pushed in the correct direction. + +There are also some tasks where agent struggle a little more. + +- **Quality Assurance of Frontend Apps:** Agents can spin up a website and check whether it works by clicking through the buttons. But they are a little bit less good at visual understanding of frontends at the moment and can sometimes make mistakes if they don't understand the workflow very well. +- **Implementing Code they Cannot Test Live:** If agents are not able to actually run and test the app, such as connecting to a live service that they do not have access to, often they will fail at performing tasks all the way to the end, unless they get some encouragement. diff --git a/openhands/usage/get-started/tutorials.mdx b/openhands/usage/get-started/tutorials.mdx new file mode 100644 index 00000000..8e14fd52 --- /dev/null +++ b/openhands/usage/get-started/tutorials.mdx @@ -0,0 +1,438 @@ +--- +title: Tutorial Library +description: Centralized hub for OpenHands tutorials and examples +--- + +Welcome to the OpenHands tutorial library. These tutorials show you how to use OpenHands for common development tasks, from testing to feature development. Each tutorial includes example prompts, expected workflows, and tips for success. + +## Categories Overview + +| Category | Best For | Complexity | +|----------|----------|------------| +| [Testing](#testing) | Adding tests, improving coverage | Simple to Medium | +| [Data Analysis](#data-analysis) | Processing data, generating reports | Simple to Medium | +| [Web Scraping](#web-scraping) | Extracting data from websites | Medium | +| [Code Review](#code-review) | Analyzing PRs, finding issues | Simple | +| [Bug Fixing](#bug-fixing) | Diagnosing and fixing errors | Medium | +| [Feature Development](#feature-development) | Building new functionality | Medium to Complex | + + +For in-depth guidance on specific use cases, see our [Use Cases](/openhands/usage/use-cases/code-review) section which includes detailed workflows for Code Review, Incident Triage, and more. + + +## Task Complexity Guidance + +Before starting, assess your task's complexity: + +**Simple tasks** (5-15 minutes): +- Single file changes +- Clear, well-defined requirements +- Existing patterns to follow + +**Medium tasks** (15-45 minutes): +- Multiple file changes +- Some discovery required +- Integration with existing code + +**Complex tasks** (45+ minutes): +- Architectural changes +- Multiple components +- Requires iteration + + +Start with simpler tutorials to build familiarity with OpenHands before tackling complex tasks. + + +## Best Use Cases + +OpenHands excels at: + +- **Repetitive tasks**: Boilerplate code, test generation +- **Pattern application**: Following established conventions +- **Analysis**: Code review, debugging, documentation +- **Exploration**: Understanding new codebases + +## Example Tutorials by Category + +### Testing + +#### Tutorial: Add Unit Tests for a Module + +**Goal**: Achieve 80%+ test coverage for a service module + +**Prompt**: +``` +Add unit tests for the UserService class in src/services/user.js. + +Current coverage: 35% +Target coverage: 80% + +Requirements: +1. Test all public methods +2. Cover edge cases (null inputs, empty arrays, etc.) +3. Mock external dependencies (database, API calls) +4. Follow our existing test patterns in tests/services/ +5. Use Jest as the testing framework + +Focus on these methods: +- createUser() +- updateUser() +- deleteUser() +- getUserById() +``` + +**What OpenHands does**: +1. Analyzes the UserService class +2. Identifies untested code paths +3. Creates test file with comprehensive tests +4. Mocks dependencies appropriately +5. Runs tests to verify they pass + +**Tips**: +- Provide existing test files as examples +- Specify the testing framework +- Mention any mocking conventions + +--- + +#### Tutorial: Add Integration Tests for an API + +**Goal**: Test API endpoints end-to-end + +**Prompt**: +``` +Add integration tests for the /api/products endpoints. + +Endpoints to test: +- GET /api/products (list all) +- GET /api/products/:id (get one) +- POST /api/products (create) +- PUT /api/products/:id (update) +- DELETE /api/products/:id (delete) + +Requirements: +1. Use our test database (configured in jest.config.js) +2. Set up and tear down test data properly +3. Test success cases and error cases +4. Verify response bodies and status codes +5. Follow patterns in tests/integration/ +``` + +--- + +### Data Analysis + +#### Tutorial: Create a Data Processing Script + +**Goal**: Process CSV data and generate a report + +**Prompt**: +``` +Create a Python script to analyze our sales data. + +Input: sales_data.csv with columns: date, product, quantity, price, region + +Requirements: +1. Load and validate the CSV data +2. Calculate: + - Total revenue by product + - Monthly sales trends + - Top 5 products by quantity + - Revenue by region +3. Generate a summary report (Markdown format) +4. Create visualizations (bar chart for top products, line chart for trends) +5. Save results to reports/ directory + +Use pandas for data processing and matplotlib for charts. +``` + +**What OpenHands does**: +1. Creates a Python script with proper structure +2. Implements data loading with validation +3. Calculates requested metrics +4. Generates formatted report +5. Creates and saves visualizations + +--- + +#### Tutorial: Database Query Analysis + +**Goal**: Analyze and optimize slow database queries + +**Prompt**: +``` +Analyze our slow query log and identify optimization opportunities. + +File: logs/slow_queries.log + +For each slow query: +1. Explain why it's slow +2. Suggest index additions if helpful +3. Rewrite the query if it can be optimized +4. Estimate the improvement + +Create a report in reports/query_optimization.md with: +- Summary of findings +- Prioritized recommendations +- SQL for suggested changes +``` + +--- + +### Web Scraping + +#### Tutorial: Build a Web Scraper + +**Goal**: Extract product data from a website + +**Prompt**: +``` +Create a web scraper to extract product information from our competitor's site. + +Target URL: https://example-store.com/products + +Extract for each product: +- Name +- Price +- Description +- Image URL +- SKU (if available) + +Requirements: +1. Use Python with BeautifulSoup or Scrapy +2. Handle pagination (site has 50 pages) +3. Respect rate limits (1 request/second) +4. Save results to products.json +5. Handle errors gracefully +6. Log progress to console + +Include a README with usage instructions. +``` + +**Tips**: +- Specify rate limiting requirements +- Mention error handling expectations +- Request logging for debugging + +--- + +### Code Review + + +For comprehensive code review guidance, see the [Code Review Use Case](/openhands/usage/use-cases/code-review) page. For automated PR reviews using GitHub Actions, see the [PR Review SDK Guide](/sdk/guides/github-workflows/pr-review). + + +#### Tutorial: Security-Focused Code Review + +**Goal**: Identify security vulnerabilities in a PR + +**Prompt**: +``` +Review this pull request for security issues: + +Focus areas: +1. Input validation - check all user inputs are sanitized +2. Authentication - verify auth checks are in place +3. SQL injection - check for parameterized queries +4. XSS - verify output encoding +5. Sensitive data - ensure no secrets in code + +For each issue found, provide: +- File and line number +- Severity (Critical/High/Medium/Low) +- Description of the vulnerability +- Suggested fix with code example + +Output format: Markdown suitable for PR comments +``` + +--- + +#### Tutorial: Performance Review + +**Goal**: Identify performance issues in code + +**Prompt**: +``` +Review the OrderService class for performance issues. + +File: src/services/order.js + +Check for: +1. N+1 database queries +2. Missing indexes (based on query patterns) +3. Inefficient loops or algorithms +4. Missing caching opportunities +5. Unnecessary data fetching + +For each issue: +- Explain the impact +- Show the problematic code +- Provide an optimized version +- Estimate the improvement +``` + +--- + +### Bug Fixing + + +For production incident investigation and automated error analysis, see the [Incident Triage Use Case](/openhands/usage/use-cases/incident-triage) which covers integration with monitoring tools like Datadog. + + +#### Tutorial: Fix a Crash Bug + +**Goal**: Diagnose and fix an application crash + +**Prompt**: +``` +Fix the crash in the checkout process. + +Error: +TypeError: Cannot read property 'price' of undefined + at calculateTotal (src/checkout/calculator.js:45) + at processOrder (src/checkout/processor.js:23) + +Steps to reproduce: +1. Add item to cart +2. Apply discount code "SAVE20" +3. Click checkout +4. Crash occurs + +The bug was introduced in commit abc123 (yesterday's deployment). + +Requirements: +1. Identify the root cause +2. Fix the bug +3. Add a regression test +4. Verify the fix doesn't break other functionality +``` + +**What OpenHands does**: +1. Analyzes the stack trace +2. Reviews recent changes +3. Identifies the null reference issue +4. Implements a defensive fix +5. Creates test to prevent regression + +--- + +#### Tutorial: Fix a Memory Leak + +**Goal**: Identify and fix a memory leak + +**Prompt**: +``` +Investigate and fix the memory leak in our Node.js application. + +Symptoms: +- Memory usage grows 100MB/hour +- After 24 hours, app becomes unresponsive +- Restarting temporarily fixes the issue + +Suspected areas: +- Event listeners in src/events/ +- Cache implementation in src/cache/ +- WebSocket connections in src/ws/ + +Analyze these areas and: +1. Identify the leak source +2. Explain why it's leaking +3. Implement a fix +4. Add monitoring to detect future leaks +``` + +--- + +### Feature Development + +#### Tutorial: Add a REST API Endpoint + +**Goal**: Create a new API endpoint with full functionality + +**Prompt**: +``` +Add a user preferences API endpoint. + +Endpoint: /api/users/:id/preferences + +Operations: +- GET: Retrieve user preferences +- PUT: Update user preferences +- PATCH: Partially update preferences + +Preferences schema: +{ + theme: "light" | "dark", + notifications: { email: boolean, push: boolean }, + language: string, + timezone: string +} + +Requirements: +1. Follow patterns in src/api/routes/ +2. Add request validation with Joi +3. Use UserPreferencesService for business logic +4. Add appropriate error handling +5. Document the endpoint in OpenAPI format +6. Add unit and integration tests +``` + +**What OpenHands does**: +1. Creates route handler following existing patterns +2. Implements validation middleware +3. Creates or updates the service layer +4. Adds error handling +5. Generates API documentation +6. Creates comprehensive tests + +--- + +#### Tutorial: Implement a Feature Flag System + +**Goal**: Add feature flags to the application + +**Prompt**: +``` +Implement a feature flag system for our application. + +Requirements: +1. Create a FeatureFlags service +2. Support these flag types: + - Boolean (on/off) + - Percentage (gradual rollout) + - User-based (specific user IDs) +3. Load flags from environment variables initially +4. Add a React hook: useFeatureFlag(flagName) +5. Add middleware for API routes + +Initial flags to configure: +- new_checkout: boolean, default false +- dark_mode: percentage, default 10% +- beta_features: user-based + +Include documentation and tests. +``` + +--- + +## Contributing Tutorials + +Have a great use case? Share it with the community! + +**What makes a good tutorial:** +- Solves a common problem +- Has clear, reproducible steps +- Includes example prompts +- Explains expected outcomes +- Provides tips for success + +**How to contribute:** +1. Create a detailed example following this format +2. Test it with OpenHands to verify it works +3. Submit via GitHub pull request to the docs repository +4. Include any prerequisites or setup required + + +These tutorials are starting points. The best results come from adapting them to your specific codebase, conventions, and requirements. + diff --git a/openhands/usage/use-cases/cobol-modernization.mdx b/openhands/usage/use-cases/cobol-modernization.mdx new file mode 100644 index 00000000..4044465d --- /dev/null +++ b/openhands/usage/use-cases/cobol-modernization.mdx @@ -0,0 +1,168 @@ +--- +title: COBOL Modernization +description: Modernizing legacy COBOL systems with OpenHands +--- + +Legacy COBOL systems power critical business operations across banking, insurance, government, and retail. OpenHands can help you understand, document, and modernize these systems while preserving their essential business logic. + + +This guide is based on our blog post [Refactoring COBOL to Java with AI Agents](https://openhands.dev/blog/20251218-cobol-to-java-refactoring). + + +## The COBOL Modernization Challenge + +[COBOL](https://en.wikipedia.org/wiki/COBOL) modernization is one of the most pressing challenges facing enterprises today. Gartner estimated there were over 200 billion lines of COBOL code in existence, running 80% of the world's business systems. As of 2020, COBOL was still running background processes for 95% of credit and debit card transactions. + +The challenge is acute: [47% of organizations](https://softwaremodernizationservices.com/mainframe-modernization) struggle to fill COBOL roles, with salaries rising 25% annually. By 2027, 92% of remaining COBOL developers will have retired. Traditional modernization approaches have seen high failure rates, with COBOL's specialized nature requiring a unique skill set that makes it difficult for human teams alone. + +## Overview + +COBOL modernization is a complex undertaking. Every modernization effort is unique and requires careful planning, execution, and validation to ensure the modernized code behaves identically to the original. The migration needs to be driven by an experienced team of developers and domain experts, but even that isn't sufficient to ensure the job is done quickly or cost-effectively. This is where OpenHands comes in. + +OpenHands is a powerful agent that assists in modernizing COBOL code along every step of the process: + +1. **Understanding**: Analyze and document existing COBOL code +2. **Translation**: Convert COBOL to modern languages like Java, Python, or C# +3. **Validation**: Ensure the modernized code behaves identically to the original + +In this document, we will explore the different ways OpenHands contributes to COBOL modernization, with example prompts and techniques to use in your own efforts. While the examples are specific to COBOL, the principles laid out here can help with any legacy system modernization. + +## Understanding + +A significant challenge in modernization is understanding the business function of the code. Developers have practice determining the "how" of the code, even in legacy systems with unfamiliar syntax and keywords, but understanding the "why" is more important to ensure that business logic is preserved accurately. The difficulty then comes from the fact that business function is only implicitly represented in the code and requires external documentation or domain expertise to untangle. + +Fortunately, agents like OpenHands are able to understand source code _and_ process-oriented documentation, and this simultaneous view lets them link the two together in a way that makes every downstream process more transparent and predictable. Your COBOL source might already have some structure or comments that make this link clear, but if not OpenHands can help. If your COBOL source is in `/src` and your process-oriented documentation is in `/docs`, the following prompt will establish a link between the two and save it for future reference: + +``` +For each COBOL program in `/src`, identify which business functions it supports. Search through the documentation in `/docs` to find all relevant sections describing that business function, and generate a summary of how the program supports that function. + +Save the results in `business_functions.json` in the following format: + +{ + ..., + "COBIL00C.cbl": { + "function": "Bill payment -- pay account balance in full and a transaction action for the online payment", + "references": [ + "docs/billing.md#bill-payment", + "docs/transactions.md#transaction-action" + ], + }, + ... +} +``` + +OpenHands uses tools like `grep`, `sed`, and `awk` to navigate files and pull in context. This is natural for source code and also works well for process-oriented documentation, but in some cases exposing the latter using a _semantic search engine_ instead will yield better results. Semantic search engines can understand the meaning behind words and phrases, making it easier to find relevant information. + +## Translation + +With a clear picture of what each program does and why, the next step is translating the COBOL source into your target language. The example prompts in this section target Java, but the same approach works for Python, C#, or any modern language. Just adjust for language-specific idioms and data types as needed. + +One thing to watch out for: COBOL keywords and data types do not always match one-to-one with their Java counterparts. For example, COBOL's decimal data type (`PIC S9(9)V9(9)`), which represents a fixed-point number with a scale of 9 digits, does not have a direct equivalent in Java. Instead, you might use `BigDecimal` with a scale of 9, but be aware of potential precision issues when converting between the two. A solid test suite will help catch these corner cases but including such _known problems_ in the translation prompt can help prevent such errors from being introduced at all. + +An example prompt is below: + +``` +Convert the COBOL files in `/src` to Java in `/src/java`. + +Requirements: +1. Create a Java class for each COBOL program +2. Preserve the business logic and data structures (see `business_functions.json`) +3. Use appropriate Java naming conventions (camelCase for methods, PascalCase) +4. Convert COBOL data types to appropriate Java types (use BigDecimal for decimal data types) +5. Implement proper error handling with try-catch blocks +6. Add JavaDoc comments explaining the purpose of each class and method +7. In JavaDoc comments, include traceability to the original COBOL source using + the format: @source : (e.g., @source CBACT01C.cbl:73-77) +8. Create a clean, maintainable object-oriented design +9. Each Java file should be compilable and follow Java best practices +``` + +Note the rule that introduces traceability comments to the resulting Java. These comments help agents understand the provenance of the code, but are also helpful for developers attempting to understand the migration process. They can be used, for example, to check how much COBOL code has been translated into Java or to identify areas where business logic has been distributed across multiple Java classes. + +## Validation + +Building confidence in the migrated code is crucial. Ideally, existing end-to-end tests can be reused to validate that business logic has been preserved. If you need to strengthen the testing setup, consider _golden file testing_. This involves capturing the COBOL program's outputs for a set of known inputs, then verifying the translated code produces identical results. When generating inputs, pay particular attention to decimal precision in monetary calculations (COBOL's fixed-point arithmetic doesn't always map cleanly to Java's BigDecimal) and date handling, where COBOL's conventions can diverge from modern defaults. + +Every modernization effort is unique, and developer experience is crucial to ensure the testing strategy covers your organization's requirements. Best practices still apply. A solid test suite will not only ensure the migrated code works as expected, but will also help the translation agent converge to a high-quality solution. Of course, OpenHands can help migrate tests, ensure they run and test the migrated code correctly, and even generate new tests to cover edge cases. + +## Scaling Up + +The largest challenge in scaling modernization efforts is dealing with agents' limited attention span. Asking a single agent to handle the entire migration process in one go will almost certainly lead to errors and low-quality code as the context window is filled and flushed again and again. One way to address this is by tying translation and validation together in an iterative refinement loop. + +The idea is straightforward: one agent migrates some amount of code, and another agent critiques the migration. If the quality doesn't meet the standards of the critic, the first agent is given some actionable feedback and the process repeats. Here's what that looks like using the [OpenHands SDK](https://github.com/OpenHands/software-agent-sdk): + +```python +while current_score < QUALITY_THRESHOLD and iteration < MAX_ITERATIONS: + # Migrating agent converts COBOL to Java + migration_conversation.send_message(migration_prompt) + migration_conversation.run() + + # Critiquing agent evaluates the conversion + critique_conversation.send_message(critique_prompt) + critique_conversation.run() + + # Parse the score and decide whether to continue + current_score = parse_critique_score(critique_file) +``` + +By tweaking the critic's prompt and scoring rubric, you can fine-tune the evaluation process to better align with your needs. For example, you might have code quality standards that are difficult to detect with static analysis tools or architectural patterns that are unique to your organization. The following prompt can be easily modified to support a wide range of requirements: + +``` +Evaluate the quality of the COBOL to Java migration in `/src`. + +For each Java file, assess using the following criteria: +1. Correctness: Does the Java code preserve the original business logic (see `business_functions.json`)? +2. Code Quality: Is the code clean, readable, and following Java 17 conventions? +3. Completeness: Are all COBOL features properly converted? +4. Best Practices: Does it use proper OOP, error handling, and documentation? + +For each instance of a criteria not met, deduct a point. + +Then generate a report containing actionable feedback for each file. The feedback, if addressed, should improve the score. + +Save the results in `critique.json` in the following format: + +{ + "total_score": -12, + "files": [ + { + "cobol": "COBIL00C.cbl", + "java": "bill_payment.java", + "scores": { + "correctness": 0, + "code_quality": 0, + "completeness": -1, + "best_practices": -2 + }, + "feedback": [ + "Rename single-letter variables to meaningful names.", + "Ensure all COBOL functionality is translated -- the transaction action for the bill payment is missing.", + ], + }, + ... + ] +} +``` + +In future iterations, the migration agent should be given the file `critique.json` and be prompted to act on the feedback. + +This iterative refinement pattern works well for medium-sized projects with a moderate level of complexity. For legacy systems that span hundreds of files, however, the migration and critique processes need to be further decomposed to prevent agents from being overwhelmed. A natural way to do so is to break the system into smaller components, each with its own migration and critique processes. This process can be automated by using the OpenHands large codebase SDK, which combines agentic intelligence with static analysis tools to decompose large projects and orchestrate parallel agents in a dependency-aware manner. + +## Try It Yourself + +The full iterative refinement example is available in the OpenHands SDK: + +```bash +export LLM_API_KEY="your-api-key" +cd software-agent-sdk +uv run python examples/01_standalone_sdk/31_iterative_refinement.py +``` + +For real-world COBOL files, you can use the [AWS CardDemo application](https://github.com/aws-samples/aws-mainframe-modernization-carddemo/tree/main/app/cbl), which provides a representative mainframe application for testing modernization approaches. + + +## Related Resources + +- [OpenHands SDK Repository](https://github.com/OpenHands/software-agent-sdk) - Build custom AI agents +- [AWS CardDemo Application](https://github.com/aws-samples/aws-mainframe-modernization-carddemo/tree/main/app/cbl) - Sample COBOL application for testing +- [Prompting Best Practices](/openhands/usage/tips/prompting-best-practices) - Write effective prompts diff --git a/openhands/usage/use-cases/dependency-upgrades.mdx b/openhands/usage/use-cases/dependency-upgrades.mdx new file mode 100644 index 00000000..7dad9000 --- /dev/null +++ b/openhands/usage/use-cases/dependency-upgrades.mdx @@ -0,0 +1,284 @@ +--- +title: Dependency Upgrades +description: Automating dependency updates and upgrades with OpenHands +--- + +Keeping dependencies up to date is essential for security, performance, and access to new features. OpenHands can help you identify outdated dependencies, plan upgrades, handle breaking changes, and validate that your application still works after updates. + +## Overview + +OpenHands helps with dependency management by: + +- **Analyzing dependencies**: Identifying outdated packages and their versions +- **Planning upgrades**: Creating upgrade strategies and migration guides +- **Implementing changes**: Updating code to handle breaking changes +- **Validating results**: Running tests and verifying functionality + +## Dependency Analysis Examples + +### Identifying Outdated Dependencies + +Start by understanding your current dependency state: + +``` +Analyze the dependencies in this project and create a report: + +1. List all direct dependencies with current and latest versions +2. Identify dependencies more than 2 major versions behind +3. Flag any dependencies with known security vulnerabilities +4. Highlight dependencies that are deprecated or unmaintained +5. Prioritize which updates are most important +``` + +**Example output:** + +| Package | Current | Latest | Risk | Priority | +|---------|---------|--------|------|----------| +| lodash | 4.17.15 | 4.17.21 | Security (CVE) | High | +| react | 16.8.0 | 18.2.0 | Outdated | Medium | +| express | 4.17.1 | 4.18.2 | Minor update | Low | +| moment | 2.29.1 | 2.29.4 | Deprecated | Medium | + +### Security-Related Dependency Upgrades + +Dependency upgrades are often needed to fix security vulnerabilities in your dependencies. If you're upgrading dependencies specifically to address security issues, see our [Vulnerability Remediation](/openhands/usage/use-cases/vulnerability-remediation) guide for comprehensive guidance on: + +- Automating vulnerability detection and remediation +- Integrating with security scanners (Snyk, Dependabot, CodeQL) +- Building automated pipelines for security fixes +- Using OpenHands agents to create pull requests automatically + +### Compatibility Checking + +Check for compatibility issues before upgrading: + +``` +Check compatibility for upgrading React from 16 to 18: + +1. Review our codebase for deprecated React patterns +2. List all components using lifecycle methods +3. Identify usage of string refs or findDOMNode +4. Check third-party library compatibility with React 18 +5. Estimate the effort required for migration +``` + +**Compatibility matrix:** + +| Dependency | React 16 | React 17 | React 18 | Action Needed | +|------------|----------|----------|----------|---------------| +| react-router | v5 ✓ | v5 ✓ | v6 required | Major upgrade | +| styled-components | v5 ✓ | v5 ✓ | v5 ✓ | None | +| material-ui | v4 ✓ | v4 ✓ | v5 required | Major upgrade | + +## Automated Upgrade Examples + +### Version Updates + +Perform straightforward version updates: + + + + ``` + Update all patch and minor versions in package.json: + + 1. Review each update for changelog notes + 2. Update package.json with new versions + 3. Update package-lock.json + 4. Run the test suite + 5. List any deprecation warnings + ``` + + + ``` + Update dependencies in requirements.txt: + + 1. Check each package for updates + 2. Update requirements.txt with compatible versions + 3. Update requirements-dev.txt similarly + 4. Run tests and verify functionality + 5. Note any deprecation warnings + ``` + + + ``` + Update dependencies in pom.xml: + + 1. Check for newer versions of each dependency + 2. Update version numbers in pom.xml + 3. Run mvn dependency:tree to check conflicts + 4. Run the test suite + 5. Document any API changes encountered + ``` + + + +### Breaking Change Handling + +When major versions introduce breaking changes: + +``` +Upgrade axios from v0.x to v1.x and handle breaking changes: + +1. List all breaking changes in axios 1.0 changelog +2. Find all axios usages in our codebase +3. For each breaking change: + - Show current code + - Show updated code + - Explain the change +4. Create a git commit for each logical change +5. Verify all tests pass +``` + +**Example transformation:** + +```javascript +// Before (axios 0.x) +import axios from 'axios'; +axios.defaults.baseURL = 'https://api.example.com'; +const response = await axios.get('/users', { + cancelToken: source.token +}); + +// After (axios 1.x) +import axios from 'axios'; +axios.defaults.baseURL = 'https://api.example.com'; +const controller = new AbortController(); +const response = await axios.get('/users', { + signal: controller.signal +}); +``` + +### Code Adaptation + +Adapt code to new API patterns: + +``` +Migrate our codebase from moment.js to date-fns: + +1. List all moment.js usages in our code +2. Map moment methods to date-fns equivalents +3. Update imports throughout the codebase +4. Handle any edge cases where APIs differ +5. Remove moment.js from dependencies +6. Verify all date handling still works correctly +``` + +**Migration map:** + +| moment.js | date-fns | Notes | +|-----------|----------|-------| +| `moment()` | `new Date()` | Different return type | +| `moment().format('YYYY-MM-DD')` | `format(new Date(), 'yyyy-MM-dd')` | Different format tokens | +| `moment().add(1, 'days')` | `addDays(new Date(), 1)` | Function-based API | +| `moment().startOf('month')` | `startOfMonth(new Date())` | Separate function | + +## Testing and Validation Examples + +### Automated Test Execution + +Run comprehensive tests after upgrades: + +``` +After the dependency upgrades, validate the application: + +1. Run the full test suite (unit, integration, e2e) +2. Check test coverage hasn't decreased +3. Run type checking (if applicable) +4. Run linting with new lint rule versions +5. Build the application for production +6. Report any failures with analysis +``` + +### Integration Testing + +Verify integrations still work: + +``` +Test our integrations after upgrading the AWS SDK: + +1. Test S3 operations (upload, download, list) +2. Test DynamoDB operations (CRUD) +3. Test Lambda invocations +4. Test SQS send/receive +5. Compare behavior to before the upgrade +6. Note any subtle differences +``` + +### Regression Detection + +Detect regressions from upgrades: + +``` +Check for regressions after upgrading the ORM: + +1. Run database operation benchmarks +2. Compare query performance before and after +3. Verify all migrations still work +4. Check for any N+1 queries introduced +5. Validate data integrity in test database +6. Document any behavioral changes +``` + +## Additional Examples + +### Security-Driven Upgrade + +``` +We have a critical security vulnerability in jsonwebtoken. + +Current: jsonwebtoken@8.5.1 +Required: jsonwebtoken@9.0.0 + +Perform the upgrade: +1. Check for breaking changes in v9 +2. Find all usages of jsonwebtoken in our code +3. Update any deprecated methods +4. Update the package version +5. Verify all JWT operations work +6. Run security tests +``` + +### Framework Major Upgrade + +``` +Upgrade our Next.js application from 12 to 14: + +Key areas to address: +1. App Router migration (pages -> app) +2. New metadata API +3. Server Components by default +4. New Image component +5. Route handlers replacing API routes + +For each area: +- Show current implementation +- Show new implementation +- Test the changes +``` + +### Multi-Package Coordinated Upgrade + +``` +Upgrade our React ecosystem packages together: + +Current: +- react: 17.0.2 +- react-dom: 17.0.2 +- react-router-dom: 5.3.0 +- @testing-library/react: 12.1.2 + +Target: +- react: 18.2.0 +- react-dom: 18.2.0 +- react-router-dom: 6.x +- @testing-library/react: 14.x + +Create an upgrade plan that handles all these together, +addressing breaking changes in the correct order. +``` + +## Related Resources + +- [Vulnerability Remediation](/openhands/usage/use-cases/vulnerability-remediation) - Fix security vulnerabilities +- [Security Guide](/sdk/guides/security) - Security best practices for AI agents +- [Prompting Best Practices](/openhands/usage/tips/prompting-best-practices) - Write effective prompts diff --git a/openhands/usage/use-cases/incident-triage.mdx b/openhands/usage/use-cases/incident-triage.mdx new file mode 100644 index 00000000..a193aeb5 --- /dev/null +++ b/openhands/usage/use-cases/incident-triage.mdx @@ -0,0 +1,256 @@ +--- +title: Incident Triage +description: Using OpenHands to investigate and resolve production incidents +--- + +When production incidents occur, speed matters. OpenHands can help you quickly investigate issues, analyze logs and errors, identify root causes, and generate fixes—reducing your mean time to resolution (MTTR). + + +This guide is based on our blog post [Debugging Production Issues with AI Agents: Automating Datadog Error Analysis](https://openhands.dev/blog/debugging-production-issues-with-ai-agents-automating-datadog-error-analysis). + + +## Overview + +Running a production service is **hard**. Errors and bugs crop up due to product updates, infrastructure changes, or unexpected user behavior. When these issues arise, it's critical to identify and fix them quickly to minimize downtime and maintain user trust—but this is challenging, especially at scale. + +What if AI agents could handle the initial investigation automatically? This allows engineers to start with a detailed report of the issue, including root cause analysis and specific recommendations for fixes, dramatically speeding up the debugging process. + +OpenHands accelerates incident response by: + +- **Automated error analysis**: AI agents investigate errors and provide detailed reports +- **Root cause identification**: Connect symptoms to underlying issues in your codebase +- **Fix recommendations**: Generate specific, actionable recommendations for resolving issues +- **Integration with monitoring tools**: Work directly with platforms like Datadog + +## Automated Datadog Error Analysis + +The [OpenHands Software Agent SDK](https://github.com/OpenHands/software-agent-sdk) provides powerful capabilities for building autonomous AI agents that can integrate with monitoring platforms like Datadog. A ready-to-use [GitHub Actions workflow](https://github.com/OpenHands/software-agent-sdk/tree/main/examples/03_github_workflows/04_datadog_debugging) demonstrates how to automate error analysis. + +### How It Works + +[Datadog](https://www.datadoghq.com/) is a popular monitoring and analytics platform that provides comprehensive error tracking capabilities. It aggregates logs, metrics, and traces from your applications, making it easier to identify and investigate issues in production. + +[Datadog's Error Tracking](https://www.datadoghq.com/error-tracking/) groups similar errors together and provides detailed insights into their occurrences, stack traces, and affected services. OpenHands can automatically analyze these errors and provide detailed investigation reports. + +### Triggering Automated Debugging + +The GitHub Actions workflow can be triggered in two ways: + +1. **Search Query**: Provide a search query (e.g., "JSONDecodeError") to find all recent errors matching that pattern. This is useful for investigating categories of errors. + +2. **Specific Error ID**: Provide a specific Datadog error tracking ID to deep-dive into a known issue. You can copy the error ID from DataDog's error tracking UI using the "Actions" button. + +### Automated Investigation Process + +When the workflow runs, it automatically performs the following steps: + +1. Get detailed info from the DataDog API +2. Create or find an existing GitHub issue to track the error +3. Clone all relevant repositories to get full code context +4. Run an OpenHands agent to analyze the error and investigate the code +5. Post the findings as a comment on the GitHub issue + +The agent identifies the exact file and line number where errors originate, determines root causes, and provides specific recommendations for fixes. + + +The workflow posts findings to GitHub issues for human review before any code changes are made. If you want the agent to create a fix, you can follow up using the [OpenHands GitHub integration](https://docs.openhands.dev/openhands/usage/cloud/github-installation#github-integration) and say `@openhands go ahead and create a pull request to fix this issue based on your analysis`. + + +## Setting Up the Workflow + +To set up automated Datadog debugging in your own repository: + +1. Copy the workflow file to `.github/workflows/` in your repository +2. Configure the required secrets (Datadog API keys, LLM API key) +3. Customize the default queries and repository lists for your needs +4. Run the workflow manually or set up scheduled runs + +The workflow is fully customizable. You can modify the prompts to focus on specific types of analysis, adjust the agent's tools to fit your workflow, or extend it to integrate with other services beyond GitHub and Datadog. + +Find the [full implementation on GitHub](https://github.com/OpenHands/software-agent-sdk/tree/main/examples/03_github_workflows/04_datadog_debugging), including the workflow YAML file, Python script, and prompt template. + +## Manual Incident Investigation + +You can also use OpenHands directly to investigate incidents without the automated workflow. + +### Log Analysis + +OpenHands can analyze logs to identify patterns and anomalies: + +``` +Analyze these application logs for the incident that occurred at 14:32 UTC: + +1. Identify the first error or warning that appeared +2. Trace the sequence of events leading to the failure +3. Find any correlated errors across services +4. Identify the user or request that triggered the issue +5. Summarize the timeline of events +``` + +**Log analysis capabilities:** + +| Log Type | Analysis Capabilities | +|----------|----------------------| +| Application logs | Error patterns, exception traces, timing anomalies | +| Access logs | Traffic patterns, slow requests, error responses | +| System logs | Resource exhaustion, process crashes, system errors | +| Database logs | Slow queries, deadlocks, connection issues | + +### Stack Trace Analysis + +Deep dive into stack traces: + +``` +Analyze this stack trace from our production error: + +[paste full stack trace] + +1. Identify the exception type and message +2. Trace back to our code (not framework code) +3. Identify the likely cause +4. Check if this code path has changed recently +5. Suggest a fix +``` + +**Multi-language support:** + + + + ``` + Analyze this Java exception: + + java.lang.OutOfMemoryError: Java heap space + at java.util.Arrays.copyOf(Arrays.java:3210) + at java.util.ArrayList.grow(ArrayList.java:265) + at com.myapp.DataProcessor.loadAllRecords(DataProcessor.java:142) + + Identify: + 1. What operation is consuming memory? + 2. Is there a memory leak or just too much data? + 3. What's the fix? + ``` + + + ``` + Analyze this Python traceback: + + Traceback (most recent call last): + File "app/api/orders.py", line 45, in create_order + order = OrderService.create(data) + File "app/services/order.py", line 89, in create + inventory.reserve(item_id, quantity) + AttributeError: 'NoneType' object has no attribute 'reserve' + + What's None and why? + ``` + + + ``` + Analyze this Node.js error: + + TypeError: Cannot read property 'map' of undefined + at processItems (/app/src/handlers/items.js:23:15) + at async handleRequest (/app/src/api/router.js:45:12) + + What's undefined and how should we handle it? + ``` + + + +### Root Cause Analysis + +Identify the underlying cause of an incident: + +``` +Perform root cause analysis for this incident: + +Symptoms: +- API response times increased 5x at 14:00 +- Error rate jumped from 0.1% to 15% +- Database CPU spiked to 100% + +Available data: +- Application metrics (Grafana dashboard attached) +- Recent deployments: v2.3.1 deployed at 13:45 +- Database slow query log (attached) + +Identify the root cause using the 5 Whys technique. +``` + +## Common Incident Patterns + +OpenHands can recognize and help diagnose these common patterns: + +- **Connection pool exhaustion**: Increasing connection errors followed by complete failure +- **Memory leaks**: Gradual memory increase leading to OOM +- **Cascading failures**: One service failure triggering others +- **Thundering herd**: Simultaneous requests overwhelming a service +- **Split brain**: Inconsistent state across distributed components + +## Quick Fix Generation + +Once the root cause is identified, generate fixes: + +``` +We've identified the root cause: a missing null check in OrderProcessor.java line 156. + +Generate a fix that: +1. Adds proper null checking +2. Logs when null is encountered +3. Returns an appropriate error response +4. Includes a unit test for the edge case +5. Is minimally invasive for a hotfix +``` + +## Best Practices + +### Investigation Checklist + +Use this checklist when investigating: + +1. **Scope the impact** + - How many users affected? + - What functionality is broken? + - What's the business impact? + +2. **Establish timeline** + - When did it start? + - What changed around that time? + - Is it getting worse or stable? + +3. **Gather data** + - Application logs + - Infrastructure metrics + - Recent deployments + - Configuration changes + +4. **Form hypotheses** + - List possible causes + - Rank by likelihood + - Test systematically + +5. **Implement fix** + - Choose safest fix + - Test before deploying + - Monitor after deployment + +### Common Pitfalls + + +Avoid these common incident response mistakes: + +- **Jumping to conclusions**: Gather data before assuming the cause +- **Changing multiple things**: Make one change at a time to isolate effects +- **Not documenting**: Record all actions for the post-mortem +- **Ignoring rollback**: Always have a rollback plan before deploying fixes + + + +For production incidents, always follow your organization's incident response procedures. OpenHands is a tool to assist your investigation, not a replacement for proper incident management. + + +## Related Resources + +- [OpenHands SDK Repository](https://github.com/OpenHands/software-agent-sdk) - Build custom AI agents +- [Datadog Debugging Workflow](https://github.com/OpenHands/software-agent-sdk/tree/main/examples/03_github_workflows/04_datadog_debugging) - Ready-to-use GitHub Actions workflow +- [Prompting Best Practices](/openhands/usage/tips/prompting-best-practices) - Write effective prompts diff --git a/openhands/usage/use-cases/spark-migrations.mdx b/openhands/usage/use-cases/spark-migrations.mdx new file mode 100644 index 00000000..b14ecc7e --- /dev/null +++ b/openhands/usage/use-cases/spark-migrations.mdx @@ -0,0 +1,148 @@ +--- +title: Spark Migrations +description: Migrating Apache Spark applications with OpenHands +--- + +Apache Spark is constantly evolving, and keeping your data pipelines up to date is essential for performance, security, and access to new features. OpenHands can help you analyze, migrate, and validate Spark applications. + +## Overview + +Spark version upgrades are deceptively difficult. The [Spark 3.0 migration guide](https://spark.apache.org/docs/latest/migration-guide.html) alone documents hundreds of behavioral changes, deprecated APIs, and removed features, and many of these changes are _semantic_. That means the same code compiles and runs but produces different results across different Spark versions: for example, a date parsing expression that worked correctly in Spark 2.4 may silently return different values in Spark 3.x due to the switch from the Julian calendar to the Gregorian calendar. + +Version upgrades are also made difficult due to the scale of typical enterprise Spark codebases. When you have dozens of jobs across ETL, reporting, and ML pipelines, each with its own combination of DataFrame operations, UDFs, and configuration, manual migration stops scaling well and becomes prone to subtle regressions. + +Spark migration requires careful analysis, targeted code changes, and thorough validation to ensure that migrated pipelines produce identical results. The migration needs to be driven by an experienced data engineering team, but even that isn't sufficient to ensure the job is done quickly or without regressions. This is where OpenHands comes in. + +Such migrations need to be driven by experienced data engineering teams that understand how your Spark pipelines interact, but even that isn't sufficient to ensure the job is done quickly or without regression. This is where OpenHands comes in. OpenHands assists in migrating Spark applications along every step of the process: + +1. **Understanding**: Analyze the existing codebase to identify what needs to change and why +2. **Migration**: Apply targeted code transformations that address API changes and behavioral differences +3. **Validation**: Verify that migrated pipelines produce identical results to the originals + +In this document, we will explore how OpenHands contributes to Spark migrations, with example prompts and techniques to use in your own efforts. While the examples focus on Spark 2.x to 3.x upgrades, the same principles apply to cloud platform migrations, framework conversions (MapReduce, Hive, Pig to Spark), and upgrades between Spark 3.x minor versions. + +## Understanding + +Before changin any code, it helps to build a clear picture of what is affected and where the risk is concentrated. Spark migrations touch a large surface area, between API deprecations, behavioral changes, configuration defaults, and dependency versions, and the interactions between them are hard to reason about manually. + +Apache releases detailed lists of changes between each major and minor version of Spark. OpenHands can utilize this list of changes while scanning your codebase to produce a structured inventory of everything that needs attention. This inventory becomes the foundation for the migration itself, helping you prioritize work and track progress. + +If your Spark project is in `/src` and you're migrating from 2.4 to 3.0, the following prompt will generate this inventory: + +``` +Analyze the Spark application in `/src` for a migration from Spark 2.4 to Spark 3.0. + +Examine the migration guidelines at https://spark.apache.org/docs/latest/migration-guide.html. + +Then, for each source file, identify + +1. Deprecated or removed API usages (e.g., `registerTempTable`, `unionAll`, `SQLContext`) +2. Behavioral changes that could affect output (e.g., date/time parsing, CSV parsing, CAST semantics) +3. Configuration properties that have changed defaults or been renamed +4. Dependencies that need version updates + +Save the results in `migration_inventory.json` in the following format: + +{ + ..., + "src/main/scala/etl/TransformJob.scala": { + "deprecated_apis": [ + {"line": 42, "current": "df.registerTempTable(\"temp\")", "replacement": "df.createOrReplaceTempView(\"temp\")"} + ], + "behavioral_changes": [ + {"line": 78, "description": "to_date() uses proleptic Gregorian calendar in Spark 3.x; verify date handling with test data"} + ], + "config_changes": [], + "risk": "medium" + }, + ... +} +``` + +Tools like `grep` and `find` (both used by OpenHands) are helpful for identifying where APIs are used, but the real value comes from OpenHands' ability to understand the _context_ around each usage. A simple `registerTempTable` call is migrated via a rename, but a date parsing expression requires understanding how the surrounding pipeline uses the result. This contextual analysis helps developers distinguish between mechanical fixes and changes that need careful testing. + +## Migration + +With a clear inventory of what needs to change, the next step is applying the transformations. Spark migrations involve a mix of straightforward API renames and subtler behavioral adjustments, and it's important to handle them differently. + +To handle simple renames, we prompt OpenHands to use tools like `grep` and `ast-grep` instead of manually manipulating source code. This saves tokens and also simplifies future migrations, as agents can reliably re-run the tools via a script. + +The main risk in migration is that many Spark 3.x behavioral changes are _silent_. The migrated code will compile and run without errors, but may produce different results. Date and timestamp handling is the most common source of these silent failures: Spark 3.x switched to the Gregorian calendar by default, which changes how dates before 1582-10-15 are interpreted. CSV and JSON parsing also became stricter in Spark 3.x, rejecting malformed inputs that Spark 2.x would silently accept. + +An example prompt is below: + +``` +Migrate the Spark application in `/src` from Spark 2.4 to Spark 3.0. + +Use `migration_inventory.json` to guide the changes. + +For all low-risk changes (minor syntax changes, updated APIs, etc.), use tools like `grep` or `ast-grep`. Make sure you write the invocations to a `migration.sh` script for future use. + +Requirements: +1. Replace all deprecated APIs with their Spark 3.0 equivalents +2. For behavioral changes (especially date handling and CSV parsing), add explicit configuration to preserve Spark 2.4 behavior where needed (e.g., spark.sql.legacy.timeParserPolicy=LEGACY) +3. Update build.sbt / pom.xml dependencies to Spark 3.0 compatible versions +4. Replace RDD-based operations with DataFrame/Dataset equivalents where practical +5. Replace UDFs with built-in Spark SQL functions where a direct equivalent exists +6. Update import statements for any relocated classes +7. Preserve all existing business logic and output schemas +``` + +Note the inclusion of the _known problems_ in requirement 2. We plan to catch the silent failures associated with these systems in the validation step, but including them explicitly while migrating helps avoid them altogether. + +## Validation + +Spark migrations are particularly prone to silent regressions: jobs appear to run successfully but produce subtly different output. Jobs dealing with dates, CSVs, or using CAST semantics are all vulnerable, especially when migrating between major versions of Spark. + +The most reliable way to ensure silent regressions do not exist is by _data-level comparison_, where both the new and old pipelines are run on the same input data and their outputs directly compared. This catches subtle errors that unit tests might miss, especially in complex pipelines where a behavioral change in one stage propagates through downstream transformations. + +An example prompt for data-level comparison: + +``` +Validate the migrated Spark application in `/src` against the original. + +1. For each job, run both the Spark 2.4 and 3.0 versions on the test data in `/test_data` +2. Compare outputs: + - Row counts must match exactly + - Perform column-level comparison using checksums for numeric columns and exact match for string/date columns + - Flag any NULL handling differences +3. For any discrepancies, trace them back to specific migration changes using the MIGRATION comments +4. Generate a performance comparison: job duration, shuffle bytes, and peak executor memory + +Save the results in `validation_report.json` in the following format: + +{ + "jobs": [ + { + "name": "daily_etl", + "data_match": true, + "row_count": {"v2": 1000000, "v3": 1000000}, + "column_diffs": [], + "performance": { + "duration_seconds": {"v2": 340, "v3": 285}, + "shuffle_bytes": {"v2": "2.1GB", "v3": "1.8GB"} + } + }, + ... + ] +} +``` + +Note this prompt relies on existing data in `/test_data`. This can be generated by standard fuzzing tools, but in a pinch OpenHands can also help construct synthetic data that stresses the potential corner cases in the relevant systems. + +Every migration is unique, and developer experience is crucial to ensure the testing strategy covers your organization's requirements. Pay particular attention to jobs that involve date arithmetic, decimal precision in financial calculations, or custom UDFs that may depend on Spark internals. A solid validation suite not only ensures the migrated code works as expected, but also builds the organizational confidence needed to deploy the new version to production. + +## Beyond Version Upgrades + +While this document focuses on Spark version upgrades, the same Understanding → Migration → Validation workflow applies to other Spark migration scenarios: + +- **Cloud platform migrations** (e.g., EMR to Databricks, on-premises to Dataproc): The "understanding" step inventories platform-specific code (S3 paths, IAM roles, EMR bootstrap scripts), the migration step converts them to the target platform's equivalents, and validation confirms that jobs produce identical output in the new environment. +- **Framework migrations** (MapReduce, Hive, or Pig to Spark): The "understanding" step maps the existing framework's operations to Spark equivalents, the migration step performs the conversion, and validation compares outputs between the old and new frameworks. + +In each case, the key principle is the same: build a structured inventory of what needs to change, apply targeted transformations, and validate rigorously before deploying. + +## Related Resources + +- [OpenHands SDK Repository](https://github.com/OpenHands/software-agent-sdk) - Build custom AI agents +- [Spark 3.x Migration Guide](https://spark.apache.org/docs/latest/migration-guide.html) - Official Spark migration documentation +- [Prompting Best Practices](/openhands/usage/tips/prompting-best-practices) - Write effective prompts diff --git a/openhands/usage/use-cases/vulnerability-remediation.mdx b/openhands/usage/use-cases/vulnerability-remediation.mdx new file mode 100644 index 00000000..39813dfd --- /dev/null +++ b/openhands/usage/use-cases/vulnerability-remediation.mdx @@ -0,0 +1,276 @@ +--- +title: Vulnerability Remediation +description: Using OpenHands to identify and fix security vulnerabilities in your codebase +--- + +Security vulnerabilities are a constant challenge for software teams. Every day, new security issues are discovered—from vulnerabilities in dependencies to code security flaws detected by static analysis tools. The National Vulnerability Database (NVD) reports thousands of new vulnerabilities annually, and organizations struggle to keep up with this constant influx. + +## The Challenge + +The traditional approach to vulnerability remediation is manual and time-consuming: + +1. Scan repositories for vulnerabilities +2. Review each vulnerability and its impact +3. Research the fix (usually a version upgrade) +4. Update dependency files +5. Test the changes +6. Create pull requests +7. Get reviews and merge + +This process can take hours per vulnerability, and with hundreds or thousands of vulnerabilities across multiple repositories, it becomes an overwhelming task. Security debt accumulates faster than teams can address it. + +**What if we could automate this entire process using AI agents?** + +## Automated Vulnerability Remediation with OpenHands + +The [OpenHands Software Agents SDK](https://docs.openhands.dev/sdk) provides powerful capabilities for building autonomous AI agents capable of interacting with codebases. These agents can tackle one of the most tedious tasks in software maintenance: **security vulnerability remediation**. + +OpenHands assists with vulnerability remediation by: + +- **Identifying vulnerabilities**: Analyzing code for common security issues +- **Understanding impact**: Explaining the risk and exploitation potential +- **Implementing fixes**: Generating secure code to address vulnerabilities +- **Validating remediation**: Verifying fixes are effective and complete + +## Two Approaches to Vulnerability Fixing + +### 1. Point to a GitHub Repository + +Build a workflow where users can point to a GitHub repository, scan it for vulnerabilities, and have OpenHands AI agents automatically create pull requests with fixes—all with minimal human intervention. + +### 2. Upload Security Scanner Reports + +Enable users to upload reports from security scanners such as Snyk (as well as other third-party security scanners) where OpenHands agents automatically detect the report format, identify the issues, and apply fixes. + +This solution goes beyond automation—it focuses on making security remediation accessible, fast, and scalable. + +## Architecture Overview + +A vulnerability remediation agent can be built as a web application that orchestrates agents using the [OpenHands Software Agents SDK](https://docs.openhands.dev/sdk) and [OpenHands Cloud](https://docs.openhands.dev/openhands/usage/key-features) to perform security scans and automate remediation fixes. + +The key architectural components include: + +- **Frontend**: Communicates directly with the OpenHands Agent Server through the [TypeScript Client](https://github.com/OpenHands/typescript-client) +- **WebSocket interface**: Enables real-time status updates on agent actions and operations +- **LLM flexibility**: OpenHands supports multiple LLMs, minimizing dependency on any single provider +- **Scalable execution**: The Agent Server can be hosted locally, with self-hosted models, or integrated with OpenHands Cloud + +This architecture allows the frontend to remain lightweight while heavy lifting happens in the agent's execution environment. + +## Example: Vulnerability Fixer Application + +An example implementation is available at [github.com/OpenHands/vulnerability-fixer](https://github.com/OpenHands/vulnerability-fixer). This React web application demonstrates the full workflow: + +1. User points to a repository or uploads a security scan report +2. Agent analyzes the vulnerabilities +3. Agent creates fixes and pull requests automatically +4. User reviews and merges the changes + +## Security Scanning Integration + +Use OpenHands to analyze security scanner output: + +``` +We ran a security scan and found these issues. Analyze each one: + +1. SQL Injection in src/api/users.py:45 +2. XSS in src/templates/profile.html:23 +3. Hardcoded credential in src/config/database.py:12 +4. Path traversal in src/handlers/files.py:67 + +For each vulnerability: +- Explain what the vulnerability is +- Show how it could be exploited +- Rate the severity (Critical/High/Medium/Low) +- Suggest a fix +``` + +## Common Vulnerability Patterns + +OpenHands can detect these common vulnerability patterns: + +| Vulnerability | Pattern | Example | +|--------------|---------|---------| +| SQL Injection | String concatenation in queries | `query = "SELECT * FROM users WHERE id=" + user_id` | +| XSS | Unescaped user input in HTML | `
${user_comment}
` | +| Path Traversal | Unvalidated file paths | `open(user_supplied_path)` | +| Command Injection | Shell commands with user input | `os.system("ping " + hostname)` | +| Hardcoded Secrets | Credentials in source code | `password = "admin123"` | + +## Automated Remediation + +### Applying Security Patches + +Fix identified vulnerabilities: + + + + ``` + Fix the SQL injection vulnerability in src/api/users.py: + + Current code: + query = f"SELECT * FROM users WHERE id = {user_id}" + cursor.execute(query) + + Requirements: + 1. Use parameterized queries + 2. Add input validation + 3. Maintain the same functionality + 4. Add a test case for the fix + ``` + + **Fixed code:** + ```python + # Using parameterized query + query = "SELECT * FROM users WHERE id = %s" + cursor.execute(query, (user_id,)) + ``` + + + ``` + Fix the XSS vulnerability in src/templates/profile.html: + + Current code: +
${user.bio}
+ + Requirements: + 1. Properly escape user content + 2. Consider Content Security Policy + 3. Handle rich text if needed + 4. Test with malicious input + ``` + + **Fixed code:** + ```html + +
{{ user.bio | escape }}
+ ``` +
+ + ``` + Fix the command injection in src/utils/network.py: + + Current code: + def ping_host(hostname): + os.system(f"ping -c 1 {hostname}") + + Requirements: + 1. Use safe subprocess calls + 2. Validate input format + 3. Avoid shell=True + 4. Handle errors properly + ``` + + **Fixed code:** + ```python + import subprocess + import re + + def ping_host(hostname): + # Validate hostname format + if not re.match(r'^[a-zA-Z0-9.-]+$', hostname): + raise ValueError("Invalid hostname") + + # Use subprocess without shell + result = subprocess.run( + ["ping", "-c", "1", hostname], + capture_output=True, + text=True + ) + return result.returncode == 0 + ``` + +
+ +### Code-Level Vulnerability Fixes + +Fix application-level security issues: + +``` +Fix the broken access control in our API: + +Issue: Users can access other users' data by changing the ID in the URL. + +Current code: +@app.get("/api/users/{user_id}/documents") +def get_documents(user_id: int): + return db.get_documents(user_id) + +Requirements: +1. Add authorization check +2. Verify requesting user matches or is admin +3. Return 403 for unauthorized access +4. Log access attempts +5. Add tests for authorization +``` + +**Fixed code:** + +```python +@app.get("/api/users/{user_id}/documents") +def get_documents(user_id: int, current_user: User = Depends(get_current_user)): + # Check authorization + if current_user.id != user_id and not current_user.is_admin: + logger.warning(f"Unauthorized access attempt: user {current_user.id} tried to access user {user_id}'s documents") + raise HTTPException(status_code=403, detail="Not authorized") + + return db.get_documents(user_id) +``` + +## Security Testing + +Test your fixes thoroughly: + +``` +Create security tests for the SQL injection fix: + +1. Test with normal input +2. Test with SQL injection payloads: + - ' OR '1'='1 + - '; DROP TABLE users; -- + - UNION SELECT * FROM passwords +3. Test with special characters +4. Test with null/empty input +5. Verify error handling doesn't leak information +``` + +## Automated Remediation Pipeline + +Create an end-to-end automated pipeline: + +``` +Create an automated vulnerability remediation pipeline: + +1. Parse Snyk/Dependabot/CodeQL alerts +2. Categorize by severity and type +3. For each vulnerability: + - Create a branch + - Apply the fix + - Run tests + - Create a PR with: + - Description of vulnerability + - Fix applied + - Test results +4. Request review from security team +5. Auto-merge low-risk fixes after tests pass +``` + +## Building Your Own Vulnerability Fixer + +The example application demonstrates that AI agents can effectively automate security maintenance at scale. Tasks that required hours of manual effort per vulnerability can now be completed in minutes with minimal human intervention. + +To build your own vulnerability remediation agent: + +1. Use the [OpenHands Software Agent SDK](https://github.com/OpenHands/software-agent-sdk) to create your agent +2. Integrate with your security scanning tools (Snyk, Dependabot, CodeQL, etc.) +3. Configure the agent to create pull requests automatically +4. Set up human review workflows for critical fixes + +As agent capabilities continue to evolve, an increasing number of repetitive and time-consuming security tasks can be automated, enabling developers to focus on higher-level design, innovation, and problem-solving rather than routine maintenance. + +## Related Resources + +- [Vulnerability Fixer Example](https://github.com/OpenHands/vulnerability-fixer) - Full implementation example +- [OpenHands SDK Documentation](https://docs.openhands.dev/sdk) - Build custom AI agents +- [Dependency Upgrades](/openhands/usage/use-cases/dependency-upgrades) - Updating vulnerable dependencies +- [Prompting Best Practices](/openhands/usage/tips/prompting-best-practices) - Write effective prompts diff --git a/overview/introduction.mdx b/overview/introduction.mdx index 8f57b201..9f510db4 100644 --- a/overview/introduction.mdx +++ b/overview/introduction.mdx @@ -29,7 +29,7 @@ The experience will be familiar to anyone who has used Devin or Jules. ## OpenHands Cloud This is a commercial deployment of OpenHands GUI, running on hosted infrastructure. -You can try it with a free $10 credit by [signing in with your GitHub account](https://app.all-hands.dev). +You can try it with a free by [signing in with your GitHub account](https://app.all-hands.dev). OpenHands Cloud comes with source-available features and integrations: - Deeper integrations with GitHub, GitLab, and Bitbucket diff --git a/overview/quickstart.mdx b/overview/quickstart.mdx index 54bfb9fd..843e997f 100644 --- a/overview/quickstart.mdx +++ b/overview/quickstart.mdx @@ -1,19 +1,32 @@ --- title: Quick Start -description: Running OpenHands Cloud or running on your own. +description: Choose how you want to run OpenHands --- - - - The easiest way to get started with OpenHands is on OpenHands Cloud, which comes with $10 in free credits for new users. +Get started with OpenHands in minutes. Choose the option that works best for you. - To get started with OpenHands Cloud, visit [app.all-hands.dev](https://app.all-hands.dev). + + + **Recommended** - For more information see [getting started with OpenHands Cloud.](/openhands/usage/cloud/openhands-cloud) - - - Run OpenHands on your local system and bring your own LLM and API key. + The fastest way to get started. No setup required—just sign in and start coding. - For more information see [running OpenHands on your own.](/openhands/usage/run-openhands/local-setup) - - + - Free usage of MiniMax M2.5 for a limited time + - No installation needed + - Managed infrastructure + + + Use OpenHands from your terminal. Perfect for automation and scripting. + + - IDE integrations available + - Headless mode for CI/CD + - Lightweight installation + + + Run OpenHands locally with a web-based interface. Bring your own LLM and API key. + + - Full control over your environment + - Works offline + - Docker-based setup + +