Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
## Architecture

- **Core Package**: `code_review_graph/` (Python 3.10+)
- `parser.py` — Tree-sitter multi-language AST parser (14 languages including Vue SFC and Solidity)
- `parser.py` — Tree-sitter multi-language AST parser (15 languages including Vue SFC and Solidity)
- `graph.py` — SQLite-backed graph store (nodes, edges, BFS impact analysis)
- `tools.py` — 9 MCP tool implementations
- `incremental.py` — Git-based change detection, file watching
Expand Down Expand Up @@ -64,7 +64,7 @@ uv run code-review-graph serve # Start MCP server
- `tests/test_tools.py` — MCP tool integration tests
- `tests/test_visualization.py` — Export, HTML generation, C++ resolution
- `tests/test_incremental.py` — Build, update, migration, git ops
- `tests/test_multilang.py` — 14 language parsing tests (including Vue and Solidity)
- `tests/test_multilang.py` — 15 language parsing tests (including Vue and Solidity)
- `tests/test_embeddings.py` — Vector encode/decode, similarity, store
- `tests/fixtures/` — Sample files for each supported language

Expand Down
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ On every git commit or file save, a hook fires. The graph diffs changed files, f
</details>

<details>
<summary><strong>14 supported languages</strong></summary>
<summary><strong>15 supported languages</strong></summary>
<br>

Python, TypeScript, JavaScript, Vue, Go, Rust, Java, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++
Python, TypeScript, JavaScript, Vue, Go, Rust, Java, Scala, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++

Each language has full Tree-sitter grammar support for functions, classes, imports, call sites, inheritance, and test detection.

Expand Down Expand Up @@ -210,7 +210,7 @@ Claude uses these automatically once the graph is built.
| Feature | Details |
|---------|---------|
| **Incremental updates** | Re-parses only changed files. Subsequent updates complete in under 2 seconds. |
| **14 languages** | Python, TypeScript, JavaScript, Vue, Go, Rust, Java, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++ |
| **15 languages** | Python, TypeScript, JavaScript, Vue, Go, Rust, Java, Scala, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++ |
| **Blast-radius analysis** | Shows exactly which functions, classes, and files are affected by any change |
| **Auto-update hooks** | Graph updates on every file edit and git commit without manual intervention |
| **Semantic search** | Optional vector embeddings via sentence-transformers |
Expand Down
48 changes: 48 additions & 0 deletions code_review_graph/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ class EdgeInfo:
".kt": "kotlin",
".swift": "swift",
".php": "php",
".scala": "scala",
".sol": "solidity",
".vue": "vue",
}
Expand All @@ -96,6 +97,9 @@ class EdgeInfo:
"kotlin": ["class_declaration", "object_declaration"],
"swift": ["class_declaration", "struct_declaration", "protocol_declaration"],
"php": ["class_declaration", "interface_declaration"],
"scala": [
"class_definition", "trait_definition", "object_definition", "enum_definition",
],
"solidity": [
"contract_declaration", "interface_declaration", "library_declaration",
"struct_declaration", "enum_declaration", "error_declaration",
Expand All @@ -118,6 +122,7 @@ class EdgeInfo:
"kotlin": ["function_declaration"],
"swift": ["function_declaration"],
"php": ["function_definition", "method_declaration"],
"scala": ["function_definition", "function_declaration"],
# Solidity: events and modifiers use kind="Function" because the graph
# schema has no dedicated kind for them. State variables are also modeled
# as Function nodes (public ones auto-generate getters) and distinguished
Expand All @@ -143,6 +148,7 @@ class EdgeInfo:
"kotlin": ["import_header"],
"swift": ["import_declaration"],
"php": ["namespace_use_declaration"],
"scala": ["import_declaration"],
"solidity": ["import_directive"],
}

Expand All @@ -161,6 +167,7 @@ class EdgeInfo:
"kotlin": ["call_expression"],
"swift": ["call_expression"],
"php": ["function_call_expression", "member_call_expression"],
"scala": ["call_expression", "instance_expression", "generic_function"],
"solidity": ["call_expression"],
}

Expand Down Expand Up @@ -1030,6 +1037,19 @@ def _get_bases(self, node, language: str, source: bytes) -> list[str]:
):
text = child.text.decode("utf-8", errors="replace")
bases.append(text)
elif language == "scala":
for child in node.children:
if child.type == "extends_clause":
for sub in child.children:
if sub.type == "type_identifier":
bases.append(sub.text.decode("utf-8", errors="replace"))
elif sub.type == "generic_type":
for ident in sub.children:
if ident.type == "type_identifier":
bases.append(
ident.text.decode("utf-8", errors="replace")
)
break
elif language == "cpp":
# C++: base_class_clause contains type_identifiers
for child in node.children:
Expand Down Expand Up @@ -1123,6 +1143,27 @@ def _extract_import(self, node, language: str, source: bytes) -> list[str]:
val = child.text.decode("utf-8", errors="replace").strip('"')
if val:
imports.append(val)
elif language == "scala":
parts = []
selectors = []
is_wildcard = False
for child in node.children:
if child.type == "identifier":
parts.append(child.text.decode("utf-8", errors="replace"))
elif child.type == "namespace_selectors":
for sub in child.children:
if sub.type == "identifier":
selectors.append(sub.text.decode("utf-8", errors="replace"))
elif child.type == "namespace_wildcard":
is_wildcard = True
base = ".".join(parts)
if selectors:
for name in selectors:
imports.append(f"{base}.{name}")
elif is_wildcard:
imports.append(f"{base}.*")
elif base:
imports.append(base)
elif language == "ruby":
# require 'module' or require_relative 'path'
if "require" in text:
Expand All @@ -1142,6 +1183,13 @@ def _get_call_name(self, node, language: str, source: bytes) -> Optional[str]:

first = node.children[0]

# Scala: instance_expression (new Foo(...)) – extract the type name
if node.type == "instance_expression":
for child in node.children:
if child.type in ("type_identifier", "identifier"):
return child.text.decode("utf-8", errors="replace")
return None

# Solidity wraps call targets in an 'expression' node – unwrap it
if language == "solidity" and first.type == "expression" and first.children:
first = first.children[0]
Expand Down
2 changes: 1 addition & 1 deletion docs/FEATURES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
- **Call target resolution**: Bare call targets are resolved to qualified names using same-file definitions, improving `callers_of`/`callees_of` accuracy.
- **Impact radius pagination**: `get_impact_radius` returns `truncated` flag and `total_impacted` count; `max_results` parameter controls output size.
- **`find_large_functions_tool`**: New MCP tool to find functions, classes, or files exceeding a line-count threshold.
- **14 languages**: Added Vue SFC and Solidity support.
- **15 languages**: Added Vue SFC and Solidity support.
- **Documentation overhaul**: All docs updated with accurate language/tool counts, version references, and VS Code extension parity.

## v1.8.3
Expand Down
2 changes: 1 addition & 1 deletion docs/LLM-OPTIMIZED-REFERENCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ Model: all-MiniLM-L6-v2 (384-dim, fast).
</section>

<section name="languages">
Supported: Python, TypeScript, JavaScript, Vue, Go, Rust, Java, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++
Supported: Python, TypeScript, JavaScript, Vue, Go, Rust, Java, Scala, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++
Parser: Tree-sitter via tree-sitter-language-pack
</section>

Expand Down
2 changes: 1 addition & 1 deletion docs/USAGE.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ Then use `embed_graph_tool` to compute vectors. `semantic_search_nodes_tool` aut

## Supported Languages

Python, TypeScript, JavaScript, Vue, Go, Rust, Java, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++
Python, TypeScript, JavaScript, Vue, Go, Rust, Java, Scala, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++

## What Gets Indexed

Expand Down
2 changes: 1 addition & 1 deletion skills/build-graph/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,4 @@ Build or incrementally update the persistent code knowledge graph for this repos

- The graph is stored as a SQLite database (`.code-review-graph/graph.db`) in the repo root
- Binary files, generated files, and patterns in `.code-review-graphignore` are skipped
- Supported languages: Python, TypeScript/JavaScript, Vue, Go, Rust, Java, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++
- Supported languages: Python, TypeScript/JavaScript, Vue, Go, Rust, Java, Scala, C#, Ruby, Kotlin, Swift, PHP, Solidity, C/C++
37 changes: 37 additions & 0 deletions tests/fixtures/sample.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.example.auth

import scala.collection.mutable
import scala.collection.mutable.{HashMap, ListBuffer}
import scala.util.Try
import scala.concurrent._

trait Repository[T]:
def findById(id: Int): Option[T]
def save(entity: T): Unit

case class User(id: Int, name: String, email: String)

class InMemoryRepo extends Repository[User] with Serializable:
private val users = mutable.HashMap[Int, User]()

override def findById(id: Int): Option[User] =
users.get(id)

override def save(user: User): Unit =
users.put(user.id, user)
println(s"Saved user ${user.id}")

class UserService(repo: Repository[User]):
def createUser(name: String, email: String): User =
val user = User(1, name, email)
repo.save(user)
user

def getUser(id: Int): Option[User] =
repo.findById(id)

object UserService:
def apply(repo: Repository[User]): UserService = new UserService(repo)

enum Color:
case Red, Green, Blue
47 changes: 47 additions & 0 deletions tests/test_multilang.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,53 @@ def test_finds_functions(self):
assert "createUser" in names or "findById" in names or "save" in names


class TestScalaParsing:
def setup_method(self):
self.parser = CodeParser()
self.nodes, self.edges = self.parser.parse_file(FIXTURES / "sample.scala")

def test_detects_language(self):
assert self.parser.detect_language(Path("Main.scala")) == "scala"

def test_finds_classes_traits_objects(self):
classes = [n for n in self.nodes if n.kind == "Class"]
names = {c.name for c in classes}
assert "Repository" in names
assert "User" in names
assert "InMemoryRepo" in names
assert "UserService" in names
assert "Color" in names

def test_finds_functions(self):
funcs = [n for n in self.nodes if n.kind == "Function"]
names = {f.name for f in funcs}
assert "findById" in names
assert "save" in names
assert "createUser" in names
assert "getUser" in names
assert "apply" in names

def test_finds_imports(self):
imports = [e for e in self.edges if e.kind == "IMPORTS_FROM"]
targets = {e.target for e in imports}
assert "scala.util.Try" in targets
assert "scala.collection.mutable" in targets
assert "scala.collection.mutable.HashMap" in targets
assert "scala.collection.mutable.ListBuffer" in targets
assert "scala.concurrent.*" in targets
assert len(imports) >= 3

def test_finds_inheritance(self):
inherits = [e for e in self.edges if e.kind == "INHERITS"]
targets = {e.target for e in inherits}
assert "Repository" in targets
assert "Serializable" in targets

def test_finds_calls(self):
calls = [e for e in self.edges if e.kind == "CALLS"]
assert len(calls) >= 3


class TestSolidityParsing:
def setup_method(self):
self.parser = CodeParser()
Expand Down