diff --git a/docs/guides/model_selection.md b/docs/guides/model_selection.md index 9cc0a4358a..e6178246d6 100644 --- a/docs/guides/model_selection.md +++ b/docs/guides/model_selection.md @@ -242,8 +242,9 @@ Models: #### Select with git changes The git-based selector allows you to select models whose files have changed compared to a target branch (default: main). This includes: + - Untracked files (new files not in git) -- Uncommitted changes in working directory +- Uncommitted changes in working directory (both staged and unstaged) - Committed changes different from the target branch For example: diff --git a/sqlmesh/utils/git.py b/sqlmesh/utils/git.py index 00410e776c..cdb9d4e2d5 100644 --- a/sqlmesh/utils/git.py +++ b/sqlmesh/utils/git.py @@ -16,7 +16,9 @@ def list_untracked_files(self) -> t.List[Path]: ) def list_uncommitted_changed_files(self) -> t.List[Path]: - return self._execute_list_output(["diff", "--name-only", "--diff-filter=d"], self._git_root) + return self._execute_list_output( + ["diff", "--name-only", "--diff-filter=d", "HEAD"], self._git_root + ) def list_committed_changed_files(self, target_branch: str = "main") -> t.List[Path]: return self._execute_list_output( diff --git a/tests/core/test_selector_native.py b/tests/core/test_selector_native.py index 46d666db64..5889efadda 100644 --- a/tests/core/test_selector_native.py +++ b/tests/core/test_selector_native.py @@ -6,6 +6,7 @@ import pytest from pytest_mock.plugin import MockerFixture +import subprocess from sqlmesh.core import dialect as d from sqlmesh.core.audit import StandaloneAudit @@ -16,6 +17,7 @@ from sqlmesh.core.snapshot import SnapshotChangeCategory from sqlmesh.utils import UniqueKeyDict from sqlmesh.utils.date import now_timestamp +from sqlmesh.utils.git import GitClient @pytest.mark.parametrize( @@ -634,6 +636,92 @@ def test_expand_git_selection( git_client_mock.list_untracked_files.assert_called_once() +def test_expand_git_selection_integration(tmp_path: Path, mocker: MockerFixture): + repo_path = tmp_path / "test_repo" + repo_path.mkdir() + subprocess.run(["git", "init", "-b", "main"], cwd=repo_path, check=True, capture_output=True) + + models: UniqueKeyDict[str, Model] = UniqueKeyDict("models") + model_a_path = repo_path / "model_a.sql" + model_a_path.write_text("SELECT 1 AS a") + model_a = SqlModel(name="test_model_a", query=d.parse_one("SELECT 1 AS a")) + model_a._path = model_a_path + models[model_a.fqn] = model_a + + model_b_path = repo_path / "model_b.sql" + model_b_path.write_text("SELECT 2 AS b") + model_b = SqlModel(name="test_model_b", query=d.parse_one("SELECT 2 AS b")) + model_b._path = model_b_path + models[model_b.fqn] = model_b + + subprocess.run(["git", "add", "."], cwd=repo_path, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=repo_path, + check=True, + capture_output=True, + ) + + # no changes should select nothing + git_client = GitClient(repo_path) + selector = NativeSelector(mocker.Mock(), models) + selector._git_client = git_client + assert selector.expand_model_selections([f"git:main"]) == set() + + # modify A but dont stage it, should be only selected + model_a_path.write_text("SELECT 10 AS a") + assert selector.expand_model_selections([f"git:main"]) == {'"test_model_a"'} + + # stage model A, should still select it + subprocess.run(["git", "add", "model_a.sql"], cwd=repo_path, check=True, capture_output=True) + assert selector.expand_model_selections([f"git:main"]) == {'"test_model_a"'} + + # now add unstaged change to B and both should be selected + model_b_path.write_text("SELECT 20 AS b") + assert selector.expand_model_selections([f"git:main"]) == { + '"test_model_a"', + '"test_model_b"', + } + + subprocess.run( + ["git", "checkout", "-b", "dev"], + cwd=repo_path, + check=True, + capture_output=True, + ) + + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Update model_a", + ], + cwd=repo_path, + check=True, + capture_output=True, + ) + + # now A is committed in the dev branch and B unstaged but should both be selected + assert selector.expand_model_selections([f"git:main"]) == { + '"test_model_a"', + '"test_model_b"', + } + + def test_select_models_with_external_parent(mocker: MockerFixture): default_catalog = "test_catalog" added_model = SqlModel( diff --git a/tests/utils/test_git_client.py b/tests/utils/test_git_client.py new file mode 100644 index 0000000000..13eecf294b --- /dev/null +++ b/tests/utils/test_git_client.py @@ -0,0 +1,173 @@ +import subprocess +from pathlib import Path +import pytest +from sqlmesh.utils.git import GitClient + + +@pytest.fixture +def git_repo(tmp_path: Path) -> Path: + repo_path = tmp_path / "test_repo" + repo_path.mkdir() + subprocess.run(["git", "init", "-b", "main"], cwd=repo_path, check=True, capture_output=True) + return repo_path + + +def test_git_uncommitted_changes(git_repo: Path): + git_client = GitClient(git_repo) + + test_file = git_repo / "model.sql" + test_file.write_text("SELECT 1 AS a") + subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + assert git_client.list_uncommitted_changed_files() == [] + + # make an unstaged change and see that it is listed + test_file.write_text("SELECT 2 AS a") + uncommitted = git_client.list_uncommitted_changed_files() + assert len(uncommitted) == 1 + assert uncommitted[0].name == "model.sql" + + # stage the change and test that it is still detected + subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True) + uncommitted = git_client.list_uncommitted_changed_files() + assert len(uncommitted) == 1 + assert uncommitted[0].name == "model.sql" + + +def test_git_both_staged_and_unstaged_changes(git_repo: Path): + git_client = GitClient(git_repo) + + file1 = git_repo / "model1.sql" + file2 = git_repo / "model2.sql" + file1.write_text("SELECT 1") + file2.write_text("SELECT 2") + subprocess.run(["git", "add", "."], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + + # stage file1 + file1.write_text("SELECT 10") + subprocess.run(["git", "add", "model1.sql"], cwd=git_repo, check=True, capture_output=True) + + # modify file2 but don't stage it! + file2.write_text("SELECT 20") + + # both should be detected + uncommitted = git_client.list_uncommitted_changed_files() + assert len(uncommitted) == 2 + file_names = {f.name for f in uncommitted} + assert file_names == {"model1.sql", "model2.sql"} + + +def test_git_untracked_files(git_repo: Path): + git_client = GitClient(git_repo) + initial_file = git_repo / "initial.sql" + initial_file.write_text("SELECT 0") + subprocess.run(["git", "add", "initial.sql"], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + + new_file = git_repo / "new_model.sql" + new_file.write_text("SELECT 1") + + # untracked file should not appear in uncommitted changes + assert git_client.list_uncommitted_changed_files() == [] + + # but in untracked + untracked = git_client.list_untracked_files() + assert len(untracked) == 1 + assert untracked[0].name == "new_model.sql" + + +def test_git_committed_changes(git_repo: Path): + git_client = GitClient(git_repo) + + test_file = git_repo / "model.sql" + test_file.write_text("SELECT 1") + subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Initial commit", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + + subprocess.run( + ["git", "checkout", "-b", "feature"], + cwd=git_repo, + check=True, + capture_output=True, + ) + + test_file.write_text("SELECT 2") + subprocess.run(["git", "add", "model.sql"], cwd=git_repo, check=True, capture_output=True) + subprocess.run( + [ + "git", + "-c", + "user.name=Max", + "-c", + "user.email=max@rb.com", + "commit", + "-m", + "Update on feature branch", + ], + cwd=git_repo, + check=True, + capture_output=True, + ) + + committed = git_client.list_committed_changed_files(target_branch="main") + assert len(committed) == 1 + assert committed[0].name == "model.sql" + + assert git_client.list_uncommitted_changed_files() == []