diff --git a/docs/guides/linter.md b/docs/guides/linter.md index 023d6bdee9..c5b6a00019 100644 --- a/docs/guides/linter.md +++ b/docs/guides/linter.md @@ -1,5 +1,7 @@ # Linter guide +![Linter](linter_example.png) + Linting is a powerful tool for improving code quality and consistency. It enables you to automatically validate model definition, ensuring they adhere to your team's best practices. When a SQLMesh command is executed and the project is loaded, each model's code is checked for compliance with a set of rules you choose. @@ -68,10 +70,10 @@ Here are all of SQLMesh's built-in linting rules: | Name | Check type | Explanation | | -------------------------- | ----------- | ------------------------------------------------------------------------------------------------------------------------ | -| ambiguousorinvalidcolumn | Correctness | SQLMesh found duplicate columns or was unable to determine whether a column is duplicated or not | -| invalidselectstarexpansion | Correctness | The query's top-level selection may be `SELECT *`, but only if SQLMesh can expand the `SELECT *` into individual columns | -| noselectstar | Stylistic | The query's top-level selection may not be `SELECT *`, even if SQLMesh can expand the `SELECT *` into individual columns | - +| `ambiguousorinvalidcolumn` | Correctness | SQLMesh found duplicate columns or was unable to determine whether a column is duplicated or not | +| `invalidselectstarexpansion` | Correctness | The query's top-level selection may be `SELECT *`, but only if SQLMesh can expand the `SELECT *` into individual columns | +| `noselectstar` | Stylistic | The query's top-level selection may not be `SELECT *`, even if SQLMesh can expand the `SELECT *` into individual columns | +| `nomissingaudits` | Governance | SQLMesh did not find any `audits` in the model's configuration to test data quality. | ### User-defined rules @@ -211,7 +213,7 @@ MODEL( Linting rule violations raise an error by default, preventing the project from running until the violation is addressed. -You may specify that a rule's violation should not error and only log a warning by specifying it in the `warning_rules` key instead of the `rules` key. +You may specify that a rule's violation should not error and only log a warning by specifying it in the `warn_rules` key instead of the `rules` key. === "YAML" @@ -221,7 +223,7 @@ You may specify that a rule's violation should not error and only log a warning # error if `ambiguousorinvalidcolumn` rule violated rules: ["ambiguousorinvalidcolumn"] # but only warn if "invalidselectstarexpansion" is violated - warning_rules: ["invalidselectstarexpansion"] + warn_rules: ["invalidselectstarexpansion"] ``` === "Python" @@ -235,9 +237,9 @@ You may specify that a rule's violation should not error and only log a warning # error if `ambiguousorinvalidcolumn` rule violated rules=["ambiguousorinvalidcolumn"], # but only warn if "invalidselectstarexpansion" is violated - warning_rules=["invalidselectstarexpansion"], + warn_rules=["invalidselectstarexpansion"], ) ) ``` -SQLMesh will raise an error if the same rule is included in more than one of the `rules`, `warning_rules`, and `ignored_rules` keys since they should be mutually exclusive. \ No newline at end of file +SQLMesh will raise an error if the same rule is included in more than one of the `rules`, `warn_rules`, and `ignored_rules` keys since they should be mutually exclusive. \ No newline at end of file diff --git a/docs/guides/linter_example.png b/docs/guides/linter_example.png new file mode 100644 index 0000000000..d88ea1bac9 Binary files /dev/null and b/docs/guides/linter_example.png differ diff --git a/sqlmesh/core/linter/rules/builtin.py b/sqlmesh/core/linter/rules/builtin.py index 664bcc9b23..cf0caab089 100644 --- a/sqlmesh/core/linter/rules/builtin.py +++ b/sqlmesh/core/linter/rules/builtin.py @@ -48,4 +48,11 @@ def check_model(self, model: Model) -> t.Optional[RuleViolation]: return self.violation(violation_msg) +class NoMissingAudits(Rule): + """Model `audits` must be configured to test data quality.""" + + def check_model(self, model: Model) -> t.Optional[RuleViolation]: + return self.violation() if not model.audits else None + + BUILTIN_RULES = RuleSet(subclasses(__name__, Rule, (Rule,))) diff --git a/tests/core/test_context.py b/tests/core/test_context.py index eae4e71936..ea4fdd9432 100644 --- a/tests/core/test_context.py +++ b/tests/core/test_context.py @@ -1676,7 +1676,9 @@ def assert_cached_violations_exist(cache: OptimizedQueryCache, model: Model): # Case: Ensure NoSelectStar only raises for top-level SELECTs, new model shouldn't raise # and thus should also be cached model2 = load_sql_based_model( - d.parse("MODEL (name test2); SELECT col FROM (SELECT * FROM tbl)") + d.parse( + "MODEL (name test2, audits (at_least_one(column := col))); SELECT col FROM (SELECT * FROM tbl)" + ) ) ctx.upsert_model(model2) @@ -1739,7 +1741,7 @@ def assert_cached_violations_exist(cache: OptimizedQueryCache, model: Model): create_temp_file( tmp_path, pathlib.Path(pathlib.Path("models"), "test2.sql"), - "MODEL(name test2, ignored_rules ['noselectstar']); SELECT * FROM (SELECT 1 AS col);", + "MODEL(name test2, audits (at_least_one(column := col)), ignored_rules ['noselectstar']); SELECT * FROM (SELECT 1 AS col);", ) ctx.load() @@ -1777,7 +1779,12 @@ def model4_entrypoint(context, **kwargs): with pytest.raises(LinterError, match=config_err): sushi_context.upsert_model(python_model) - @model(name="memory.sushi.model5", columns={"col": "int"}, owner="test") + @model( + name="memory.sushi.model5", + columns={"col": "int"}, + owner="test", + audits=[("at_least_one", {"column": "col"})], + ) def model5_entrypoint(context, **kwargs): yield pd.DataFrame({"col": []}) diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 8c6e763f39..9431eef30e 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -307,6 +307,32 @@ def test_model_qualification(tmp_path: Path): ) +@use_terminal_console +def test_model_missing_audits(tmp_path: Path): + with patch.object(get_console(), "log_warning") as mock_logger: + expressions = d.parse( + """ + MODEL ( + name db.table, + kind FULL, + ); + + SELECT a + """ + ) + + ctx = Context( + config=Config(linter=LinterConfig(enabled=True, warn_rules=["nomissingaudits"])), + paths=tmp_path, + ) + ctx.upsert_model(load_sql_based_model(expressions)) + + assert ( + """Model `audits` must be configured to test data quality.""" + in mock_logger.call_args[0][0] + ) + + @pytest.mark.parametrize( "partition_by_input, partition_by_output, output_dialect, expected_exception", [