From 9fbbffe52a6012dae6eaab2dae13b11fec5a5012 Mon Sep 17 00:00:00 2001 From: Erin Drummond Date: Wed, 5 Mar 2025 03:51:09 +0000 Subject: [PATCH] Feat: Enable INCREMENTAL_UNMANAGED models in native projects --- docs/concepts/models/model_kinds.md | 29 ++++++++++++++++++++++- sqlmesh/core/dialect.py | 1 + tests/core/test_model.py | 36 +++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) diff --git a/docs/concepts/models/model_kinds.md b/docs/concepts/models/model_kinds.md index c9d658d94b..7efc415c2c 100644 --- a/docs/concepts/models/model_kinds.md +++ b/docs/concepts/models/model_kinds.md @@ -1552,4 +1552,31 @@ Depending on the target engine, models of the `INCREMENTAL_BY_PARTITION` kind ar | BigQuery | DELETE by partitioning key, then INSERT | | Redshift | DELETE by partitioning key, then INSERT | | Postgres | DELETE by partitioning key, then INSERT | -| DuckDB | DELETE by partitioning key, then INSERT | \ No newline at end of file +| DuckDB | DELETE by partitioning key, then INSERT | + +## INCREMENTAL_UNMANAGED + +The `INCREMENTAL_UNMANAGED` model kind exists to support append-only tables. It's "unmanaged" in the sense that SQLMesh doesnt try to manage how the data is loaded. SQLMesh will just run your query on the configured cadence and append whatever it gets into the table. + +!!! question "Should you use this model kind?" + + Some patterns for data management, such as Data Vault, may rely on append-only tables. In this situation, `INCREMENTAL_UNMANAGED` is the correct type to use. + + In most other situations, you probably want `INCREMENTAL_BY_TIME_RANGE` or `INCREMENTAL_BY_UNIQUE_KEY` because they give you much more control over how the data is loaded. + +Usage of the `INCREMENTAL_UNMANAGED` model kind is straightforward: + +```sql linenums="1" hl_lines="3" +MODEL ( + name db.events, + kind INCREMENTAL_UNMANAGED, +); +``` + +Since it's unmanaged, it doesnt support the `batch_size` and `batch_concurrency` properties to control how data is loaded like the other incremental model types do. + +!!! warning "Only full restatements supported" + + Similar to `INCREMENTAL_BY_PARTITION`, attempting to [restate](../plans.md#restatement-plans) an `INCREMENTAL_UNMANAGED` model will trigger a full restatement. That is, the model will be rebuilt from scratch rather than from a time slice you specify. + + This is because an append-only table is inherently non-idempotent. Restating `INCREMENTAL_UNMANAGED` models may lead to data loss and should be performed with care. \ No newline at end of file diff --git a/sqlmesh/core/dialect.py b/sqlmesh/core/dialect.py index 7a80a0f37f..92300ee82d 100644 --- a/sqlmesh/core/dialect.py +++ b/sqlmesh/core/dialect.py @@ -594,6 +594,7 @@ def parse(self: Parser) -> t.Optional[exp.Expression]: ModelKindName.INCREMENTAL_BY_TIME_RANGE, ModelKindName.INCREMENTAL_BY_UNIQUE_KEY, ModelKindName.INCREMENTAL_BY_PARTITION, + ModelKindName.INCREMENTAL_UNMANAGED, ModelKindName.SEED, ModelKindName.VIEW, ModelKindName.SCD_TYPE_2, diff --git a/tests/core/test_model.py b/tests/core/test_model.py index 933e2fc4fb..c2347d469b 100644 --- a/tests/core/test_model.py +++ b/tests/core/test_model.py @@ -3003,6 +3003,42 @@ def test_incremental_unmanaged_validation(): model.validate_definition() +def test_incremental_unmanaged(): + expr = d.parse( + """ + MODEL ( + name foo, + kind INCREMENTAL_UNMANAGED + ); + + SELECT x.a AS a FROM test.x AS x + """ + ) + + model = load_sql_based_model(expressions=expr) + + assert isinstance(model.kind, IncrementalUnmanagedKind) + assert not model.kind.insert_overwrite + + expr = d.parse( + """ + MODEL ( + name foo, + kind INCREMENTAL_UNMANAGED ( + insert_overwrite true + ), + partitioned_by a + ); + + SELECT x.a AS a FROM test.x AS x + """ + ) + + model = load_sql_based_model(expressions=expr) + assert isinstance(model.kind, IncrementalUnmanagedKind) + assert model.kind.insert_overwrite + + def test_custom_interval_unit(): assert ( load_sql_based_model(