From 9a477aa3854c3fb60b2173e9d0e4e23a34be401b Mon Sep 17 00:00:00 2001 From: "arun.mohanaselvam" Date: Mon, 4 May 2026 18:27:20 +0530 Subject: [PATCH 1/3] Added GHA WF to support vscode marketplace publish --- .github/workflows/cd-publish.yml | 197 ++++++++++++++++++++++++++++++ .github/workflows/create-tags.yml | 187 ++++++++++++++++++++++++++++ 2 files changed, 384 insertions(+) create mode 100644 .github/workflows/cd-publish.yml create mode 100644 .github/workflows/create-tags.yml diff --git a/.github/workflows/cd-publish.yml b/.github/workflows/cd-publish.yml new file mode 100644 index 0000000..ba5240b --- /dev/null +++ b/.github/workflows/cd-publish.yml @@ -0,0 +1,197 @@ +name: Publish Extension to VS Code Marketplace +run-name: Publish v${{ inputs.version }} + +on: + workflow_dispatch: + inputs: + version: + description: "Version to publish (e.g. 0.1.0) - do NOT include 'v' prefix" + required: true + type: string + +concurrency: + group: publish + cancel-in-progress: false + +permissions: {} + +jobs: + publish: + name: Publish v${{ inputs.version }} + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: write + steps: + - name: Validate version input format + env: + VERSION: ${{ inputs.version }} + run: | + set -euo pipefail + if ! echo "$VERSION" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+([-+].+)?$'; then + echo "::error::Invalid version format '$VERSION'. Expected 'X.Y.Z' (no 'v' prefix)." + exit 1 + fi + + - name: Checkout tag + uses: actions/checkout@v4 + with: + ref: refs/tags/v${{ inputs.version }} + fetch-depth: 0 + + - name: Setup Node.js + uses: actions/setup-node@v4 + with: + node-version: '24' + cache: 'npm' + + - name: Install vsce + run: npm install -g @vscode/vsce@^3.6 --silent --no-fund --no-audit + + - name: Resolve package metadata + id: pkg + env: + VERSION: ${{ inputs.version }} + run: | + set -euo pipefail + if [ ! -f package.json ]; then + echo "::error::package.json not found at tag v${VERSION}" + exit 1 + fi + PACKAGE_NAME=$(node -p "require('./package.json').name || ''" 2>/dev/null || true) + PACKAGE_VERSION=$(node -p "require('./package.json').version || ''" 2>/dev/null || true) + PUBLISHER=$(node -p "require('./package.json').publisher || ''" 2>/dev/null || true) + if [ -z "$PACKAGE_NAME" ] || [ -z "$PACKAGE_VERSION" ] || [ -z "$PUBLISHER" ]; then + echo "::error::Failed to read name/version/publisher from package.json" + exit 1 + fi + echo "name=$PACKAGE_NAME" >> "$GITHUB_OUTPUT" + echo "version=$PACKAGE_VERSION" >> "$GITHUB_OUTPUT" + echo "publisher=$PUBLISHER" >> "$GITHUB_OUTPUT" + echo "extension_id=${PUBLISHER}.${PACKAGE_NAME}" >> "$GITHUB_OUTPUT" + echo "vsix=${PACKAGE_NAME}-${PACKAGE_VERSION}.vsix" >> "$GITHUB_OUTPUT" + + - name: Verify version matches package.json + env: + INPUT_VERSION: ${{ inputs.version }} + PKG_VERSION: ${{ steps.pkg.outputs.version }} + run: | + set -euo pipefail + if [ "$INPUT_VERSION" != "$PKG_VERSION" ]; then + echo "::error::Input version '$INPUT_VERSION' does not match package.json version '$PKG_VERSION'" + exit 1 + fi + + - name: Check VS Code Marketplace for existing version + env: + EXTENSION_ID: ${{ steps.pkg.outputs.extension_id }} + PACKAGE_VERSION: ${{ steps.pkg.outputs.version }} + run: | + set -euo pipefail + TMP_OUT=$(mktemp) + TMP_ERR=$(mktemp) + set +e + vsce show "$EXTENSION_ID" --json >"$TMP_OUT" 2>"$TMP_ERR" + RC=$? + set -e + ERR=$(cat "$TMP_ERR"); rm -f "$TMP_ERR" + RAW=$(sed -n '/^[[:space:]]*[{[]/,$p' "$TMP_OUT") + rm -f "$TMP_OUT" + + if [ $RC -ne 0 ] || [ -z "$RAW" ]; then + if echo "$ERR" | grep -qiE "not found|could not be found|no extension"; then + echo "Extension '$EXTENSION_ID' not on marketplace yet (first publish)." + exit 0 + fi + echo "::error::vsce show failed for '$EXTENSION_ID': $ERR" + exit 1 + fi + + if ! echo "$RAW" | jq -e . >/dev/null 2>&1; then + echo "::error::vsce show returned non-JSON output for '$EXTENSION_ID': $RAW" + exit 1 + fi + + PUBLISHED=$(echo "$RAW" | jq -r '.versions[].version') + if echo "$PUBLISHED" | grep -Fxq "$PACKAGE_VERSION"; then + echo "::error::Version $PACKAGE_VERSION already published to marketplace for $EXTENSION_ID" + exit 1 + fi + + - name: Install dependencies + run: | + set -euo pipefail + if [ -f package-lock.json ]; then + npm ci + else + echo "::warning::package-lock.json not found; falling back to npm install" + npm install + fi + + - name: Package extension + env: + VSIX: ${{ steps.pkg.outputs.vsix }} + run: | + set -euo pipefail + npm run package + if [ ! -f "$VSIX" ]; then + echo "::error::Expected VSIX '$VSIX' not produced by 'npm run package'" + ls -1 *.vsix 2>/dev/null || true + exit 1 + fi + + - name: Upload VSIX artifact + uses: actions/upload-artifact@v4 + with: + name: ${{ steps.pkg.outputs.vsix }} + path: ./${{ steps.pkg.outputs.vsix }} + if-no-files-found: error + retention-days: 30 + + - name: Publish to VS Code Marketplace + env: + VSCE_PAT: ${{ secrets.VSCODE_MARKETPLACE_PAT }} + VSIX: ${{ steps.pkg.outputs.vsix }} + run: | + set -euo pipefail + if [ -z "${VSCE_PAT:-}" ]; then + echo "::error::Marketplace PAT secret is missing" + exit 1 + fi + vsce publish --no-dependencies --packagePath "./${VSIX}" + + - name: Read changelog entry + id: changelog + continue-on-error: true + uses: mindsers/changelog-reader-action@32aa5b4c155d76c94e4ec883a223c947b2f02656 # v2.2.3 + with: + version: ${{ steps.pkg.outputs.version }} + path: ./CHANGELOG.md + + - name: Resolve release body + id: body + env: + CHANGES: ${{ steps.changelog.outputs.changes }} + TAG: v${{ inputs.version }} + run: | + set -euo pipefail + { + echo "body<> "$GITHUB_OUTPUT" + + - name: Create GitHub Release + uses: softprops/action-gh-release@3bb12739c298aeb8a4eeaf626c5b8d85266b0e65 # v2.6.2 + with: + tag_name: v${{ inputs.version }} + name: v${{ inputs.version }} + body: ${{ steps.body.outputs.body }} + draft: false + prerelease: false + fail_on_unmatched_files: true + files: ./${{ steps.pkg.outputs.vsix }} diff --git a/.github/workflows/create-tags.yml b/.github/workflows/create-tags.yml new file mode 100644 index 0000000..67d777b --- /dev/null +++ b/.github/workflows/create-tags.yml @@ -0,0 +1,187 @@ +name: Create Tag for new dj version + +on: + workflow_dispatch: + inputs: + ignore_changes: + description: "Skip source code change detection and force tag creation" + required: false + type: boolean + default: false + +concurrency: + group: create-tag + cancel-in-progress: false + +permissions: {} + +jobs: + create-tag: + runs-on: ubuntu-latest + timeout-minutes: 15 + permissions: + contents: write + outputs: + version_tag: ${{ steps.version.outputs.version_tag }} + + if: github.ref == 'refs/heads/main' + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup node + uses: actions/setup-node@v4 + with: + node-version: '24' + + - name: Check for source code changes + if: inputs.ignore_changes == false + id: check_changes + run: | + set -euo pipefail + + if git rev-parse HEAD^ >/dev/null 2>&1; then + CHANGED_FILES=$(git diff --name-only HEAD^ HEAD) + else + echo "Initial commit detected; treating all tracked files as changed." + CHANGED_FILES=$(git ls-tree -r --name-only HEAD) + fi + + echo "Changed files:" + echo "$CHANGED_FILES" + + SOURCE_CHANGES="$CHANGED_FILES" + + if [ -f ".tagignore" ] && [ -s ".tagignore" ]; then + PATTERNS=$(grep -vE '^[[:space:]]*(#|$)' .tagignore || true) + if [ -n "$PATTERNS" ]; then + ESC() { sed -E 's:[][/.^$*+?(){}|\\]:\\&:g'; } + DIRS=$(printf '%s\n' "$PATTERNS" | grep '/$' | sed 's:/$::' | ESC | paste -sd '|' -) + FILES=$(printf '%s\n' "$PATTERNS" | grep -v '/$' | ESC | paste -sd '|' -) + REGEX="" + [ -n "$FILES" ] && REGEX="^(${FILES})$" + [ -n "$DIRS" ] && REGEX="${REGEX:+$REGEX|}^(${DIRS})/" + if [ -n "$REGEX" ]; then + SOURCE_CHANGES=$(echo "$CHANGED_FILES" | grep -vE "$REGEX" || true) + fi + fi + fi + + if [ -z "$SOURCE_CHANGES" ]; then + echo "No source code changes detected. Skipping tag creation." + echo "should_tag=false" >> "$GITHUB_OUTPUT" + else + echo "Source code changes detected:" + echo "$SOURCE_CHANGES" + echo "should_tag=true" >> "$GITHUB_OUTPUT" + fi + + - name: Get version from package.json + if: inputs.ignore_changes == true || steps.check_changes.outputs.should_tag == 'true' + id: version + run: | + set -euo pipefail + if [ ! -f package.json ]; then + echo "::error::package.json not found" + exit 1 + fi + VERSION=$(node -p "require('./package.json').version" 2>/dev/null || true) + if [ -z "$VERSION" ] || [ "$VERSION" = "undefined" ]; then + echo "::error::Could not read version from package.json" + exit 1 + fi + if ! echo "$VERSION" | grep -Eq '^[0-9]+\.[0-9]+\.[0-9]+([-+].+)?$'; then + echo "::error::Invalid semver '$VERSION' in package.json" + exit 1 + fi + echo "version_tag=v${VERSION}" >> "$GITHUB_OUTPUT" + + - name: Check if tag already exists + if: inputs.ignore_changes == true || steps.check_changes.outputs.should_tag == 'true' + env: + TAG: ${{ steps.version.outputs.version_tag }} + run: | + set -euo pipefail + if git rev-parse -q --verify "refs/tags/${TAG}" >/dev/null; then + echo "::error::Tag ${TAG} already exists locally" + exit 1 + fi + if git ls-remote --tags --exit-code origin "refs/tags/${TAG}" >/dev/null 2>&1; then + echo "::error::Tag ${TAG} already exists on remote" + exit 1 + fi + echo "Tag ${TAG} does not exist. Proceeding." + + - name: Install vsce + if: inputs.ignore_changes == true || steps.check_changes.outputs.should_tag == 'true' + run: npm install -g @vscode/vsce@^3.6 --silent --no-fund --no-audit + + - name: Check marketplace version + if: inputs.ignore_changes == true || steps.check_changes.outputs.should_tag == 'true' + env: + TAG: ${{ steps.version.outputs.version_tag }} + run: | + set -euo pipefail + PACKAGE_NAME=$(node -p "require('./package.json').name || ''" 2>/dev/null || true) + PUBLISHER=$(node -p "require('./package.json').publisher || ''" 2>/dev/null || true) + if [ -z "$PACKAGE_NAME" ] || [ -z "$PUBLISHER" ]; then + echo "::error::Failed to read name/publisher from package.json" + exit 1 + fi + EXTENSION_ID="${PUBLISHER}.${PACKAGE_NAME}" + PACKAGE_VERSION="${TAG#v}" + + TMP_OUT=$(mktemp) + TMP_ERR=$(mktemp) + set +e + vsce show "$EXTENSION_ID" --json >"$TMP_OUT" 2>"$TMP_ERR" + RC=$? + set -e + ERR=$(cat "$TMP_ERR"); rm -f "$TMP_ERR" + RAW=$(sed -n '/^[[:space:]]*[{[]/,$p' "$TMP_OUT") + rm -f "$TMP_OUT" + + if [ $RC -ne 0 ] || [ -z "$RAW" ]; then + if echo "$ERR" | grep -qiE "not found|could not be found|no extension"; then + echo "Extension '$EXTENSION_ID' not on marketplace yet (first publish). OK." + exit 0 + fi + echo "::error::vsce show failed for '$EXTENSION_ID': $ERR" + exit 1 + fi + + if ! echo "$RAW" | jq -e . >/dev/null 2>&1; then + echo "::error::vsce show returned non-JSON output for '$EXTENSION_ID': $RAW" + exit 1 + fi + + PUBLISHED=$(echo "$RAW" | jq -r '.versions[].version') + if echo "$PUBLISHED" | grep -Fxq "$PACKAGE_VERSION"; then + echo "::error::Version $PACKAGE_VERSION already exists on marketplace for $EXTENSION_ID" + exit 1 + fi + + LATEST=$(echo "$PUBLISHED" | sort -V | tail -n1) + if [ -n "$LATEST" ]; then + HIGHER=$(printf '%s\n%s\n' "$LATEST" "$PACKAGE_VERSION" | sort -V | tail -n1) + if [ "$HIGHER" != "$PACKAGE_VERSION" ] || [ "$LATEST" = "$PACKAGE_VERSION" ]; then + echo "::error::Version $PACKAGE_VERSION must be greater than latest published $LATEST for $EXTENSION_ID" + exit 1 + fi + fi + echo "Version $PACKAGE_VERSION is available to publish for $EXTENSION_ID (latest published: ${LATEST:-none})." + + - name: Tag new version + if: inputs.ignore_changes == true || steps.check_changes.outputs.should_tag == 'true' + env: + TAG: ${{ steps.version.outputs.version_tag }} + ACTOR: ${{ github.actor }} + ACTOR_ID: ${{ github.actor_id }} + run: | + set -euo pipefail + git config user.name "$ACTOR" + git config user.email "${ACTOR_ID}+${ACTOR}@users.noreply.github.com" + git tag -a "$TAG" -m "Release $TAG (triggered by $ACTOR)" + git push origin "refs/tags/${TAG}" From 8d7fb0757caf864dd9a3385ef1d52a091f4fc761 Mon Sep 17 00:00:00 2001 From: "arun.mohanaselvam" Date: Mon, 4 May 2026 18:28:53 +0530 Subject: [PATCH 2/3] Added GHA WF to support vscode marketplace publish --- .tagignore | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .tagignore diff --git a/.tagignore b/.tagignore new file mode 100644 index 0000000..8419a51 --- /dev/null +++ b/.tagignore @@ -0,0 +1,16 @@ +# Files/directories to ignore when determining if a tag should be created +docs/ +.github/ +.prettierignore +.prettierrc +.eslintrc.json +.eslintcache +.gitignore +.vscodeignore +DEVELOPMENT_SETUP.md +CONTRIBUTING.md +CODE_OF_CONDUCT.md +LICENSE.md +NOTICE +Makefile +.tagignore From 30d583d7602853477fa16217edd849834935ba77 Mon Sep 17 00:00:00 2001 From: gowtham raj j Date: Mon, 4 May 2026 19:28:43 +0530 Subject: [PATCH 3/3] v1.3.6: (#31) --- AGENTS.md | 4 +- CHANGELOG.md | 5 + docs/models/CTE_PATTERNS.md | 31 ++++ docs/models/README.md | 3 +- macros/strategies.sql | 71 +++++++++ package-lock.json | 6 +- package.json | 5 +- schemas/model.cte.schema.json | 12 ++ .../model.incremental_strategy.schema.json | 17 +- src/services/columnLineage.ts | 6 + .../cte-framework-dims-autoinject.test.ts | 145 ++++++++++++++++++ .../__tests__/cte-partition-filters.test.ts | 130 ++++++++++++++++ ...cte-portal-source-count-autoinject.test.ts | 80 ++++++++++ .../__tests__/cte-validation.test.ts | 100 ++++++++++++ .../framework/__tests__/index.test.ts | 75 +++++++++ .../framework/handlers/model-crud-handlers.ts | 1 + src/services/framework/index.ts | 11 +- src/services/framework/utils/column-utils.ts | 73 ++++++++- src/services/framework/utils/sql-utils.ts | 66 ++++++-- src/services/modelValidation.ts | 61 ++++++++ src/services/sync/ModelProcessor.ts | 18 +++ src/services/sync/types.ts | 8 + src/shared/dbt/types.ts | 1 + src/shared/framework/types.ts | 1 + src/shared/schema/types/model.cte.schema.d.ts | 20 +++ .../schema/types/model.ctes.schema.d.ts | 20 +++ .../model.incremental_strategy.schema.d.ts | 8 +- .../types/model.materialization.schema.d.ts | 8 +- src/shared/schema/types/model.schema.d.ts | 12 +- .../model.type.int_join_column.schema.d.ts | 8 +- .../model.type.int_join_models.schema.d.ts | 16 +- .../model.type.int_lookback_model.schema.d.ts | 8 +- .../model.type.int_rollup_model.schema.d.ts | 8 +- .../model.type.int_select_model.schema.d.ts | 16 +- .../model.type.int_union_models.schema.d.ts | 16 +- .../model.type.mart_join_models.schema.d.ts | 12 ++ .../model.type.mart_select_model.schema.d.ts | 12 ++ .../model.type.stg_select_model.schema.d.ts | 8 +- .../model.type.stg_select_source.schema.d.ts | 8 +- .../model.type.stg_union_sources.schema.d.ts | 8 +- templates/_AGENTS.md | 8 +- .../skills/dj-create-new-model/_SKILL.md | 3 +- .../features/ModelWizard/AdditionalFields.tsx | 13 +- 43 files changed, 1092 insertions(+), 50 deletions(-) create mode 100644 macros/strategies.sql diff --git a/AGENTS.md b/AGENTS.md index 350e35c..17d0c0b 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -618,7 +618,7 @@ To add/modify JSON schemas: ### Storage-Type Branching - Partitioning keyword depends on storage format: **Iceberg uses `partitioning: ARRAY[...]`** while **Delta Lake / Hive uses `partitioned_by: ARRAY[...]`**. The switch happens in `frameworkGenerateModelOutput` (`sql-utils.ts`) based on `materialization.format` or the project var `storage_type`. -- Incremental strategy resolution (`frameworkGenerateModelOutput` in `sql-utils.ts`): per-model `materialization.strategy.type` → legacy top-level `incremental_strategy` → extension default via `dj.config.materializationDefaultIncrementalStrategy` → shared constant `DEFAULT_INCREMENTAL_STRATEGY` in `src/shared/framework/constants.ts` (currently `overwrite_existing_partitions`; planned to switch to `delete+insert` in a future release). To change the factory default, update the shared constant **and** the `default` field for `dj.materialization.defaultIncrementalStrategy` in `package.json` in lockstep. All other fallback sites (`config.ts`, `preferences-handler.ts`, `sql-utils.ts`, web store, web mock api) already route through the shared constant. +- Incremental strategy resolution (`frameworkGenerateModelOutput` in `sql-utils.ts`): per-model `materialization.strategy.type` → legacy top-level `incremental_strategy` → extension default via `dj.config.materializationDefaultIncrementalStrategy` → shared constant `DEFAULT_INCREMENTAL_STRATEGY` in `src/shared/framework/constants.ts` (currently `overwrite_existing_partitions`; planned to switch to `delete+insert` in a future release). Five strategy types are supported: `append`, `delete+insert`, `merge` (Iceberg-only in dbt-trino), `overwrite_existing_partitions` (consumer macro required), and `dj_iceberg_partition_overwrite` (Iceberg-only; DJ ships the dispatch macro `get_incremental_dj_iceberg_partition_overwrite_sql` via `macros/strategies.sql`, auto-copied to `/macros/_ext_/strategies.sql` by `writeMacroFiles` in `dbt.ts`). The Iceberg requirement for `dj_iceberg_partition_overwrite` is enforced by `validateDjIcebergPartitionOverwrite` in `src/services/modelValidation.ts`, surfaced as a Problems-tab error via `ModelProcessor`. To change the factory default, update the shared constant **and** the `default` field for `dj.materialization.defaultIncrementalStrategy` in `package.json` in lockstep. All other fallback sites (`config.ts`, `preferences-handler.ts`, `sql-utils.ts`, web store, web mock api) already route through the shared constant. - When touching `getMaterializationProp`, `getDefaultUniqueKey`, or the strategy switch in `sql-utils.ts`, run the materialization shorthand tests in `src/services/framework/__tests__/index.test.ts` against both Iceberg and Delta/Hive paths. ## Configuration @@ -643,7 +643,7 @@ To add/modify JSON schemas: | `dj.lightdashProjectPath` | string | — | Custom path to dbt project for Lightdash | | `dj.lightdashProfilesPath` | string | — | Custom path to dbt profiles for Lightdash | | `dj.lightdash.defaultPartitionColumnCaseSensitive` | boolean | `false` | Auto-emit `meta.dimension.case_sensitive: true` on partition columns in generated YAML (stops Lightdash from wrapping partition columns in `UPPER()`, preserving Trino predicate pushdown). Requires `DJ: Sync to SQL and YML` to apply. | -| `dj.materialization.defaultIncrementalStrategy` | string | `overwrite_existing_partitions` | Default incremental strategy: `append`, `delete+insert`, `merge`, or `overwrite_existing_partitions` | +| `dj.materialization.defaultIncrementalStrategy` | string | `overwrite_existing_partitions` | Default incremental strategy: `append`, `delete+insert`, `merge`, `overwrite_existing_partitions`, or `dj_iceberg_partition_overwrite` | | `dj.autoGenerateTests` | object | — | (Experimental) Auto-generate tests on models | ### Environment Variables diff --git a/CHANGELOG.md b/CHANGELOG.md index 9356526..169555d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log +## 1.3.6 + +- **CTE exclude/include flags now mirror their main-model counterparts and inherit from the model** — a CTE accepts `exclude_date_filter`, `exclude_daily_filter`, `exclude_portal_partition_columns`, `exclude_portal_source_count`, and `include_full_month` with the same semantics as the corresponding main-model flags. Resolution is uniform: CTE override > model value > false. Set `exclude_portal_partition_columns: true` on the model to skip partition auto-injection in every CTE without per-CTE repetition; set it on a single CTE to override only that CTE. +- **New `dj_iceberg_partition_overwrite` incremental strategy** — drops and rewrites only the partitions present in the new slice on Iceberg tables. Shipped by DJ (no consumer macro required) and selectable from the Model Wizard. Requires Iceberg format on the target table; DJ flags non-Iceberg use directly in the Problems tab and points you to `delete+insert` instead. + ## 1.3.5 - **`unique_key` no longer emitted for `overwrite_existing_partitions`** — this strategy requires a custom dbt macro in your project (typically `get_incremental_overwrite_existing_partitions_sql`); the DJ extension does not ship it and dbt-trino does not provide it natively. If your project does not define the macro, switch to `{ "type": "delete+insert" }` — it auto-derives `unique_key` from partition columns. diff --git a/docs/models/CTE_PATTERNS.md b/docs/models/CTE_PATTERNS.md index d7097fb..de0216e 100644 --- a/docs/models/CTE_PATTERNS.md +++ b/docs/models/CTE_PATTERNS.md @@ -237,6 +237,37 @@ and `dims_from_cte` see on the main model, ensures downstream passthroughs never silently drop audit or partition columns, and prevents materialization errors where `partitioned_by` cannot find the partition columns. +### Opting out of auto-injection + +CTE-level exclude/include flags mirror their main-model counterparts: they +take the same names and the same boolean semantics, and they **inherit** +from the model when omitted on the CTE. Resolution is uniform across the +set: **CTE override > model value > false**. Set a flag once on the model to +have every CTE honor it, or set it on a single CTE to override just that CTE +(including setting `false` on a CTE to opt back in when the model excluded). + +The full set: + +- `"exclude_portal_partition_columns": true` — drops `portal_partition_*` + injection. +- `"exclude_portal_source_count": true` — drops `portal_source_count` + injection. +- `"exclude_date_filter": true` — drops the auto `_ext_event_date_filter` + WHERE-clause macros entirely (model-level OR CTE-level: either side + triggers suppression). +- `"exclude_daily_filter": true` — drops just the daily-grain + `_ext_event_date_filter`; the monthly-grain filter is preserved. +- `"include_full_month": true` — same effect as `exclude_daily_filter` for + partition pruning; emits the full-month range filter only. + +`datetime` itself has no CTE-level opt-out (the main model has none either, +so the two stay symmetric). If you don't want `datetime` in a CTE's output, +chain through another CTE (`from: { cte: ... }`) — auto-injection is skipped +for non-`from: { model }` shapes. + +These flags have no effect on CTEs whose `from` is another CTE, a source, or +a union, since those shapes never auto-inject in the first place. + --- ## 6. Dead Outer Layer Warning diff --git a/docs/models/README.md b/docs/models/README.md index c07245e..e3a2268 100644 --- a/docs/models/README.md +++ b/docs/models/README.md @@ -433,7 +433,8 @@ Set `materialization.strategy.type` to one of the following (or rely on the exte | `append` | Inserts new rows with no de-duplication. Fastest. | Upstream must guarantee no duplicates in the new slice. | | `delete+insert` | Partition-safe upsert. Safe default. | `unique_key` auto-derived from partition columns when omitted. Works on Delta Lake, Hive, and Iceberg. | | `merge` | Row-level upsert on `unique_key`. | **dbt-trino requires Iceberg format** on the target table. On Delta Lake / Hive use `delete+insert` instead. | -| `overwrite_existing_partitions` | Drops and rewrites only the partitions present in the new slice. `unique_key` is not applicable, the consumer macro derives the partition list from the new slice itself, so the schema rejects `unique_key` on this strategy. | **Requires a custom dbt macro in your project** (e.g. `get_incremental_overwrite_existing_partitions_sql`). The DJ extension does NOT ship this macro and dbt-trino does NOT provide it natively. If your project does not define it, use `delete+insert` with a partition column as `unique_key` \u2014 it produces equivalent behavior for daily/monthly partitioned models. | +| `overwrite_existing_partitions` | Drops and rewrites only the partitions present in the new slice. `unique_key` is not applicable, the consumer macro derives the partition list from the new slice itself, so the schema rejects `unique_key` on this strategy. | **Requires a custom dbt macro in your project** (e.g. `get_incremental_overwrite_existing_partitions_sql`). The DJ extension does NOT ship this macro and dbt-trino does NOT provide it natively. If your project does not define it, use `delete+insert` with a partition column as `unique_key`, it produces equivalent behavior for daily/monthly partitioned models. | +| `dj_iceberg_partition_overwrite` | Drops and rewrites only the partitions present in the new slice on **Iceberg** tables. `unique_key` is not applicable, the macro derives the partition list from the new slice itself by reading `properties.partitioning`, so the schema rejects `unique_key`. | **Shipped by DJ.** The dispatch macro `get_incremental_dj_iceberg_partition_overwrite_sql` lives in `macros/strategies.sql` and is auto-copied to `/macros/_ext_/strategies.sql` on **DJ: Refresh Projects**. **Requires Iceberg format** on the target table, set `materialization.format: "iceberg"` or the project var `storage_type: iceberg`. DJ flags non-Iceberg use in the Problems tab. On Delta Lake / Hive use `delete+insert` instead. | ### Data Quality diff --git a/macros/strategies.sql b/macros/strategies.sql new file mode 100644 index 0000000..729fdc6 --- /dev/null +++ b/macros/strategies.sql @@ -0,0 +1,71 @@ +{% macro get_incremental_dj_iceberg_partition_overwrite_sql(arg_dict) %} + {{ return(adapter.dispatch('get_incremental_dj_iceberg_partition_overwrite_sql', 'dbt')(arg_dict)) }} +{% endmacro %} + +{% macro default__get_incremental_dj_iceberg_partition_overwrite_sql(arg_dict) %} + {%- set target_relation = arg_dict["target_relation"] -%} + {%- set temp_relation = arg_dict["temp_relation"] -%} + {%- set config_properties = config.get("properties", {}) -%} + {%- set dest_columns = arg_dict["dest_columns"] -%} + {%- set dest_cols_csv = get_quoted_csv(dest_columns | map(attribute="name")) -%} + + {%- if "partitioning" in config_properties -%} + {%- set raw_partitioning = config_properties["partitioning"] | string -%} + {%- set partitioned_by = (raw_partitioning | replace("ARRAY['", "") | replace("']", "") | replace("'", "")).split(", ") -%} + {%- else -%} + {%- set partitioned_by = [] -%} + {%- endif -%} + + {%- set partitioned_by = partitioned_by | reject('==', '') | list -%} + {%- set mat_relation = temp_relation.incorporate(path={"identifier": temp_relation.identifier ~ "_mat"}) -%} + + {% if execute %} + {# 1. Create the materialized table once #} + {%- do run_query("create or replace table " ~ mat_relation ~ " as (select " ~ dest_cols_csv ~ " from " ~ temp_relation ~ ")") -%} + {% endif %} + + {% if is_incremental() and partitioned_by | length > 0 %} + {%- set target_columns = adapter.get_columns_in_relation(target_relation) -%} + {%- set col_types = {} -%} + {%- for col in target_columns -%} + {%- do col_types.update({col.name | lower: col.data_type}) -%} + {%- endfor -%} + + {%- set get_partitions_sql -%} + select distinct {{ partitioned_by | join(", ") }} from {{ mat_relation }} + {%- endset -%} + {%- set partition_results = run_query(get_partitions_sql) -%} + + {% if execute and partition_results.rows | length > 0 %} + {# 3. Run individual DELETEs. + Trino treats these as simple metadata drops. No OR-complexity issues. #} + {%- for row in partition_results.rows -%} + {%- set row_conditions = [] -%} + {%- for val in row.values() -%} + {%- set col_name = partitioned_by[loop.index0] | replace('"', '') | replace('`', '') | lower -%} + {%- set col_type = col_types.get(col_name, 'varchar') | lower -%} + + {%- if val is none -%} + {%- do row_conditions.append(partitioned_by[loop.index0] ~ " IS NULL") -%} + {%- elif 'date' in col_type -%} + {%- do row_conditions.append(partitioned_by[loop.index0] ~ " = DATE '" ~ val ~ "'") -%} + {%- elif 'timestamp' in col_type -%} + {%- do row_conditions.append(partitioned_by[loop.index0] ~ " = CAST('" ~ val ~ "' AS " ~ col_type ~ ")") -%} + {%- else -%} + {%- do row_conditions.append(partitioned_by[loop.index0] ~ " = '" ~ (val | string | replace("'", "''")) ~ "'") -%} + {%- endif -%} + {%- endfor -%} + + delete from {{ target_relation }} where {{ row_conditions | join(" AND ") }}; + {%- endfor -%} + {% endif %} + {% elif is_incremental() %} + delete from {{ target_relation }}; + {% endif %} + + {# 4. Finally, insert the new data #} + insert into {{ target_relation }} ({{ dest_cols_csv }}) + select {{ dest_cols_csv }} from {{ mat_relation }}; + + drop table if exists {{ mat_relation }}; +{% endmacro %} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 02b8c57..a28fe1d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "dj-framework", - "version": "1.3.5", + "version": "1.3.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "dj-framework", - "version": "1.3.5", + "version": "1.3.6", "license": "Apache-2.0", "workspaces": [ "web" @@ -15396,4 +15396,4 @@ } } } -} +} \ No newline at end of file diff --git a/package.json b/package.json index cc44c40..60f8d75 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,7 @@ "type": "git", "url": "https://github.com/Workday/vscode-dbt-json.git" }, - "version": "1.3.5", + "version": "1.3.6", "workspaces": [ "web" ], @@ -359,11 +359,12 @@ "enum": [ "append", "delete+insert", + "dj_iceberg_partition_overwrite", "merge", "overwrite_existing_partitions" ], "default": "overwrite_existing_partitions", - "markdownDescription": "Default incremental strategy applied when `materialization` is set to `incremental` without an explicit `strategy`. Can be overridden per model via `materialization.strategy`.\n\n**Strategy notes (dbt-trino):**\n- `append` — fastest; inserts without de-dup. Upstream must guarantee no duplicates in the new slice.\n- `delete+insert` — partition-safe upsert; `unique_key` is auto-derived from partitions when omitted. Works on Delta Lake, Hive, and Iceberg. _Planned to become the factory default in a future release._\n- `merge` — row-level upsert on `unique_key`. Requires the target table to use **Iceberg** format in dbt-trino.\n- `overwrite_existing_partitions` — **current default.** Drops & rewrites only partitions in the new slice. **Requires a custom dbt macro in your project** (e.g. `get_incremental_overwrite_existing_partitions_sql`); if your project does not define it, use `delete+insert` instead.\n\nTo change this default globally for a workspace, set this value here. To change the factory default across the extension, update `DEFAULT_INCREMENTAL_STRATEGY` in `src/shared/framework/constants.ts` and this `default` in lockstep.\n\nChanges apply to newly generated SQL immediately; to regenerate SQL/YML for existing models, run **DJ: Sync to SQL and YML**." + "markdownDescription": "Default incremental strategy applied when `materialization` is set to `incremental` without an explicit `strategy`. Can be overridden per model via `materialization.strategy`.\n\n**Strategy notes (dbt-trino):**\n- `append` — fastest; inserts without de-dup. Upstream must guarantee no duplicates in the new slice.\n- `delete+insert` — partition-safe upsert; `unique_key` is auto-derived from partitions when omitted. Works on Delta Lake, Hive, and Iceberg. _Planned to become the factory default in a future release._\n- `merge` — row-level upsert on `unique_key`. Requires the target table to use **Iceberg** format in dbt-trino.\n- `overwrite_existing_partitions` — **current default.** Drops & rewrites only partitions in the new slice. **Requires a custom dbt macro in your project** (e.g. `get_incremental_overwrite_existing_partitions_sql`); if your project does not define it, use `delete+insert` instead.\n- `dj_iceberg_partition_overwrite` — Drops & rewrites only partitions in the new slice on **Iceberg** tables. **Shipped by DJ** (`macros/strategies.sql` is auto-copied to `/macros/_ext_/`). On Delta Lake / Hive use `delete+insert` instead.\n\nTo change this default globally for a workspace, set this value here. To change the factory default across the extension, update `DEFAULT_INCREMENTAL_STRATEGY` in `src/shared/framework/constants.ts` and this `default` in lockstep.\n\nChanges apply to newly generated SQL immediately; to regenerate SQL/YML for existing models, run **DJ: Sync to SQL and YML**." }, "dj.autoGenerateTests": { "type": "object", diff --git a/schemas/model.cte.schema.json b/schemas/model.cte.schema.json index b493c66..9f6bb7a 100644 --- a/schemas/model.cte.schema.json +++ b/schemas/model.cte.schema.json @@ -144,6 +144,18 @@ "exclude_date_filter": { "$ref": "model.exclude_date_filter.schema.json" }, + "exclude_daily_filter": { + "$ref": "model.exclude_daily_filter.schema.json" + }, + "exclude_portal_partition_columns": { + "$ref": "model.exclude_portal_partition_columns.schema.json" + }, + "exclude_portal_source_count": { + "$ref": "model.exclude_portal_source_count.schema.json" + }, + "include_full_month": { + "$ref": "model.include_full_month.schema.json" + }, "where": { "$ref": "model.where.schema.json" }, diff --git a/schemas/model.incremental_strategy.schema.json b/schemas/model.incremental_strategy.schema.json index 458b80b..efb0f57 100644 --- a/schemas/model.incremental_strategy.schema.json +++ b/schemas/model.incremental_strategy.schema.json @@ -1,8 +1,8 @@ { "$id": "model.incremental_strategy.schema.json", "title": "Incremental Strategy", - "description": "Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key.", - "markdownDescription": "**Incremental Strategy (dbt-trino)**\n\n| Strategy | When to use | Caveat |\n|---|---|---|\n| `append` | Fast insert-only, no dedup | Upstream must guarantee no duplicates |\n| `delete+insert` | Partition-safe upsert, **safe default** | Requires `unique_key` (auto-derived from partitions) |\n| `merge` | Row-level upsert on a primary key | **dbt-trino requires Iceberg format** |\n| `overwrite_existing_partitions` | Re-drop & rewrite only touched partitions | **Requires a custom dbt macro in your project.** The DJ extension does NOT ship this strategy. If your project does not define `get_incremental_overwrite_existing_partitions_sql` (or the equivalent adapter dispatch macro), use `delete+insert` instead. |", + "description": "Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key.", + "markdownDescription": "**Incremental Strategy (dbt-trino)**\n\n| Strategy | When to use | Caveat |\n|---|---|---|\n| `append` | Fast insert-only, no dedup | Upstream must guarantee no duplicates |\n| `delete+insert` | Partition-safe upsert, **safe default** | Requires `unique_key` (auto-derived from partitions) |\n| `merge` | Row-level upsert on a primary key | **dbt-trino requires Iceberg format** |\n| `overwrite_existing_partitions` | Re-drop & rewrite only touched partitions | **Requires a custom dbt macro in your project.** The DJ extension does NOT ship this strategy. If your project does not define `get_incremental_overwrite_existing_partitions_sql` (or the equivalent adapter dispatch macro), use `delete+insert` instead. |\n| `dj_iceberg_partition_overwrite` | Re-drop & rewrite only touched partitions on Iceberg tables | **Shipped by DJ** (`macros/strategies.sql` -> `/macros/_ext_/`). **Requires Iceberg format** (`materialization.format: \"iceberg\"` or project var `storage_type: iceberg`). On Delta Lake / Hive use `delete+insert` instead. |", "anyOf": [ { "description": "Append: insert new rows without de-duplication. Fastest strategy, but upstream must guarantee no duplicates in the new slice. No unique_key needed.", @@ -87,6 +87,19 @@ "description": "Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one." } } + }, + { + "description": "DJ Iceberg partition overwrite: drop and rewrite only the partitions present in the new slice on an Iceberg target. SHIPPED by the DJ extension via macros/strategies.sql (auto-copied to /macros/_ext_/strategies.sql on project refresh). REQUIRES Iceberg format on the target table — set materialization.format='iceberg' or the project var storage_type='iceberg'; otherwise the macro silently degrades to a full-table refresh. The macro derives the partition list from the new slice itself by reading properties.partitioning, so unique_key is not applicable and is rejected by the schema. On Delta Lake / Hive use 'delete+insert' instead.", + "markdownDescription": "**`dj_iceberg_partition_overwrite`** — drop & rewrite only partitions in the new slice on Iceberg tables.\n\n> **Shipped by DJ.** No consumer setup required — `macros/strategies.sql` is auto-copied to `/macros/_ext_/strategies.sql` when you run **DJ: Refresh Projects**. The dispatch macro is `get_incremental_dj_iceberg_partition_overwrite_sql`.\n>\n> **WARNING — Iceberg format required.** This strategy reads the Iceberg-only `properties.partitioning` config. Set `materialization.format: \"iceberg\"` or the project var `storage_type: iceberg`. On Delta Lake / Hive the macro silently degrades to a full-table refresh — DJ surfaces this as a Problems-tab error.\n>\n> **`unique_key` is not applicable** — the macro derives partitions from the new slice itself, so this strategy ignores `unique_key` and the schema rejects it.\n>\n> **Not on Iceberg?** Use `{ \"type\": \"delete+insert\" }` instead — DJ auto-fills `unique_key` from the partition column.", + "type": "object", + "required": ["type"], + "additionalProperties": false, + "properties": { + "type": { + "const": "dj_iceberg_partition_overwrite", + "description": "Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format." + } + } } ] } diff --git a/src/services/columnLineage.ts b/src/services/columnLineage.ts index 7c4a25f..efb9095 100644 --- a/src/services/columnLineage.ts +++ b/src/services/columnLineage.ts @@ -1238,6 +1238,9 @@ export class ColumnLineageService implements DJService { ctes: ctes as Parameters< typeof frameworkBuildCteColumnRegistry >[0]['ctes'], + modelJson: modelJson as Parameters< + typeof frameworkBuildCteColumnRegistry + >[0]['modelJson'], project, }); const cteCols = registry.get(cteName) || []; @@ -1792,6 +1795,9 @@ export class ColumnLineageService implements DJService { ctes: modelJson.ctes as Parameters< typeof frameworkBuildCteColumnRegistry >[0]['ctes'], + modelJson: modelJson as Parameters< + typeof frameworkBuildCteColumnRegistry + >[0]['modelJson'], project, }); const cteCols = registry.get(selected.cte) || []; diff --git a/src/services/framework/__tests__/cte-framework-dims-autoinject.test.ts b/src/services/framework/__tests__/cte-framework-dims-autoinject.test.ts index df4fc65..2b74495 100644 --- a/src/services/framework/__tests__/cte-framework-dims-autoinject.test.ts +++ b/src/services/framework/__tests__/cte-framework-dims-autoinject.test.ts @@ -284,4 +284,149 @@ describe('CTE datetime + portal_partition_* auto-injection', () => { expect(sql).toContain('portal_partition_daily'); expect(sql).toContain('portal_partition_hourly'); }); + + // Per-CTE opt-out via `exclude_portal_partition_columns: true` (mirrors the + // main-model flag with the same name). `datetime` has no per-CTE opt-out -- + // the main model has none either, so the two stay symmetric. + describe('exclude_portal_partition_columns', () => { + test('registry suppresses partitions but keeps datetime', () => { + const project = projectWithDatetimeAndPartitions('hour'); + const cte: FrameworkCTE = { + name: 'pre_agg', + from: { model: 'stg_events' }, + select: [ + { name: 'region', type: 'dim' }, + { name: 'amount', type: 'fct', agg: 'sum' }, + ], + group_by: 'dims', + exclude_portal_partition_columns: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + project, + }); + const names = registry.get('pre_agg')!.map((c) => c.name); + + expect(names).toContain('datetime'); + expect(names).not.toContain('portal_partition_monthly'); + expect(names).not.toContain('portal_partition_daily'); + expect(names).not.toContain('portal_partition_hourly'); + }); + + test('SQL emitter omits partitions but keeps datetime', () => { + const project = projectWithDatetimeAndPartitions('hour'); + const cte: FrameworkCTE = { + name: 'pre_agg', + from: { model: 'stg_events' }, + select: [ + { name: 'region', type: 'dim' }, + { name: 'amount', type: 'fct', agg: 'sum' }, + ], + group_by: 'dims', + exclude_portal_partition_columns: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + project, + }); + const sql = frameworkGenerateCteSql({ + cte, + cteRegistry: registry, + project, + partitionColumnNames: [ + 'portal_partition_monthly', + 'portal_partition_daily', + 'portal_partition_hourly', + ], + }); + + expect(sql).toMatch(/\bdatetime\b/); + expect(sql).not.toContain('-- partition columns'); + expect(sql).not.toContain('portal_partition_monthly'); + expect(sql).not.toContain('portal_partition_daily'); + expect(sql).not.toContain('portal_partition_hourly'); + }); + + // Inheritance: when the CTE omits the flag, the gate falls back to the + // model-level value. CTE override takes precedence (CTE > model > false). + test('inherits from main-model exclude_portal_partition_columns when CTE omits the flag', () => { + const project = projectWithDatetimeAndPartitions('hour'); + const cte: FrameworkCTE = { + name: 'pre_agg', + from: { model: 'stg_events' }, + select: [ + { name: 'region', type: 'dim' }, + { name: 'amount', type: 'fct', agg: 'sum' }, + ], + group_by: 'dims', + } as any; + const modelJson = { + type: 'int_select_model', + exclude_portal_partition_columns: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + modelJson, + project, + }); + const names = registry.get('pre_agg')!.map((c) => c.name); + + expect(names).toContain('datetime'); + expect(names).not.toContain('portal_partition_monthly'); + expect(names).not.toContain('portal_partition_daily'); + expect(names).not.toContain('portal_partition_hourly'); + }); + + test('CTE flag overrides model: cte=false beats model=true', () => { + const project = projectWithDatetimeAndPartitions('hour'); + const cte: FrameworkCTE = { + name: 'opt_back_in', + from: { model: 'stg_events' }, + select: [{ name: 'region', type: 'dim' }], + exclude_portal_partition_columns: false, + } as any; + const modelJson = { + type: 'int_select_model', + exclude_portal_partition_columns: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + modelJson, + project, + }); + const names = registry.get('opt_back_in')!.map((c) => c.name); + + expect(names).toContain('portal_partition_monthly'); + expect(names).toContain('portal_partition_daily'); + expect(names).toContain('portal_partition_hourly'); + }); + + // Flag is inert on chained CTEs because the gate already returns null for + // those shapes -- setting it should not throw or change behavior. + test('flag is inert on from: { cte } CTEs', () => { + const project = projectWithDatetimeAndPartitions('hour'); + const base: FrameworkCTE = { + name: 'base', + from: { model: 'stg_events' }, + select: [{ name: 'region', type: 'dim' }], + } as any; + const chained: FrameworkCTE = { + name: 'chained', + from: { cte: 'base' }, + select: [{ name: 'region', type: 'dim' }], + exclude_portal_partition_columns: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [base, chained], + project, + }); + const names = registry.get('chained')!.map((c) => c.name); + expect(names).toEqual(['region']); + }); + }); }); diff --git a/src/services/framework/__tests__/cte-partition-filters.test.ts b/src/services/framework/__tests__/cte-partition-filters.test.ts index 791f5c2..88bb972 100644 --- a/src/services/framework/__tests__/cte-partition-filters.test.ts +++ b/src/services/framework/__tests__/cte-partition-filters.test.ts @@ -382,6 +382,136 @@ describe('CTE Partition Filters', () => { }); }); + // CTE-level `exclude_daily_filter` and `include_full_month` mirror the + // main-model flags with the same names. Both suppress just the daily-grain + // _ext_event_date_filter while leaving the monthly-grain filter in place. + // CTE values take precedence over model values via `??` in + // `frameworkBuildFilters`. + describe('CTE-level exclude_daily_filter / include_full_month', () => { + test('CTE exclude_daily_filter=true drops daily filter, keeps monthly', () => { + const cte: FrameworkCTE = { + name: 'no_daily', + from: { model: 'partitioned_model' }, + select: [{ name: 'col_a', type: 'dim' as const }], + exclude_daily_filter: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + project: partitionedProject, + }); + + const sql = frameworkGenerateCteSql({ + cte, + cteRegistry: registry, + datetimeInterval: null, + dj, + modelJson: intModelJson, + project: partitionedProject, + }); + + // Monthly filter still emitted with `interval="month"`; daily filter + // (no interval arg) is suppressed. + expect(sql).toMatch(/_ext_event_date_filter\([^)]*interval="month"/); + expect(sql).not.toMatch( + /_ext_event_date_filter\("portal_partition_daily"[^)]*\)/, + ); + }); + + test('CTE include_full_month=true drops daily filter, keeps monthly', () => { + const cte: FrameworkCTE = { + name: 'full_month', + from: { model: 'partitioned_model' }, + select: [{ name: 'col_a', type: 'dim' as const }], + include_full_month: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + project: partitionedProject, + }); + + const sql = frameworkGenerateCteSql({ + cte, + cteRegistry: registry, + datetimeInterval: null, + dj, + modelJson: intModelJson, + project: partitionedProject, + }); + + expect(sql).toMatch(/_ext_event_date_filter\([^)]*interval="month"/); + expect(sql).not.toMatch( + /_ext_event_date_filter\("portal_partition_daily"[^)]*\)/, + ); + }); + + test('CTE flag overrides the model-level value', () => { + // Model says exclude daily; CTE explicitly opts back in (`false`). + const modelOptOut: FrameworkModel = { + ...intModelJson, + exclude_daily_filter: true, + } as any; + const cte: FrameworkCTE = { + name: 'opt_back_in', + from: { model: 'partitioned_model' }, + select: [{ name: 'col_a', type: 'dim' as const }], + exclude_daily_filter: false, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + project: partitionedProject, + }); + + const sql = frameworkGenerateCteSql({ + cte, + cteRegistry: registry, + datetimeInterval: null, + dj, + modelJson: modelOptOut, + project: partitionedProject, + }); + + // Daily filter should appear because the CTE override (`false`) wins + // over the model-level `true`. + expect(sql).toMatch( + /_ext_event_date_filter\("portal_partition_daily"[^)]*\)/, + ); + }); + + test('omitting CTE override falls through to model-level value', () => { + const modelOptOut: FrameworkModel = { + ...intModelJson, + exclude_daily_filter: true, + } as any; + const cte: FrameworkCTE = { + name: 'inherits', + from: { model: 'partitioned_model' }, + select: [{ name: 'col_a', type: 'dim' as const }], + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + project: partitionedProject, + }); + + const sql = frameworkGenerateCteSql({ + cte, + cteRegistry: registry, + datetimeInterval: null, + dj, + modelJson: modelOptOut, + project: partitionedProject, + }); + + // No CTE override -> falls through to model value (true) -> no daily. + expect(sql).not.toMatch( + /_ext_event_date_filter\("portal_partition_daily"[^)]*\)/, + ); + }); + }); + describe('Full model integration', () => { test('model with CTEs generates partition filters in CTE SQL', () => { const modelJson: FrameworkModel = { diff --git a/src/services/framework/__tests__/cte-portal-source-count-autoinject.test.ts b/src/services/framework/__tests__/cte-portal-source-count-autoinject.test.ts index ccc3d20..600c345 100644 --- a/src/services/framework/__tests__/cte-portal-source-count-autoinject.test.ts +++ b/src/services/framework/__tests__/cte-portal-source-count-autoinject.test.ts @@ -184,6 +184,86 @@ describe('CTE portal_source_count auto-injection (Gap 3)', () => { // `portal_source_count_count`. The registry tests above already cover the // no-agg passthrough branch on the SQL side indirectly (SQL emitter is // driven by the registry). + // Inheritance: CTE inherits `exclude_portal_source_count` from the parent + // model when the CTE omits the flag (CTE override > model > false). + test('inherits from main-model exclude_portal_source_count when CTE omits the flag', () => { + const project = projectWithPortalSourceCount(); + const cte: FrameworkCTE = { + name: 'pre_agg', + from: { model: 'stg_events' }, + select: [ + { name: 'region', type: 'dim' }, + { name: 'amount', type: 'fct', agg: 'sum' }, + ], + group_by: 'dims', + } as any; + const modelJson = { + type: 'int_select_model', + exclude_portal_source_count: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + modelJson, + project, + }); + const cols = registry.get('pre_agg')!; + expect(cols.find((c) => c.name === 'portal_source_count')).toBeUndefined(); + }); + + test('CTE flag overrides model: cte=false beats model=true', () => { + const project = projectWithPortalSourceCount(); + const cte: FrameworkCTE = { + name: 'opt_back_in', + from: { model: 'stg_events' }, + select: [{ name: 'region', type: 'dim' }], + exclude_portal_source_count: false, + } as any; + const modelJson = { + type: 'int_select_model', + exclude_portal_source_count: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + modelJson, + project, + }); + const cols = registry.get('opt_back_in')!; + expect(cols.find((c) => c.name === 'portal_source_count')).toBeDefined(); + }); + + // Per-CTE opt-out via `exclude_portal_source_count: true` (mirrors the + // main-model flag with the same name). Both the registry and the SQL emitter + // must drop the auto-injected column when the flag is set. + test('exclude_portal_source_count: true suppresses auto-injection (registry + SQL)', () => { + const project = projectWithPortalSourceCount(); + const cte: FrameworkCTE = { + name: 'pre_agg', + from: { model: 'stg_events' }, + select: [ + { name: 'region', type: 'dim' }, + { name: 'amount', type: 'fct', agg: 'sum' }, + ], + group_by: 'dims', + exclude_portal_source_count: true, + } as any; + + const registry = frameworkBuildCteColumnRegistry({ + ctes: [cte], + project, + }); + const cols = registry.get('pre_agg')!; + expect(cols.find((c) => c.name === 'portal_source_count')).toBeUndefined(); + + const sql = frameworkGenerateCteSql({ + cte, + cteRegistry: registry, + project, + }); + expect(sql).not.toMatch(/portal_source_count/); + }); + test('SQL emitter injects portal_source_count in aggregating CTE (sum-kernel after collision)', () => { const project = projectWithPortalSourceCount(); const cte: FrameworkCTE = { diff --git a/src/services/framework/__tests__/cte-validation.test.ts b/src/services/framework/__tests__/cte-validation.test.ts index ca2292f..16e36c1 100644 --- a/src/services/framework/__tests__/cte-validation.test.ts +++ b/src/services/framework/__tests__/cte-validation.test.ts @@ -6,6 +6,7 @@ import { validateCteLightdashMetrics, validateCtes, validateDeadOuterLayer, + validateDjIcebergPartitionOverwrite, validateMainModelAggregation, } from '@services/modelValidation'; @@ -1097,3 +1098,102 @@ describe('validateDeadOuterLayer (Gap 5)', () => { expect(validateDeadOuterLayer(m)).toHaveLength(0); }); }); + +// dj_iceberg_partition_overwrite is the DJ-shipped Iceberg-only variant of +// overwrite_existing_partitions. The validator must surface a Problems-tab +// error when the strategy is used without Iceberg format -- otherwise the +// shipped macro silently degrades to a full-table refresh on Delta/Hive. +describe('validateDjIcebergPartitionOverwrite', () => { + test('emits an error when strategy is set without Iceberg format', () => { + const modelJson = { + type: 'int_select_model', + materialization: { + type: 'incremental', + strategy: { type: 'dj_iceberg_partition_overwrite' }, + }, + }; + + const errors = validateDjIcebergPartitionOverwrite(modelJson, undefined); + expect(errors).toHaveLength(1); + expect(errors[0].severity).toBe('error'); + expect(errors[0].instancePath).toBe('/materialization/strategy/type'); + expect(errors[0].message).toContain('requires Iceberg format'); + }); + + test('emits an error when project storage_type is delta_lake', () => { + const modelJson = { + type: 'int_select_model', + materialization: { + type: 'incremental', + strategy: { type: 'dj_iceberg_partition_overwrite' }, + }, + }; + + const errors = validateDjIcebergPartitionOverwrite(modelJson, 'delta_lake'); + expect(errors).toHaveLength(1); + expect(errors[0].severity).toBe('error'); + }); + + test('passes when project storage_type is iceberg', () => { + const modelJson = { + type: 'int_select_model', + materialization: { + type: 'incremental', + strategy: { type: 'dj_iceberg_partition_overwrite' }, + }, + }; + + const errors = validateDjIcebergPartitionOverwrite(modelJson, 'iceberg'); + expect(errors).toHaveLength(0); + }); + + test('passes when model-level format overrides to iceberg', () => { + const modelJson = { + type: 'int_select_model', + materialization: { + type: 'incremental', + format: 'iceberg', + strategy: { type: 'dj_iceberg_partition_overwrite' }, + }, + }; + + const errors = validateDjIcebergPartitionOverwrite(modelJson, undefined); + expect(errors).toHaveLength(0); + }); + + test('returns no errors for unrelated strategies', () => { + for (const strategyType of [ + 'append', + 'delete+insert', + 'merge', + 'overwrite_existing_partitions', + ]) { + const modelJson = { + type: 'int_select_model', + materialization: { + type: 'incremental', + strategy: { type: strategyType }, + }, + }; + expect( + validateDjIcebergPartitionOverwrite(modelJson, undefined), + ).toHaveLength(0); + } + }); + + test('returns no errors when materialization is absent or shorthand string', () => { + expect( + validateDjIcebergPartitionOverwrite( + { type: 'int_select_model' }, + undefined, + ), + ).toHaveLength(0); + + expect( + validateDjIcebergPartitionOverwrite( + { type: 'int_select_model', materialization: 'incremental' }, + undefined, + ), + ).toHaveLength(0); + }); +}); diff --git a/src/services/framework/__tests__/index.test.ts b/src/services/framework/__tests__/index.test.ts index 3e04677..250f5c0 100644 --- a/src/services/framework/__tests__/index.test.ts +++ b/src/services/framework/__tests__/index.test.ts @@ -1131,6 +1131,81 @@ describe('incremental strategy variants', () => { expect(properties?.partitioning).toBe("ARRAY['portal_partition_daily']"); expect(properties?.partitioned_by).toBeUndefined(); }); + + test('strategy: dj_iceberg_partition_overwrite emits Iceberg partitioning + omits unique_key', () => { + // dj_iceberg_partition_overwrite is the DJ-shipped variant of + // overwrite_existing_partitions. Like its consumer-supplied cousin it + // never emits unique_key (the macro derives partitions from the new + // slice itself), but it requires Iceberg format -- so the partition + // keyword must be `partitioning` (not `partitioned_by`). + const icebergProject: DbtProject = { + ...partitionedProject, + variables: { storage_type: 'iceberg' }, + }; + + const modelJson: FrameworkModel = { + type: 'int_select_model', + group: 'swh', + topic: 'misc', + name: 'daily_dj_iceberg', + materialization: { + type: 'incremental', + strategy: { type: 'dj_iceberg_partition_overwrite' }, + }, + select: [ + 'portal_partition_daily', + { type: 'dim', name: 'dim_a' } as never, + ], + from: { model: 'parent_daily' }, + } as unknown as FrameworkModel; + + const { config } = frameworkGenerateModelOutput({ + dj: createTestDJ(), + modelJson, + project: icebergProject, + }); + + expect(config.materialized).toBe('incremental'); + expect(config.incremental_strategy).toBe('dj_iceberg_partition_overwrite'); + expect(config.unique_key).toBeUndefined(); + const properties = config.properties as Record | undefined; + expect(properties?.partitioning).toBe("ARRAY['portal_partition_daily']"); + expect(properties?.partitioned_by).toBeUndefined(); + }); + + test('strategy: dj_iceberg_partition_overwrite honors model-level format override', () => { + // No project-level storage_type, but the model overrides format to + // iceberg -- the SQL generator must still emit `partitioning` and the + // strategy must still be wired through cleanly. + const modelJson: FrameworkModel = { + type: 'int_select_model', + group: 'swh', + topic: 'misc', + name: 'daily_dj_iceberg_override', + materialization: { + type: 'incremental', + format: 'iceberg', + strategy: { type: 'dj_iceberg_partition_overwrite' }, + }, + select: [ + 'portal_partition_daily', + { type: 'dim', name: 'dim_a' } as never, + ], + from: { model: 'parent_daily' }, + } as unknown as FrameworkModel; + + const { config } = frameworkGenerateModelOutput({ + dj: createTestDJ(), + modelJson, + project: partitionedProject, + }); + + expect(config.incremental_strategy).toBe('dj_iceberg_partition_overwrite'); + expect(config.unique_key).toBeUndefined(); + const properties = config.properties as Record | undefined; + expect(properties?.partitioning).toBe("ARRAY['portal_partition_daily']"); + expect(properties?.partitioned_by).toBeUndefined(); + }); }); describe('incremental unique_key defaulting', () => { diff --git a/src/services/framework/handlers/model-crud-handlers.ts b/src/services/framework/handlers/model-crud-handlers.ts index 99cec6a..b32eddc 100644 --- a/src/services/framework/handlers/model-crud-handlers.ts +++ b/src/services/framework/handlers/model-crud-handlers.ts @@ -259,6 +259,7 @@ export class ModelCrudHandlers { ) { const cteColumnRegistry = frameworkBuildCteColumnRegistry({ ctes: modelJsonForValidation.ctes, + modelJson: modelJsonForValidation, project, }); const colRefErrors = validateCteColumnReferences( diff --git a/src/services/framework/index.ts b/src/services/framework/index.ts index 642fa41..d8f3eb2 100644 --- a/src/services/framework/index.ts +++ b/src/services/framework/index.ts @@ -1875,6 +1875,15 @@ function resolveValidationDiagnostics( } } - return new vscode.Diagnostic(range, err.message, severity); + // Per-detail severity wins so a single batched callback can carry a + // mix of errors and warnings (e.g. validateDjIcebergPartitionOverwrite + // emits an Error alongside other post-generation warnings). + const resolvedSeverity = + err.severity === 'error' + ? vscode.DiagnosticSeverity.Error + : err.severity === 'warning' + ? vscode.DiagnosticSeverity.Warning + : severity; + return new vscode.Diagnostic(range, err.message, resolvedSeverity); }); } diff --git a/src/services/framework/utils/column-utils.ts b/src/services/framework/utils/column-utils.ts index 67b6f72..2f8aef7 100644 --- a/src/services/framework/utils/column-utils.ts +++ b/src/services/framework/utils/column-utils.ts @@ -976,11 +976,19 @@ export function frameworkInferCteColumns({ cte, cteRegistry, modelId, + modelJson, project, }: { cte: FrameworkCTE; cteRegistry: CteColumnRegistry; modelId?: string | null; + /** + * Parent model JSON. When provided, CTE-level `exclude_portal_partition_columns` + * and `exclude_portal_source_count` inherit from the model when omitted on + * the CTE itself (CTE override > model > false). Optional so test fixtures + * and lineage previews can build a registry without a full model. + */ + modelJson?: FrameworkModel; project: DbtProject; }): FrameworkColumn[] { const columns: FrameworkColumn[] = []; @@ -1214,6 +1222,7 @@ export function frameworkInferCteColumns({ const autoDims = frameworkShouldAutoInjectCteFrameworkDims({ cte, alreadyPresentNames: columns.map((c) => c.name), + modelJson, project, }); if (autoDims) { @@ -1241,6 +1250,7 @@ export function frameworkInferCteColumns({ const autoPsc = frameworkShouldAutoInjectCtePortalSourceCount({ cte, alreadyPresentNames: columns.map((c) => c.name), + modelJson, project, }); if (autoPsc) { @@ -1283,6 +1293,10 @@ export function frameworkInferCteColumns({ * - The CTE's FROM is a plain `{ model }` ref (no union, no cte chaining). * - The upstream model actually has `portal_source_count` in its catalog. * - The CTE's own select / bulk expansions haven't already pulled it in. + * - The CTE has not opted out via `exclude_portal_source_count: true`, + * matching the main-model flag with the same name. The CTE flag overrides + * the model-level value; when omitted on the CTE the model-level value is + * inherited (CTE override > model > false). * * `applyAgg` is true when the CTE aggregates (non-empty `group_by`), mirroring * the main-model `frameworkModelHasAgg` rule. The caller is responsible for @@ -1292,12 +1306,28 @@ export function frameworkInferCteColumns({ export function frameworkShouldAutoInjectCtePortalSourceCount({ cte, alreadyPresentNames, + modelJson, project, }: { cte: FrameworkCTE; alreadyPresentNames: string[]; + /** + * When provided, the model's own `exclude_portal_source_count` is used as + * a fallback when the CTE does not declare the flag. CTE-only call sites + * (tests, lineage previews) may omit this; behavior reduces to "no model + * inheritance" in that case. + */ + modelJson?: FrameworkModel; project: DbtProject; }): { baseModel: string; applyAgg: boolean } | null { + const effectiveExclude = + cte.exclude_portal_source_count ?? + (modelJson && 'exclude_portal_source_count' in modelJson + ? modelJson.exclude_portal_source_count + : undefined); + if (effectiveExclude) { + return null; + } if (!('model' in cte.from) || !cte.from.model) { return null; } @@ -1344,6 +1374,12 @@ export function frameworkShouldAutoInjectCtePortalSourceCount({ * - The CTE's own select hasn't already pulled the column in. * - The candidate is not excluded by the effective datetime interval * (day → hourly dropped; month → daily+hourly dropped; year → all three). + * - The CTE has not opted out via `exclude_portal_partition_columns: true` + * (suppresses all `portal_partition_*`), matching the main-model flag with + * the same name. The CTE flag overrides the model-level value; when omitted + * on the CTE the model-level value is inherited (CTE override > model > + * false). `datetime` itself has no per-CTE opt-out; the main model has none + * either, so the two stay symmetric. * * The effective datetime interval is determined from either an explicit * `{ name: 'datetime', interval: X }` entry in the CTE select, or by @@ -1352,10 +1388,18 @@ export function frameworkShouldAutoInjectCtePortalSourceCount({ export function frameworkShouldAutoInjectCteFrameworkDims({ cte, alreadyPresentNames, + modelJson, project, }: { cte: FrameworkCTE; alreadyPresentNames: string[]; + /** + * When provided, the model's own `exclude_portal_partition_columns` is used + * as a fallback when the CTE does not declare the flag. CTE-only call sites + * (tests, lineage previews) may omit this; behavior reduces to "no model + * inheritance" in that case. + */ + modelJson?: FrameworkModel; project: DbtProject; }): { baseModel: string; @@ -1419,12 +1463,22 @@ export function frameworkShouldAutoInjectCteFrameworkDims({ break; } - const candidates: ('datetime' | FrameworkPartitionName)[] = [ - 'datetime', - PARTITION_MONTHLY, - PARTITION_DAILY, - PARTITION_HOURLY, - ]; + // Per-CTE opt-out: `exclude_portal_partition_columns` suppresses the + // partition columns, mirroring the main-model flag with the same name. + // CTE override > model > false: a CTE that omits the flag inherits the + // model-level value, so users can set it once on the model and have all + // CTEs honor it without per-CTE repetition. There is no `exclude_datetime` + // on either schema -- if you don't want `datetime` in a CTE, source from + // another CTE or use a lookback model. + const effectiveExcludePartitions = + cte.exclude_portal_partition_columns ?? + (modelJson && 'exclude_portal_partition_columns' in modelJson + ? modelJson.exclude_portal_partition_columns + : undefined); + const candidates: ('datetime' | FrameworkPartitionName)[] = ['datetime']; + if (!effectiveExcludePartitions) { + candidates.push(PARTITION_MONTHLY, PARTITION_DAILY, PARTITION_HOURLY); + } const alreadyPresent = new Set(alreadyPresentNames); const missing = candidates.filter( (c) => !alreadyPresent.has(c) && !excluded.has(c), @@ -1456,11 +1510,17 @@ export function frameworkShouldAutoInjectCteFrameworkDims({ export function frameworkBuildCteColumnRegistry({ ctes, modelId, + modelJson, partitionColumnNames, project, }: { ctes: FrameworkCTE[]; modelId?: string | null; + /** + * Parent model JSON. Forwarded to `frameworkInferCteColumns` so that CTE + * exclude flags inherit from the model when omitted on the CTE. + */ + modelJson?: FrameworkModel; partitionColumnNames?: string[]; project: DbtProject; }): CteColumnRegistry { @@ -1470,6 +1530,7 @@ export function frameworkBuildCteColumnRegistry({ cte, cteRegistry: registry, modelId, + modelJson, project, }); // Sort alphabetically with partition columns at the end, matching the diff --git a/src/services/framework/utils/sql-utils.ts b/src/services/framework/utils/sql-utils.ts index ca97627..edfa172 100644 --- a/src/services/framework/utils/sql-utils.ts +++ b/src/services/framework/utils/sql-utils.ts @@ -996,6 +996,8 @@ export function frameworkBuildFilters({ modelJson, prefix, project, + excludeDailyFilterOverride, + includeFullMonthOverride, }: { datetimeInterval: 'hour' | 'day' | 'month' | 'year' | null; dj: DJ; @@ -1003,6 +1005,16 @@ export function frameworkBuildFilters({ modelJson: FrameworkModel; prefix?: string; project: DbtProject; + /** + * Per-CTE override for `exclude_daily_filter`. When set, takes precedence + * over the model-level value; otherwise the model-level flag is used. + */ + excludeDailyFilterOverride?: boolean; + /** + * Per-CTE override for `include_full_month`. When set, takes precedence + * over the model-level value; otherwise the model-level flag is used. + */ + includeFullMonthOverride?: boolean; }): string[] { const sqlLines: string[] = []; // If exclude_date_filter set to true, we return no framework date filters @@ -1012,10 +1024,13 @@ export function frameworkBuildFilters({ // const modelLayer = frameworkGetModelLayer(modelJson); - // Model level inputs - const includeFullMonth = !!( - 'include_full_month' in modelJson && modelJson.include_full_month - ); + // Effective flag values: CTE override > model-level > default false. + const includeFullMonth = + includeFullMonthOverride ?? + !!('include_full_month' in modelJson && modelJson.include_full_month); + const excludeDailyFilter = + excludeDailyFilterOverride ?? + !!('exclude_daily_filter' in modelJson && modelJson.exclude_daily_filter); if ( 'model' in from && @@ -1038,13 +1053,7 @@ export function frameworkBuildFilters({ break; } case PARTITION_DAILY: { - if ( - !( - 'exclude_daily_filter' in modelJson && - modelJson.exclude_daily_filter - ) && - !includeFullMonth - ) { + if (!excludeDailyFilter && !includeFullMonth) { sqlLines.push(`{{ _ext_event_date_filter(${args.join(', ')}) }}`); } break; @@ -1267,6 +1276,15 @@ export function frameworkGenerateCteSql({ 'exclude_date_filter' in modelJson && modelJson.exclude_date_filter); + // Per-CTE overrides for the daily-grain date filter and the include-full-month + // shape. `undefined` falls through to the model-level value inside + // `frameworkBuildFilters` via `??`, so omitting the flag on a CTE preserves + // the existing model-level behavior. + const cteExcludeDailyFilterOverride = + 'exclude_daily_filter' in cte ? cte.exclude_daily_filter : undefined; + const cteIncludeFullMonthOverride = + 'include_full_month' in cte ? cte.include_full_month : undefined; + // UNION CTE if ('union' in from && from.union) { const unionSpec = from.union; @@ -1305,6 +1323,8 @@ export function frameworkGenerateCteSql({ from: { model: modelRef }, modelJson, project, + excludeDailyFilterOverride: cteExcludeDailyFilterOverride, + includeFullMonthOverride: cteIncludeFullMonthOverride, }); if (filters.length) { sqlLine += ` where ${filters.join(' and ')}`; @@ -1443,6 +1463,7 @@ export function frameworkGenerateCteSql({ const autoDims = frameworkShouldAutoInjectCteFrameworkDims({ cte, alreadyPresentNames: selectParts.map((p) => p.name), + modelJson, project, }); if (autoDims) { @@ -1460,6 +1481,7 @@ export function frameworkGenerateCteSql({ const autoPsc = frameworkShouldAutoInjectCtePortalSourceCount({ cte, alreadyPresentNames: selectParts.map((p) => p.name), + modelJson, project, }); if (autoPsc) { @@ -1580,6 +1602,8 @@ export function frameworkGenerateCteSql({ modelJson, project, prefix: 'join' in from && from.join ? from.model : undefined, + excludeDailyFilterOverride: cteExcludeDailyFilterOverride, + includeFullMonthOverride: cteIncludeFullMonthOverride, }), ); } @@ -1818,6 +1842,7 @@ export function frameworkGenerateModelOutput({ ? frameworkBuildCteColumnRegistry({ ctes: modelJson.ctes, modelId, + modelJson, partitionColumnNames, project, }) @@ -1993,6 +2018,18 @@ export function frameworkGenerateModelOutput({ modelConfig.incremental_strategy = 'overwrite_existing_partitions'; break; } + case 'dj_iceberg_partition_overwrite': { + // DJ-shipped strategy (macros/strategies.sql -> the dispatch macro + // get_incremental_dj_iceberg_partition_overwrite_sql is auto-copied + // to /macros/_ext_/strategies.sql by writeMacroFiles). + // Requires Iceberg format on the target table; the macro reads + // properties.partitioning (Iceberg-only) to derive partitions + // from the new slice itself, so unique_key is not applicable + // (the JSON schema rejects it). Format is enforced at validation + // time via validateDjIcebergPartitionOverwrite, not here. + modelConfig.incremental_strategy = 'dj_iceberg_partition_overwrite'; + break; + } default: { const defaultStrategy = dj.config.materializationDefaultIncrementalStrategy ?? @@ -2000,9 +2037,9 @@ export function frameworkGenerateModelOutput({ modelConfig.incremental_strategy = defaultStrategy; // Only delete+insert auto-derives unique_key from partitions. // Append never needs one; merge requires a user-supplied key - // (and is not a valid default); overwrite_existing_partitions - // ignores unique_key entirely (the consumer macro derives - // partitions from the new slice itself). + // (and is not a valid default); overwrite_existing_partitions and + // dj_iceberg_partition_overwrite ignore unique_key entirely (the + // macros derive partitions from the new slice itself). if (defaultStrategy === 'delete+insert' && partitions.length) { modelConfig.unique_key = getDefaultUniqueKey(partitions); } @@ -2580,6 +2617,7 @@ export function frameworkModelManifestMerge({ ? frameworkBuildCteColumnRegistry({ ctes: modelJson.ctes, modelId: mergeModelId, + modelJson, partitionColumnNames: frameworkGetPartitionColumnNames({ modelJson, project, diff --git a/src/services/modelValidation.ts b/src/services/modelValidation.ts index b7214ef..d4638f1 100644 --- a/src/services/modelValidation.ts +++ b/src/services/modelValidation.ts @@ -768,6 +768,67 @@ function normalizeGroupBy(groupBy: any): string { return 'none'; } +/** + * Validates that the `dj_iceberg_partition_overwrite` incremental strategy + * is only used on Iceberg tables. + * + * The DJ-shipped macro `get_incremental_dj_iceberg_partition_overwrite_sql` + * (in `macros/strategies.sql`) reads `properties.partitioning`, which + * `frameworkGenerateModelOutput` only emits on Iceberg-format models. On + * Delta Lake / Hive (where only `partitioned_by` is emitted), the macro + * silently degrades to a full-table refresh -- almost certainly not what + * the author intended. We surface this as a Problems-tab error so users + * notice immediately. + * + * Format resolution mirrors the SQL generator: model-level + * `materialization.format` wins, then project-level `storage_type`, then + * neither (Delta/Hive default). + * + * Emits the detail with `severity: 'error'` so it can ride the existing + * post-generation warning channel without overwriting other warnings on + * the same URI. + */ +export function validateDjIcebergPartitionOverwrite( + modelJson: any, + storageType?: string | null, +): ValidationErrorDetail[] { + const errors: ValidationErrorDetail[] = []; + if (!modelJson || typeof modelJson !== 'object') { + return errors; + } + + const materialization = modelJson.materialization; + if (!materialization || typeof materialization !== 'object') { + return errors; + } + const strategy = materialization.strategy; + if ( + !strategy || + typeof strategy !== 'object' || + strategy.type !== 'dj_iceberg_partition_overwrite' + ) { + return errors; + } + + const modelFormat = + typeof materialization.format === 'string' ? materialization.format : null; + const resolvedFormat = + modelFormat || (storageType === 'iceberg' ? 'iceberg' : null); + + if (resolvedFormat !== 'iceberg') { + errors.push({ + message: + "incremental_strategy 'dj_iceberg_partition_overwrite' requires Iceberg format. " + + "Set materialization.format to 'iceberg' or the project var storage_type to 'iceberg'. " + + "On Delta Lake / Hive use 'delete+insert' instead -- DJ auto-derives unique_key from the partition column.", + instancePath: '/materialization/strategy/type', + severity: 'error', + }); + } + + return errors; +} + const EXISTS_OPERATORS = new Set(['exists', 'not_exists']); /** diff --git a/src/services/sync/ModelProcessor.ts b/src/services/sync/ModelProcessor.ts index f834cfd..3e96ed7 100644 --- a/src/services/sync/ModelProcessor.ts +++ b/src/services/sync/ModelProcessor.ts @@ -21,6 +21,7 @@ import { import { validateCteColumnReferences, validateDeadOuterLayer, + validateDjIcebergPartitionOverwrite, validateMainModelAggregation, } from '@services/modelValidation'; import { jsonParse } from '@shared'; @@ -130,6 +131,7 @@ export class ModelProcessor { const cteColumnRegistry = hasCtes ? frameworkBuildCteColumnRegistry({ ctes: modelJson.ctes!, + modelJson, project, }) : undefined; @@ -189,6 +191,22 @@ export class ModelProcessor { } } + // dj_iceberg_partition_overwrite requires Iceberg format on the target + // table; emit a hard Error (red squiggle) when the resolved format is + // not Iceberg so users notice immediately. Each detail carries + // severity: 'error' so it renders correctly even when batched + // alongside warnings on the same URI. + const icebergStrategyErrors = validateDjIcebergPartitionOverwrite( + modelJson, + project.variables?.storage_type, + ); + if (icebergStrategyErrors.length > 0) { + for (const e of icebergStrategyErrors) { + this.config.logger.error(`${modelName}: ${e.message}`); + validationWarnings.push(e); + } + } + if (validationWarnings.length > 0) { const summary = `Model validation warnings:\n${validationWarnings .map((w) => w.message) diff --git a/src/services/sync/types.ts b/src/services/sync/types.ts index 0107659..9d69f68 100644 --- a/src/services/sync/types.ts +++ b/src/services/sync/types.ts @@ -406,6 +406,14 @@ export interface ValidationErrorDetail { message: string; /** AJV JSON pointer path, e.g. "/tables/0/freshness" */ instancePath: string; + /** + * Optional per-detail severity override. When omitted, the consumer + * (e.g. `resolveValidationDiagnostics`) applies its default severity for + * the channel (Error for AJV failures, Warning for post-generation + * checks). Set to 'error' on a detail emitted via the warning channel + * to render it as a red squiggle without overwriting sibling warnings. + */ + severity?: 'error' | 'warning'; } /** diff --git a/src/shared/dbt/types.ts b/src/shared/dbt/types.ts index 4a1f74b..1c3c2eb 100644 --- a/src/shared/dbt/types.ts +++ b/src/shared/dbt/types.ts @@ -201,6 +201,7 @@ export type DbtModelConfig = { incremental_strategy?: | 'append' | 'delete+insert' + | 'dj_iceberg_partition_overwrite' | 'merge' | 'overwrite_existing_partitions'; materialized?: diff --git a/src/shared/framework/types.ts b/src/shared/framework/types.ts index 1016829..02c7636 100644 --- a/src/shared/framework/types.ts +++ b/src/shared/framework/types.ts @@ -572,5 +572,6 @@ export type FrameworkSyncOp = export type DefaultIncrementalStrategy = | 'append' | 'delete+insert' + | 'dj_iceberg_partition_overwrite' | 'merge' | 'overwrite_existing_partitions'; diff --git a/src/shared/schema/types/model.cte.schema.d.ts b/src/shared/schema/types/model.cte.schema.d.ts index bb461e1..831ab89 100644 --- a/src/shared/schema/types/model.cte.schema.d.ts +++ b/src/shared/schema/types/model.cte.schema.d.ts @@ -490,6 +490,22 @@ export type SchemaModelSelectCTE = * Will prevent the automatic date filters from getting added */ export type SchemaModelExcludeDateFilter = boolean; +/** + * Exclude Daily Filter + */ +export type ModelExcludeDailyFilterSchemaJson = boolean; +/** + * Will prevent the automatic portal partition date columns from getting added + */ +export type SchemaModelExcludePortalPartitionColumns = boolean; +/** + * Will prevent the automatic portal source count column from getting added + */ +export type SchemaModelExcludePortalSourceCount = boolean; +/** + * Includes the full month when running any given event date + */ +export type ModelIncludeFullMonthSchemaJson = boolean; /** * GROUP BY clause for the CTE. Use "dims" (shorthand) or [{ "type": "dims" }] to automatically group by all dimension column expressions. Avoid bare string aliases when the CTE select contains computed expressions (expr), as they reference the alias rather than the underlying expression and will fail at query runtime. */ @@ -623,6 +639,10 @@ export interface SchemaModelCTE { )[], ]; exclude_date_filter?: SchemaModelExcludeDateFilter; + exclude_daily_filter?: ModelExcludeDailyFilterSchemaJson; + exclude_portal_partition_columns?: SchemaModelExcludePortalPartitionColumns; + exclude_portal_source_count?: SchemaModelExcludePortalSourceCount; + include_full_month?: ModelIncludeFullMonthSchemaJson; where?: SchemaModelWhere; group_by?: SchemaModelGroupBy; having?: SchemaModelHaving; diff --git a/src/shared/schema/types/model.ctes.schema.d.ts b/src/shared/schema/types/model.ctes.schema.d.ts index 668ceae..9a44299 100644 --- a/src/shared/schema/types/model.ctes.schema.d.ts +++ b/src/shared/schema/types/model.ctes.schema.d.ts @@ -496,6 +496,22 @@ export type SchemaModelSelectCTE = * Will prevent the automatic date filters from getting added */ export type SchemaModelExcludeDateFilter = boolean; +/** + * Exclude Daily Filter + */ +export type ModelExcludeDailyFilterSchemaJson = boolean; +/** + * Will prevent the automatic portal partition date columns from getting added + */ +export type SchemaModelExcludePortalPartitionColumns = boolean; +/** + * Will prevent the automatic portal source count column from getting added + */ +export type SchemaModelExcludePortalSourceCount = boolean; +/** + * Includes the full month when running any given event date + */ +export type ModelIncludeFullMonthSchemaJson = boolean; /** * GROUP BY clause for the CTE. Use "dims" (shorthand) or [{ "type": "dims" }] to automatically group by all dimension column expressions. Avoid bare string aliases when the CTE select contains computed expressions (expr), as they reference the alias rather than the underlying expression and will fail at query runtime. */ @@ -629,6 +645,10 @@ export interface SchemaModelCTE { )[], ]; exclude_date_filter?: SchemaModelExcludeDateFilter; + exclude_daily_filter?: ModelExcludeDailyFilterSchemaJson; + exclude_portal_partition_columns?: SchemaModelExcludePortalPartitionColumns; + exclude_portal_source_count?: SchemaModelExcludePortalSourceCount; + include_full_month?: ModelIncludeFullMonthSchemaJson; where?: SchemaModelWhere; group_by?: SchemaModelGroupBy; having?: SchemaModelHaving; diff --git a/src/shared/schema/types/model.incremental_strategy.schema.d.ts b/src/shared/schema/types/model.incremental_strategy.schema.d.ts index 4d0ae7a..12ff0c0 100644 --- a/src/shared/schema/types/model.incremental_strategy.schema.d.ts +++ b/src/shared/schema/types/model.incremental_strategy.schema.d.ts @@ -6,7 +6,7 @@ */ /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -48,4 +48,10 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; diff --git a/src/shared/schema/types/model.materialization.schema.d.ts b/src/shared/schema/types/model.materialization.schema.d.ts index f9832ee..ca1c1ea 100644 --- a/src/shared/schema/types/model.materialization.schema.d.ts +++ b/src/shared/schema/types/model.materialization.schema.d.ts @@ -38,7 +38,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -80,4 +80,10 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; diff --git a/src/shared/schema/types/model.schema.d.ts b/src/shared/schema/types/model.schema.d.ts index c5a69ac..cba2f67 100644 --- a/src/shared/schema/types/model.schema.d.ts +++ b/src/shared/schema/types/model.schema.d.ts @@ -79,7 +79,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -121,6 +121,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization @@ -1412,6 +1418,10 @@ export interface SchemaModelCTE { )[], ]; exclude_date_filter?: SchemaModelExcludeDateFilter; + exclude_daily_filter?: ModelExcludeDailyFilterSchemaJson; + exclude_portal_partition_columns?: SchemaModelExcludePortalPartitionColumns; + exclude_portal_source_count?: SchemaModelExcludePortalSourceCount; + include_full_month?: ModelIncludeFullMonthSchemaJson; where?: SchemaModelWhere; group_by?: SchemaModelGroupBy1; having?: SchemaModelHaving; diff --git a/src/shared/schema/types/model.type.int_join_column.schema.d.ts b/src/shared/schema/types/model.type.int_join_column.schema.d.ts index 7b7fadf..79d3e60 100644 --- a/src/shared/schema/types/model.type.int_join_column.schema.d.ts +++ b/src/shared/schema/types/model.type.int_join_column.schema.d.ts @@ -147,7 +147,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -189,6 +189,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization diff --git a/src/shared/schema/types/model.type.int_join_models.schema.d.ts b/src/shared/schema/types/model.type.int_join_models.schema.d.ts index 73608f8..299530b 100644 --- a/src/shared/schema/types/model.type.int_join_models.schema.d.ts +++ b/src/shared/schema/types/model.type.int_join_models.schema.d.ts @@ -147,7 +147,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -189,6 +189,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization @@ -763,6 +769,10 @@ export type SchemaModelSelectCTE = */ include?: [SchemaColumnName, ...SchemaColumnName[]]; }; +/** + * Includes the full month when running any given event date + */ +export type ModelIncludeFullMonthSchemaJson = boolean; /** * Validate model group by */ @@ -1088,6 +1098,10 @@ export interface SchemaModelCTE { )[], ]; exclude_date_filter?: SchemaModelExcludeDateFilter; + exclude_daily_filter?: ModelExcludeDailyFilterSchemaJson; + exclude_portal_partition_columns?: SchemaModelExcludePortalPartitionColumns; + exclude_portal_source_count?: SchemaModelExcludePortalSourceCount; + include_full_month?: ModelIncludeFullMonthSchemaJson; where?: SchemaModelWhere; group_by?: SchemaModelGroupBy1; having?: SchemaModelHaving; diff --git a/src/shared/schema/types/model.type.int_lookback_model.schema.d.ts b/src/shared/schema/types/model.type.int_lookback_model.schema.d.ts index 032ea5f..e5364d8 100644 --- a/src/shared/schema/types/model.type.int_lookback_model.schema.d.ts +++ b/src/shared/schema/types/model.type.int_lookback_model.schema.d.ts @@ -147,7 +147,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -189,6 +189,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization diff --git a/src/shared/schema/types/model.type.int_rollup_model.schema.d.ts b/src/shared/schema/types/model.type.int_rollup_model.schema.d.ts index 5bddeb5..35972fd 100644 --- a/src/shared/schema/types/model.type.int_rollup_model.schema.d.ts +++ b/src/shared/schema/types/model.type.int_rollup_model.schema.d.ts @@ -147,7 +147,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -189,6 +189,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization diff --git a/src/shared/schema/types/model.type.int_select_model.schema.d.ts b/src/shared/schema/types/model.type.int_select_model.schema.d.ts index cd1d2c3..adf5f06 100644 --- a/src/shared/schema/types/model.type.int_select_model.schema.d.ts +++ b/src/shared/schema/types/model.type.int_select_model.schema.d.ts @@ -147,7 +147,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -189,6 +189,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization @@ -763,6 +769,10 @@ export type SchemaModelSelectCTE = */ include?: [SchemaColumnName, ...SchemaColumnName[]]; }; +/** + * Includes the full month when running any given event date + */ +export type ModelIncludeFullMonthSchemaJson = boolean; /** * Validate model group by */ @@ -1090,6 +1100,10 @@ export interface SchemaModelCTE { )[], ]; exclude_date_filter?: SchemaModelExcludeDateFilter; + exclude_daily_filter?: ModelExcludeDailyFilterSchemaJson; + exclude_portal_partition_columns?: SchemaModelExcludePortalPartitionColumns; + exclude_portal_source_count?: SchemaModelExcludePortalSourceCount; + include_full_month?: ModelIncludeFullMonthSchemaJson; where?: SchemaModelWhere; group_by?: SchemaModelGroupBy1; having?: SchemaModelHaving; diff --git a/src/shared/schema/types/model.type.int_union_models.schema.d.ts b/src/shared/schema/types/model.type.int_union_models.schema.d.ts index a3256bd..6d5d0f9 100644 --- a/src/shared/schema/types/model.type.int_union_models.schema.d.ts +++ b/src/shared/schema/types/model.type.int_union_models.schema.d.ts @@ -147,7 +147,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -189,6 +189,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization @@ -680,6 +686,10 @@ export type SchemaModelSelectCTE = */ include?: [SchemaColumnName, ...SchemaColumnName[]]; }; +/** + * Includes the full month when running any given event date + */ +export type ModelIncludeFullMonthSchemaJson = boolean; /** * GROUP BY clause for the CTE. Use "dims" (shorthand) or [{ "type": "dims" }] to automatically group by all dimension column expressions. Avoid bare string aliases when the CTE select contains computed expressions (expr), as they reference the alias rather than the underlying expression and will fail at query runtime. */ @@ -981,6 +991,10 @@ export interface SchemaModelCTE { )[], ]; exclude_date_filter?: SchemaModelExcludeDateFilter; + exclude_daily_filter?: ModelExcludeDailyFilterSchemaJson; + exclude_portal_partition_columns?: SchemaModelExcludePortalPartitionColumns; + exclude_portal_source_count?: SchemaModelExcludePortalSourceCount; + include_full_month?: ModelIncludeFullMonthSchemaJson; where?: SchemaModelWhere; group_by?: SchemaModelGroupBy; having?: SchemaModelHaving; diff --git a/src/shared/schema/types/model.type.mart_join_models.schema.d.ts b/src/shared/schema/types/model.type.mart_join_models.schema.d.ts index 08c51e9..b17db50 100644 --- a/src/shared/schema/types/model.type.mart_join_models.schema.d.ts +++ b/src/shared/schema/types/model.type.mart_join_models.schema.d.ts @@ -584,6 +584,14 @@ export type SchemaModelSelectCTE = * Will prevent the automatic date filters from getting added */ export type SchemaModelExcludeDateFilter = boolean; +/** + * Exclude Daily Filter + */ +export type ModelExcludeDailyFilterSchemaJson = boolean; +/** + * Includes the full month when running any given event date + */ +export type ModelIncludeFullMonthSchemaJson = boolean; /** * Validate model group by */ @@ -884,6 +892,10 @@ export interface SchemaModelCTE { )[], ]; exclude_date_filter?: SchemaModelExcludeDateFilter; + exclude_daily_filter?: ModelExcludeDailyFilterSchemaJson; + exclude_portal_partition_columns?: SchemaModelExcludePortalPartitionColumns; + exclude_portal_source_count?: SchemaModelExcludePortalSourceCount; + include_full_month?: ModelIncludeFullMonthSchemaJson; where?: SchemaModelWhere; group_by?: SchemaModelGroupBy1; having?: SchemaModelHaving; diff --git a/src/shared/schema/types/model.type.mart_select_model.schema.d.ts b/src/shared/schema/types/model.type.mart_select_model.schema.d.ts index c5e9214..1bce80b 100644 --- a/src/shared/schema/types/model.type.mart_select_model.schema.d.ts +++ b/src/shared/schema/types/model.type.mart_select_model.schema.d.ts @@ -555,6 +555,14 @@ export type SchemaModelSelectCTE = * Will prevent the automatic date filters from getting added */ export type SchemaModelExcludeDateFilter = boolean; +/** + * Exclude Daily Filter + */ +export type ModelExcludeDailyFilterSchemaJson = boolean; +/** + * Includes the full month when running any given event date + */ +export type ModelIncludeFullMonthSchemaJson = boolean; /** * Validate model group by */ @@ -832,6 +840,10 @@ export interface SchemaModelCTE { )[], ]; exclude_date_filter?: SchemaModelExcludeDateFilter; + exclude_daily_filter?: ModelExcludeDailyFilterSchemaJson; + exclude_portal_partition_columns?: SchemaModelExcludePortalPartitionColumns; + exclude_portal_source_count?: SchemaModelExcludePortalSourceCount; + include_full_month?: ModelIncludeFullMonthSchemaJson; where?: SchemaModelWhere; group_by?: SchemaModelGroupBy1; having?: SchemaModelHaving; diff --git a/src/shared/schema/types/model.type.stg_select_model.schema.d.ts b/src/shared/schema/types/model.type.stg_select_model.schema.d.ts index 8b829af..28b0f20 100644 --- a/src/shared/schema/types/model.type.stg_select_model.schema.d.ts +++ b/src/shared/schema/types/model.type.stg_select_model.schema.d.ts @@ -64,7 +64,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -106,6 +106,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization diff --git a/src/shared/schema/types/model.type.stg_select_source.schema.d.ts b/src/shared/schema/types/model.type.stg_select_source.schema.d.ts index 3cabd09..7a9b8ec 100644 --- a/src/shared/schema/types/model.type.stg_select_source.schema.d.ts +++ b/src/shared/schema/types/model.type.stg_select_source.schema.d.ts @@ -64,7 +64,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -106,6 +106,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization diff --git a/src/shared/schema/types/model.type.stg_union_sources.schema.d.ts b/src/shared/schema/types/model.type.stg_union_sources.schema.d.ts index 74d37d8..83583c5 100644 --- a/src/shared/schema/types/model.type.stg_union_sources.schema.d.ts +++ b/src/shared/schema/types/model.type.stg_union_sources.schema.d.ts @@ -64,7 +64,7 @@ export type SchemaColumnName = string; */ export type SchemaModelPartitions = SchemaColumnName[]; /** - * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' requires the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. + * Incremental Strategy for dbt-trino. Pick one of: 'append', 'delete+insert', 'merge', 'overwrite_existing_partitions', 'dj_iceberg_partition_overwrite'. NOTE: 'overwrite_existing_partitions' requires a custom dbt macro in your project and is not shipped by the DJ extension. 'merge' and 'dj_iceberg_partition_overwrite' require the target table to use Iceberg format in dbt-trino. When in doubt, use 'delete+insert' with a partition column as unique_key. */ export type IncrementalStrategy = | { @@ -106,6 +106,12 @@ export type IncrementalStrategy = * Overwrite only the partitions in the new slice. REQUIRES a custom macro in your dbt project — prefer 'delete+insert' if you do not have one. */ type: 'overwrite_existing_partitions'; + } + | { + /** + * Overwrite only the partitions in the new slice on an Iceberg table. Shipped by DJ; requires Iceberg format. + */ + type: 'dj_iceberg_partition_overwrite'; }; /** * Type of materialization diff --git a/templates/_AGENTS.md b/templates/_AGENTS.md index c7a2eaf..836c0e9 100644 --- a/templates/_AGENTS.md +++ b/templates/_AGENTS.md @@ -699,7 +699,8 @@ Use the `materialization` field instead of the legacy `materialized` + `incremen | `append` | `{ "type": "append" }` | Fast insert-only; no de-dup | Upstream must guarantee no duplicates in the new slice | | `delete+insert` | `{ "type": "delete+insert", "unique_key": "..." }` | Partition-safe upsert (**safe default**) | `unique_key` is auto-derived from partitions when omitted | | `merge` | `{ "type": "merge", "unique_key": "id", "merge_update_columns": [...], "merge_exclude_columns": [...] }` | Row-level upsert on a primary key | **dbt-trino requires Iceberg format.** Set `materialization.format: "iceberg"` or the project var `storage_type: iceberg` | -| `overwrite_existing_partitions` | `{ "type": "overwrite_existing_partitions" }` | Drop & rewrite only partitions present in the new slice | **Requires a custom dbt macro in your project** (e.g. `get_incremental_overwrite_existing_partitions_sql`). The DJ extension does NOT ship this macro and dbt-trino does NOT provide it natively. `unique_key` is **not applicable** for this strategy \u2014 the macro derives partitions from the new slice itself, and the schema rejects `unique_key`. If your project does not define the macro, use `{ "type": "delete+insert" }` instead \u2014 behavior is equivalent for partition-aligned daily/monthly incrementals when `unique_key` is the partition column. | +| `overwrite_existing_partitions` | `{ "type": "overwrite_existing_partitions" }` | Drop & rewrite only partitions present in the new slice | **Requires a custom dbt macro in your project** (e.g. `get_incremental_overwrite_existing_partitions_sql`). The DJ extension does NOT ship this macro and dbt-trino does NOT provide it natively. `unique_key` is **not applicable** for this strategy the macro derives partitions from the new slice itself, and the schema rejects `unique_key`. If your project does not define the macro, use `{ "type": "delete+insert" }` instead, behavior is equivalent for partition-aligned daily/monthly incrementals when `unique_key` is the partition column. | +| `dj_iceberg_partition_overwrite` | `{ "type": "dj_iceberg_partition_overwrite" }` | Drop & rewrite only partitions present in the new slice on **Iceberg** tables | **Shipped by DJ.** No consumer macro required, `macros/strategies.sql` is auto-copied to `/macros/_ext_/strategies.sql` on **DJ: Refresh Projects**. The dispatch macro is `get_incremental_dj_iceberg_partition_overwrite_sql`. **Requires Iceberg format**: set `materialization.format: "iceberg"` or project var `storage_type: iceberg`; otherwise DJ flags it in the Problems tab. `unique_key` is **not applicable**, the macro derives partitions from the new slice itself. On Delta Lake / Hive use `{ "type": "delete+insert" }` instead. | ### Legacy Incremental Configuration @@ -958,7 +959,8 @@ Use `"group_by": "dims"` or `"group_by": [{ "type": "dims" }]` inside CTEs. Avoi - **Lightdash metrics belong on the main-model `select`.** `lightdash.metrics` / `lightdash.metrics_merge` on a CTE `select` item is rejected (only the main-model select feeds Lightdash metric generation). Keep the pre-aggregated column in the CTE and re-declare it on the main-model `select` with the metric block. `lightdash.dimension` on CTE selects is still supported. - **`portal_source_count` auto-injects in CTEs whose `from` is `{ model }`.** It's aggregated with `count` when the CTE has a `group_by`; otherwise it passes through. Don't add it manually. Set `override_suffix_agg: true` on the CTE select item only when you need a differently-aggregated variant alongside the audit column. -- **`datetime` and `portal_partition_*` auto-inject in CTEs whose `from` is `{ model }`.** Mirrors the main-model behavior: if the upstream has them and the CTE's select did not include them (even through a narrow `dims_from_model.include` list), they're appended automatically. `datetime` emits as a bare passthrough unless the CTE sets `{ "name": "datetime", "interval": "..." }`; in that case the interval drives partition exclusion (`day` drops hourly, `month` drops hourly+daily, `year` drops all three). Only fires for plain `from: { model }` — not for `from: { cte }`, source, or union shapes. +- **`datetime` and `portal_partition_*` auto-inject in CTEs whose `from` is `{ model }`.** Mirrors the main-model behavior: if the upstream has them and the CTE's select did not include them (even through a narrow `dims_from_model.include` list), they're appended automatically. `datetime` emits as a bare passthrough unless the CTE sets `{ "name": "datetime", "interval": "..." }`; in that case the interval drives partition exclusion (`day` drops hourly, `month` drops hourly+daily, `year` drops all three). Only fires for plain `from: { model }` — not for `from: { cte }`, source, or union shapes. Opt out via `"exclude_portal_partition_columns": true` on the CTE or the model (see flag inheritance below). +- **CTE-level exclude/include flags mirror the main-model flags and inherit from the model.** A CTE accepts `exclude_date_filter`, `exclude_daily_filter`, `exclude_portal_partition_columns`, `exclude_portal_source_count`, and `include_full_month` with the same semantics as the corresponding main-model flags. Resolution is uniform: CTE override > model value > false. Set the flag on the model to apply it to every CTE, on a single CTE to override only that CTE, or set `false` on a CTE to opt back in when the model excluded. - **Every `fct` column in the main-model `select` must be aggregated when the main model has a `group_by`.** Set `agg` / `aggs`, wrap an aggregate in `expr` (e.g. `sum(x)`, `avg(x)`, `any_value(x)`, `merge(cast(x as hyperloglog))`, `cast(tdigest_agg(x) as varbinary)`), or set `exclude_from_group_by: true`. This is enforced for scalar selects, CTE scalar refs, and bulk `all_from_cte` / `fcts_from_cte` carriers. - **Avoid dead outer layers.** A main `select` that's a single `all_from_cte` / `dims_from_cte` passthrough of one CTE with identical `group_by` and no extra filter / limit / projection is flagged as a no-op warning — drop the wrapper (move the CTE's select into the main model) or add new work to the outer layer. @@ -1031,7 +1033,7 @@ Subqueries can appear in `where`, `having`, and join `on` conditions via the `su | **Un-aggregated `fct` + group_by** | Every `fct` in the main `select` must set `agg`/`aggs`, wrap an aggregate in `expr` (e.g. `sum(x)`, `merge(cast(x as hyperloglog))`), or `exclude_from_group_by: true`. Applies to scalar and bulk CTE carriers. | | **Lightdash metrics on a CTE select**| Not supported; only the main-model `select` feeds Lightdash metric generation. Keep the pre-aggregated column in the CTE and declare `lightdash.metrics` / `lightdash.metrics_merge` on the main-model `select`. `lightdash.dimension` on CTE selects is still supported. | | **Duplicated `portal_source_count` in CTE** | When a CTE's `from` is `{ model }`, `portal_source_count` auto-injects (aggregated with `count` when the CTE has `group_by`). Don't add it manually; set `override_suffix_agg: true` only for a differently-aggregated variant alongside the audit column. | -| **Missing `portal_partition_*` / `datetime` in CTE** | When a CTE's `from` is `{ model }`, `datetime` and `portal_partition_*` auto-inject from the upstream even if a narrow `dims_from_model.include` list omitted them — do not add them by hand. Explicit `{ "name": "datetime", "interval": X }` drives partition exclusion (`day` drops hourly, `month` drops hourly+daily, `year` drops all three). | +| **Missing `portal_partition_*` / `datetime` in CTE** | When a CTE's `from` is `{ model }`, `datetime` and `portal_partition_*` auto-inject from the upstream even if a narrow `dims_from_model.include` list omitted them — do not add them by hand. Explicit `{ "name": "datetime", "interval": X }` drives partition exclusion (`day` drops hourly, `month` drops hourly+daily, `year` drops all three). Opt out per CTE with `"exclude_portal_partition_columns": true` (mirrors the main-model flag); `datetime` itself has no opt-out. | | **Invalid Source Reference** | Use format `__.` (double underscore, then dot). | | **Lightdash Case Sensitivity** | Optionally set `"case_sensitive": true/false` at model or column level to override the Lightdash global default. | | **CTE group_by with computed cols** | Don't use bare string aliases (e.g., `["month"]`) for columns defined with `expr`. Use `"dims"` or `{ "expr": "..." }`. | diff --git a/templates/skills/dj-create-new-model/_SKILL.md b/templates/skills/dj-create-new-model/_SKILL.md index b406cdc..699d3c5 100644 --- a/templates/skills/dj-create-new-model/_SKILL.md +++ b/templates/skills/dj-create-new-model/_SKILL.md @@ -78,7 +78,7 @@ The layer directory is derived from the type prefix: `stg_*` -> `staging`, `int_ - For joins, verify upstream columns exist by reading the upstream model's `.model.json` or source `.source.json` - Rename models by changing JSON fields (type/group/topic/name), never by renaming the file on disk - Prefer `"materialization": "incremental"` over legacy `"materialized": "incremental"`. For full control, use the structured form: `{ "type": "incremental", "format"?: "iceberg"|"delta_lake"|"hive", "partitions"?: [...], "strategy"?: {...} }`. See `model.materialization.schema.json` -- **Incremental strategies** (`materialization.strategy.type`): `append` (insert-only, no dedup), `delete+insert` (partition-safe upsert; `unique_key` auto-derived from partitions), `merge` (row-level upsert on `unique_key` \u2014 **requires Iceberg format in dbt-trino**), `overwrite_existing_partitions` (**requires a custom dbt macro in the consumer project**; if not available, use `delete+insert` instead). If omitted, the extension default applies (`dj.materialization.defaultIncrementalStrategy`). See `model.incremental_strategy.schema.json` +- **Incremental strategies** (`materialization.strategy.type`): `append` (insert-only, no dedup), `delete+insert` (partition-safe upsert; `unique_key` auto-derived from partitions), `merge` (row-level upsert on `unique_key` **requires Iceberg format in dbt-trino**), `overwrite_existing_partitions` (**requires a custom dbt macro in the consumer project**; if not available, use `delete+insert` instead), `dj_iceberg_partition_overwrite` (**shipped by DJ** via `macros/_ext_/strategies.sql`; **requires Iceberg format** on Delta Lake / Hive use `delete+insert` instead). If omitted, the extension default applies (`dj.materialization.defaultIncrementalStrategy`). See `model.incremental_strategy.schema.json` - `int_select_model` and `int_join_models` support `from.rollup` for time-grain re-aggregation without needing a separate `int_rollup_model`. See AGENTS.md "Model Types" and `model.from.rollup.schema.json` - Use the `ctes` array for inline CTEs on `int_select_model`, `int_join_models`, `int_union_models`, `mart_select_model`, `mart_join_models`. CTE bulk selects support `exclude`/`include` filters. See AGENTS.md "Inline CTEs" and `model.cte.schema.json` - WHERE, HAVING, and JOIN ON conditions support inline subqueries via the `subquery` key. See AGENTS.md "Inline Subqueries" and `model.subquery.schema.json` @@ -95,6 +95,7 @@ The layer directory is derived from the type prefix: `stg_*` -> `staging`, `int_ - **Un-aggregated `fct` + main-model `group_by` is an error** — every `fct` in the main `select` must set `agg` / `aggs`, wrap an aggregate in `expr` (`sum(x)`, `avg(x)`, `merge(cast(x as hyperloglog))`, `cast(tdigest_agg(x) as varbinary)`, `any_value(x)`, …), or `exclude_from_group_by: true`. Applies to scalar selects, CTE scalar refs, and bulk `all_from_cte` / `fcts_from_cte` carriers. - **`portal_source_count` auto-injects in CTEs whose `from` is a model** — don't duplicate it in the CTE `select`; it's appended automatically (aggregated with `count` when the CTE has a `group_by`). Set `override_suffix_agg: true` only when you need a differently-aggregated variant alongside the audit column. - **`datetime` and `portal_partition_*` auto-inject in CTEs whose `from` is a model** — mirrors the main-model behavior. If the upstream has them and the CTE's select (or `dims_from_model.include`) did not list them, they're appended automatically. An explicit `{ "name": "datetime", "interval": X }` drives partition exclusion: `day` drops hourly, `month` drops hourly+daily, `year` drops all three. Only fires for plain `from: { model }` — not `from: { cte }`, source, or union shapes. +- **CTE exclude/include flags mirror the main-model flags and inherit from the model** — a CTE accepts `exclude_date_filter`, `exclude_daily_filter`, `exclude_portal_partition_columns`, `exclude_portal_source_count`, and `include_full_month` with the same semantics as their main-model counterparts. Resolution is uniform: **CTE override > model value > false**. Set a flag on the model to apply it to every CTE, on a single CTE to override only that CTE, or set `false` on a CTE to opt back in when the model excluded. - **Dead outer-layer warning** — if the main `select` is a single `all_from_cte` / `dims_from_cte` passthrough of one CTE with identical `group_by` and no extra filter / limit / projection, drop the wrapper or add new work to it. See `docs/models/CTE_PATTERNS.md`. - `from.rollup` requires the upstream model to have a select column with an `"interval"` field (e.g., `{ "name": "datetime", "interval": "day" }`) - Cross joins have no `on` property -- do not include `on: {}` or `on: null` diff --git a/web/src/features/ModelWizard/AdditionalFields.tsx b/web/src/features/ModelWizard/AdditionalFields.tsx index 54c72cc..88315dd 100644 --- a/web/src/features/ModelWizard/AdditionalFields.tsx +++ b/web/src/features/ModelWizard/AdditionalFields.tsx @@ -35,6 +35,10 @@ const INCREMENTAL_STRATEGY_OPTIONS = [ label: 'Overwrite Existing Partitions', value: 'overwrite_existing_partitions', }, + { + label: 'DJ Iceberg Partition Overwrite', + value: 'dj_iceberg_partition_overwrite', + }, ] as const; export function AdditionalFields({ @@ -228,16 +232,19 @@ export function AdditionalFields({ '\u2022 append: inserts new rows without dedup (upstream must guarantee no duplicates).\n' + '\u2022 delete+insert: partition-safe upsert; unique_key auto-derived from partitions.\n' + '\u2022 merge: row-level upsert on unique_key (requires Iceberg format in dbt-trino).\n' + - '\u2022 overwrite_existing_partitions: drops & rewrites partitions in the new slice; no unique_key needed (the macro derives partitions from the new slice). Requires a custom dbt macro in your project \u2014 if unavailable, use delete+insert instead.\n' + + '\u2022 overwrite_existing_partitions: drops & rewrites partitions in the new slice; no unique_key needed (the macro derives partitions from the new slice). Requires a custom dbt macro in your project, if unavailable, use delete+insert instead.\n' + + '\u2022 dj_iceberg_partition_overwrite: drops & rewrites partitions in the new slice on Iceberg tables; shipped by DJ (no consumer macro required). Requires Iceberg format on Delta Lake / Hive use delete+insert instead.\n' + 'The default can be configured via dj.materialization.defaultIncrementalStrategy.' } /> )} /> - {/* Unique Key - not applicable for 'append' or 'overwrite_existing_partitions' */} + {/* Unique Key - not applicable for 'append', 'overwrite_existing_partitions', or 'dj_iceberg_partition_overwrite' */} {incrementalStrategy?.type !== 'append' && - incrementalStrategy?.type !== 'overwrite_existing_partitions' && ( + incrementalStrategy?.type !== 'overwrite_existing_partitions' && + incrementalStrategy?.type !== + 'dj_iceberg_partition_overwrite' && (