From 12dbdbe10fbed14f80489d1ce0ff0b9224c737a4 Mon Sep 17 00:00:00 2001 From: "turbolytics.io" Date: Sun, 13 Apr 2025 14:47:56 -0400 Subject: [PATCH] debug: Looking into Azure Integration refs #120 --- dev/config/examples/azure-debug.yml | 46 +++++++++++++++++++++++++++++ dev/fixtures/azure.jsonl | 1 + 2 files changed, 47 insertions(+) create mode 100644 dev/config/examples/azure-debug.yml create mode 100644 dev/fixtures/azure.jsonl diff --git a/dev/config/examples/azure-debug.yml b/dev/config/examples/azure-debug.yml new file mode 100644 index 0000000..b422aa5 --- /dev/null +++ b/dev/config/examples/azure-debug.yml @@ -0,0 +1,46 @@ +commands: + - name: load extensions + sql: | + INSTALL azure; + LOAD azure; + - name: create adls secret + sql: | + CREATE SECRET adls_conn ( + TYPE azure, + PROVIDER credential_chain, + CHAIN 'default;cli;env', + ACCOUNT_NAME '{{ SQLFLOW_AZURE_ACCOUNT_NAME }}' + ); + +pipeline: + name: kafka-azure-duckdb-sink + description: "Writes Kafka stream into Azure Data Lake using DuckDB Azure extension" + batch_size: 50 + + source: + type: kafka + kafka: + brokers: [{{ SQLFLOW_KAFKA_BROKERS|default('localhost:9092') }}] + group_id: sql-flow-consumer-001 + auto_offset_reset: earliest + topics: + - "input-azure-pipeline-dev" + + handler: + type: 'handlers.InferredMemBatch' + sql: | + SELECT + *, + CAST(TO_TIMESTAMP(payload.source.ts_ms / 1000) AS DATE) AS SourceDatePartition + FROM batch; + + sink: + type: sqlcommand + sqlcommand: + substitutions: + - var: $sqlflow_uuid + type: uuid4 + sql: | + COPY sqlflow_sink_batch + TO 'abfss://somecontainer@someaccount.dfs.core.windows.net/path/$sqlflow_uuid.parquet' + (FORMAT 'parquet', PARTITION_BY (SourceDatePartition), OVERWRITE_OR_IGNORE 1); \ No newline at end of file diff --git a/dev/fixtures/azure.jsonl b/dev/fixtures/azure.jsonl new file mode 100644 index 0000000..4bd3880 --- /dev/null +++ b/dev/fixtures/azure.jsonl @@ -0,0 +1 @@ +{"payload":{"source":{"ts_ms":1744569606922}}} \ No newline at end of file