From 191de419686364fd1764678b31e2501ab722a8fe Mon Sep 17 00:00:00 2001 From: Anders Westrheim Date: Fri, 11 Apr 2025 10:22:34 +0200 Subject: [PATCH 1/3] rename secret scope --- src/dataworkbench/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dataworkbench/utils.py b/src/dataworkbench/utils.py index b4975ab..5670a7a 100644 --- a/src/dataworkbench/utils.py +++ b/src/dataworkbench/utils.py @@ -23,7 +23,7 @@ def is_databricks(): return os.getenv("DATABRICKS_RUNTIME_VERSION") is not None -def get_secret(key: str, scope: str = "secrets") -> str: +def get_secret(key: str, scope: str = "dwsecrets") -> str: """ Retrieve a secret from dbutils if running on Databricks, otherwise fallback to env variables. """ From ba57f0538241546e8e90d55f6c790e552d2f3aef Mon Sep 17 00:00:00 2001 From: Anders Westrheim Date: Fri, 11 Apr 2025 10:28:16 +0200 Subject: [PATCH 2/3] updated readme with schema example --- README.md | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index dc3cae4..7b5e9aa 100644 --- a/README.md +++ b/README.md @@ -51,28 +51,39 @@ import dataworkbench To use it on your local machine, it requires you to set a set of variables to connect to the Veracity Dataworkbench API. -### Basic Example + +## Examples + +### Saving a Spark DataFrame to the Data Catalogue ```python from dataworkbench import DataCatalogue df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["letter", "number"]) -datacatalogue = DataCatalogue() # Naming subject to change -datacatalogue.save(df, "Dataset Name", "Description", tags={"environment": ["test"]}) +datacatalogue = DataCatalogue() +datacatalogue.save( + df, + "Dataset Name", + "Description", + tags={"environment": ["test"]} +) # schema_id is optional - if not provided, schema will be inferred from the dataframe ``` - -## Examples - -### Saving a Spark DataFrame to the Data Catalogue - +#### Using an existing schema +When you have an existing schema that you want to reuse: ```python from dataworkbench import DataCatalogue df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["letter", "number"]) -datacatalogue = DataCatalogue() # Naming subject to change -datacatalogue.save(df, "Dataset Name", "Description", tags={"environment": ["test"]}) +datacatalogue = DataCatalogue() +datacatalogue.save( + df, + "Dataset Name", + "Description", + tags={"environment": ["test"]}, + schema_id="abada0f7-acb4-43cf-8f54-b51abd7ba8b1" # Using an existing schema ID +) ``` ## API Reference From 2a0e2685314b4c7f2b17b0d0cc721319ae7dde06 Mon Sep 17 00:00:00 2001 From: Anders Westrheim Date: Fri, 11 Apr 2025 10:34:08 +0200 Subject: [PATCH 3/3] fixed readme missing schema_id --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7b5e9aa..3269962 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ datacatalogue.save( ### DataCatalogue -- `save(df, name, description=None, tags=None)`: Save a Spark DataFrame to the Data Workbench Data Catalogue +- `save(df, name, description, schema_id=None, tags=None)`: Save a Spark DataFrame to the Data Workbench Data Catalogue ## License