From bea8104f84f18fa22a2a7d077c07f612f276db59 Mon Sep 17 00:00:00 2001
From: Anders Westrheim <anders.westrheim@dnv.com>
Date: Thu, 10 Apr 2025 14:46:30 +0200
Subject: [PATCH 1/4] added readme, changed version number of whl file

---
 .github/workflows/ci.yml |  2 +-
 README.md                | 92 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 91 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 72f7f6b..ca762c7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -84,7 +84,7 @@ jobs:
       run: |
         # Find the .whl file using a wildcard and rename it
         WHL_FILE=$(find . -type f -name "*.whl" -print -quit)
-        mv "$WHL_FILE" dist/dataworkbench-latest-py3-none-any.whl
+        mv "$WHL_FILE" dist/dataworkbench-1.0-py3-none-any.whl
 
     - name: Log package version number
       if: github.event_name == 'push'
diff --git a/README.md b/README.md
index 9506790..bf1d4d3 100644
--- a/README.md
+++ b/README.md
@@ -10,12 +10,100 @@
 | | |
 | --- | --- |
 | Testing | [![CI](https://github.com/veracity/DataWorkbench/actions/workflows/ci.yml/badge.svg)](https://github.com/veracity/DataWorkbench/actions/workflows/ci.yml) |
-|
+
+
+# DataWorkbench
 
 ## What is it?
+Veracity DataWorkbench is a Python SDK designed to bridge your Python environment with Veracity DataWorkbench services. It simplifies access to data cataloging, lineage tracking, and APIs — supporting efficient data workflows across local and cloud environments such as Databricks
 
-## Table of Contents
 
+## Table of Contents
 - [Features](#features)
+- [Installation](#installation)
+- [How to use it](#how-to-use-it)
+- [Configuration](#configuration)
+- [Examples](#examples)
+- [API Reference](#api-reference)
+- [Contributing](#contributing)
+- [License](#license)
 
 ## Features
+- **DataCatalogue**: Register and manage datasets in the Veracity Data Workbench Data Catalogue.
+
+## Installation
+This package is pre-installed in Veracity-hosted Databricks environments (if analytics features are enabled).
+
+To install the latest version locally:
+
+```sh
+pip install https://github.com/veracity/DataWorkbench/releases/latest/download/dataworkbench-1.0-py3-none-any.whl
+```
+Make sure you have the required credentials and environment variables set when running outside Databricks.
+
+
+## How to use it
+In Veracity-hosted Databricks, the SDK is ready to use:
+
+```python
+import dataworkbench
+```
+
+To use it on your local machine, it requires you to set a set of variables to connect to the Veracity Dataworkbench API.
+
+### Basic Example
+
+```python
+from dataworkbench import DataCatalogue
+
+df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["letter", "number"])
+
+datacatalogue = DataCatalogue()  # Naming subject to change
+datacatalogue.save(df, "Dataset Name", "Description", tags={"environment": ["test"]})
+```
+
+## Configuration
+
+When using Dataworkbench locally, you need to configure the following environment variables:
+
+```python
+# Required for local machine setup
+import os
+
+os.environ["ApimClientId"] = "your-apim-client-id"
+os.environ["ApimClientSecret"] = "your-apim-client-secret"
+os.environ["ApimScope"] = "your-apim-scope"
+```
+
+Alternatively, create a `.env` file or use a configuration file:
+
+```
+# .env file example
+ApimClientId=your-apim-client-id
+ApimClientSecret=your-apim-client-secret
+ApimScope=your-apim-scope
+```
+
+## Examples
+
+### Saving a Spark DataFrame to the Data Catalogue
+
+```python
+from dataworkbench import DataCatalogue
+
+df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["letter", "number"])
+
+datacatalogue = DataCatalogue()  # Naming subject to change
+datacatalogue.save(df, "Dataset Name", "Description", tags={"environment": ["test"]})
+```
+
+## API Reference
+
+### DataCatalogue
+
+- `save(df, name, description=None, tags=None)`: Save a Spark DataFrame to the Data Workbench Data Catalogue
+
+
+## License
+
+Dataworkbench is licensed under [WHICH LICENSE](LICENSE).

From a71fd05ba06601944c258d78dac5532c0702232e Mon Sep 17 00:00:00 2001
From: Anders Westrheim <anders.westrheim@dnv.com>
Date: Thu, 10 Apr 2025 14:55:11 +0200
Subject: [PATCH 2/4] testing coverage report

---
 .github/workflows/ci.yml | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ca762c7..b1c95d7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -41,6 +41,13 @@ jobs:
       run: |
         pytest --junitxml=test-results-${{ matrix.python-version }}.xml --cov=dataworkbench --cov-report=xml tests/
 
+    - name: Get Coverage report
+      uses: orgoro/coverage@v3.2
+      with:
+        coverageFile: coverage.xml
+        token: ${{ secrets.GITHUB_TOKEN }}
+      if: ${{ always() }}
+
     # Step to upload the test results as an artifact
     - name: Upload test results as artifact
       uses: actions/upload-artifact@v4
@@ -49,13 +56,6 @@ jobs:
         path: test-results-${{ matrix.python-version }}.xml
       if: ${{ always() }}
 
-    # Step to upload the coverage report as an artifact
-    - name: Upload coverage report as artifact
-      uses: actions/upload-artifact@v4
-      with:
-        name: coverage-report-${{ matrix.python-version }}
-        path: coverage.xml
-      if: ${{ always() }}
 
 
 

From b4751d943a050506ae1868c481993190ae59a51c Mon Sep 17 00:00:00 2001
From: Anders Westrheim <anders.westrheim@dnv.com>
Date: Thu, 10 Apr 2025 15:01:32 +0200
Subject: [PATCH 3/4] comment out the coverage report for now

---
 .github/workflows/ci.yml | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index b1c95d7..e436cfb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -41,12 +41,12 @@ jobs:
       run: |
         pytest --junitxml=test-results-${{ matrix.python-version }}.xml --cov=dataworkbench --cov-report=xml tests/
 
-    - name: Get Coverage report
-      uses: orgoro/coverage@v3.2
-      with:
-        coverageFile: coverage.xml
-        token: ${{ secrets.GITHUB_TOKEN }}
-      if: ${{ always() }}
+    # - name: Get Coverage report
+    #   uses: orgoro/coverage@v3.2
+    #   with:
+    #     coverageFile: coverage.xml
+    #     token: ${{ secrets.GITHUB_TOKEN }}
+    #   if: ${{ always() }}
 
     # Step to upload the test results as an artifact
     - name: Upload test results as artifact

From b05c88299560374fc0c8795e337fb1d9a90f8b98 Mon Sep 17 00:00:00 2001
From: Anders Westrheim <anders.westrheim@dnv.com>
Date: Thu, 10 Apr 2025 15:53:24 +0200
Subject: [PATCH 4/4] Fix typing

---
 src/dataworkbench/datacatalogue.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/dataworkbench/datacatalogue.py b/src/dataworkbench/datacatalogue.py
index 5cb729f..befcc6a 100644
--- a/src/dataworkbench/datacatalogue.py
+++ b/src/dataworkbench/datacatalogue.py
@@ -39,7 +39,7 @@ def __init__(self) -> None:
         self.gateway: Gateway = Gateway()
         self.storage_base_url: str = get_secret("StorageBaseUrl")
 
-    def __build_storage_url(self, folder_id: str) -> str:
+    def __build_storage_url(self, folder_id: uuid.UUID) -> str:
         """
         Build the ABFSS URL for the target storage location.
 
@@ -53,8 +53,8 @@ def __build_storage_url(self, folder_id: str) -> str:
             >>> catalogue = DataCatalogue()
             >>> catalogue._build_storage_url("abc123")
         """
-        if not isinstance(folder_id, str):
-            raise TypeError("folder_id must be a string")
+        if not isinstance(folder_id, uuid.UUID):
+            raise TypeError("folder_id must be uuid")
 
         if not folder_id:
             raise ValueError("folder_id cannot be empty")
@@ -116,7 +116,7 @@ def save(
             raise TypeError("tags must be a dictionary or None")
 
         # Generate folder_id
-        folder_id = str(uuid.uuid4())
+        folder_id = uuid.uuid4()
 
         target_path = self.__build_storage_url(folder_id)