veracity · hanyfakhry · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025 · Apr 10, 2025
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -41,6 +41,13 @@ jobs:
       run: |
         pytest --junitxml=test-results-${{ matrix.python-version }}.xml --cov=dataworkbench --cov-report=xml tests/
 
+    # - name: Get Coverage report
+    #   uses: orgoro/coverage@v3.2
+    #   with:
+    #     coverageFile: coverage.xml
+    #     token: ${{ secrets.GITHUB_TOKEN }}
+    #   if: ${{ always() }}
+
     # Step to upload the test results as an artifact
     - name: Upload test results as artifact
       uses: actions/upload-artifact@v4
@@ -49,13 +56,6 @@ jobs:
         path: test-results-${{ matrix.python-version }}.xml
       if: ${{ always() }}
 
-    # Step to upload the coverage report as an artifact
-    - name: Upload coverage report as artifact
-      uses: actions/upload-artifact@v4
-      with:
-        name: coverage-report-${{ matrix.python-version }}
-        path: coverage.xml
-      if: ${{ always() }}
 
 
 
@@ -84,7 +84,7 @@ jobs:
       run: |
         # Find the .whl file using a wildcard and rename it
         WHL_FILE=$(find . -type f -name "*.whl" -print -quit)
-        mv "$WHL_FILE" dist/dataworkbench-latest-py3-none-any.whl
+        mv "$WHL_FILE" dist/dataworkbench-1.0-py3-none-any.whl
 
     - name: Log package version number
       if: github.event_name == 'push'

diff --git a/README.md b/README.md
@@ -10,12 +10,100 @@
 | | |
 | --- | --- |
 | Testing | [![CI](https://github.com/veracity/DataWorkbench/actions/workflows/ci.yml/badge.svg)](https://github.com/veracity/DataWorkbench/actions/workflows/ci.yml) |
-|
+
+
+# DataWorkbench
 
 ## What is it?
+Veracity DataWorkbench is a Python SDK designed to bridge your Python environment with Veracity DataWorkbench services. It simplifies access to data cataloging, lineage tracking, and APIs — supporting efficient data workflows across local and cloud environments such as Databricks
 
-## Table of Contents
 
+## Table of Contents
 - [Features](#features)
+- [Installation](#installation)
+- [How to use it](#how-to-use-it)
+- [Configuration](#configuration)
+- [Examples](#examples)
+- [API Reference](#api-reference)
+- [Contributing](#contributing)
+- [License](#license)
 
 ## Features
+- **DataCatalogue**: Register and manage datasets in the Veracity Data Workbench Data Catalogue.
+
+## Installation
+This package is pre-installed in Veracity-hosted Databricks environments (if analytics features are enabled).
+
+To install the latest version locally:
+
+```sh
+pip install https://github.com/veracity/DataWorkbench/releases/latest/download/dataworkbench-1.0-py3-none-any.whl
+```
+Make sure you have the required credentials and environment variables set when running outside Databricks.
+
+
+## How to use it
+In Veracity-hosted Databricks, the SDK is ready to use:
+
+```python
+import dataworkbench
+```
+
+To use it on your local machine, it requires you to set a set of variables to connect to the Veracity Dataworkbench API.
+
+### Basic Example
+
+```python
+from dataworkbench import DataCatalogue
+
+df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["letter", "number"])
+
+datacatalogue = DataCatalogue()  # Naming subject to change
+datacatalogue.save(df, "Dataset Name", "Description", tags={"environment": ["test"]})
+```
+
+## Configuration
+
+When using Dataworkbench locally, you need to configure the following environment variables:
+
+```python
+# Required for local machine setup
+import os
+
+os.environ["ApimClientId"] = "your-apim-client-id"
+os.environ["ApimClientSecret"] = "your-apim-client-secret"
+os.environ["ApimScope"] = "your-apim-scope"
+```
+
+Alternatively, create a `.env` file or use a configuration file:
+
+```
+# .env file example
+ApimClientId=your-apim-client-id
+ApimClientSecret=your-apim-client-secret
+ApimScope=your-apim-scope
+```
+
+## Examples
+
+### Saving a Spark DataFrame to the Data Catalogue
+
+```python
+from dataworkbench import DataCatalogue
+
+df = spark.createDataFrame([("a", 1), ("b", 2), ("c", 3)], ["letter", "number"])
+
+datacatalogue = DataCatalogue()  # Naming subject to change
+datacatalogue.save(df, "Dataset Name", "Description", tags={"environment": ["test"]})
+```
+
+## API Reference
+
+### DataCatalogue
+
+- `save(df, name, description=None, tags=None)`: Save a Spark DataFrame to the Data Workbench Data Catalogue
+
+
+## License
+
+Dataworkbench is licensed under [WHICH LICENSE](LICENSE).
diff --git a/src/dataworkbench/datacatalogue.py b/src/dataworkbench/datacatalogue.py
@@ -39,7 +39,7 @@ def __init__(self) -> None:
         self.gateway: Gateway = Gateway()
         self.storage_base_url: str = get_secret("StorageBaseUrl")
 
-    def __build_storage_url(self, folder_id: str) -> str:
+    def __build_storage_url(self, folder_id: uuid.UUID) -> str:
         """
         Build the ABFSS URL for the target storage location.
 
@@ -53,8 +53,8 @@ def __build_storage_url(self, folder_id: str) -> str:
             >>> catalogue = DataCatalogue()
             >>> catalogue._build_storage_url("abc123")
         """
-        if not isinstance(folder_id, str):
-            raise TypeError("folder_id must be a string")
+        if not isinstance(folder_id, uuid.UUID):
+            raise TypeError("folder_id must be uuid")
 
         if not folder_id:
             raise ValueError("folder_id cannot be empty")
@@ -116,7 +116,7 @@ def save(
             raise TypeError("tags must be a dictionary or None")
 
         # Generate folder_id
-        folder_id = str(uuid.uuid4())
+        folder_id = uuid.uuid4()
 
         target_path = self.__build_storage_url(folder_id)