From cf24732787a78aed4702f13959ee48e53fe7b678 Mon Sep 17 00:00:00 2001 From: royendo <67675319+royendo@users.noreply.github.com> Date: Tue, 16 Dec 2025 20:09:01 -0500 Subject: [PATCH 1/4] update to explicit cloud object --- .../build/connectors/data-source/azure.md | 311 +++++++++++++++--- docs/docs/build/connectors/data-source/gcs.md | 108 +++--- docs/docs/build/connectors/data-source/s3.md | 250 +++++++++++--- 3 files changed, 550 insertions(+), 119 deletions(-) diff --git a/docs/docs/build/connectors/data-source/azure.md b/docs/docs/build/connectors/data-source/azure.md index a77163256b8..66699683622 100644 --- a/docs/docs/build/connectors/data-source/azure.md +++ b/docs/docs/build/connectors/data-source/azure.md @@ -14,90 +14,327 @@ sidebar_position: 05 azure://.blob.core.windows.net//path/file.csv ``` -## Connect to Azure Blob Storage +## Authentication Methods -To connect to Azure Blob Storage, you need to provide authentication credentials. You have four options: +To connect to Azure Blob Storage, you can choose from four authentication options: -1. **Use Storage Account Key** (recommended for cloud deployment) -2. **Use Connection String** (alternative for cloud deployment) -3. **Use Shared Access Signature (SAS) Token** (most secure, fine-grained control) -4. **Use Azure CLI authentication** (local development only - not recommended for production) +1. **Storage Account Key** (recommended for cloud deployment) +2. **Connection String** (alternative for cloud deployment) +3. **Shared Access Signature (SAS) Token** (most secure, fine-grained control) +4. **Public** (for publicly accessible containers - no authentication required) +5. **Azure CLI authentication** (local development only - not recommended for production) -Choose the method that best fits your setup. For production deployments to Rill Cloud, use Storage Account Key, Connection String, or SAS tokens. Azure CLI authentication only works for local development and will cause deployment failures. +:::tip Authentication Methods +Choose the method that best fits your setup. For production deployments to Rill Cloud, use Storage Account Key, Connection String, or SAS tokens. Public containers don't require authentication and skip connector creation. Azure CLI authentication only works for local development and will cause deployment failures. +::: + +## Using the Add Data UI + +When you add an Azure Blob Storage data model through the Rill UI, you'll see four authentication options: + +- **Storage Account Key**, **Connection String**, or **SAS Token**: The process follows two steps: + 1. **Configure Authentication** - Set up your Azure connector with credentials + 2. **Configure Data Model** - Define which container and objects to ingest + The UI will automatically create both the connector file and model file for you. + +- **Public**: For publicly accessible containers, you skip the connector creation step and go directly to: + 1. **Configure Data Model** - Define which container and objects to ingest + The UI will only create the model file (no connector file is needed). + +:::note Manual Configuration Only +Azure CLI authentication is only available through manual configuration. See [Method 5: Azure CLI Authentication](#method-5-azure-cli-authentication-local-development-only) for setup instructions. +::: + +--- + +## Method 1: Storage Account Key (Recommended) + +Storage Account Key credentials provide reliable authentication for Azure Blob Storage. This method works for both local development and Rill Cloud deployments. + +### Using the UI + +1. Click **Add Data** in your Rill project +2. Select **Azure Blob Storage** as the data model type +3. In the authentication step: + - Choose **Storage Account Key** + - Enter your Storage Account name + - Enter your Storage Account Key + - Name your connector (e.g., `my_azure`) +4. In the data model configuration step: + - Enter your container name and object path + - Configure other model settings as needed +5. Click **Create** to finalize + +The UI will automatically create both the connector file and model file for you. -### Storage Account Key +### Manual Configuration -To ensure seamless deployment to Rill Cloud, configure your Azure Storage Account Key directly in your project's `.env` file instead of relying solely on Azure CLI authentication (which only works locally). +If you prefer to configure manually, create two files: + +**Step 1: Create connector configuration** + +Create `connectors/my_azure.yaml`: ```yaml type: connector - driver: azure azure_storage_account: rilltest azure_storage_key: "{{ .env.connector.azure.azure_storage_key }}" ``` -This approach ensures your Azure Blob Storage sources authenticate consistently across both local development and cloud deployment. Follow the [Azure Documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal) to retrieve your storage account keys. +**Step 2: Create model configuration** + +Create `models/my_azure_data.yaml`: + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('azure://rilltest.blob.core.windows.net/my-container/path/to/data/*.parquet') + +refresh: + cron: "0 */6 * * *" +``` + +**Step 3: Add credentials to `.env`** + +```bash +connector.azure.azure_storage_key=your_storage_account_key +``` + +Follow the [Azure Documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal) to retrieve your storage account keys. + +--- + +## Method 2: Connection String + +Connection String provides an alternative authentication method for Azure Blob Storage. + +### Using the UI + +1. Click **Add Data** in your Rill project +2. Select **Azure Blob Storage** as the data model type +3. In the authentication step: + - Choose **Connection String** + - Enter your Connection String + - Name your connector (e.g., `my_azure_conn`) +4. In the data model configuration step: + - Enter your container name and object path + - Configure other model settings as needed +5. Click **Create** to finalize -### Connection String +### Manual Configuration -To ensure seamless deployment to Rill Cloud, configure your Azure Blob Storage credentials using a connection string directly in your project's `.env` file instead of relying solely on Azure CLI authentication (which only works locally). +**Step 1: Create connector configuration** + +Create `connectors/my_azure_conn.yaml`: ```yaml type: connector - driver: azure azure_storage_connection_string: "{{ .env.connector.azure.azure_storage_connection_string }}" ``` -This approach ensures your Azure Blob Storage sources authenticate consistently across both local development and cloud deployment. Follow the [Azure Documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal) to retrieve your connection string. +**Step 2: Create model configuration** + +Create `models/my_azure_data.yaml`: + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('azure://rilltest.blob.core.windows.net/my-container/path/to/data/*.parquet') + +refresh: + cron: "0 */6 * * *" +``` + +**Step 3: Add credentials to `.env`** + +```bash +connector.azure.azure_storage_connection_string=your_connection_string +``` + +Follow the [Azure Documentation](https://learn.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal) to retrieve your connection string. + +--- + +## Method 3: Shared Access Signature (SAS) Token + +SAS tokens provide fine-grained access control with specific permissions and expiration times for secure access to your storage resources. + +### Using the UI + +1. Click **Add Data** in your Rill project +2. Select **Azure Blob Storage** as the data model type +3. In the authentication step: + - Choose **SAS Token** + - Enter your Storage Account name + - Enter your SAS Token + - Name your connector (e.g., `my_azure_sas`) +4. In the data model configuration step: + - Enter your container name and object path + - Configure other model settings as needed +5. Click **Create** to finalize + +### Manual Configuration -### Shared Access Signature (SAS) Token +**Step 1: Create connector configuration** -Use Shared Access Signature (SAS) tokens as an alternative authentication method for Azure Blob Storage. SAS tokens provide fine-grained access control with specific permissions and expiration times for secure access to your storage resources. +Create `connectors/my_azure_sas.yaml`: ```yaml type: connector - driver: azure azure_storage_account: rilltest azure_storage_sas_token: "{{ .env.connector.azure.azure_storage_sas_token }}" ``` -This method provides fine-grained access control and enhanced security for your Azure Blob Storage connections. Follow the [Azure Documentation](https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/how-to-guides/create-sas-tokens?tabs=Containers) to create your Azure SAS token. +**Step 2: Create model configuration** -### Azure CLI Authentication (Local Development Only) +Create `models/my_azure_data.yaml`: -:::warning Not recommended for production -Azure CLI authentication only works for local development. If you deploy to Rill Cloud using this method, your dashboards will fail. Use one of the methods above for production deployments. -::: +```yaml +type: model +connector: duckdb -1. Install the [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) if not already installed. -2. Open a terminal window and run the following command to log in to your Azure account: `az login` -3. Verify your authentication status: `az account show` +sql: SELECT * FROM read_parquet('azure://rilltest.blob.core.windows.net/my-container/path/to/data/*.parquet') -You've now configured Azure access from your local environment. Rill will automatically detect and use these credentials when you connect to Azure Blob Storage sources. +refresh: + cron: "0 */6 * * *" +``` -:::tip Cloud Credentials Management +**Step 3: Add credentials to `.env`** -If your project is already deployed to Rill Cloud with configured credentials, use `rill env pull` to [retrieve and sync these cloud credentials](/build/connectors/credentials/#rill-env-pull) to your local `.env` file. **Warning**: This operation will overwrite any existing local credentials for this source. +```bash +connector.azure.azure_storage_sas_token=your_sas_token +``` -::: +Follow the [Azure Documentation](https://learn.microsoft.com/en-us/azure/ai-services/translator/document-translation/how-to-guides/create-sas-tokens?tabs=Containers) to create your Azure SAS token. -## Deploy to Rill Cloud +--- -When deploying your project to Rill Cloud, you must provide either an Azure Blob Storage connection string, Azure Storage Key, or Azure Storage SAS token for the containers used in your project. If these credentials exist in your `.env` file, they'll be pushed with your project automatically. If you're using inferred credentials only, your deployment will result in errored dashboards. +## Method 4: Public Containers -To manually configure your environment variables, run: -```bash -rill env configure +For publicly accessible Azure Blob Storage containers, you don't need to create a connector. Simply use the Azure URI directly in your model configuration. + +### Using the UI + +1. Click **Add Data** in your Rill project +2. Select **Azure Blob Storage** as the data model type +3. In the authentication step: + - Choose **Public** + - The UI will skip connector creation and proceed directly to data model configuration +4. In the data model configuration step: + - Enter your container name and object path + - Configure other model settings as needed +5. Click **Create** to finalize + +The UI will only create the model file (no connector file is created). + +### Manual Configuration + +For public containers, you only need to create a model file. No connector configuration is required. + +Create `models/my_azure_data.yaml`: + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('azure://publicaccount.blob.core.windows.net/my-public-container/path/to/data/*.parquet') + +refresh: + cron: "0 */6 * * *" ``` -:::tip Did you know? +--- + +## Method 5: Azure CLI Authentication (Local Development Only) + +For local development, you can use credentials from the Azure CLI. This method is **not suitable for production** or Rill Cloud deployments. This method is only available through manual configuration, and you don't need to create a connector file. + +### Setup + +1. Install the [Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli) if not already installed +2. Authenticate with your Azure account: + ```bash + az login + ``` +3. Verify your authentication status: + ```bash + az account show + ``` +4. Create your model file (no connector needed) + +### Model Configuration + +Create `models/my_azure_data.yaml`: + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('azure://rilltest.blob.core.windows.net/my-container/path/to/data/*.parquet') + +refresh: + cron: "0 */6 * * *" +``` -If you've already configured credentials locally (in your `/.env` file), use `rill env push` to [push these credentials](/build/connectors/credentials#rill-env-push) to your Rill Cloud project. This allows other users to retrieve and reuse the same credentials automatically by running `rill env pull`. +Rill will automatically detect and use your local Azure CLI credentials when no connector is specified. +:::warning +This method only works for local development. Deploying to Rill Cloud with this configuration will fail because the cloud environment doesn't have access to your local credentials. Always use Storage Account Key, Connection String, or SAS tokens for production deployments. ::: + +## Using Azure Blob Storage Data in Models + +Once your connector is configured (or for public containers, no connector needed), you can reference Azure Blob Storage paths in your model SQL queries using DuckDB's Azure functions. + +### Basic Example + +**With a connector (authenticated):** + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('azure://rilltest.blob.core.windows.net/my-container/data/*.parquet') + +refresh: + cron: "0 */6 * * *" +``` + +**Public container (no connector needed):** + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('azure://publicaccount.blob.core.windows.net/my-public-container/data/*.parquet') + +refresh: + cron: "0 */6 * * *" +``` + +### Path Patterns + +You can use wildcards to read multiple files: + +```sql +-- Single file +SELECT * FROM read_parquet('azure://account.blob.core.windows.net/container/data/file.parquet') + +-- All files in a directory +SELECT * FROM read_parquet('azure://account.blob.core.windows.net/container/data/*.parquet') + +-- All files in nested directories +SELECT * FROM read_parquet('azure://account.blob.core.windows.net/container/data/**/*.parquet') + +-- Files matching a pattern +SELECT * FROM read_parquet('azure://account.blob.core.windows.net/container/data/2024-*.parquet') +``` + diff --git a/docs/docs/build/connectors/data-source/gcs.md b/docs/docs/build/connectors/data-source/gcs.md index 85acc655b1e..3dbc693bced 100644 --- a/docs/docs/build/connectors/data-source/gcs.md +++ b/docs/docs/build/connectors/data-source/gcs.md @@ -13,24 +13,28 @@ sidebar_position: 15 ## Authentication Methods -To connect to Google Cloud Storage, you need to provide authentication credentials (or skip for public buckets). Rill supports three methods: +To connect to Google Cloud Storage, you can choose from three authentication options: -1. **Use Service Account JSON** (recommended for production) -2. **Use HMAC Keys** (alternative authentication method) -3. **Use Local Google Cloud CLI credentials** (local development only - not recommended for production) +1. **Service Account JSON** (recommended for production) +2. **HMAC Keys** (alternative authentication method) +3. **Public** (for publicly accessible buckets - no authentication required) :::tip Authentication Methods -Choose the method that best fits your setup. For production deployments to Rill Cloud, use Service Account JSON or HMAC Keys. Local Google Cloud CLI credentials only work for local development and will cause deployment failures. +Choose the method that best fits your setup. For production deployments to Rill Cloud, use Service Account JSON or HMAC Keys. Public buckets don't require authentication and skip connector creation. ::: ## Using the Add Data UI -When you add a GCS data model through the Rill UI, the process follows two steps: +When you add a GCS data model through the Rill UI, you'll see three authentication options: -1. **Configure Authentication** - Set up your GCS connector with credentials (Service Account JSON or HMAC keys) -2. **Configure Data Model** - Define which bucket and objects to ingest +- **Service Account JSON** or **HMAC Keys**: The process follows two steps: + 1. **Configure Authentication** - Set up your GCS connector with credentials + 2. **Configure Data Model** - Define which bucket and objects to ingest + The UI will automatically create both the connector file and model file for you. -This two-step flow ensures your credentials are securely stored in the connector configuration, while your data model references remain clean and portable. +- **Public**: For publicly accessible buckets, you skip the connector creation step and go directly to: + 1. **Configure Data Model** - Define which bucket and objects to ingest + The UI will only create the model file (no connector file is needed). --- @@ -151,9 +155,46 @@ Notice that the connector uses `key_id` and `secret`. HMAC keys use S3-compatibl --- -## Method 3: Local Google Cloud CLI Credentials +## Method 3: Public Buckets -For local development, you can use credentials from the Google Cloud CLI. This method is **not suitable for production** or Rill Cloud deployments. +For publicly accessible GCS buckets, you don't need to create a connector. Simply use the GCS URI directly in your model configuration. + +### Using the UI + +1. Click **Add Data** in your Rill project +2. Select **Google Cloud Storage (GCS)** as the data model type +3. In the authentication step: + - Choose **Public** + - The UI will skip connector creation and proceed directly to data model configuration +4. In the data model configuration step: + - Enter your bucket name and object path + - Configure other model settings as needed +5. Click **Create** to finalize + +The UI will only create the model file (no connector file is created). + +### Manual Configuration + +For public buckets, you only need to create a model file. No connector configuration is required. + +Create `models/my_gcs_data.yaml`: + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('gs://my-public-bucket/path/to/data/*.parquet') + +# Add a refresh schedule +refresh: + cron: "0 */6 * * *" +``` + +--- + +## Method 4: Local Google Cloud CLI Credentials + +For local development, you can use credentials from the Google Cloud CLI. This method is **not suitable for production** or Rill Cloud deployments. This method is only available through manual configuration, and you don't need to create a connector file. ### Setup @@ -162,16 +203,7 @@ For local development, you can use credentials from the Google Cloud CLI. This m ```bash gcloud auth application-default login ``` -3. Create your connector and model files - -### Connector Configuration - -Create `connectors/my_gcs.yaml`: - -```yaml -type: connector -driver: gcs -``` +3. Create your model file (no connector needed) ### Model Configuration @@ -188,7 +220,7 @@ refresh: cron: "0 */6 * * *" ``` -When no explicit credentials are provided in the connector, Rill will automatically use your local Google Cloud CLI credentials. +Rill will automatically detect and use your local Google Cloud CLI credentials when no connector is specified. :::warning This method only works for local development. Deploying to Rill Cloud with this configuration will fail because the cloud environment doesn't have access to your local credentials. Always use Service Account JSON or HMAC keys for production deployments. @@ -198,10 +230,12 @@ This method only works for local development. Deploying to Rill Cloud with this ## Using GCS Data in Models -Once your connector is configured, you can reference GCS paths in your model SQL queries using DuckDB's GCS functions. +Once your connector is configured (or for public buckets, no connector needed), you can reference GCS paths in your model SQL queries using DuckDB's GCS functions. ### Basic Example +**With a connector (authenticated):** + ```yaml type: model connector: duckdb @@ -212,6 +246,18 @@ refresh: cron: "0 */6 * * *" ``` +**Public bucket (no connector needed):** + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('gs://my-public-bucket/data/*.parquet') + +refresh: + cron: "0 */6 * * *" +``` + ### Reading Multiple File Types ```yaml @@ -249,22 +295,6 @@ SELECT * FROM read_parquet('gs://my-bucket/data/**/*.parquet') SELECT * FROM read_parquet('gs://my-bucket/data/2024-*.parquet') ``` ---- - -## Deploying to Rill Cloud - -When deploying your project to Rill Cloud, you must use either Service Account JSON or HMAC Keys. Local Google Cloud CLI credentials will not work in the cloud environment. - -To manually configure your environment variables, run: - -```bash -rill env configure -``` - -The CLI will interactively walk you through configuring all required credentials for your connectors. - ---- - ## Appendix ### How to create a service account using the Google Cloud Console diff --git a/docs/docs/build/connectors/data-source/s3.md b/docs/docs/build/connectors/data-source/s3.md index 1c490ca6fef..c23754fb00c 100644 --- a/docs/docs/build/connectors/data-source/s3.md +++ b/docs/docs/build/connectors/data-source/s3.md @@ -10,25 +10,67 @@ sidebar_position: 60 ## Overview [Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html) is a scalable, fully managed, and highly reliable object storage solution offered by AWS, designed to store and access data from anywhere in the world. It provides a secure and cost-effective way to store data, including common storage formats such as CSV and Parquet. Rill natively supports connecting to S3 using the provided [S3 URI](https://repost.aws/questions/QUFXlwQxxJQQyg9PMn2b6nTg/what-is-s3-uri-in-simple-storage-service) of your bucket to retrieve and read files. -## Connect to S3 +## Authentication Methods -To connect to Amazon S3, you need to provide authentication credentials. You have four options: +To connect to Amazon S3, you can choose from four authentication options: + +1. **Access Key/Secret Key** (recommended for cloud deployment) +2. **IAM Role Assumption** (enhanced security with temporary credentials) +3. **Public** (for publicly accessible buckets - no authentication required) +4. **Local AWS credentials** (local development only - not recommended for production) -1. **Use Access Key/Secret Key** (recommended for cloud deployment) -2. **Use IAM Role Assumption** (enhanced security with temporary credentials) -3. **Use Anonymous Access** (for publicly accessible buckets only) -4. **Use Local AWS credentials** (local development only - not recommended for production) -Choose the method that best fits your setup. For production deployments to Rill Cloud, use Access Key/Secret Key or IAM Role Assumption. Local AWS credentials only work for local development and will cause deployment failures. :::info S3-Compatible Storage You can also connect to S3-compatible storage services by specifying a custom endpoint in your connector configuration. +::: + +## Using the Add Data UI + +When you add an S3 data model through the Rill UI, you'll see two authentication options: + +- **Access Key/Secret Key**: The process follows two steps: + 1. **Configure Authentication** - Set up your S3 connector with credentials + 2. **Configure Data Model** - Define which bucket and objects to ingest + The UI will automatically create both the connector file and model file for you. + +- **Public**: For publicly accessible buckets, you skip the connector creation step and go directly to: + 1. **Configure Data Model** - Define which bucket and objects to ingest + The UI will only create the model file (no connector file is needed). + +:::note Manual Configuration Only +IAM Role Assumption and Local AWS credentials are only available through manual configuration. See [Method 2: IAM Role Assumption](#method-2-iam-role-assumption) and [Method 4: Local AWS Credentials](#method-4-local-aws-credentials-local-development-only) for setup instructions. ::: -### Access Key and Secret Key +--- + +## Method 1: Access Key/Secret Key (Recommended) + +Access Key/Secret Key credentials provide reliable authentication for S3. This method works for both local development and Rill Cloud deployments. + +### Using the UI -Create a connector with your credentials to connect to S3. Here's an example connector configuration file you can copy into your `connectors` directory to get started: +1. Click **Add Data** in your Rill project +2. Select **Amazon S3** as the data model type +3. In the authentication step: + - Choose **Access Key/Secret Key** + - Enter your Access Key ID + - Enter your Secret Access Key + - Name your connector (e.g., `my_s3`) +4. In the data model configuration step: + - Enter your bucket name and object path + - Configure other model settings as needed +5. Click **Create** to finalize +The UI will automatically create both the connector file and model file for you. + +### Manual Configuration + +If you prefer to configure manually, create two files: + +**Step 1: Create connector configuration** + +Create `connectors/my_s3.yaml`: ```yaml type: connector @@ -38,18 +80,37 @@ aws_access_key_id: "{{ .env.connector.s3.aws_access_key_id }}" aws_secret_access_key: "{{ .env.connector.s3.aws_secret_access_key }}" ``` -This approach ensures your AWS sources authenticate consistently across both local development and cloud deployment environments. +**Step 2: Create model configuration** -:::tip Did you know? +Create `models/my_s3_data.yaml`: -If this project has already been deployed to Rill Cloud and credentials have been set for this connector, you can use `rill env pull` to [pull these cloud credentials](/build/connectors/credentials/#rill-env-pull) locally (into your local `.env` file). Please note that this may override any credentials that you have set locally for this source. +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('s3://my-bucket/path/to/data/*.parquet') + +# Add a refresh schedule +refresh: + cron: "0 */6 * * *" +``` + +**Step 3: Add credentials to `.env`** +```bash +connector.s3.aws_access_key_id=your_access_key_id +connector.s3.aws_secret_access_key=your_secret_access_key +``` + +:::tip Did you know? +If this project has already been deployed to Rill Cloud and credentials have been set for this connector, you can use `rill env pull` to [pull these cloud credentials](/build/connectors/credentials/#rill-env-pull) locally (into your local `.env` file). Please note that this may override any credentials that you have set locally for this source. ::: +--- -### IAM Role-Based Authentication +## Method 2: IAM Role Assumption -Rill supports AWS IAM role assumption for enhanced security. This method allows Rill to temporarily assume an IAM role to access S3 resources. +Rill supports AWS IAM role assumption for enhanced security. This method allows Rill to temporarily assume an IAM role to access S3 resources. This method is only available through manual configuration. #### Benefits of Using IAM Roles @@ -58,59 +119,162 @@ Rill supports AWS IAM role assumption for enhanced security. This method allows - **Cross-Account Access**: Access S3 resources in different AWS accounts. - **Centralized Control**: Manage permissions through IAM roles and policies. -### Anonymous Access +### Manual Configuration -For publicly accessible S3 buckets, you can connect without authentication: +**Step 1: Create connector configuration** + +Create `connectors/my_s3_role.yaml`: ```yaml type: connector driver: s3 -# No authentication credentials needed for public buckets + +aws_role_arn: "{{ .env.connector.s3.aws_role_arn }}" +aws_external_id: "{{ .env.connector.s3.aws_external_id }}" +``` + +**Step 2: Create model configuration** + +Create `models/my_s3_data.yaml`: + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('s3://my-bucket/path/to/data/*.parquet') + +refresh: + cron: "0 */6 * * *" +``` + +**Step 3: Add credentials to `.env`** + +```bash +connector.s3.aws_role_arn=arn:aws:iam::123456789012:role/RillDataAccess +connector.s3.aws_external_id=your_external_id +``` + +--- + +## Method 3: Public Buckets + +For publicly accessible S3 buckets, you don't need to create a connector. Simply use the S3 URI directly in your model configuration. + +### Using the UI + +1. Click **Add Data** in your Rill project +2. Select **Amazon S3** as the data model type +3. In the authentication step: + - Choose **Public** + - The UI will skip connector creation and proceed directly to data model configuration +4. In the data model configuration step: + - Enter your bucket name and object path + - Configure other model settings as needed +5. Click **Create** to finalize + +The UI will only create the model file (no connector file is created). + +### Manual Configuration + +For public buckets, you only need to create a model file. No connector configuration is required. + +Create `models/my_s3_data.yaml`: + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('s3://my-public-bucket/path/to/data/*.parquet') + +refresh: + cron: "0 */6 * * *" ``` :::warning Public Access Only This method only works with publicly accessible buckets. Most production S3 buckets are private and require authentication. ::: -### Local AWS Credentials (Local Development Only) +--- + +## Method 4: Local AWS Credentials (Local Development Only) -:::warning Not recommended for production -Local AWS credentials only work for local development. If you deploy to Rill Cloud using this method, your dashboards will fail. Use one of the methods above for production deployments. -::: +For local development, you can use credentials from the AWS CLI. This method is **not suitable for production** or Rill Cloud deployments. This method is only available through manual configuration, and you don't need to create a connector file. -To check if you already have the AWS CLI installed and authenticated, open a terminal window and run: -```bash -aws iam get-user --no-cli-pager +### Setup + +1. Install the [AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) if not already installed +2. Authenticate with your AWS account: + - If your organization has SSO configured, reach out to your admin for instructions on how to authenticate using `aws sso login` + - If your organization does not have SSO configured, follow the steps described under [How to create an AWS service account using the AWS Management Console](#how-to-create-an-aws-service-account-using-the-aws-management-console), then run `aws configure` +3. Create your model file (no connector needed) + +### Model Configuration + +Create `models/my_s3_data.yaml`: + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('s3://my-bucket/path/to/data/*.parquet') + +refresh: + cron: "0 */6 * * *" ``` -:::note -The above command only works with AWS CLI version 2 and above. + +Rill will automatically detect and use your local AWS CLI credentials when no connector is specified. + +:::warning +This method only works for local development. Deploying to Rill Cloud with this configuration will fail because the cloud environment doesn't have access to your local credentials. Always use Access Key/Secret Key or IAM Role Assumption for production deployments. ::: -If it prints information about your user, there is nothing more to do. Rill will be able to connect to any data in S3 that you have access to. -If you do not have the AWS CLI installed and authenticated, follow these steps: +## Using S3 Data in Models -1. Open a terminal window and [install the AWS CLI](https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html) if it is not already installed on your system. -2. If your organization has SSO configured, reach out to your admin for instructions on how to authenticate using `aws sso login`. -3. If your organization does not have SSO configured: - - a. Follow the steps described under [How to create an AWS service account using the AWS Management Console](#how-to-create-an-aws-service-account-using-the-aws-management-console), which you will find below on this page. +Once your connector is configured (or for public buckets, no connector needed), you can reference S3 paths in your model SQL queries using DuckDB's S3 functions. - b. Run the following command and provide the access key, access secret, and default region when prompted (you can leave the "Default output format" blank): - ``` - aws configure - ``` +### Basic Example -You have now configured AWS access from your local environment. Rill will detect and use your credentials the next time you try to ingest a source. +**With a connector (authenticated):** +```yaml +type: model +connector: duckdb -## Deploy to Rill Cloud +sql: SELECT * FROM read_parquet('s3://my-bucket/data/*.parquet') -When deploying your project to Rill Cloud, you must provide an access key and secret key for an AWS service account with appropriate read access/permissions to the S3 buckets used in your project. If these credentials exist in your `.env` file, they'll be pushed with your project automatically. If you're using inferred credentials only, your deployment will result in errored dashboards. +refresh: + cron: "0 */6 * * *" +``` -To manually configure your environment variables, run: -```bash -rill env configure +**Public bucket (no connector needed):** + +```yaml +type: model +connector: duckdb + +sql: SELECT * FROM read_parquet('s3://my-public-bucket/data/*.parquet') + +refresh: + cron: "0 */6 * * *" +``` + +### Path Patterns + +You can use wildcards to read multiple files: + +```sql +-- Single file +SELECT * FROM read_parquet('s3://my-bucket/data/file.parquet') + +-- All files in a directory +SELECT * FROM read_parquet('s3://my-bucket/data/*.parquet') + +-- All files in nested directories +SELECT * FROM read_parquet('s3://my-bucket/data/**/*.parquet') + +-- Files matching a pattern +SELECT * FROM read_parquet('s3://my-bucket/data/2024-*.parquet') ``` ## Appendix From 6374dbfb93aef8404516e0eeac0aa7a5a007f3fe Mon Sep 17 00:00:00 2001 From: royendo <67675319+royendo@users.noreply.github.com> Date: Tue, 16 Dec 2025 20:13:55 -0500 Subject: [PATCH 2/4] links --- docs/blog/0.64.md | 2 +- docs/docs/build/connectors/credentials.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/blog/0.64.md b/docs/blog/0.64.md index 71200ccca16..d531fe7c60f 100644 --- a/docs/blog/0.64.md +++ b/docs/blog/0.64.md @@ -28,7 +28,7 @@ You can now search directly within the row and column add dialogs. It was alread When you add a local filter to a widget in a Canvas dashboard, it will now be represented as an inline filter pill inside the widget. This helps end users understand which filters are active for that specific widget. ## Support for AWS STS Authentication in Connectors -We now support AWS STS authentication for our DuckDB and S3 connectors. You can find more details in our [documentation](/build/connectors/data-source/s3#iam-role-based-authentication). +We now support AWS STS authentication for our DuckDB and S3 connectors. You can find more details in our [documentation](/build/connectors/data-source/s3#method-2-iam-role-assumption). ## Cell Inspector for Long Dimension Values Sometimes, long dimension values are truncated in leaderboards and tables. You can now press the spacebar to toggle the Cell Inspector, which reveals the full value of the cell. This is especially useful when dealing with JSON values or log messages. diff --git a/docs/docs/build/connectors/credentials.md b/docs/docs/build/connectors/credentials.md index a58349e0403..85f5cb24370 100644 --- a/docs/docs/build/connectors/credentials.md +++ b/docs/docs/build/connectors/credentials.md @@ -24,7 +24,7 @@ While Rill **can** infer credentials from your local environment (AWS CLI, Azure 1. **Credentials referenced in connection strings or DSN within YAML files (RECOMMENDED)** - The UI creates YAML configurations that reference credentials from your `.env` file using templating (see [Connector YAML](/reference/project-files/connectors) for more details) 2. **Credentials passed in as variables** - When starting Rill Developer via `rill start --env key=value` (see [templating](/build/connectors/templating) for more details) -3. **Credentials configured via CLI** - For [AWS](/build/connectors/data-source/s3#local-aws-credentials-local-development-only) / [Azure](/build/connectors/data-source/azure#azure-cli-authentication-local-development-only) / [Google Cloud](/build/connectors/data-source/gcs#method-3-local-google-cloud-cli-credentials) - **NOT RECOMMENDED for production use** +3. **Credentials configured via CLI** - For [AWS](/build/connectors/data-source/s3#method-4-local-aws-credentials-local-development-only) / [Azure](/build/connectors/data-source/azure#method-5-azure-cli-authentication-local-development-only) / [Google Cloud](/build/connectors/data-source/gcs#method-4-local-google-cloud-cli-credentials) - **NOT RECOMMENDED for production use** For more details, please refer to the corresponding [connector](/build/connectors) or [OLAP engine](/build/connectors/olap) page. From f0624f7f18ed4b8935df7667b2793000bfe34d1e Mon Sep 17 00:00:00 2001 From: royendo <67675319+royendo@users.noreply.github.com> Date: Thu, 18 Dec 2025 17:08:51 -0500 Subject: [PATCH 3/4] deploy to RC --- docs/docs/build/connectors/data-source/azure.md | 11 +++++++++++ docs/docs/build/connectors/data-source/s3.md | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/docs/docs/build/connectors/data-source/azure.md b/docs/docs/build/connectors/data-source/azure.md index 66699683622..de4bd94656b 100644 --- a/docs/docs/build/connectors/data-source/azure.md +++ b/docs/docs/build/connectors/data-source/azure.md @@ -338,3 +338,14 @@ SELECT * FROM read_parquet('azure://account.blob.core.windows.net/container/data SELECT * FROM read_parquet('azure://account.blob.core.windows.net/container/data/2024-*.parquet') ``` +--- + +## Deploy to Rill Cloud + +When deploying a project to Rill Cloud, Rill requires you to explicitly provide either an Azure Blob Storage connection string, Azure Storage Key, or Azure Storage SAS token for the containers used in your project. Please refer to our [connector YAML reference docs](/reference/project-files/connectors#azure) for more information. + +If you subsequently add sources that require new credentials (or if you simply entered the wrong credentials during the initial deploy), you can update the credentials by pushing the `Deploy` button to update your project or by running the following command in the CLI: +``` +rill env push +``` + diff --git a/docs/docs/build/connectors/data-source/s3.md b/docs/docs/build/connectors/data-source/s3.md index c23754fb00c..8889a6e4cd3 100644 --- a/docs/docs/build/connectors/data-source/s3.md +++ b/docs/docs/build/connectors/data-source/s3.md @@ -277,6 +277,17 @@ SELECT * FROM read_parquet('s3://my-bucket/data/**/*.parquet') SELECT * FROM read_parquet('s3://my-bucket/data/2024-*.parquet') ``` +--- + +## Deploy to Rill Cloud + +When deploying a project to Rill Cloud, Rill requires you to explicitly provide an access key and secret for an AWS service account with access to S3 used in your project. Please refer to our [connector YAML reference docs](/reference/project-files/connectors#s3) for more information. + +If you subsequently add sources that require new credentials (or if you simply entered the wrong credentials during the initial deploy), you can update the credentials by pushing the `Deploy` button to update your project or by running the following command in the CLI: +``` +rill env push +``` + ## Appendix ### How to create an AWS service account using the AWS Management Console From c68740adb43c862db469352db32e6483c00508e5 Mon Sep 17 00:00:00 2001 From: royendo <67675319+royendo@users.noreply.github.com> Date: Wed, 7 Jan 2026 10:11:50 -0500 Subject: [PATCH 4/4] schema updates --- .../reference/project-files/connectors.md | 17 +++++++++++ docs/docs/reference/project-files/models.md | 18 ++++++++++++ runtime/parser/schema/project.schema.yaml | 28 +++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/docs/docs/reference/project-files/connectors.md b/docs/docs/reference/project-files/connectors.md index dd6992c5ffa..669771d9576 100644 --- a/docs/docs/reference/project-files/connectors.md +++ b/docs/docs/reference/project-files/connectors.md @@ -712,6 +712,10 @@ _[boolean]_ - Log raw SQL queries executed through Pinot _[integer]_ - Maximum number of open connections to the Pinot database +### `timeout_ms` + +_[integer]_ - Query timeout in milliseconds + ```yaml # Example: Pinot connector configuration type: connector # Must be `connector` (required) @@ -725,6 +729,7 @@ controller_port: 9000 # Port number for the Pinot controller ssl: true # Enable SSL connection to Pinot log_queries: true # Log raw SQL queries executed through Pinot max_open_conns: 100 # Maximum number of open connections to the Pinot database +timeout_ms: 30000 # Query timeout in milliseconds ``` ## Postgres @@ -886,6 +891,18 @@ _[string]_ - Optional custom endpoint URL for S3-compatible storage _[string]_ - AWS region of the S3 bucket +### `aws_role_arn` + +_[string]_ - ARN of the IAM role to assume for accessing S3 resources + +### `aws_role_session_name` + +_[string]_ - Session name to use when assuming the IAM role + +### `aws_external_id` + +_[string]_ - External ID for cross-account role assumption + ### `path_prefixes` _[string, array]_ - A list of bucket path prefixes that this connector is allowed to access. diff --git a/docs/docs/reference/project-files/models.md b/docs/docs/reference/project-files/models.md index 397fe848101..c07379e9299 100644 --- a/docs/docs/reference/project-files/models.md +++ b/docs/docs/reference/project-files/models.md @@ -71,6 +71,20 @@ _[string]_ - Refers to a SQL query that is run after the main query, available f post_exec: DETACH DATABASE IF EXISTS postgres_db ``` +### `create_secrets_from_connectors` + +_[string, array]_ - List of connector names for which temporary secrets should be created before executing the SQL. This allows DuckDB-based models to access cloud storage (S3, GCS, Azure) using credentials from named connectors. + +```yaml +create_secrets_from_connectors: my_s3_connector +``` + +```yaml +create_secrets_from_connectors: + - my_s3_connector + - my_other_s3_connector +``` + ### `retry` _[object]_ - Refers to the retry configuration for the model. (optional) @@ -488,6 +502,10 @@ _[string]_ - Path to the data source. _[string]_ - Format of the data source (e.g., csv, json, parquet). +### `invalidate_on_change` + +_[boolean]_ - When true, the model will be invalidated and re-processed if the source file changes. + ## Additional properties when `connector` is `redshift` or [named connector](./connectors#redshift) of redshift ### `output_location` diff --git a/runtime/parser/schema/project.schema.yaml b/runtime/parser/schema/project.schema.yaml index 8051d0770a5..09efcdac23b 100644 --- a/runtime/parser/schema/project.schema.yaml +++ b/runtime/parser/schema/project.schema.yaml @@ -685,6 +685,9 @@ definitions: max_open_conns: type: integer description: Maximum number of open connections to the Pinot database + timeout_ms: + type: integer + description: Query timeout in milliseconds examples: - # Example: Pinot connector configuration type: connector # Must be `connector` (required) @@ -699,6 +702,7 @@ definitions: ssl: true # Enable SSL connection to Pinot log_queries: true # Log raw SQL queries executed through Pinot max_open_conns: 100 # Maximum number of open connections to the Pinot database + timeout_ms: 30000 # Query timeout in milliseconds required: - driver - dsn @@ -842,6 +846,15 @@ definitions: region: type: string description: AWS region of the S3 bucket + aws_role_arn: + type: string + description: ARN of the IAM role to assume for accessing S3 resources + aws_role_session_name: + type: string + description: Session name to use when assuming the IAM role + aws_external_id: + type: string + description: External ID for cross-account role assumption path_prefixes: type: [string, array] description: | @@ -1182,6 +1195,18 @@ definitions: description: Refers to a SQL query that is run after the main query, available for DuckDB-based models. (optional). Ensure post_exec queries are idempotent. Use IF EXISTS statements when applicable. examples: - post_exec: DETACH DATABASE IF EXISTS postgres_db + create_secrets_from_connectors: + type: + - string + - array + items: + type: string + description: List of connector names for which temporary secrets should be created before executing the SQL. This allows DuckDB-based models to access cloud storage (S3, GCS, Azure) using credentials from named connectors. + examples: + - create_secrets_from_connectors: my_s3_connector + - create_secrets_from_connectors: + - my_s3_connector + - my_other_s3_connector retry: type: object description: Refers to the retry configuration for the model. (optional) @@ -1622,6 +1647,9 @@ definitions: format: type: string description: 'Format of the data source (e.g., csv, json, parquet).' + invalidate_on_change: + type: boolean + description: When true, the model will be invalidated and re-processed if the source file changes. redshift: type: object properties: