diff --git a/.github/workflows/oracle_testing.yml b/.github/workflows/oracle_testing.yml
new file mode 100644
index 000000000..53e9d488a
--- /dev/null
+++ b/.github/workflows/oracle_testing.yml
@@ -0,0 +1,69 @@
+name: Run Oracle Tests
+
+on:
+ workflow_call:
+ inputs:
+ python-versions:
+ description: "JSON string of Python versions"
+ type: string
+ required: true
+ secrets:
+ ORACLE_PASSWORD:
+ required: true
+
+jobs:
+ oracle-tests:
+ name: Oracle Tests (Python ${{ matrix.python-version }})
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ python-version: ${{ fromJSON(inputs.python-versions) }}
+
+ # Define services here to run Docker containers alongside your job
+ services:
+ oracle:
+ image: bodoai1/pydough-oracle-tpch:latest
+ env:
+ # Set environment variables for Oracle container
+ ORACLE_PWD: ${{ secrets.ORACLE_PASSWORD }}
+ ports:
+ - 1521:1521
+ env:
+ ORACLE_PASSWORD: ${{ secrets.ORACLE_PASSWORD }}
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Setup Python ${{ matrix.python-version }}
+ id: setup-python
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install uv
+ uses: astral-sh/setup-uv@v3
+ with:
+ version: "0.4.23"
+
+ - name: Create virtual environment
+ run: uv venv
+
+ - name: Install dependencies
+ run: uv pip install -e ".[oracle]"
+
+ - name: Wait for TPCH data to load
+ run: |
+ for i in {1..600}; do
+ if docker logs ${{ job.services.oracle.id }} 2>&1 | grep -q "02_tpch_data.sh"; then
+ echo "TPCH data loaded"
+ break
+ fi
+ echo "Waiting for TPCH load... ($i/600)"
+ sleep 5
+ done
+
+ - name: Confirm Oracle connector is installed
+ run: uv run python -c "import oracledb; print('Oracle connector installed')"
+
+ - name: Run Oracle Tests
+ run: uv run pytest -m oracle tests/ -rs
diff --git a/.github/workflows/pr_testing.yml b/.github/workflows/pr_testing.yml
index 3ca39ea72..1db734a6d 100644
--- a/.github/workflows/pr_testing.yml
+++ b/.github/workflows/pr_testing.yml
@@ -49,6 +49,11 @@ on:
description: "Run Postgres Tests"
type: boolean
required: false
+ default: false
+ run-oracle:
+ description: "Run Oracle Tests"
+ type: boolean
+ required: false
default: false
run-sf_masked:
description: "Run Snowflake Masked Tests"
@@ -168,7 +173,7 @@ jobs:
run: uv run ruff check .
- name: Run Tests
- run: uv run pytest tests/ -m "not (snowflake or mysql or postgres or sf_masked or s3 or bodosql)" -rs
+ run: uv run pytest tests/ -m "not (snowflake or mysql or postgres or sf_masked or s3 or bodosql or oracle)" -rs
run-defog-daily-update:
name: Run DEFOG Daily Update
@@ -286,3 +291,19 @@ jobs:
python-versions: ${{ github.event_name == 'workflow_dispatch'
&& needs.get-py-ver-matrix.outputs.matrix
|| '["3.10", "3.11", "3.12", "3.13"]' }}
+
+ run-oracle-tests:
+ name: Oracle Tests
+ needs: [get-msg, get-py-ver-matrix]
+ if: |
+ (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run all]')) ||
+ (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run dialects]')) ||
+ (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run oracle]')) ||
+ (github.event_name == 'workflow_dispatch' && (inputs.run-all || inputs.run-dialects || inputs.run-oracle))
+ uses: ./.github/workflows/oracle_testing.yml
+ secrets:
+ ORACLE_PASSWORD: ${{ secrets.ORACLE_PASSWORD }}
+ with:
+ python-versions: ${{ github.event_name == 'workflow_dispatch'
+ && needs.get-py-ver-matrix.outputs.matrix
+ || '["3.10", "3.11", "3.12", "3.13"]' }}
diff --git a/demos/README.md b/demos/README.md
index aea4e263a..3453f9105 100644
--- a/demos/README.md
+++ b/demos/README.md
@@ -27,4 +27,5 @@ Once the introduction notebook is complete, you can explore the other notebooks:
- [SF_TPCH_q1.ipynb](notebooks/SF_TPCH_q1.ipynb) demonstrates how to connect a Snowflake database with PyDough.
- [MySQL_TPCH.ipynb](notebooks/MySQL_TPCH.ipynb) demonstrates how to connect a MySQL database with PyDough.
- [PG_TPCH.ipynb](notebooks/PG_TPCH.ipynb) demonstrates how to connect a Postgres database with PyDough.
+- [Oracle_TPCH.ipynb](notebooks/Oracle_TPCH.ipynb) demonstrates how to connect an Oracle database with PyDough.
diff --git a/demos/notebooks/Oracle_TCPH.ipynb b/demos/notebooks/Oracle_TCPH.ipynb
new file mode 100644
index 000000000..dde15416a
--- /dev/null
+++ b/demos/notebooks/Oracle_TCPH.ipynb
@@ -0,0 +1,317 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "d1cd6a33",
+ "metadata": {},
+ "source": [
+ "# Oracle PyDough Database connector"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b190b0ef",
+ "metadata": {},
+ "source": [
+ "> ## π Initial Setup\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### 1οΈβ£ Oracle Database\n",
+ ">\n",
+ "> You can connect to your **own Oracle database** using your credentials β for example, if you have **Oracle Database Software** or another local server running.\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### 2οΈβ£ Docker Image (TPC-H Database)\n",
+ ">\n",
+ "> You can also test with our **pre-built Oracle TPC-H database** available on **Docker Hub**.\n",
+ ">\n",
+ "> #### π Requirements\n",
+ "> - Make sure you have **Docker Desktop** installed and running.\n",
+ ">\n",
+ "> #### π¦ Pull and Run the Container\n",
+ "> ```bash\n",
+ "> docker run -d --name [CONTAINER_NAME]\\\n",
+ "> --platform linuxamd64 \\\n",
+ "> -e ORACLE_PWD=[PASSWORD] \\\n",
+ "> -p 1521:1521 \\\n",
+ "> bodoai1/pydough-oracle-tpch:latest\n",
+ "> ```\n",
+ "> - Replace `[CONTAINER_NAME]` with your preferred container name. \n",
+ "> - Replace `[PASSWORD]` with your preferred password.\n",
+ ">\n",
+ "> *(Make sure the `1521` port is available and not being used by another container.)* \n",
+ "> \n",
+ "> ---\n",
+ ">\n",
+ "> #### π Environment Variables\n",
+ "> To connect to this database, use:\n",
+ "> ```env\n",
+ "> ORACLE_USERNAME=toch\n",
+ "> ORACLE_PASSWORD=[PASSWORD]\n",
+ "> ```\n",
+ "> *(Make sure `[PASSWORD]` matches the one you used when running the container.)*\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> π‘ **Tip:** \n",
+ "> Store these credentials in a `.env` file in your project directory for easy access and security.\n",
+ ">\n",
+ "> Example `.env` file:\n",
+ "> ```env\n",
+ "> ORACLE_USERNAME=root\n",
+ "> ORACLE_PASSWORD=mysecretpassword\n",
+ "> ```\n",
+ ">\n",
+ ">\n",
+ "> #### Deleting the container and image\n",
+ "> Once the tests have finished you can stop the container and delete it with the image using the following docker commands:\n",
+ ">```bash\n",
+ "> docker stop [CONTAINER_NAME]\n",
+ "> docker rm [CONTAINER_NAME]\n",
+ "> docker rmi bodoai1/pydough-oracle-tpch\n",
+ ">```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "097cba60",
+ "metadata": {},
+ "source": [
+ "> ## π Installing Oracle Connector\n",
+ ">\n",
+ "> Make sure to have the **`python-oracledb`** package installed:\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> - **If you're working inside the repo**:\n",
+ "> ```bash\n",
+ "> pip install -e \".[oracle]\"\n",
+ "> ```\n",
+ ">\n",
+ "> - **Or install the connector directly**:\n",
+ "> ```bash\n",
+ "> python -m pip install oracledb --upgrade\n",
+ "> ```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1f39b2af",
+ "metadata": {},
+ "source": [
+ "> ## Importing Required Libraries\n",
+ ">\n",
+ "> ---"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b473d180",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pydough\n",
+ "import datetime\n",
+ "import os"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6c595441",
+ "metadata": {},
+ "source": [
+ "> ## π Loading Credentials and connecting to Oracle with PyDough\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### 1οΈβ£ Load Credentials from a Local `.env` File\n",
+ "> - The `.env` file contains your MySQL login details like:\n",
+ "> ```env\n",
+ "> ORACLE_PASSWORD=mypassword\n",
+ "> ```\n",
+ "> - These are read in Python using:\n",
+ "> ```python\n",
+ "> import os\n",
+ "> password = os.getenv(\"ORACLE_PASSWORD\")\n",
+ "> ```\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### 2οΈβ£ Oracle-PyDough `connect_database()` Parameters\n",
+ "> - **`user`** *(required)*: Username for Oracle connection. \n",
+ "> - **`password`** *(required)*: Password used for MySQL connection. \n",
+ "> - **`service_name`** *(required)*: Oracle database service name. \n",
+ "> - **`host`** *(optional)*: IP to access Oracle server. Default: `\"localhost\"` or `\"127.0.0.1\"`. \n",
+ "> - **`port`** *(optional)*: Port number to access Oracle server. Default: `1521`. \n",
+ "> - **`tcp_connect_timeout`** *(optional)*: Timeout in seconds for Oracle connection. Default: `3`. \n",
+ "> - **`attempts`** *(optional)*: Number of times the connection is attempted. Default: `1`. \n",
+ "> - **`delay`** *(optional)*: Seconds to wait before another connection attempt. Default: `2`. \n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### 3οΈβ£ Connect to Oracle Using PyDough\n",
+ "> - `pydough.active_session.load_metadata_graph(...)` \n",
+ "> Loads a metadata graph mapping your Oracle schema (used for query planning/optimizations). \n",
+ "> - `connect_database(...)` \n",
+ "> Uses the loaded credentials to establish a live connection to your Oracle database.\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> **β οΈ Notes:** \n",
+ "> - Ensure the `.env` exists and contains **all required keys**. \n",
+ "> - The metadata graph path must point to a **valid JSON file** representing your schema."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7487b588",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "oracle_user = \"tpch\"\n",
+ "oracle_password = os.getenv(\"ORACLE_PASSWORD\")\n",
+ "oracle_service_name = \"FREEPDB1\"\n",
+ "oracle_host = \"127.0.0.1\"\n",
+ "oracle_port = 1521\n",
+ "connection_timeout = 2\n",
+ "attempts = 2 \n",
+ "delay = 5.0 \n",
+ "\n",
+ "pydough.active_session.load_metadata_graph(\"../../tests/test_metadata/sample_graphs.json\", \"TPCH\")\n",
+ "pydough.active_session.connect_database(\"oracle\", \n",
+ " user=oracle_user,\n",
+ " password=oracle_password,\n",
+ " service_name=oracle_service_name,\n",
+ " host=oracle_host,\n",
+ " port=oracle_port,\n",
+ " tcp_connect_timeout=connection_timeout,\n",
+ " attempts=attempts,\n",
+ " delay=delay\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "305e11ec",
+ "metadata": {},
+ "source": [
+ "> ## β¨ Enabling PyDough's Jupyter Magic Commands\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> This step loads the **`pydough.jupyter_extensions`** module, which adds custom magic commands (like `%%pydough`) to your notebook.\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### π What These Magic Commands Do\n",
+ "> - **Write PyDough directly** in notebook cells using:\n",
+ "> ```python\n",
+ "> %%pydough\n",
+ "> ```\n",
+ "> - **Automatically render** query results inside the notebook.\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### π» How It Works\n",
+ "> This is a **Jupyter-specific feature** β the `%load_ext` command dynamically loads these extensions into your **current notebook session**:\n",
+ "> ```python\n",
+ "> %load_ext pydough.jupyter_extensions\n",
+ "> ```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "93dde776",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%load_ext pydough.jupyter_extensions"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d9b9d04a",
+ "metadata": {},
+ "source": [
+ "> ## π Running TPC-H Query 1 with PyDough in Oracle\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> This cell runs **TPC-H Query 1** using **PyDough**.\n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### π What the Query Does\n",
+ "> - **Computes summary statistics**: sums, averages, and counts for orders. \n",
+ "> - **Groups by**: `return_flag` and `line_status`. \n",
+ "> - **Filters by**: a shipping date cutoff. \n",
+ ">\n",
+ "> ---\n",
+ ">\n",
+ "> ### π€ Output\n",
+ "> - `pydough.to_df(output)` converts the result to a **Pandas DataFrame**. \n",
+ "> - This makes it easy to inspect and analyze results directly in Python. \n",
+ ">\n",
+ "> ---\n",
+ ">"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "86b45425",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%pydough\n",
+ "# TPCH Q1\n",
+ "output = (lines.WHERE((ship_date <= datetime.date(1998, 12, 1)))\n",
+ " .PARTITION(name=\"groups\", by=(return_flag, status))\n",
+ " .CALCULATE(\n",
+ " L_RETURNFLAG=return_flag,\n",
+ " L_LINESTATUS=status,\n",
+ " SUM_QTY=SUM(lines.quantity),\n",
+ " SUM_BASE_PRICE=SUM(lines.extended_price),\n",
+ " SUM_DISC_PRICE=SUM(lines.extended_price * (1 - lines.discount)),\n",
+ " SUM_CHARGE=SUM(\n",
+ " lines.extended_price * (1 - lines.discount) * (1 + lines.tax)\n",
+ " ),\n",
+ " AVG_QTY=AVG(lines.quantity),\n",
+ " AVG_PRICE=AVG(lines.extended_price),\n",
+ " AVG_DISC=AVG(lines.discount),\n",
+ " COUNT_ORDER=COUNT(lines),\n",
+ " )\n",
+ " .ORDER_BY(L_RETURNFLAG.ASC(), L_LINESTATUS.ASC())\n",
+ ")\n",
+ "\n",
+ "pydough.to_df(output)"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "PyDough",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.13.5"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/documentation/usage.md b/documentation/usage.md
index 33b97f291..2295e8d8e 100644
--- a/documentation/usage.md
+++ b/documentation/usage.md
@@ -345,7 +345,9 @@ Below is a list of all supported values for the database name:
- `snowflake`: uses a Snowflake database. [See here](https://docs.snowflake.com/en/user-guide/python-connector.html#connecting-to-snowflake) for details on the connection API and what keyword arguments can be passed in.
-- `postgres` or `postgres`: uses a Postgres database. [See here](https://www.psycopg.org/docs/) for details on the connection API and what keyword arguments can be passed in.
+- `postgres`: uses a Postgres database. [See here](https://www.psycopg.org/docs/) for details on the connection API and what keyword arguments can be passed in.
+
+- `oracle`: uses an Oracle database. [See here](https://python-oracledb.readthedocs.io/en/latest/user_guide/installation.html) for details on the connection API and what keyword arguments can be passed in.
- `bodosql`: uses a BodoSQL context. [See here](https://docs.bodo.ai/latest/api_docs/sql/bodosqlcontext/) for details on the BodoSQL context and [here](https://docs.bodo.ai/latest/api_docs/sql/database_catalogs/) for details on the various kinds of catalogs that can be connected to a BodoSQL context.
@@ -368,6 +370,7 @@ Hereβs a quick reference table showing which connector is needed for each dial
| `mysql` | `mysql-connector-python` |
| `snowflake` | `snowflake-connector-python[pandas]` |
| `postgres` | `psycopg2-binary` |
+| `oracle` | `python-oracledb` |
| `bodosql` | Depends on the catalog being used |
Below are examples of how to access the context and switch it out for a newly created one, either by manually setting it or by using `session.load_database`. These examples assume that there are two different sqlite database files located at `db_files/education.db` and `db_files/shakespeare.db`.
@@ -444,6 +447,34 @@ You can find a full example of using MySQL database with PyDough in [this usage
```
You can find a full example of using Postgres database with PyDough in [this usage guide](./../demos/notebooks/PG_TPCH.ipynb).
+- Oracle: You can connect to an Oracle database using `load_metadata_graph` and `connect_database` APIs. For example:
+ ```py
+ pydough.active_session.load_metadata_graph("../../tests/test_metadata/sample_graphs.json", "TPCH")
+ pydough.active_session.connect_database("oracle",
+ user=oracle_user,
+ password=oracle_password,
+ host=oracle_host,
+ port=oracle_port
+ service_name=oracle_service_name,
+ )
+ ```
+ Also you can use `dsn` instead of `host`, `port` and `service_name`.
+
+ Example with a connection object
+ ```py
+ pydough.active_session.load_metadata_graph("../../tests/test_metadata/sample_graphs.json", "TPCH")
+ oracle_conn: oracledb.connection = oracledb.connect(
+ dbname=oracle_db,
+ user=oracle_user,
+ password=oracle_password,
+ host=oracle_host,
+ port=oracle_port,
+ service_name=oracle_service_name,
+ )
+ pydough.active_session.connect_database("oracle", connection=oracle_conn)
+ ```
+You can find a full example of using an Oracle database with PyDough in [this usage guide](./../demos/notebooks/Oracle_TPCH.ipynb).
+
## Evaluation APIs
diff --git a/pydough/database_connectors/README.md b/pydough/database_connectors/README.md
index 0ef17e5b5..b19e27071 100644
--- a/pydough/database_connectors/README.md
+++ b/pydough/database_connectors/README.md
@@ -19,6 +19,7 @@ The database connectors module provides functionality to manage database connect
- `SNOWFLAKE`: Represents the Snowflake SQL dialect.
- `MYSQL`: Represents the MySQL dialect.
- `POSTGRES`: Represents the Postgres dialect
+ - `ORACLE`: Represents the Oracle dialect
- `DatabaseContext`: Dataclass that manages the database connection and the corresponding dialect.
- Fields:
- `connection`: The `DatabaseConnection` object.
@@ -35,6 +36,7 @@ The database connectors module provides functionality to manage database connect
- `load_snowflake_connection`: Loads a Snowflake connection.
- `load_mysql_connection`: Loads a MySQL database connection.
- `load_postgres_connection`: Loads a Postgres database connection.
+- `load_oracle_connection`: Loads an Oracle database connection.
## Usage
diff --git a/pydough/database_connectors/__init__.py b/pydough/database_connectors/__init__.py
index e9e71103f..7664b381d 100644
--- a/pydough/database_connectors/__init__.py
+++ b/pydough/database_connectors/__init__.py
@@ -5,6 +5,7 @@
"empty_connection",
"load_database_context",
"load_mysql_connection",
+ "load_oracle_connection",
"load_postgres_connection",
"load_snowflake_connection",
"load_sqlite_connection",
@@ -13,6 +14,7 @@
from .builtin_databases import (
load_database_context,
load_mysql_connection,
+ load_oracle_connection,
load_postgres_connection,
load_snowflake_connection,
load_sqlite_connection,
diff --git a/pydough/database_connectors/builtin_databases.py b/pydough/database_connectors/builtin_databases.py
index 1746eb2c1..034585788 100644
--- a/pydough/database_connectors/builtin_databases.py
+++ b/pydough/database_connectors/builtin_databases.py
@@ -18,6 +18,7 @@
"load_bodosql_context",
"load_database_context",
"load_mysql_connection",
+ "load_oracle_connection",
"load_postgres_connection",
"load_snowflake_connection",
"load_sqlite_connection",
@@ -37,7 +38,7 @@ def load_database_context(database_name: str, **kwargs) -> DatabaseContext:
Returns:
The database context object.
"""
- supported_databases = {"postgres", "mysql", "sqlite", "snowflake"}
+ supported_databases = {"postgres", "mysql", "sqlite", "snowflake", "oracle"}
connection: DatabaseConnection | BodoSQLContext
dialect: DatabaseDialect
match database_name.lower():
@@ -53,6 +54,9 @@ def load_database_context(database_name: str, **kwargs) -> DatabaseContext:
case "postgres":
connection = load_postgres_connection(**kwargs)
dialect = DatabaseDialect.POSTGRES
+ case "oracle":
+ connection = load_oracle_connection(**kwargs)
+ dialect = DatabaseDialect.ORACLE
case "bodosql":
connection = load_bodosql_context(**kwargs)
dialect = DatabaseDialect.BODOSQL
@@ -306,6 +310,98 @@ def load_postgres_connection(**kwargs) -> DatabaseConnection:
raise ValueError(f"Failed to connect to Postgres after {attempts} attempts")
+def load_oracle_connection(**kwargs) -> DatabaseConnection:
+ """
+ Loads an Oracle database connection. This is done by providing a wrapper
+ around the DB 2.0 connect API.
+
+ Args:
+ **kwargs: Either an Oracle connection object (as `connection=