Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 68 additions & 22 deletions .github/workflows/run_benchmark.yml
Original file line number Diff line number Diff line change
@@ -1,31 +1,77 @@
name: Run SDGym Benchmark

on:
workflow_dispatch:
schedule:
- cron: '0 5 1 * *'
workflow_call:
inputs:
modality:
required: true
type: string
sdgym_ref:
required: false
type: string
default: issue-516-add-workflows
secrets:
SDV_ENTERPRISE_USERNAME:
required: true
SDV_ENTERPRISE_LICENSE_KEY:
required: true
GCP_SERVICE_ACCOUNT_JSON:
required: true
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
SLACK_TOKEN:
required: true

jobs:
run-sdgym-benchmark:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up latest Python
uses: actions/setup-python@v5
with:
python-version-file: 'pyproject.toml'
- name: Install dependencies
run: |
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install dependencies
env:
USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
run: |
python -m venv venv
source venv/bin/activate

python -m pip install --upgrade pip
python -m pip install --no-cache-dir -e .[dev]

- name: Run SDGym Benchmark
env:
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}

run: invoke run-sdgym-benchmark
python -m pip install sdv-installer
python -c "
from sdv_installer.installation.installer import install_packages
install_packages(
username='${USERNAME}',
license_key='${LICENSE_KEY}',
package='sdv-enterprise',
)
"
python -m pip install "sdgym[all] @ git+https://github.com/sdv-dev/SDGym.git@${{ inputs.sdgym_ref }}"

echo "VIRTUAL_ENV=$(pwd)/venv" >> $GITHUB_ENV
echo "$(pwd)/venv/bin" >> $GITHUB_PATH

- name: Run SDGym Benchmark
env:
GCP_SERVICE_ACCOUNT_JSON: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }}
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
GCP_ZONE: ${{ secrets.GCP_ZONE }}
SDV_ENTERPRISE_USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
SDV_ENTERPRISE_LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
run: |
export CREDENTIALS_FILEPATH=$(python -c "from sdgym._benchmark.credentials_utils import create_credentials_file; print(create_credentials_file())")
invoke run-sdgym-benchmark --modality "${{ inputs.modality }}"
rm -f "$CREDENTIALS_FILEPATH"
16 changes: 16 additions & 0 deletions .github/workflows/run_benchmark_multi_table.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Run SDGym Benchmark Multi-Table

on:
workflow_dispatch:
schedule:
- cron: "0 5 1 * *"
push:
branches:
- issue-516-add-workflows

jobs:
call-run-sdgym-benchmark:
uses: ./.github/workflows/run_benchmark.yml
with:
modality: multi_table
secrets: inherit
16 changes: 16 additions & 0 deletions .github/workflows/run_benchmark_single_table.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Run SDGym Benchmark Single-Table

on:
workflow_dispatch:
schedule:
- cron: "0 5 1 * *"
push:
branches:
- issue-516-add-workflows

jobs:
call-run-sdgym-benchmark:
uses: ./.github/workflows/run_benchmark.yml
with:
modality: single_table
secrets: inherit
182 changes: 99 additions & 83 deletions .github/workflows/upload_benchmark_results.yml
Original file line number Diff line number Diff line change
@@ -1,91 +1,107 @@
name: Upload SDGym Benchmark results
name: Upload SDGym Benchmark Results

on:
workflow_run:
workflows: ["Run SDGym Benchmark"]
types:
- completed
workflow_dispatch:
schedule:
- cron: '0 6 * * *'
workflow_call:
inputs:
modality:
description: "Benchmark modality to upload"
required: true
type: string
secrets:
PYDRIVE_TOKEN:
required: true
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
GH_TOKEN:
required: true
SLACK_TOKEN:
required: true

jobs:
upload-sdgym-benchmark:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up latest Python
uses: actions/setup-python@v5
with:
python-version-file: 'pyproject.toml'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --no-cache-dir -e .[dev]

- name: Upload SDGym Benchmark
env:
PYDRIVE_TOKEN: ${{ secrets.PYDRIVE_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
GITHUB_LOCAL_RESULTS_DIR: ${{ runner.temp }}/sdgym-leaderboard-files
run: |
invoke upload-benchmark-results
echo "GITHUB_LOCAL_RESULTS_DIR=$GITHUB_LOCAL_RESULTS_DIR" >> $GITHUB_ENV

- name: Prepare files for commit
if: env.SKIP_UPLOAD != 'true'
run: |
mkdir pr-staging
echo "Looking for files in: $GITHUB_LOCAL_RESULTS_DIR"
ls -l "$GITHUB_LOCAL_RESULTS_DIR"
for f in "$GITHUB_LOCAL_RESULTS_DIR"/*; do
if [ -f "$f" ]; then
base=$(basename "$f")
cp "$f" "pr-staging/${base}"
fi
done

echo "Files staged for PR:"
ls -l pr-staging

- name: Checkout target repo (sdv-dev.github.io)
if: env.SKIP_UPLOAD != 'true'
run: |
git clone https://github.com/sdv-dev/sdv-dev.github.io.git target-repo
cd target-repo
git checkout gatsby-home

- name: Copy results and commit
if: env.SKIP_UPLOAD != 'true'
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
FOLDER_NAME: ${{ env.FOLDER_NAME }}
run: |
cp pr-staging/* target-repo/assets/sdgym-leaderboard-files/
cd target-repo
git checkout gatsby-home
git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
git add assets/
git commit -m "Upload SDGym Benchmark Results ($FOLDER_NAME)" || echo "No changes to commit"
git remote set-url origin https://x-access-token:${GH_TOKEN}@github.com/sdv-dev/sdv-dev.github.io.git
git push origin gatsby-home
COMMIT_HASH=$(git rev-parse HEAD)
COMMIT_URL="https://github.com/sdv-dev/sdv-dev.github.io/commit/${COMMIT_HASH}"
echo "Commit URL: $COMMIT_URL"
echo "COMMIT_URL=$COMMIT_URL" >> $GITHUB_ENV

- name: Send Slack notification
if: env.SKIP_UPLOAD != 'true'
env:
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
run: |
invoke notify-sdgym-benchmark-uploaded \
--folder-name "$FOLDER_NAME" \
--commit-url "$COMMIT_URL"
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up latest Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install --no-cache-dir -e .[dev]

- name: Upload SDGym Benchmark
env:
PYDRIVE_TOKEN: ${{ secrets.PYDRIVE_TOKEN }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
GITHUB_LOCAL_RESULTS_DIR: ${{ runner.temp }}/sdgym-leaderboard-files
run: |
invoke upload-benchmark-results --modality "${{ inputs.modality }}"
echo "GITHUB_LOCAL_RESULTS_DIR=$GITHUB_LOCAL_RESULTS_DIR" >> $GITHUB_ENV

- name: Prepare files for commit
if: env.SKIP_UPLOAD != 'true'
run: |
set -euo pipefail
mkdir -p pr-staging

echo "Looking for files in: $GITHUB_LOCAL_RESULTS_DIR"
ls -l "$GITHUB_LOCAL_RESULTS_DIR" || true

shopt -s nullglob
for f in "$GITHUB_LOCAL_RESULTS_DIR"/*; do
[ -f "$f" ] && cp "$f" "pr-staging/$(basename "$f")"
done

echo "Files staged for PR:"
ls -l pr-staging || true

- name: Checkout target repo (sdv-dev.github.io)
if: env.SKIP_UPLOAD != 'true'
run: |
git clone https://github.com/sdv-dev/sdv-dev.github.io.git target-repo
cd target-repo
git checkout gatsby-home

- name: Copy results and commit
if: env.SKIP_UPLOAD != 'true'
env:
GH_TOKEN: ${{ secrets.GH_TOKEN }}
FOLDER_NAME: ${{ env.FOLDER_NAME }}
run: |
set -euo pipefail

cp -f pr-staging/* target-repo/assets/sdgym-leaderboard-files/ || true
cd target-repo

git config --local user.name "github-actions[bot]"
git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"

git add assets/
git commit -m "Upload SDGym Benchmark Results ($FOLDER_NAME) - Modality: ${{ inputs.modality }}" || echo "No changes to commit"

git remote set-url origin "https://x-access-token:${GH_TOKEN}@github.com/sdv-dev/sdv-dev.github.io.git"
git push origin gatsby-home

COMMIT_HASH=$(git rev-parse HEAD)
COMMIT_URL="https://github.com/sdv-dev/sdv-dev.github.io/commit/${COMMIT_HASH}"
echo "COMMIT_URL=$COMMIT_URL" >> $GITHUB_ENV

- name: Send Slack notification
if: env.SKIP_UPLOAD != 'true'
env:
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
run: |
invoke notify-sdgym-benchmark-uploaded \
--folder-name "$FOLDER_NAME" \
--commit-url "$COMMIT_URL" \
--modality "${{ inputs.modality }}"
19 changes: 19 additions & 0 deletions .github/workflows/upload_benchmark_results_multi_table.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Upload SDGym Multi-Table Benchmark results

on:
workflow_run:
workflows: ["Run SDGym Benchmark Multi-Table"]
types: [completed]
workflow_dispatch:
schedule:
- cron: "0 6 * * *"
push:
branches:
- issue-516-add-workflows

jobs:
call-upload-benchmark-results:
uses: ./.github/workflows/upload_benchmark_results.yml
with:
modality: multi_table
secrets: inherit
16 changes: 16 additions & 0 deletions .github/workflows/upload_benchmark_results_single_table.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Upload SDGym Single-Table Benchmark results

on:
workflow_run:
workflows: ["Run SDGym Benchmark Single-Table"]
types: [completed]
workflow_dispatch:
schedule:
- cron: "0 6 * * *"

jobs:
call-upload-benchmark-results:
uses: ./.github/workflows/upload_benchmark_results.yml
with:
modality: single_table
secrets: inherit
4 changes: 2 additions & 2 deletions sdgym/_benchmark/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def _get_user_data_script(
log "======== Install Dependencies =========="
pip install --upgrade pip
{sdv_install}
pip install "sdgym[all] @ git+https://github.com/sdv-dev/SDGym.git@issue-515-_benchmark_multi_table_compute_gcp"
pip install "sdgym[all] @ git+https://github.com/sdv-dev/SDGym.git@issue-516-add-workflows"

{gpu_block}

Expand Down Expand Up @@ -428,7 +428,7 @@ def _benchmark_single_table_compute_gcp(
limit_dataset_size=False,
compute_quality_score=True,
compute_diagnostic_score=True,
compute_privacy_score=True,
compute_privacy_score=False,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this for testing purposes only or it will change to this now ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the benchmark we don't compute the privacy_score for now:

compute_privacy_score=False,

sdmetrics=None,
timeout=None,
):
Expand Down
2 changes: 1 addition & 1 deletion sdgym/_benchmark/config_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,6 @@ def validate_compute_config(config):


def _make_instance_name(prefix):
day = datetime.now(timezone.utc).strftime('%Y_%m_%d_%H:%M')
day = datetime.now(timezone.utc).strftime('%Y%m%d-%H%M')
suffix = uuid.uuid4().hex[:6]
return f'{prefix}-{day}-{suffix}'
Loading
Loading