Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions .github/workflows/_run_benchmark.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
name: _Run SDGym Benchmark

on:
workflow_call:
inputs:
config_filepath:
required: false
type: string
modality:
required: false
type: string
datasets:
required: false
type: string
synthesizers:
required: false
type: string
num_instances:
required: false
type: string
output_destination:
required: false
type: string
timeout:
required: false
type: string
secrets:
SDV_ENTERPRISE_USERNAME:
required: true
SDV_ENTERPRISE_LICENSE_KEY:
required: true
GCP_SERVICE_ACCOUNT_JSON:
required: true
GCP_PROJECT_ID:
required: true
GCP_ZONE:
required: true
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true

jobs:
run-sdgym-benchmark:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version-file: "pyproject.toml"

- name: Install dependencies
env:
USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
run: |
python -m venv venv
source venv/bin/activate

python -m pip install --upgrade pip
python -m pip install sdv-installer
python -c "
from sdv_installer.installation.installer import install_packages
install_packages(
username='${USERNAME}',
license_key='${LICENSE_KEY}',
)
"
python -m pip install ".[all]"

echo "VIRTUAL_ENV=$(pwd)/venv" >> $GITHUB_ENV
echo "$(pwd)/venv/bin" >> $GITHUB_PATH

- name: Run SDGym Benchmark
env:
GCP_SERVICE_ACCOUNT_JSON: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }}
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
GCP_ZONE: ${{ secrets.GCP_ZONE }}
SDV_ENTERPRISE_USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
SDV_ENTERPRISE_LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
run: |
CMD=(python sdgym/_benchmark_launcher/script.py)

add_arg() {
local flag="$1"
local value="$2"
if [ -n "$value" ]; then
CMD+=("$flag" "$value")
fi
}

add_list_arg() {
local flag="$1"
local value="$2"
if [ -n "$value" ]; then
CMD+=("$flag")
read -r -a items <<< "$value"
CMD+=("${items[@]}")
fi
}

add_arg --config-filepath "${{ inputs.config_filepath }}"
add_arg --modality "${{ inputs.modality }}"
add_list_arg --datasets "${{ inputs.datasets }}"
add_list_arg --synthesizers "${{ inputs.synthesizers }}"
add_arg --num-instances "${{ inputs.num_instances }}"
add_arg --output-destination "${{ inputs.output_destination }}"
add_arg --timeout "${{ inputs.timeout }}"

printf 'Running command:'
printf ' %q' "${CMD[@]}"
printf '\n'

"${CMD[@]}"
2 changes: 1 addition & 1 deletion .github/workflows/run_benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ jobs:
license_key='${LICENSE_KEY}',
)
"
python -m pip install "sdgym[all] @ git+https://github.com/sdv-dev/SDGym.git@issue-532-define-yaml-files"
python -m pip install "sdgym[all]"

echo "VIRTUAL_ENV=$(pwd)/venv" >> $GITHUB_ENV
echo "$(pwd)/venv/bin" >> $GITHUB_PATH
Expand Down
16 changes: 16 additions & 0 deletions .github/workflows/run_benchmark_from_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
name: Run SDGym Benchmark from YAML Config file

on:
workflow_dispatch:
inputs:
config_filepath:
description: "Path to the benchmark YAML file in the selected branch"
required: true
type: string

jobs:
run:
uses: ./.github/workflows/_run_benchmark.yml
with:
config_filepath: ${{ inputs.config_filepath }}
secrets: inherit
52 changes: 52 additions & 0 deletions .github/workflows/run_benchmark_manual.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Run SDGym Benchmark Manually

on:
workflow_dispatch:
inputs:
modality:
description: "Benchmark modality"
required: true
type: choice
options:
- single_table
- multi_table
datasets:
description: "Space-separated datasets, e.g. 'adult alarm'. If not provided, all datasets for the modality will be included."
required: false
type: string
synthesizers:
description: "Space-separated synthesizers, e.g. 'CTGANSynthesizer TVAESynthesizer'. If not provided, all synthesizers for the modality will be included."
required: false
type: string
num_instances:
description: "Number of instances"
required: false
default: "1"
type: string
output_destination:
description: "Required output destination"
required: true
type: string
timeout:
description: "Optional timeout in seconds"
required: false
type: string

jobs:
run:
uses: ./.github/workflows/_run_benchmark.yml
with:
modality: ${{ inputs.modality }}
datasets: ${{ inputs.datasets }}
synthesizers: ${{ inputs.synthesizers }}
num_instances: ${{ inputs.num_instances }}
output_destination: ${{ inputs.output_destination }}
timeout: ${{ inputs.timeout }}
secrets:
SDV_ENTERPRISE_USERNAME: ${{ secrets.SDV_ENTERPRISE_USERNAME }}
SDV_ENTERPRISE_LICENSE_KEY: ${{ secrets.SDV_ENTERPRISE_LICENSE_KEY }}
GCP_SERVICE_ACCOUNT_JSON: ${{ secrets.GCP_SERVICE_ACCOUNT_JSON }}
GCP_PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }}
GCP_ZONE: ${{ secrets.GCP_ZONE }}
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
87 changes: 0 additions & 87 deletions sdgym/_benchmark/credentials_utils.py
Original file line number Diff line number Diff line change
@@ -1,65 +1,4 @@
import json
import os
import textwrap
from tempfile import NamedTemporaryFile

CREDENTIAL_KEYS = {
'aws': {'aws_access_key_id', 'aws_secret_access_key'},
'gcp': {
'type',
'project_id',
'private_key_id',
'private_key',
'client_email',
'client_id',
'auth_uri',
'token_uri',
'auth_provider_x509_cert_url',
'client_x509_cert_url',
'universe_domain',
'gcp_project',
'gcp_zone',
},
'sdv': {'username', 'license_key'},
}


def get_credentials(credential_filepath):
"""Load GCP credentials from a file.

Args:
credential_filepath (str): Path to the GCP credentials file.
"""
with open(credential_filepath, 'r') as cred_file:
credentials = json.load(cred_file)

required_sections = {'aws', 'gcp'}
optional_sections = {'sdv'}
valid_sections = required_sections | optional_sections

actual_sections = set(credentials.keys())
missing_required = required_sections - actual_sections
unknown_sections = actual_sections - valid_sections
if missing_required or unknown_sections:
raise ValueError(
f'Credentials file can only contain the following sections: {valid_sections}.'
)

for section in valid_sections:
if section not in credentials:
continue

expected_keys = CREDENTIAL_KEYS[section]
actual_keys = set(credentials[section].keys())
if expected_keys != actual_keys:
raise ValueError(
f'The "{section}" section must contain the following keys: {expected_keys}. '
f'Found: {actual_keys}.'
)

credentials.setdefault('sdv', {})

return credentials


def sdv_install_cmd(credentials):
Expand All @@ -76,29 +15,3 @@ def sdv_install_cmd(credentials):
python -c "from sdv_installer.installation.installer import install_packages; \\
install_packages(username='{username}', license_key='{license_key}')"
""")


def create_credentials_file():
"""Create a credentials file."""
gcp_json = os.getenv('GCP_SERVICE_ACCOUNT_JSON')
credentials = {
'aws': {
'aws_access_key_id': os.getenv('AWS_ACCESS_KEY_ID'),
'aws_secret_access_key': os.getenv('AWS_SECRET_ACCESS_KEY'),
},
'gcp': {
**json.loads(gcp_json),
'gcp_project': os.getenv('GCP_PROJECT_ID'),
'gcp_zone': os.getenv('GCP_ZONE'),
},
'sdv': {
'sdv_enterprise_username': os.getenv('SDV_ENTERPRISE_USERNAME'),
'sdv_enterprise_license_key': os.getenv('SDV_ENTERPRISE_LICENSE_KEY'),
},
}

tmp_file = NamedTemporaryFile(mode='w+', delete=False, suffix='.json')
json.dump(credentials, tmp_file)
tmp_file.flush()

return tmp_file.name
2 changes: 1 addition & 1 deletion sdgym/_benchmark_launcher/benchmark_base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ method_params:
compute:
service: 'gcp'

credentials_filepath: null
credentials_filepath: null
17 changes: 6 additions & 11 deletions sdgym/_benchmark_launcher/benchmark_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,9 @@
_validate_method_params,
_validate_structure,
)
from sdgym._benchmark_launcher.utils import _METHODS
from sdgym._benchmark_launcher.utils import _METHODS, CONFIG_KEYS
from sdgym.errors import BenchmarkConfigError

CONFIG_KEYS = frozenset([
'modality',
'method_params',
'credentials_filepath',
'compute',
'instance_jobs',
])


class BenchmarkConfig:
"""BenchmarkConfig class.
Expand All @@ -37,11 +29,14 @@ class BenchmarkConfig:
string specifying the path to the credentials file, if None,
credentials will be resolved from environment variables.
'compute': dict specifying the compute configuration (e.g. service: 'gcp'),
'instance_jobs': list of dicts, each specifying a combination of synthesizers and datasets:
'instance_jobs': list of dicts, each specifying a combination of synthesizers
and datasets and output destination to run a benchmark job on. Each dict should
have the following structure:
[
{
'synthesizers': ['synthesizer1', 'synthesizer2'],
'datasets': ['dataset1', 'dataset2'] or {'include': [...], 'exclude': [...]}
'datasets': ['dataset1', 'dataset2'] or {'include': [...], 'exclude': [...]},
'output_destination': 's3://bucket/path'
},
...
]
Expand Down
2 changes: 1 addition & 1 deletion sdgym/_benchmark_launcher/benchmark_multi_table.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
modality: multi_table
output_destination: &output_destination 's3://sdgym-benchmark/Debug/Benchmark_Launcher/'
output_destination: &output_destination 's3://sdgym-benchmark/Benchmarks/'
datasets_multi_table: &datasets_multi_table
- WebKP
- DCG
Expand Down
2 changes: 1 addition & 1 deletion sdgym/_benchmark_launcher/benchmark_single_table.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
modality: single_table
output_destination: &output_destination 's3://sdgym-benchmark/Debug/Benchmark_Launcher/'
output_destination: &output_destination 's3://sdgym-benchmark/Benchmarks/'
datasets_single_table: &datasets_single_table
- adult
- alarm
Expand Down
5 changes: 0 additions & 5 deletions sdgym/run_benchmark/run_benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@
)
from sdgym.s3 import get_s3_client, parse_s3_path

MODALITY_TO_CONFIG_FILE = {
'single_table': 'benchmark_single_table.yaml',
'multi_table': 'benchmark_multi_table.yaml',
}


def append_benchmark_run(
aws_access_key_id, aws_secret_access_key, date_str, modality='single_table'
Expand Down
Loading
Loading