From dc8b37a92094f24437a209c3d86000d15bca88b6 Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Sun, 5 Apr 2026 11:38:03 +0300 Subject: [PATCH 1/2] Use aws s3 cp instead of wget for benchmark index downloads Replace wget with aws s3 cp for downloading pre-generated indices in benchmarks. Since the runners are EC2 instances in AWS and the files are hosted on S3, using the AWS CLI for same-region transfers is significantly faster and more reliable, avoiding the timeout failures seen with wget over HTTPS. Changes: - bm_files.sh: Replace wget calls with aws s3 cp via a download_s3 helper. Also deduplicate URLs with sort -u in the benchmarks-all path. - benchmark-runner.yml: Add awscli to apt install, and add a Configure AWS credentials step before the download step. --- .github/workflows/benchmark-runner.yml | 8 +++++++- tests/benchmark/bm_files.sh | 19 +++++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/.github/workflows/benchmark-runner.yml b/.github/workflows/benchmark-runner.yml index 534d9f674..bc85bd90c 100644 --- a/.github/workflows/benchmark-runner.yml +++ b/.github/workflows/benchmark-runner.yml @@ -61,9 +61,15 @@ jobs: - name: Install benchmark dependencies run: | sudo .install/install_script.sh - sudo apt install python3-pip -y + sudo apt install python3-pip awscli -y pip3 install --upgrade pip PyYAML setuptools redisbench-admin pip3 install -r requirements.txt + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION_BENCHMARK }} - name: Download pre-generated indices timeout-minutes: 20 run: ./tests/benchmark/bm_files.sh ${{ inputs.setup }} diff --git a/tests/benchmark/bm_files.sh b/tests/benchmark/bm_files.sh index 82a732145..902f887ff 100755 --- a/tests/benchmark/bm_files.sh +++ b/tests/benchmark/bm_files.sh @@ -1,8 +1,23 @@ BM_TYPE=$1 alg="hnsw" +S3_BUCKET="dev.cto.redis" +S3_PREFIX="VectorSimilarity" +DEST_DIR="tests/benchmark/data" + +# Download a file from S3 given its full HTTPS URL. +# Extracts the object key from the URL and uses aws s3 cp. +download_s3() { + local url="$1" + local filename + filename=$(basename "$url") + aws s3 cp "s3://${S3_BUCKET}/${S3_PREFIX}/${filename}" "${DEST_DIR}/${filename}" +} +export -f download_s3 +export S3_BUCKET S3_PREFIX DEST_DIR + if [ -z "$BM_TYPE" ] || [ "$BM_TYPE" = "benchmarks-all" ]; then - cat tests/benchmark/data/hnsw_indices/*.txt tests/benchmark/data/svs_indices/*.txt | xargs -n 1 -P 0 wget --no-check-certificate -P tests/benchmark/data + cat tests/benchmark/data/hnsw_indices/*.txt tests/benchmark/data/svs_indices/*.txt | grep -v '^$' | sort -u | xargs -n 1 -P 0 -I {} bash -c 'download_s3 "$@"' _ {} exit 0 elif [ "$BM_TYPE" = "benchmarks-default" ] \ || [ "$BM_TYPE" = "bm-basics-fp32-single" ] \ @@ -57,4 +72,4 @@ else exit 0 fi -cat tests/benchmark/data/${alg}_indices/${alg}_indices_$file_name.txt | xargs -n 1 -P 0 wget --no-check-certificate -P tests/benchmark/data +cat tests/benchmark/data/${alg}_indices/${alg}_indices_$file_name.txt | grep -v '^$' | xargs -n 1 -P 0 -I {} bash -c 'download_s3 "$@"' _ {} From 0e57c58e43b45e9b6f0ecb26a6f4c5afebd02c0c Mon Sep 17 00:00:00 2001 From: ofiryanai Date: Sun, 5 Apr 2026 14:10:37 +0300 Subject: [PATCH 2/2] Fix: install AWS CLI v2 standalone to avoid Python conflicts The apt-installed awscli v1 conflicts with the newer botocore pulled in by pip (via redisbench-admin), causing KeyError: 'opsworkscm' on every aws s3 cp call. Fix by installing AWS CLI v2 as a standalone binary (no Python dependency), and add set -euo pipefail to bm_files.sh so download failures are caught immediately instead of silently producing missing files. --- .github/workflows/benchmark-runner.yml | 8 +++++++- tests/benchmark/bm_files.sh | 4 +++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/benchmark-runner.yml b/.github/workflows/benchmark-runner.yml index bc85bd90c..8ec79b39d 100644 --- a/.github/workflows/benchmark-runner.yml +++ b/.github/workflows/benchmark-runner.yml @@ -61,9 +61,15 @@ jobs: - name: Install benchmark dependencies run: | sudo .install/install_script.sh - sudo apt install python3-pip awscli -y + sudo apt install python3-pip unzip -y pip3 install --upgrade pip PyYAML setuptools redisbench-admin pip3 install -r requirements.txt + - name: Install AWS CLI v2 + run: | + curl "https://awscli.amazonaws.com/awscli-exe-linux-$(uname -m).zip" -o /tmp/awscliv2.zip + unzip -q /tmp/awscliv2.zip -d /tmp + sudo /tmp/aws/install + aws --version - name: Configure AWS credentials uses: aws-actions/configure-aws-credentials@v4 with: diff --git a/tests/benchmark/bm_files.sh b/tests/benchmark/bm_files.sh index 902f887ff..4212d0bc4 100755 --- a/tests/benchmark/bm_files.sh +++ b/tests/benchmark/bm_files.sh @@ -1,4 +1,6 @@ -BM_TYPE=$1 +set -euo pipefail + +BM_TYPE=${1:-} alg="hnsw" S3_BUCKET="dev.cto.redis"