From e878b01d13db707457ad78ede45e4ea9fd0273ab Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 23 Jan 2026 12:50:49 -0800 Subject: [PATCH 1/6] editing initialize.sh --- .../SuperBenchmark/initialize.sh | 23 ++++++-- .../profiles/PERF-GPU-SUPERBENCH.json | 35 ------------ .../profiles/SETUP-NVIDIA-A100.json | 54 +++++++++++++++++++ 3 files changed, 74 insertions(+), 38 deletions(-) create mode 100644 src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index cfd7e68560..ca8a053ed8 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -1,10 +1,27 @@ # Ansible will use sudo which needs explicit password input. This command removes that step. echo '$1 ALL=(ALL) NOPASSWD:ALL' | (sudo EDITOR='tee -a' visudo) -# sb binary might be in this path. This command adds this path to the PATH variable. -export PATH=$PATH:/home/$1/.local/bin +# Remove any existing system-installed Ansible to avoid version conflicts +# The old Ansible 2.10 doesn't support modern collections required by SuperBench +sudo apt remove -y ansible || true +sudo pip3 uninstall -y ansible ansible-base ansible-core || true +# Install ansible-core compatible with Python 3.8 (Ubuntu 20.04) +# ansible-core 2.12-2.13 is the highest version compatible with Python 3.8 +python3 -m pip install --user "ansible-core>=2.12,<2.14" +# Ensure the pip user-installed ansible is in PATH and takes precedence +export PATH=/home/$1/.local/bin:$PATH +# Configure Docker to use the data disk at /mnt to avoid filling up root filesystem +sudo mkdir -p /mnt/docker +sudo systemctl stop docker || true +# Backup existing docker data if it exists +if [ -d "/var/lib/docker" ]; then + sudo rsync -aP /var/lib/docker/ /mnt/docker/ || true +fi +# Configure Docker daemon to use new data directory +echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json +sudo systemctl start docker # Command to install sb dependencies. python3 -m pip install . -# Command to build sb. +# Command to build sb - this will install Ansible collections make postinstall # This command initiates /dev/nvidiactl and /dev/nvidia-uvm directories, which sb checks before running. sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json index 42ce2f08dd..ead2e4e51d 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json @@ -25,40 +25,5 @@ "ContainerVersion": "superbench/superbench:v0.9.0-cuda12.1" } } - ], - "Dependencies": [ - { - "Type": "NvidiaCudaInstallation", - "Parameters": { - "Scenario": "InstallNvidiaCuda", - "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion", - "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion", - "Username": "$.Parameters.Username", - "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile" - } - }, - { - "Type": "DockerInstallation", - "Parameters": { - "Scenario": "InstallDocker" - } - }, - { - "Type": "NvidiaContainerToolkitInstallation", - "Parameters": { - "Scenario": "InstallNvidiaContainerToolkit" - } - }, - { - "Type": "LinuxPackageInstallation", - "Parameters": { - "Scenario": "InstallLinuxPackages", - "Packages": "sshpass,python3-pip", - "Packages-Apt": "nvidia-common", - "Packages-Dnf": "nvidia-driver", - "Packages-Yum": "nvidia-driver", - "Packages-Zypper": "" - } - } ] } diff --git a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json new file mode 100644 index 0000000000..862c4f3c9c --- /dev/null +++ b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json @@ -0,0 +1,54 @@ +{ + "Description": "AMD GPU Driver Installation Dependency", + "Metadata": { + "RecommendedMinimumExecutionTime": "00:10:00", + "SupportedPlatforms": "win-x64, linux-x64", + "SupportedOperatingSystems": "Windows, Linux", + "SupportedGpuModelOnWindows": "mi25,v620", + "SupportedLinuxGpuModel": "all", + "SupportedLinuxDistros": "Ubuntu", + "SpecialRequirements": "This is AMD GPU Driver dependency. It can only be installed on the system having an AMD GPU card/chip." + }, + "Parameters": { + "ConfigurationFile": "default.yaml", + "Username": "", + "LinuxCudaVersion": "12.0", + "LinuxDriverVersion": "525", + "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" + }, + "Dependencies": [ + { + "Type": "DockerInstallation", + "Parameters": { + "Scenario": "InstallDocker" + } + }, + { + "Type": "NvidiaCudaInstallation", + "Parameters": { + "Scenario": "InstallNvidiaCuda", + "LinuxCudaVersion": "$.Parameters.LinuxCudaVersion", + "LinuxDriverVersion": "$.Parameters.LinuxDriverVersion", + "Username": "$.Parameters.Username", + "LinuxLocalRunFile": "$.Parameters.LinuxLocalRunFile" + } + }, + { + "Type": "NvidiaContainerToolkitInstallation", + "Parameters": { + "Scenario": "InstallNvidiaContainerToolkit" + } + }, + { + "Type": "LinuxPackageInstallation", + "Parameters": { + "Scenario": "InstallLinuxPackages", + "Packages": "sshpass,python3-pip", + "Packages-Apt": "nvidia-common", + "Packages-Dnf": "nvidia-driver", + "Packages-Yum": "nvidia-driver", + "Packages-Zypper": "" + } + } + ] +} \ No newline at end of file From 476200afda717b27d81db53398a416abe7a257b6 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 23 Jan 2026 12:56:32 -0800 Subject: [PATCH 2/6] NVIDIA in metadata --- .../SuperBenchmark/initialize.sh | 11 ++--------- .../profiles/SETUP-NVIDIA-A100.json | 13 ++++++------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index ca8a053ed8..0cc2d3cb50 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -1,27 +1,20 @@ # Ansible will use sudo which needs explicit password input. This command removes that step. echo '$1 ALL=(ALL) NOPASSWD:ALL' | (sudo EDITOR='tee -a' visudo) # Remove any existing system-installed Ansible to avoid version conflicts -# The old Ansible 2.10 doesn't support modern collections required by SuperBench sudo apt remove -y ansible || true sudo pip3 uninstall -y ansible ansible-base ansible-core || true # Install ansible-core compatible with Python 3.8 (Ubuntu 20.04) -# ansible-core 2.12-2.13 is the highest version compatible with Python 3.8 python3 -m pip install --user "ansible-core>=2.12,<2.14" # Ensure the pip user-installed ansible is in PATH and takes precedence export PATH=/home/$1/.local/bin:$PATH -# Configure Docker to use the data disk at /mnt to avoid filling up root filesystem +# Configure Docker to use the data disk at /mnt sudo mkdir -p /mnt/docker sudo systemctl stop docker || true -# Backup existing docker data if it exists -if [ -d "/var/lib/docker" ]; then - sudo rsync -aP /var/lib/docker/ /mnt/docker/ || true -fi -# Configure Docker daemon to use new data directory echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json sudo systemctl start docker # Command to install sb dependencies. python3 -m pip install . -# Command to build sb - this will install Ansible collections +# Command to build sb make postinstall # This command initiates /dev/nvidiactl and /dev/nvidia-uvm directories, which sb checks before running. sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json index 862c4f3c9c..2f42a941e1 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json @@ -1,13 +1,12 @@ { - "Description": "AMD GPU Driver Installation Dependency", + "Description": "NVIDIA A100 GPU Driver Installation Dependency", "Metadata": { "RecommendedMinimumExecutionTime": "00:10:00", - "SupportedPlatforms": "win-x64, linux-x64", - "SupportedOperatingSystems": "Windows, Linux", - "SupportedGpuModelOnWindows": "mi25,v620", - "SupportedLinuxGpuModel": "all", - "SupportedLinuxDistros": "Ubuntu", - "SpecialRequirements": "This is AMD GPU Driver dependency. It can only be installed on the system having an AMD GPU card/chip." + "SupportedPlatforms": "linux-x64", + "SupportedOperatingSystems": "Linux", + "SupportedLinuxGpuModel": "NVIDIA A100", + "SupportedLinuxDistros": "Ubuntu20", + "SpecialRequirements": "This is an NVIDIA GPU Driver dependency. It can only be installed on the system having an NVIDIA A100 GPU card/chip." }, "Parameters": { "ConfigurationFile": "default.yaml", From 8b94434481519a379a1b5a20dd2de3db444f1eb3 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 23 Jan 2026 13:52:03 -0800 Subject: [PATCH 3/6] minor --- .../VirtualClient.Actions/SuperBenchmark/initialize.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index 0cc2d3cb50..2e814cad01 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -14,7 +14,7 @@ echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json sudo systemctl start docker # Command to install sb dependencies. python3 -m pip install . -# Command to build sb +# Command to build sb. make postinstall # This command initiates /dev/nvidiactl and /dev/nvidia-uvm directories, which sb checks before running. sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi \ No newline at end of file From 58d380d06b36ec997b0ad3d6a3c011b751c4e3cc Mon Sep 17 00:00:00 2001 From: saibulusu Date: Fri, 23 Jan 2026 15:45:21 -0800 Subject: [PATCH 4/6] Updating test case to use setu nvidia a100 profile. --- .../SuperBenchmarkProfileTests.cs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs index 92cabfc783..96ea2a4a99 100644 --- a/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs +++ b/src/VirtualClient/VirtualClient.Actions.FunctionalTests/SuperBenchmarkProfileTests.cs @@ -39,7 +39,7 @@ public void SuperBenchmarkWorkloadProfileParametersAreInlinedCorrectly(string pr } [Test] - [TestCase("PERF-GPU-SUPERBENCH.json")] + [TestCase("SETUP-NVIDIA-A100.json")] public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAndReboot(string profile) { List expectedCommands = new List @@ -74,7 +74,7 @@ public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAn } [Test] - [TestCase("PERF-GPU-SUPERBENCH.json")] + [TestCase("SETUP-NVIDIA-A100.json")] public async Task SuperBenchmarkWorkloadProfileExecutesTheExpectedDependenciesAndWorkloadsAfterReboot(string profile) { IEnumerable expectedCommands = this.GetProfileExpectedCommands(PlatformID.Unix); @@ -123,12 +123,7 @@ private IEnumerable GetProfileExpectedCommands(PlatformID platform) $"sudo bash -c \"{setupCommand}\"", $"sudo apt-get update", $"sudo apt-get install -y nvidia-container-toolkit", - $"sudo systemctl restart docker", - $"sudo chmod -R 2777 \"/home/user/tools/VirtualClient\"", - $"sudo git clone -b v0.9.0 https://github.com/microsoft/superbenchmark", - $"sudo bash initialize.sh", - $"sb deploy --host-list localhost -i superbench/superbench:v0.9.0-cuda12.1", - $"sb run --host-list localhost -c default.yaml" + $"sudo systemctl restart docker" }; } } From 1d9218e2121d9d78f34e219768dd71fe90df650d Mon Sep 17 00:00:00 2001 From: saibulusu Date: Mon, 26 Jan 2026 14:51:37 -0800 Subject: [PATCH 5/6] optional step for superbench --- .../SuperBenchmark/initialize.sh | 16 ++++++++++++---- .../profiles/PERF-GPU-SUPERBENCH.json | 15 ++++++++++++--- .../profiles/SETUP-NVIDIA-A100.json | 6 ------ 3 files changed, 24 insertions(+), 13 deletions(-) diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index 2e814cad01..f3bdb36323 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -1,20 +1,28 @@ # Ansible will use sudo which needs explicit password input. This command removes that step. echo '$1 ALL=(ALL) NOPASSWD:ALL' | (sudo EDITOR='tee -a' visudo) + # Remove any existing system-installed Ansible to avoid version conflicts sudo apt remove -y ansible || true sudo pip3 uninstall -y ansible ansible-base ansible-core || true + # Install ansible-core compatible with Python 3.8 (Ubuntu 20.04) python3 -m pip install --user "ansible-core>=2.12,<2.14" + # Ensure the pip user-installed ansible is in PATH and takes precedence export PATH=/home/$1/.local/bin:$PATH + # Configure Docker to use the data disk at /mnt -sudo mkdir -p /mnt/docker -sudo systemctl stop docker || true -echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json -sudo systemctl start docker +# This step is optional and can be commented out if not needed. +# sudo mkdir -p /mnt/docker +# sudo systemctl stop docker || true +# echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json +# sudo systemctl start docker + # Command to install sb dependencies. python3 -m pip install . + # Command to build sb. make postinstall + # This command initiates /dev/nvidiactl and /dev/nvidia-uvm directories, which sb checks before running. sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi \ No newline at end of file diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json index ead2e4e51d..682348443e 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json @@ -5,14 +5,14 @@ "RecommendedMinimumExecutionTime": "08:00:00", "SupportedPlatforms": "linux-x64", "SupportedOperatingSystems": "Ubuntu", - "SpecialRequirements": "This is an Nvidia GPU-specialized workload. It depends upon the system having an Nvidia GPU card/chip." + "SpecialRequirements": "This is an Nvidia GPU-specialized workload. It depends upon the system having an Nvidia GPU card/chip. Use a SETUP profile for GPU setup." }, "Parameters": { "ConfigurationFile": "default.yaml", "Username": "", + "DockerContainerPath": null, "LinuxCudaVersion": "12.0", - "LinuxDriverVersion": "525", - "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" + "LinuxDriverVersion": "525" }, "Actions": [ { @@ -21,9 +21,18 @@ "Scenario": "Models", "Username": "$.Parameters.Username", "Version": "0.9.0", + "DockerContainerPath": "$.Parameters.DockerContainerPath", "ConfigurationFile": "$.Parameters.ConfigurationFile", "ContainerVersion": "superbench/superbench:v0.9.0-cuda12.1" } } + ], + "Dependencies": [ + { + "Type": "DockerInstallation", + "Parameters": { + "Scenario": "InstallDocker" + } + } ] } diff --git a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json index 2f42a941e1..f62b524577 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json @@ -16,12 +16,6 @@ "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" }, "Dependencies": [ - { - "Type": "DockerInstallation", - "Parameters": { - "Scenario": "InstallDocker" - } - }, { "Type": "NvidiaCudaInstallation", "Parameters": { From 997b4808438220578c583c9647e3ace2cd97c868 Mon Sep 17 00:00:00 2001 From: saibulusu Date: Mon, 26 Jan 2026 18:25:36 -0800 Subject: [PATCH 6/6] Using a conditional in initialize.sh to allow for the user to set the docker container path. --- .../SuperBenchmarkExecutorTests.cs | 54 +++++++++++++++++++ .../SuperBenchmark/SuperBenchmarkExecutor.cs | 23 ++++++-- .../SuperBenchmark/initialize.sh | 33 +++++++++--- .../profiles/PERF-GPU-SUPERBENCH.json | 12 +---- .../profiles/SETUP-NVIDIA-A100.json | 6 +++ 5 files changed, 108 insertions(+), 20 deletions(-) diff --git a/src/VirtualClient/VirtualClient.Actions.UnitTests/SuperBenchmark/SuperBenchmarkExecutorTests.cs b/src/VirtualClient/VirtualClient.Actions.UnitTests/SuperBenchmark/SuperBenchmarkExecutorTests.cs index ed727f84f8..92523d692c 100644 --- a/src/VirtualClient/VirtualClient.Actions.UnitTests/SuperBenchmark/SuperBenchmarkExecutorTests.cs +++ b/src/VirtualClient/VirtualClient.Actions.UnitTests/SuperBenchmark/SuperBenchmarkExecutorTests.cs @@ -260,6 +260,60 @@ public async Task SuperBenchmarkExecutorExecutesTheCorrectCommandsWithInstallati Assert.IsTrue(processCount == 5); } + [Test] + public async Task SuperBenchmarkExecutorExecutesTheCorrectCommandsWithInstallationAndDockerContainerPath() + { + this.mockFixture.Parameters = new Dictionary() + { + { nameof(SuperBenchmarkExecutor.Version), "0.0.1" }, + { nameof(SuperBenchmarkExecutor.ContainerVersion), "testContainer" }, + { nameof(SuperBenchmarkExecutor.ConfigurationFile), "Test.yaml" }, + { nameof(SuperBenchmarkExecutor.Username), "testuser" }, + { nameof(SuperBenchmarkExecutor.DockerContainerPath), "/docker/path" } + }; + + ProcessStartInfo expectedInfo = new ProcessStartInfo(); + List expectedCommands = new List + { + $"sudo chmod -R 2777 \"{this.mockFixture.PlatformSpecifics.CurrentDirectory}\"", + $"sudo git clone -b v0.0.1 https://github.com/microsoft/superbenchmark", + $"sudo bash initialize.sh testuser /docker/path", + $"sb deploy --host-list localhost -i testContainer", + $"sb run --host-list localhost -c Test.yaml" + }; + + int processCount = 0; + this.mockFixture.ProcessManager.OnCreateProcess = (exe, arguments, workingDir) => + { + Assert.AreEqual(expectedCommands.ElementAt(processCount), $"{exe} {arguments}"); + processCount++; + + return new InMemoryProcess + { + StartInfo = new ProcessStartInfo + { + FileName = exe, + Arguments = arguments + }, + ExitCode = 0, + OnStart = () => true, + OnHasExited = () => true + }; + }; + + this.mockFixture.StateManager.OnGetState().ReturnsAsync(JObject.FromObject(new SuperBenchmarkExecutor.SuperBenchmarkState() + { + SuperBenchmarkInitialized = false + })); + + using (TestSuperBenchmarkExecutor superBenchmarkExecutor = new TestSuperBenchmarkExecutor(this.mockFixture.Dependencies, this.mockFixture.Parameters)) + { + await superBenchmarkExecutor.ExecuteAsync(CancellationToken.None).ConfigureAwait(false); + } + + Assert.IsTrue(processCount == expectedCommands.Count); + } + [Test] public async Task SuperBenchmarkExecutorSkipsInitializationOfTheWorkloadForExecutionAfterTheFirstRun() { diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/SuperBenchmarkExecutor.cs b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/SuperBenchmarkExecutor.cs index afd8c2ea60..1010966216 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/SuperBenchmarkExecutor.cs +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/SuperBenchmarkExecutor.cs @@ -7,14 +7,12 @@ namespace VirtualClient.Actions using System.Collections.Generic; using System.IO; using System.IO.Abstractions; - using System.Runtime.InteropServices; using System.Threading; using System.Threading.Tasks; using Microsoft.CodeAnalysis; using Microsoft.Extensions.DependencyInjection; using VirtualClient.Common; using VirtualClient.Common.Extensions; - using VirtualClient.Common.Platform; using VirtualClient.Common.Telemetry; using VirtualClient.Contracts; using VirtualClient.Contracts.Metadata; @@ -121,6 +119,18 @@ public string OutputDirectory } } + /// + /// Path to hold all docker container data. + /// + public string DockerContainerPath + { + get + { + this.Parameters.TryGetValue(nameof(SuperBenchmarkExecutor.DockerContainerPath), out IConvertible dockerContainerPath); + return dockerContainerPath?.ToString(); + } + } + /// /// Executes the SuperBenchmark workload. /// @@ -173,7 +183,14 @@ protected override async Task InitializeAsync(EventContext telemetryContext, Can true); } - await this.ExecuteSbCommandAsync("bash", $"initialize.sh {this.Username}", this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, true); + string initializeArgs = $"initialize.sh {this.Username}"; + + if (!string.IsNullOrEmpty(this.DockerContainerPath)) + { + initializeArgs = $"initialize.sh {this.Username} {this.DockerContainerPath}"; + } + + await this.ExecuteSbCommandAsync("bash", initializeArgs, this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, true); await this.ExecuteSbCommandAsync("sb", $"deploy --host-list localhost -i {this.ContainerVersion}", this.SuperBenchmarkDirectory, telemetryContext, cancellationToken, false); state.SuperBenchmarkInitialized = true; diff --git a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh index f3bdb36323..4e2d9262b1 100644 --- a/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh +++ b/src/VirtualClient/VirtualClient.Actions/SuperBenchmark/initialize.sh @@ -11,12 +11,33 @@ python3 -m pip install --user "ansible-core>=2.12,<2.14" # Ensure the pip user-installed ansible is in PATH and takes precedence export PATH=/home/$1/.local/bin:$PATH -# Configure Docker to use the data disk at /mnt -# This step is optional and can be commented out if not needed. -# sudo mkdir -p /mnt/docker -# sudo systemctl stop docker || true -# echo '{"data-root": "/mnt/docker"}' | sudo tee /etc/docker/daemon.json -# sudo systemctl start docker +# Configure Docker to use the data disk at path, unless not provided +if [[ -n "${2:-}" ]]; then + DOCKER_DATA_ROOT="$2" + echo "Configuring Docker data-root at ${DOCKER_DATA_ROOT} ..." + + # Create target path and stop Docker cleanly + sudo mkdir -p "${DOCKER_DATA_ROOT}" + sudo systemctl stop docker || true + + # Write/merge daemon.json to set data-root + # If jq is present and an existing file exists, merge to preserve other keys; otherwise overwrite minimal file. + if command -v jq >/dev/null 2>&1 && [[ -f /etc/docker/daemon.json ]]; then + TMP_JSON=$(mktemp) + sudo jq --arg dr "${DOCKER_DATA_ROOT}" '. + { "data-root": $dr }' /etc/docker/daemon.json | sudo tee "${TMP_JSON}" >/dev/null + sudo mv "${TMP_JSON}" /etc/docker/daemon.json + else + echo "{\"data-root\": \"${DOCKER_DATA_ROOT}\"}" | sudo tee /etc/docker/daemon.json >/dev/null + fi + + # Start Docker back up + sudo systemctl start docker + + # (Optional) Warm-up/check NVIDIA devices as you had in the commented section + # sudo docker run --rm --gpus all nvidia/cuda:11.0.3-base nvidia-smi +else + echo "No second argument provided; skipping Docker data-root configuration." +fi # Command to install sb dependencies. python3 -m pip install . diff --git a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json index 682348443e..349cef0664 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/PERF-GPU-SUPERBENCH.json @@ -10,9 +10,7 @@ "Parameters": { "ConfigurationFile": "default.yaml", "Username": "", - "DockerContainerPath": null, - "LinuxCudaVersion": "12.0", - "LinuxDriverVersion": "525" + "DockerContainerPath": null }, "Actions": [ { @@ -26,13 +24,5 @@ "ContainerVersion": "superbench/superbench:v0.9.0-cuda12.1" } } - ], - "Dependencies": [ - { - "Type": "DockerInstallation", - "Parameters": { - "Scenario": "InstallDocker" - } - } ] } diff --git a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json index f62b524577..2f42a941e1 100644 --- a/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json +++ b/src/VirtualClient/VirtualClient.Main/profiles/SETUP-NVIDIA-A100.json @@ -16,6 +16,12 @@ "LinuxLocalRunFile": "https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda_12.0.0_525.60.13_linux.run" }, "Dependencies": [ + { + "Type": "DockerInstallation", + "Parameters": { + "Scenario": "InstallDocker" + } + }, { "Type": "NvidiaCudaInstallation", "Parameters": {