From 94d1d75131783f704cf6cf10d21d78dfaaa64bf3 Mon Sep 17 00:00:00 2001 From: Eric Lund <77127214+erl-hpe@users.noreply.github.com> Date: Tue, 28 Apr 2026 06:41:19 -0500 Subject: [PATCH 1/2] Add install-openchami as part of openchami-release Signed-off-by: Eric Lund <77127214+erl-hpe@users.noreply.github.com> --- install-openchami/.gitignore | 5 + install-openchami/.pycodestyle | 4 + install-openchami/.pylintrc | 327 +++++++ install-openchami/README.md | 257 +++++ install-openchami/README.md.license | 2 + .../install_openchami/__init__.py | 21 + .../install_openchami/__main__.py | 11 + install-openchami/install_openchami/config.py | 907 ++++++++++++++++++ .../install_openchami/config/README.md | 27 + .../config/README.md.license | 2 + .../install_openchami/config/config.yaml | 421 ++++++++ .../config/config.yaml.license | 2 + install-openchami/install_openchami/error.py | 16 + .../install_openchami/install_openchami.py | 210 ++++ .../install_openchami/installer.py | 121 +++ .../install_openchami/templates/Corefile | 45 + .../templates/Corefile.license | 2 + .../templates/OpenCHAMI-Install.sh | 420 ++++++++ .../install_openchami/templates/README.md | 21 + .../install_openchami/templates/bmc_info.json | 11 + .../templates/bmc_info.json.license | 2 + .../templates/build-image.sh | 109 +++ .../templates/containers.conf | 2 + .../templates/containers.conf.license | 2 + .../install_openchami/templates/coredhcp.yaml | 22 + .../templates/coredhcp.yaml.license | 2 + .../templates/minio.container | 27 + .../templates/minio.container.license | 2 + .../install_openchami/templates/nodes.yaml | 33 + .../templates/nodes.yaml.license | 2 + .../templates/openchami-net.xml | 7 + .../templates/openchami-net.xml.license | 2 + .../install_openchami/templates/prep_setup.sh | 158 +++ .../templates/prepare_host.sh | 71 ++ .../templates/registry.container | 18 + .../templates/registry.container.license | 2 + .../templates/s3-public-read-boot-images.json | 11 + .../s3-public-read-boot-images.json.license | 2 + .../templates/s3-public-read-efi.json | 11 + .../templates/s3-public-read-efi.json.license | 2 + .../install_openchami/templates/s3cfg | 13 + .../install_openchami/templates/s3cfg.license | 2 + install-openchami/noxfile.py | 87 ++ install-openchami/pyproject.toml | 106 ++ openchami.spec | 35 +- 45 files changed, 3560 insertions(+), 2 deletions(-) create mode 100644 install-openchami/.gitignore create mode 100644 install-openchami/.pycodestyle create mode 100644 install-openchami/.pylintrc create mode 100644 install-openchami/README.md create mode 100644 install-openchami/README.md.license create mode 100644 install-openchami/install_openchami/__init__.py create mode 100644 install-openchami/install_openchami/__main__.py create mode 100644 install-openchami/install_openchami/config.py create mode 100644 install-openchami/install_openchami/config/README.md create mode 100644 install-openchami/install_openchami/config/README.md.license create mode 100644 install-openchami/install_openchami/config/config.yaml create mode 100644 install-openchami/install_openchami/config/config.yaml.license create mode 100644 install-openchami/install_openchami/error.py create mode 100755 install-openchami/install_openchami/install_openchami.py create mode 100644 install-openchami/install_openchami/installer.py create mode 100644 install-openchami/install_openchami/templates/Corefile create mode 100644 install-openchami/install_openchami/templates/Corefile.license create mode 100644 install-openchami/install_openchami/templates/OpenCHAMI-Install.sh create mode 100644 install-openchami/install_openchami/templates/README.md create mode 100644 install-openchami/install_openchami/templates/bmc_info.json create mode 100644 install-openchami/install_openchami/templates/bmc_info.json.license create mode 100644 install-openchami/install_openchami/templates/build-image.sh create mode 100644 install-openchami/install_openchami/templates/containers.conf create mode 100644 install-openchami/install_openchami/templates/containers.conf.license create mode 100644 install-openchami/install_openchami/templates/coredhcp.yaml create mode 100644 install-openchami/install_openchami/templates/coredhcp.yaml.license create mode 100644 install-openchami/install_openchami/templates/minio.container create mode 100644 install-openchami/install_openchami/templates/minio.container.license create mode 100644 install-openchami/install_openchami/templates/nodes.yaml create mode 100644 install-openchami/install_openchami/templates/nodes.yaml.license create mode 100644 install-openchami/install_openchami/templates/openchami-net.xml create mode 100644 install-openchami/install_openchami/templates/openchami-net.xml.license create mode 100644 install-openchami/install_openchami/templates/prep_setup.sh create mode 100755 install-openchami/install_openchami/templates/prepare_host.sh create mode 100644 install-openchami/install_openchami/templates/registry.container create mode 100644 install-openchami/install_openchami/templates/registry.container.license create mode 100644 install-openchami/install_openchami/templates/s3-public-read-boot-images.json create mode 100644 install-openchami/install_openchami/templates/s3-public-read-boot-images.json.license create mode 100644 install-openchami/install_openchami/templates/s3-public-read-efi.json create mode 100644 install-openchami/install_openchami/templates/s3-public-read-efi.json.license create mode 100644 install-openchami/install_openchami/templates/s3cfg create mode 100644 install-openchami/install_openchami/templates/s3cfg.license create mode 100644 install-openchami/noxfile.py create mode 100644 install-openchami/pyproject.toml diff --git a/install-openchami/.gitignore b/install-openchami/.gitignore new file mode 100644 index 0000000..1b7f122 --- /dev/null +++ b/install-openchami/.gitignore @@ -0,0 +1,5 @@ +*/__pycache__ +__pycache__ +dist +*.egg-info +build diff --git a/install-openchami/.pycodestyle b/install-openchami/.pycodestyle new file mode 100644 index 0000000..158cd09 --- /dev/null +++ b/install-openchami/.pycodestyle @@ -0,0 +1,4 @@ +[pycodestyle] +count=True +max-line-length=120 +statistics=True diff --git a/install-openchami/.pylintrc b/install-openchami/.pylintrc new file mode 100644 index 0000000..bd6e71a --- /dev/null +++ b/install-openchami/.pylintrc @@ -0,0 +1,327 @@ +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=1 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist= + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time. See also the "--disable" option for examples. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" + +disable= + too-few-public-methods, + consider-using-f-string, + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +# +# This appears to be obsolete... +# +# files-output=no + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=10 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=yes + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=_$|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=120 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# Maximum number of lines in a module +max-module-lines=2000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_,id + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct constant names +const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__))$ + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=__.*__ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=-1 + +# List of decorators that define properties, such as abc.abstractproperty. +property-classes=abc.abstractproperty + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis +ignored-modules= + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamically set). +ignored-classes=SQLObject, optparse.Values, thread._local, _thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members=REQUEST,acl_users,aq_parent + +# List of decorators that create context managers from functions, such as +# contextlib.contextmanager. +contextmanager-decorators=contextlib.contextmanager + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=10 +max-positional-arguments=10 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=25 + +# Maximum number of return / yield for function / method body +max-returns=11 + +# Maximum number of branch for function / method body +max-branches=26 + +# Maximum number of statements in function / method body +max-statements=100 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=11 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=25 + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= diff --git a/install-openchami/README.md b/install-openchami/README.md new file mode 100644 index 0000000..5df39fc --- /dev/null +++ b/install-openchami/README.md @@ -0,0 +1,257 @@ +# OpenCHAMI Installer + +The OpenCHAMI Installer consists of a Python wrapper and a base +configuration designed to deploy OpenCHAMI onto a host node using the +quadlet implementation of OpenCHAMI described in the [OpenCHAMI +Tutorial](https://openchami.org/docs/tutorial/) using a "`host` mode" +configuration that creates and boots a single virtual managed +(compute) node co-resident on the OpenCHAMI headnode host. The +installer can be used either to deploy OpenCHAMI and the virtual +managed node on physical hardware or on a virtual machine, as long as +the virtual machine supports nested virtualization and has sufficient +resources to run both the OpenCHAMI headnode software and the virtual +managed node. + +The OpenCHAMI Installer can also run in a "`cluster` mode" in which +the OpenCHAMI head node and all managed nodes are connected to a +physical or virtual network cluster where the managed nodes are not +co-resident on the host node. In this case, the assumption is that the +managed nodes can be powered on, powered off, and reset using RedFish +calls to RedFish instances running on Base Board Management +Controllers (BMCs) accesible across a network from the headnode. + + __NOTE: The 'cluster' mode is still under development and not quite + ready for use.__ + +## System Requirements + +At present, the OpenCHAMI Installer uses the 'dnf' package manager, +which is an RPM based package manager primarily used on RedHat +systems. The installer expects to run on a Rocky Linux or similar +distribution of Linux. + +The installer can currently successfully install OpenCHAMI and +bring up virtual compute nodes on x86-64 (amd64) and AMD-64 (aarch64) +hosts. The default configuration assumes x86-64. This README provides +an example configuration overlay to enable AMD-64 operation, as well +as a broader description of configuration overlays. + +For `host` mode operation, the OpenCHAMI Installer recommends a +minimum of 4GB of memory, 4 CPU cores and 40GB of free disk space on +the headnode. If the headnode is a VM it must support nested +virtualization for the installer to work in `host` mode. On MacOS, +currently, nested virtualization is only supported by VMs running on +an M3 or better system under Apple Virtualization not those running +under KVM. + +The installer requires a minimum version 3.9 of Python installed on +the headnode. + +The user running the OpenCHAMI Installer must either be `root` or have +`sudo` access on the headnode. + +The host system must have `openchami` (this package) installed on it +either by means of downloading the release RPM and installing it or by +means of obtaining the source code, building the RPM and installing +it. To install from the release RPM, do the following: + +``` +# Identify the latest release RPM +RELEASE_VERSION="latest" +API_URL="https://api.github.com/repos/openchami/release/releases/${RELEASE_VERSION}" +release_json=$(curl -s "$API_URL") +rpm_url=$(echo "$release_json" | jq -r '.assets[] | select(.name | endswith(".rpm")) | .browser_download_url' | head -n 1) +rpm_name=$(echo "$release_json" | jq -r '.assets[] | select(.name | endswith(".rpm")) | .name' | head -n 1) + +# Download the RPM +curl -L -o "$rpm_name" "$rpm_url" + +# Install the RPM +sudo dnf install -y ./"$rpm_name" +``` + +Installation from source is described in the +[OpenCHAMI Release README](https://github.com/OpenCHAMI/release/blob/main/README.md#openchami-releases). + + +## Installing and Running the Installer + +The installer is installed on the OpenCHAMI headnode when the +OpenCHAMI Release RPM is installed. This permits the installer to +track with versions of the OpenCHAMI Release repository. The steps +involved in installing and running the installer are as follows: + +1. Use the OpenCHAMI Installer to prepare the headnode for OpenCHAMI +installation: + +```shell +sudo install_openchami -p +``` + +2. Use the OpenCHAMI Installer to install OpenCHAMI and start a +virtual compute node: + +```shell +sudo install_openchami +``` + +__NOTE: If your are running on an arm64 (aarch64) host, you will need +to change the repository URLs in the default image builder +configuration. See the 'Configuration' section below to learn how to +do this with a configuration overlay.__ + +Assuming this completes successfully, you should be able to become the +deployment user (by default this is `rocky`) and SSH to your virtual +compute node: + +```shell +sudo su - rocky +ssh root@compute-001 +``` + +## Configuration + +The OpenCHAMI Installer contains a rich configuration that allows for +both future changes to OpenCHAMI and changes to the way OpenCHAMI is +installed on your headnode. This configuration, in turn drives the +creation of configuration files and scripts used internally to install +OpenCHAMI. The base configuration drives an installation quite similar +to the OpenCHAMI tutorial with virtual managed (compute) nodes. This +base configuration can be modified at run time by providing the paths +to one or more YAML format configuration overlays on the command line. + +For example, to change the repository URLs in the image builders so you +can deploy this on an `arm64` (`aarch64`) host, all that is required is a +configuration overlay that overrides those URLs. Something like this: + +```yaml +images: + builders: + rocky-base-9: + data: + repos: + # Note: since 'repos' is a list, all of the elements need to be + # specified fully as the new list will completely replace + # the list in the default configuration. + - alias: 'Rocky_9_BaseOS' + # Change the 'url' field from x86_64 (in the default config) to + # 'aarch64' in your config to pull packages of the correct + # machine architecture. + url: 'https://dl.rockylinux.org/pub/rocky/9/BaseOS/aarch64/os/' + gpg: 'https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-9' + - alias: 'Rocky_9_AppStream' + # Change the 'url' field from x86_64 (in the default config) to + # 'aarch64' in your config to pull packages of the correct + # machine architecture. + url: 'https://dl.rockylinux.org/pub/rocky/9/AppStream/aarch64/os/' + gpg: 'https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-9' + compute-base-rocky9: + data: + # Note: since 'repos' is a list, all of the elements need to be + # specified fully as the new list will completely replace + # the list in the default configuration + repos: + - alias: 'Epel9' + # Change the 'url' field from x86_64 (in the default config) to + # 'aarch64' in your config to pull packages of the correct + # machine architecture. + url: 'https://dl.fedoraproject.org/pub/epel/9/Everything/aarch64/' + gpg: 'https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9' +``` + +When creating a configuration overlay, it helps to know what the +configuration to be overlaid looks like, and what the configuration +looks like after applying the overlay. There are two options to the +installer that allow you to see the contents of the OpenCHAMI +Installer configuration. The first option dumps out the entire contents +of the base configuration file, complete with comments explaining the +pieces. This is a good place to start: + +```shell +python3 -m install_openchami -b +``` + +From there you can cut and paste the necessary pieces to create new +configuration overlays. + +The second option dumps out the final configuration after applying +your configuration overlay(s): + +```shell +python3 -m install_openchami -c amd64_config_overlay.yaml +``` + +This configuration may be in a different order from the base +configuration and is not commented, but it allows you to verify that +your configuration changes were applied the way you want them. + +To validate your OpenCHAMI Installer final configuration, use: + +```shell +python3 -m install_openchami -v amd64_config_overlay.yaml +``` + +This helps ensure that your configuration is correct and +consistent, and, where possible, required system elements are in place +to support the configuration you have created. + +Install your newly configured OpenCHAMI system by first preparing the +host node using: + +```shell +sudo install_openchami -p amd64_config_overlay.yaml +``` + +then installing the new configuration using: + +```shell +sudo install_openchami amd64_config_overlay.yaml +``` + +If all went well, you should have OpenCHAMI running on a 64 bit AMD host +with a co-located VM serving as compute node. + +```shell +sudo su - rocky +ssh root@compute-001 +``` + +The OpenCHAMI Installer has been designed to be able to be run +correctly multiple times on the same host with new configurations, so, +if you tried installing on an AMD system before reading this part, you +should be able to simply re-run with the new configuration overlay. + +This mechanism also allows you to change many other aspects of how your +OpenCHAMI system is installed. The AMD overlay was simply a convenient +way to demonstrate the mechanism and enable AMD installs. + +## Limitations + +The OpenCHAMI Installer currently has the following +limitations. Except where otherwise noted, solutions to these are +being investigated and implemented: + +- the 'cluster' mode is experimental +- there is no 'remove' operation in the OpenCHAMI Installer +- while the OpenCHAMI Installer tries to be re-usable, it is not + perfectly idempotent, so situations may arise where re-running the + installer fails leaving the host in an inconsitent state. There are + currently no known instances of this, but with arbitrary + configuration overlays, not every case can be tested. +- along similar lines, the OpenCHAMI installer is not protected against + being interrupted (for example CTRL-C) in the middle of a sensitive + operation that may leave the system in an inconsistent state -- it is + safest not to interrupt a running instance. +- the minimum configuration of the headnode can host up to two virtual + managed nodes. To host more than that, use a host with more CPU, + Memory and Disk resources. +- issues have been seen with the `openchami-external` podman network + disappearing after several runs of the installer. This appears to be + intractable without a reboot, but a reboot of the host node will clear + it. +- issues have been seen with the digests of boot images not matcing (some + kind of corruption of the boot image either in or on the way to the + registry). We are currently stopping the registry, removing both the + disk resident registry storage and the podman volume and then restarting + what should be a fresh registry to alleviate this. It does not always + work. Rebooting the host node seems to clear this problem. diff --git a/install-openchami/README.md.license b/install-openchami/README.md.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/README.md.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/install_openchami/__init__.py b/install-openchami/install_openchami/__init__.py new file mode 100644 index 0000000..2b98830 --- /dev/null +++ b/install-openchami/install_openchami/__init__.py @@ -0,0 +1,21 @@ +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT +"""Module initialization + +""" + +from os import sep as separator +from os.path import ( + join as path_join, + dirname +) +BASE_CONFIG_PATH = path_join(dirname(__file__), "config", "config.yaml") +TEMPLATE_DIR_PATH = path_join(dirname(__file__), 'templates') + + +def template(filename): + """Translate a file name into a full path name to a file in the + scripts directory. + + """ + return path_join(TEMPLATE_DIR_PATH, filename) diff --git a/install-openchami/install_openchami/__main__.py b/install-openchami/install_openchami/__main__.py new file mode 100644 index 0000000..af237e0 --- /dev/null +++ b/install-openchami/install_openchami/__main__.py @@ -0,0 +1,11 @@ +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT +"""Module entrypoint for the OpenCHAMI Installer + +""" +import sys +from .install_openchami import entrypoint + +# Start here +if __name__ == "__main__": # pragma no unit test + sys.exit(entrypoint(sys.argv)) # pragma no unit test diff --git a/install-openchami/install_openchami/config.py b/install-openchami/install_openchami/config.py new file mode 100644 index 0000000..af0c21e --- /dev/null +++ b/install-openchami/install_openchami/config.py @@ -0,0 +1,907 @@ +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT + +# pylint: disable=consider-using-f-string +"""The home of the Config class that holds, validates and prepares the +configuration for OpenCHAMI installation. + +""" + + +import sys +from json import dumps as json_dumps +from tempfile import NamedTemporaryFile +from os.path import ( + sep as path_separator, + join as path_join, +) +from os import ( + makedirs, + chown, + chmod +) +import re +from grp import getgrnam +from pwd import getpwnam +from pathlib import Path +from random import randint + +from yaml import ( + SafeDumper, + YAMLError, + dump as yaml_dumps, + safe_load, +) +from passlib.pwd import genword as generate_password +from vtds_base import ( + merge_configs, + render_template_file, +) + +from . import ( + template, + BASE_CONFIG_PATH +) +from .error import ContextualError, ConfigError + + +# Create a custom representer for yaml SafeDumper to dump multiline strings +# using the '|' notation and multiline string output properly indented +def __representer_strings_multiline(dumper, data): + """String representer for yaml that dumps multiline strings using + pipe notation. + + """ + if '\n' in data: + return dumper.represent_scalar( + "tag:yaml.org,2002:str", data, style="|" + ) + return dumper.represent_scalar("tag:yaml.org,2002:str", data) + + +# Create a yaml SafeDumper class that uses '|' notation for multiline +# strings +class MultilineStringSafeDumper(SafeDumper): + """Safe Dumper Class for multiline strings so that when I register + my representer I don't corrupt the standard SafeDumper + + """ + + +# Register the multiline string representer with the custom safe dumper +MultilineStringSafeDumper.add_representer( + str, __representer_strings_multiline +) + + +class Config: + """The OpenCHAMI configuration class that holds, validates and + prepares the configuration for OpenCHAMI installation. + + """ + def __init__(self, options, config_overlays): + """Construct the installer instance using the config overlays + and options provided from the caller. + + """ + self.config_overlays = [BASE_CONFIG_PATH] + config_overlays + self.options = options + self.config = None + + def load_config(self): + """Read in the configuration and attach it to this object + + """ + overlays = [] + for overlay_path in self.config_overlays: + try: + with open(overlay_path, 'r', encoding='UTF-8') as config_file: + overlays.append(safe_load(config_file)) + except OSError as err: + raise ContextualError( + "cannot open config_overlay file '%s' - %s" % ( + overlay_path, str(err) + ) + ) from err + except YAMLError as err: + raise ContextualError( + "error parsing config overlay file '%s' - %s" % ( + overlay_path, str(err) + ) + ) from err + self.config = overlays[0] + for overlay in overlays[1:]: + self.config = merge_configs(self.config, overlay) + + def config_by_path(self, config_path, **kwargs): + """Look up a configuration item using its dotted path + notation, for example, 'hosting_config.cluster_name', and + return what is found there. The caller can supply an explicit + keyword argument of 'default' which provides a value to return + if nothing is found at the final element of the path (all + preceding elements must still exist). If the 'default' value + is not provided, config_by_path raises a ConfigError + exception. + + """ + have_default = 'default' in kwargs + default = kwargs.get('default', None) + elements = config_path.split('.') + tmp_path = "" + found = self.config + for element in elements: + tmp_path += element + if not isinstance(found, dict) or element not in found: + if have_default and len(elements) == len(tmp_path.split('.')): + # We have reached the end of the path and the last + # item is missing. The caller said missing was okay, + # so just return None + return default + # Could not find this element and either no default was + # provided, or we have not reached the end of the path so + # it is a parent item that is missing. Raise a + # ConfigError. + raise ConfigError( + "unable to resolve path '%s' in configuration" % tmp_path + ) + found = found[element] + tmp_path += '.' + return found + + def find_annotated_files(self, annotation): + """Find the list of manifest files with a specific annotation and + return the list fully resolved target paths. + + """ + manifest_files = self.config_by_path('manifest.files') + # Get all the target paths + found = [ + manifest_file['target'] + for manifest_file in manifest_files.values() + if annotation in manifest_file.get('annotations', []) + ] + # Fix the ones that are not absolute + deploy_dir = self.config_by_path('manifest.deployment_directory') + found = [ + path_join(deploy_dir, path) if path[0] != path_separator else path + for path in found + ] + return found + + def dump_yaml(self, config_path=None): + """Dump the configuration found at 'config_path' to a YAML + string + + """ + data = self.config_by_path(config_path) if config_path else self.config + return yaml_dumps( + data, + Dumper=MultilineStringSafeDumper, + default_flow_style=False, sort_keys=False, indent=2 + ) + + def dump_json(self, config_path): + """Dump the configuration found at 'config_path' to a JSON + string + + """ + return json_dumps(self.config_by_path(config_path), indent=2) + + def __render_manifest_file(self, manifest_file): + """Render the file described in 'manifest_file'. + + """ + template_name = manifest_file.get('template_name', None) + target = manifest_file['target'] + if target[0] != path_separator: + # This is a relative pathname, prepend the configured deployment + # directory to it to make it absolute. + deploy_dir = self.config_by_path('manifest.deployment_directory') + target = path_join(deploy_dir, target) + # Create an empty 'target' file and set its ownership and access + # so that it is protected from the start. + deploy_uid = self.config_by_path('manifest.deployment_user.uid') + deploy_gid = self.config_by_path('manifest.deployment_user.gid') + file_uid = manifest_file.get('uid', None) + file_gid = manifest_file.get('gid', None) + uid = file_uid if file_uid is not None else deploy_uid + gid = file_gid if file_gid is not None else deploy_gid + mode = int(manifest_file['mode'], base=8) + make_dir = manifest_file.get('mkdir', False) + if make_dir: + target_dir = str(Path(target).parent) + try: + makedirs(target_dir, mode=0o755, exist_ok=True) + chown(target_dir, uid, gid) + except OSError as err: + raise ContextualError( + "unable to make directory path '%s' - %s" % ( + target_dir, err + ) + ) from err + try: + with open(target, "w", encoding='UTF-8'): + # don't really need to do this with the file open, but we did + # need to create the file, so this makes a good thing to do in + # the 'with ...' block, why not? + chown(target, uid, gid) + chmod(target, mode) + except OSError as err: + raise ContextualError( + "unable to create manifest target file '%s' - %s" % ( + target, err + ) + ) from err + # Now we are ready to render the file safely + if template_name is None: + # What were are doing here is finding the configuration from + # which to write out the template file by the configuration + # path specified in the manifest item's generation parameters, + # hence the weird indirection. + file_template_path = manifest_file['generation']['config_path'] + with NamedTemporaryFile(mode='w+', encoding='UTF-8') as tmp_file: + template_file = tmp_file.name + if manifest_file['generation']['type'] == 'yaml': + tmp_file.write(self.dump_yaml(file_template_path)) + else: + tmp_file.write(self.dump_json(file_template_path)) + tmp_file.flush() + tmp_file.seek(0) + render_template_file(template_file, self.config, target) + else: + render_template_file(template(template_name), self.config, target) + + def render_manifest(self, annotations=None): + """Use Jinj2 to render all of the files in a supplied manifest to + their specified destinations providing 'config' as the templating + data. If 'annotations' are provided only the files that have one + or more of the provided annotations are rendered. If 'annotations' + are not provided or None, all files are rendered. + + """ + manifest_files = self.config_by_path('manifest.files') + for manifest_file in manifest_files.values(): + if annotations: + # Annotations were specified, see if this file matches any + file_annotations = manifest_file.get('annotations', []) + matches = [ + file_annotation + for file_annotation in file_annotations + if file_annotation in annotations + ] + if not matches: + # No matching annotations, skip this file + continue + self.__render_manifest_file(manifest_file) + + def __prep_manifest_file(self, file_key): + """Prepare the pieces of the manifest file configuration that + need to be resolved at run time. + + """ + # Stash an owning UID and GID in the file manifest if there is an owner + file_manifest = self.config_by_path('manifest.files.%s' % file_key) + owner = self.config_by_path( + 'manifest.files.%s.owner' % file_key, default=None + ) + if owner is not None: + file_manifest['uid'] = getpwnam(owner).pw_uid + group = self.config_by_path( + 'manifest.files.%s.group' % file_key, default=None + ) + if group is not None: + file_manifest['gid'] = getgrnam(group).gr_gid + + def __prep_manifest(self): + """Prepare the contents of the manifest portion of the + configuration + + """ + # Set up the user id and primary group id of the deployment user + deploy_user = self.config_by_path('manifest.deployment_user') + username = self.config_by_path('manifest.deployment_user.username') + prep_host = self.options.get('prep-host', False) + if not prep_host: + # Validation already checked, so the user exists + user_info = getpwnam(username) + deploy_user['uid'] = user_info.pw_uid + deploy_user['gid'] = user_info.pw_gid + deploy_dir = self.config_by_path('manifest.deployment_directory') + makedirs(deploy_dir, mode=0o755, exist_ok=True) + chown(deploy_dir, deploy_user['uid'], deploy_user['gid']) + annotations = ( + ['host-prep-entrypoint', 'host-prep-support'] if prep_host + else [] + ) + manifest_files = self.config_by_path('manifest.files') + for file_key, manifest_file in manifest_files.items(): + file_annotations = manifest_file.get('annotations', []) + matches = [ + file_annotation + for file_annotation in file_annotations + if file_annotation in annotations + ] + if annotations and not matches: + # No matching annotations, skip this file + continue + self.__prep_manifest_file(file_key) + + @staticmethod + def __random_mac(prefix=None): + """Generate a MAC address using a specified prefix specified + as a string containing colon separated hexadecimal octet + values for the length of the desired prefix. By default use + the KVM reserved, locally administered, unicast prefix + '52:54:00'. + + """ + prefix = prefix if prefix is not None else "52:54:00" + try: + prefix_octets = [ + int(octet, base=16) for octet in prefix.split(':') + ] + except Exception as err: + raise ContextualError( + "internal error: parsing MAC prefix '%s' failed - %s" % ( + prefix, str(err) + ) + ) from err + if len(prefix_octets) > 6: + raise ContextualError( + "internal error: MAC address prefix '%s' has too " + "many octets" % prefix + ) + mac_binary = prefix_octets + [ + randint(0x00, 0xff) for i in range(0, 6 - len(prefix_octets)) + ] + return ":".join(["%2.2x" % octet for octet in mac_binary]) + + def __prep_bmcs(self): + """Prepare the configuration of BMCs + + """ + # Run through the BMCs and generate redfish passwords for the + # ones that don't have one explicitly set. + bmcs = self.config_by_path('bmcs') + for bmc in bmcs.values(): + if bmc['network'].get('redfish_password', None) is None: + bmc['network']['redfish_password'] = generate_password( + length=20 + ) + if bmc['network'].get('mac', None) is None: + bmc['network']['mac'] = self.__random_mac() + + def __prep_hosting(self): + """Prepare the hosting configuration + + """ + # nothing to do, just return + + def __prep_nodes(self): + """Prepare the 'nodes' section of the config + + """ + # nothing to do, just return + + def __prep_images(self): + """Prepare the 'images' section of the config + + """ + # Nothing to do, just return + + def prepare(self): + """Prepare the Installer to install the system by reading in + the configuration, merging the overlays onto the + configuration, and generating any configuration data that need + to be generated. + + """ + self.__prep_manifest() + self.__prep_bmcs() + self.__prep_hosting() + self.__prep_nodes() + self.__prep_images() + return 0 + + def __check_and_get_dict_key( + self, key, dictionary, value_type, none_ok=False + ): + """Validate and return the contents of a path in the + configuration, checking that the path exists and has the + correct type. + + """ + if key not in dictionary: + if none_ok: + return None + raise ConfigError("key '%s' not found" % key) + if not isinstance(dictionary[key], value_type): + raise ConfigError( + "key '%s' is a %s and should be a %s" % ( + key, str(type(dictionary[key])), str(value_type) + ) + ) + return dictionary[key] + + def __check_and_get_config_path( + self, config_path, value_type, none_ok=False + ): + """Validate and return the contents of a path in the + configuration, checking that the path exists and has the + correct type. + + """ + value = self.config_by_path(config_path) + if value is None and none_ok: + return value + if not isinstance(value, value_type): + raise ConfigError( + "'%s' has a value of type '%s' and should have a value of " + "type '%s'" % ( + config_path, str(type(value)), str(value_type) + ) + ) + return value + + def __valid_manifest_deploy_dir(self): + """Make sure the 'deployment_directory' field of the manifest + is specified, is a string and looks like it might be an + absolute pathname + + """ + deployment_directory = self.__check_and_get_config_path( + 'manifest.deployment_directory', str + ) + if deployment_directory[0] != path_separator: + raise ConfigError( + "'manifest.deployment_directory' value '%s' is not an " + "absolute pathname" + ) + + def __valid_manifest_deploy_user(self): + """Validate the 'deployment_user' section of the manifest + + """ + self.__check_and_get_config_path( + 'manifest.deployment_user', dict + ) + username = self.__check_and_get_config_path( + 'manifest.deployment_user.username', str + ) + primary_group = self.__check_and_get_config_path( + 'manifest.deployment_user.primary_group', str + ) + supplementary_groups = self.__check_and_get_config_path( + 'manifest.deployment_user.supplementary_groups', list + ) + for group in supplementary_groups: + if not isinstance(str, group): + raise ConfigError( + "supplementary group '%s' in " + "'manifest.deployment_user.supplmentary_groups " + "should be a string but is of type '%s'" % ( + str(group), str(type(group)) + ) + ) + if not self.options['prep-host']: + try: + user_info = getpwnam(username) + except KeyError as err: + raise ConfigError( + "'manifest.deployment_user.username' user '%s' is not " + "provisioned as a user on this host " + "try running installer in 'prep-host' mode " + "before installing OpenCHAMI" % username + ) from err + try: + primary_info = getgrnam(primary_group) + except KeyError as err: + raise ContextualError( + "error looking up deployment user primary " + "group '%s' (try running installer in 'prep-host' " + "mode before installing OpenCHAMI) - %s" % ( + primary_group, str(err) + ) + ) from err + try: + supplementary_info = [ + getgrnam(group) + for group in supplementary_groups + ] + except KeyError as err: + raise ContextualError( + "error looking up deployment user supplmentary " + "groups (try running installer in 'prep-host' " + "mode before installing OpenCHAMI) - %s" % str(err) + ) from err + if user_info.pw_gid != primary_info.gr_gid: + raise ConfigError( + "deployment user '%s' does not have group '%s' as " + "its primary group try running installer in 'prep-host' " + "mode before installing OpenCHAMI" % ( + username, + primary_group + ) + ) + for group_info in supplementary_info: + if username not in group_info.gr_mem: + raise ConfigError( + "user '%s' is not a member of group '%s' as a " + "supplementary group try running installer in " + "'prep-host' mode before installing OpenCHAMI" % ( + username, + group_info.gr_name + ) + ) + + def __valid_manifest_file_gen(self, file_key): + """For generated manifest file items (items that have no + template specified) validate the manifest contents with + respect to generation parameters. + + """ + config_path = self.__check_and_get_config_path( + "manifest.files.%s.generation.config_path" % file_key, str + ) + # Make sure that the configuration path from which the + # template file will be composed is, in fact, present and a + # dictionary. + self.__check_and_get_config_path(config_path, dict) + + # Make sure the generation type is either YAML or JSON + gen_type = self.__check_and_get_config_path( + "manifest.files.%s.generation.type" % file_key, str + ) + if gen_type not in ('yaml', 'json'): + raise ConfigError( + "'manifest.files.%s.generation.type' is '%s' but must " + "be either 'yaml' or 'json'" % (file_key, gen_type) + ) + + def __valid_manifest_file_tpl(self, file_key): + """For template based manifest file items (items with a + template specified) validate the template information. + + """ + template_name = self.__check_and_get_config_path( + "manifest.files.%s.template_name" % file_key, str + ) + template_path = Path(template(template_name)) + if not template_path.exists(): + raise ConfigError( + "(internal) missing template file '%s' " + "referenced from 'manifest.files.%s.template_name'" % ( + template_name, file_key + ) + ) + + def __valid_manifest_file(self, file_key): + """Validate the contents of a manifest item + + """ + # Look at the template name for the specified file + # structure. It it is None, then the template is generated, if + # not the template is a file. It needs to be explicitely None + # to be generated, missing is not okay. + template_name = self.__check_and_get_config_path( + "manifest.files.%s.template_name" % file_key, str, none_ok=True + ) + if template_name is None: + self.__valid_manifest_file_gen(file_key) + else: + self.__valid_manifest_file_tpl(file_key) + # Verify that 'target' is specified and is a string + self.__check_and_get_config_path( + "manifest.files.%s.target" % file_key, str + ) + # Verify that 'mode' is specified and is a legal value + mode = self.__check_and_get_config_path( + "manifest.files.%s.mode" % file_key, str + ) + mode_re = re.compile("^[0-7][0-7][0-7]$") + if not mode_re.match(mode): + raise ConfigError( + "'manifest.files.%s.mode' has a value of '%s' which " + "is invalid since it should be a three digit octal " + "value" % (file_key, mode) + ) + # The owner and group fields in a file manifest are optional, + # but they have to be strings and exist on the installation + # host if they are present. Also, if we are doing host + # preparation, an explicit and existing owner and group must + # be present. + owner = self.config_by_path( + 'manifest.files.%s.owner' % file_key, default=None + ) + if self.options.get('prep-host', False) and owner is None: + raise ConfigError( + "manifest file 'manifest.files.%s' must have an explicit " + "owner" + ) + if owner is not None: + if not isinstance(owner, str): + raise ConfigError( + "'manifest.files.%s.owner' has a value of type '%s' and " + "should have a value of type '%s' or be null" % ( + file_key, str(type(owner)), str(str) + ) + ) + try: + getpwnam(owner) + except KeyError as err: + raise ContextualError( + "'manifest.files.%s.owner' specifies a username '%s' " + "that is not yet provisioned on the host" % ( + file_key, owner + ) + ) from err + group = self.config_by_path( + 'manifest.files.%s.group' % file_key, default=None + ) + if self.options.get('prep-host', False) and group is None: + raise ConfigError( + "manifest file 'manifest.files.%s' must have an explicit " + "group" + ) + if group is not None: + if not isinstance(group, str): + raise ConfigError( + "'manifest.files.%s.group' has a value of type '%s' and " + "should have a value of type '%s' or be null" % ( + file_key, str(type(group)), str(str) + ) + ) + try: + getgrnam(group) + except KeyError as err: + raise ContextualError( + "'manifest.files.%s.group' specifies a group '%s' that is " + "not yet provisioned on the host" % ( + file_key, group + ) + ) from err + + def __valid_manifest_files(self): + """Validate the 'files' section of the manifest + + """ + manifest_files = self.__check_and_get_config_path( + 'manifest.files', dict + ) + if not manifest_files: + raise ConfigError( + "'manifest.files' must contain at least one item" + ) + # If we are running in prep-host mode, we are only going to + # deploy a subset of the manifest, only check the files we + # plan to deploy. + prep_host = self.options.get('prep-host', False) + annotations = ( + ['host-prep-entrypoint', 'host-prep-support'] if prep_host + else [] + ) + for file_key, manifest_file in manifest_files.items(): + file_annotations = manifest_file.get('annotations', []) + matches = [ + file_annotation + for file_annotation in file_annotations + if file_annotation in annotations + ] + if annotations and not matches: + # No matching annotations, skip this file + continue + self.__valid_manifest_file(file_key) + + def __valid_required_annotation(self, annotation, max_count=None): + """Make sure that the specified 'annotation' is present on at + least one file in the manifest and, if 'max_count' is + specified, no more than 'max_count' files. + + """ + manifest_files = self.__check_and_get_config_path( + 'manifest.files', dict + ) + found = [ + manifest_file_key + for manifest_file_key, manifest_file in manifest_files.items() + if annotation in manifest_file.get('annotations', []) + ] + if not found: + raise ConfigError( + "there is no file with the required annotation " + "'%s' in 'manifest.files'" % annotation + ) + if max_count is not None and len(found) > max_count: + raise ConfigError( + "there should be a maximum of %d file%s with the " + "annotation '%s' in 'manifest.files', these files " + "all have that annotation: %s" % ( + max_count, + 's' if max_count > 1 else '', + annotation, + str(found) + ) + ) + + def __valid_manifest(self): + """Validate the contents of the manifest portion of the + configuration + + """ + self.__check_and_get_config_path('manifest', dict) + self.__valid_manifest_deploy_dir() + self.__valid_manifest_deploy_user() + self.__valid_manifest_files() + self.__valid_required_annotation('image-builder') + self.__valid_required_annotation('install-entrypoint', 1) + self.__valid_required_annotation('host-prep-entrypoint', 1) + + def __valid_bmcs(self): + """Validate the configuration of BMCs + + """ + bmcs = self.__check_and_get_config_path('bmcs', dict) + for bmc_name, bmc in bmcs.items(): + if 'network' not in bmc: + raise ConfigError( + "bmc '%s' has no 'network' specification" % bmc_name + ) + if 'ipv4' not in bmc['network']: + raise ConfigError( + "bmc '%s' 'network' specification has no 'ipv4'" % bmc_name + ) + if 'redfish_username' not in bmc['network']: + raise ConfigError( + "bmc '%s' 'network' specification has " + "no 'redfish_username'" % bmc_name + ) + + def __valid_hosting(self): + """Validate the hosting configuration + + """ + self.__check_and_get_config_path('hosting_config', dict) + + def __valid_node(self, node): + """Verify that the contents of a node is complete and + consistent. + """ + name = "" + try: + name = self.__check_and_get_dict_key('name', node, str) + self.__check_and_get_dict_key('bmc_name', node, str) + cluster_net_interface = self.__check_and_get_dict_key( + 'cluster_net_interface', node, str + ) + self.__check_and_get_dict_key('hostname', node, str) + self.__check_and_get_dict_key('nid', node, int) + self.__check_and_get_dict_key('node_group', node, str) + interfaces = self.__check_and_get_dict_key( + 'interfaces', node, list + ) + except ConfigError as err: + raise ConfigError( + "node '%s' is not properly formed - %s" % (name, str(err)) + ) from err + cluster_interface = None + for interface in interfaces: + network_name = "" + try: + network_name = self.__check_and_get_dict_key( + 'network_name', interface, str + ) + self.__check_and_get_dict_key( + 'mac_addr', interface, str + ) + if network_name == cluster_net_interface: + cluster_interface = interface + except ConfigError as err: + raise ConfigError( + "network '%s' in node '%s' is not properly formed - %s" % ( + name, network_name, str(err) + ) + ) from err + if cluster_interface is None: + raise ConfigError( + "node '%s' has no interface connected to the cluster " + "network ('%s')" % (name, cluster_net_interface) + ) + + def __valid_nodes(self): + + """Validate the 'nodes' section of the config + + """ + nodes = self.__check_and_get_config_path('nodes', list) + if not nodes: + raise ConfigError( + "the 'nodes' section is empty" + ) + for node_key in nodes: + self.__valid_node(node_key) + + def __valid_images(self): + """Validate the 'images' section of the config + + """ + self.__check_and_get_config_path('images', dict) + self.__check_and_get_config_path('images.build_order', list) + builders = self.__check_and_get_config_path('images.builders', dict) + if not builders: + raise ConfigError( + "config must provide at least one image builder in " + "'images.builders' section" + ) + deployment_targets = self.__check_and_get_config_path( + 'images.deployment_targets', dict + ) + if not deployment_targets: + raise ConfigError( + "config must provide at least one deployment target in " + "'images.deployment_targets' section" + ) + # Check that all deployment targets are deploying an image + # built by a known image builder and are targeting a known + # node group. + # + # First, make a set of node groups to use in validating + # deployment target keys. + nodes = self.config.get('nodes', {}) + node_groups = { + node['node_group'] + for node in nodes + if 'node_group' in node + } + for node_group, image_key in deployment_targets.items(): + if image_key not in builders: + raise ConfigError( + "unknown image builder key '%s' used for node group " + "'%s' in 'images.deployment_targets'" % ( + image_key, node_group + ) + ) + if node_group not in node_groups: + raise ConfigError( + "unknown config target node group '%s' " + "found in 'images.deployment_targets' section " + "known node groups are: %s" % ( + node_group, + " % ".join(sorted(list(node_groups))) + ) + ) + + def validate(self): + """Validate the final configuration to be sure that everything + is reasonable before attempting an installation. + + """ + self.load_config() + deployment_mode = self.__check_and_get_config_path( + 'deployment_mode', str + ) + if deployment_mode not in ('host', 'cluster'): + raise ConfigError( + "unknown deployment_mode: '%s' " + "expected 'host' or 'cluster'" % deployment_mode + ) + self.__valid_manifest() + self.__valid_bmcs() + self.__valid_hosting() + self.__valid_nodes() + self.__valid_images() + + def show_config(self): + """Display the configuration resulting from applying the base + configuration and all of the overlay files on standard output. + + """ + self.load_config() + sys.stdout.write(self.dump_yaml()) + + def show_base_config(self): + """Display the base configuration file (with comments) on + standard output. + + """ + with open(BASE_CONFIG_PATH, 'r', encoding='UTF-8') as base_config: + sys.stdout.write(base_config.read() + '\n') diff --git a/install-openchami/install_openchami/config/README.md b/install-openchami/install_openchami/config/README.md new file mode 100644 index 0000000..40a9a27 --- /dev/null +++ b/install-openchami/install_openchami/config/README.md @@ -0,0 +1,27 @@ +# OpenCHAMI Base Configuration + +The file `config.yaml` in this directory provides the fully annotated +base configuration of the OpenCHAMI installer. To obtain the contents +of this file complete with annotations for reference, use the + +``` +install_openchami -b +``` + +command. To inspect the effect of configuration overlays on the base configuration use + +``` +install_openchami -c [ ...] +``` + +To validate an overlaid configuration use + +``` +install_openchami -v [ ...] +``` + +__NOTE: While the base configuration is copyrighted and made available + under the MIT license, displaying that copyright as part of the + `install_openchami -b` output is cumbersome and unnecessary. Because + of this a companion `.license` file has been provided that covers + this base configuration.__ diff --git a/install-openchami/install_openchami/config/README.md.license b/install-openchami/install_openchami/config/README.md.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/install_openchami/config/README.md.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/install_openchami/config/config.yaml b/install-openchami/install_openchami/config/config.yaml new file mode 100644 index 0000000..3fde773 --- /dev/null +++ b/install-openchami/install_openchami/config/config.yaml @@ -0,0 +1,421 @@ +# The deployment mode can be 'host' or 'cluster'. If the mode is +# 'cluster' the presumption is that there is hardware (or virtual +# hardware) out on the cluster network and BMCs or virtual BMCs managing +# that hardware, and OpenCHAMI is there to boot and manage the +# cluster. If the mode is 'host' then we are deploying OpenCHAMI on a +# node or VM where we want to spin up one or more Virtual Machines +# across a node local Virtual Network on the same node where OpenCHAMI +# is running and let them boot from OpenCHAMI. The default is 'host' to +# allow easy testing of OpenCHAMI on a single VM. +deployment_mode: host + +# The manifest section contains information about the files to be +# generated in preparation for installing OpenCHAMI. Most files will be +# predefined and found in the 'templates' sub-directory of the +# 'install_openchami' module. Some files are generated directly from +# config. Two files are installation scripts. One installs OpenCHAMI and +# one prepares the management host for installing OpenCHAMI. These are +# identified by the 'install-entrypoint' and 'host-prep-entrypoint' +# annotations respectively. There must be exectly one file with each +# annotation in the config. Other annotations are used by the file +# templates to identify target files for a given action. For example, +# all image builders have the 'image-builder' annotation. +manifest: + # 'deployment_user' identifies the username of the user on the host + # who will be running the deployment. By default this is 'rocky'. This + # user must already exist on the host where installation is run and + # must be set up for passwordless 'sudo' access. All rendered files + # generated from this manifest will be owned by this user and its + # primary group unless otherwise specified in the manifest entry for + # the file. + # + # The primary group ID and user ID will be filled in in this section + # by the installer as 'deployment_user.uid' and 'deployment_user.gid' + # based on information queried from the host and can be used in file + # templates as needed + deployment_user: + username: rocky + primary_group: rocky # used for host preparation + supplementary_groups: [] # user for host preparation + uid: null # This will always be filled in by the installer + gid: null # This will always be filled in by the installer + # The deployment directory is where deployment operations are run + # from. All files that have a 'target' specification that is a + # relative pathname are located below the deployment directory. + deployment_directory: "/opt/workdir" + # 'files' contains the information about each file to be + # deployed. If a file has a 'template_name' the template is picked up + # from that named file among the templates delivered with the + # 'install_openchami' module and rendered to the 'target' location on + # the host. If not the file is generated somehow, and the details are + # in the description. If 'mkdir' is present and 'true' then the parent + # directory path will be created as needed with and each created + # element will be given 0755 permissions and ownership matching the + # file. If 'mkdir' is not present or 'false', the directory must + # already exist. + files: + rocky-base-image: + annotations: + - image-builder + template_name: null # generated from config + target: images/rocky-base-9.yaml # relative to 'deployment_directory' + # This template is generated from config and then rendered after + # generation. The 'generation' section describes how that is done, + # identifying the path in this config from which the template data + # are obtained to be written ('config_path') in dotted notation, and + # a file type (YAML or JSON) to be generated. + generation: + config_path: images.builders.rocky-base-9.data + type: yaml # 'yaml' or 'json' + mode: "644" + mkdir: true + compute-base-image: + annotations: + - image-builder + template_name: null # generated from config + target: images/compute-base-rocky9.yaml + generation: + config_path: images.builders.compute-base-rocky9.data + type: yaml # 'yaml' or 'json' + mode: "644" + mkdir: true + compute-debug-image: + annotations: + - image-builder + template_name: null # generated from config + target: images/compute-debug-9.yaml + generation: + config_path: images.builders.compute-debug-rocky9.data + type: yaml # 'yaml' or 'json' + mode: "644" + mkdir: true + bmc_info: + # This template is delivered with the install_openchami module and + # gets installed into a system directory with mode "rw-------" + # and root ownership because it contains some potentially + # sensitive data. + template_name: bmc_info.json + target: /etc/vtds/bmc_info.json + mode: "600" + owner: root + group: root + mkdir: true + build-image-script: + template_name: build-image.sh + target: /etc/profile.d/build-image.sh + mode: "644" + containers-conf: + # XXX - is this really used anywhere? + template_name: containers.conf + target: containers.conf + mode: "644" + mkdir: true + coredhcp: + annotations: + - host-prep-support + template_name: coredhcp.yaml + target: /etc/openchami/configs/coredhcp.yaml + mode: "644" + owner: root + group: root + mkdir: false + Corefile: + template_name: Corefile + target: /etc/openchami/configs/Corefile + owner: root + group: root + mode: "644" + mkdir: false + minio-container: + template_name: minio.container + target: /etc/containers/systemd/minio.container + mode: "644" + owner: root + group: root + mkdir: false + registry-container: + template_name: registry.container + target: /etc/containers/systemd/registry.container + mode: "644" + owner: root + group: root + mkdir: false + nodes: + template_name: nodes.yaml + target: nodes/nodes.yaml + mode: "644" + mkdir: true + openchami-net: + template_name: openchami-net.xml + target: openchami-net.xml + mode: "644" + mkdir: true + openchami-install: + annotations: + - install-entrypoint + template_name: OpenCHAMI-Install.sh + target: OpenCHAMI-Install.sh + mode: "755" + mkdir: true + prep-setup: + annotations: + - host-prep-support + template_name: prep_setup.sh + target: prep_setup.sh + owner: root + group: root + mode: "644" # not a script: a sourced file, not stand-alone executable + mkdir: true + prepare-host: + annotations: + - host-prep-entrypoint + template_name: prepare_host.sh + target: prepare_host.sh + owner: root + group: root + mode: "755" + mkdir: true + s3-public-read-boot-images: + annotation: + - s3-access-control + template_name: s3-public-read-boot-images.json + target: s3-public-read-boot-images.json + mode: "644" + mkdir: true + s3-public-read-efi: + annotation: + - s3-access-control + template_name: s3-public-read-efi.json + target: s3-public-read-efi.json + mode: "644" + mkdir: true + s3cfg: + # This will be copied into the deployment user's home directory as + # '.s3cfg'. For now, put it in the deployment directory so we have + # it. + template_name: s3cfg + target: s3cfg + mode: "644" + mkdir: true + +# The following provides BMC information. For simple stand-alone +# installs, there are no "real" BMCs (unless your system has them and +# you want them configured). Real systems and vTDS deployed systems do +# have "real" BMCs and they should be configured here. +# +# Even for simple standalone installs, though, we do need to have some +# basic BMC information in order to be able to compose the static +# discovery information, so we have a BMC defined here, which also +# serves as an example. +bmcs: + x2000c0s0b0: + blade_class: host-blade + blade_instance: 0 + network: + redfish_username: root + redfish_password: null # Generated + mac: null # Generated if null, otherwise put the real MAC here + # this is a stand-in, put the real IP here if you have one + ipv4: 127.0.0.1 + +# The hosting configuration is here. This identifies the cluster +# network from the management node (where OpenCHAMI runs) and sets up +# the DNS and DHCP information needed to set up the network related +# microservices. +hosting_config: + # this is the network name not the interface name. It is used to + # create virtual networks in 'virsh' for 'host' mode operation. + cluster_name: demo + cluster_net_name: openchami-net + cluster_net_bridge_name: virbr-openchami + cluster_net_dhcp_start: 172.16.0.1 + cluster_net_dhcp_end: 172.16.0.128 + cluster_net_mask: 255.255.255.0 + net_head_dns_server: 172.16.0.254 + net_head_domain: openchami.cluster + net_head_hostname: management + net_head_ip: 172.16.0.254 + # Extra Packages lists extra packages in addition to the ones + # strictly needed for OpenCHAMI deployment that the 'prep-host' + # operation should install on the host node. The 'pre' list is + # packages that are required before installing the main list. This is + # not about dependencies per-se but about installing packages that + # bring repos with them like 'epel-release'. The 'main' list is the + # list of packages to be installed after the 'pre' list has been + # installed. + extra_packages: + pre: [] + main: [] + +# The 'nodes' section provides information about the managed nodes to be +# created / managed by OpenCHAMI. It is used in the 'host' deployment +# mode to help configure the VMs that are created locally as managed +# (compute) nodes and set up static discovery data (nodes.yaml) to feed +# to OpenCHAMI. It is used in the 'cluster' deployment mode only to set +# up static discovery data. +nodes: + - name: x2000c0s0b0n0 + bmc_name: x2000c0s0b0 + cluster_net_interface: openchami-net + hostname: compute-001 + nid: 1 + node_group: compute + interfaces: + - network_name: openchami-net + mac_addr: 52:54:00:62:65:ad + ip_addrs: + - name: openchami-net + ip_addr: 172.16.0.1 + +# The 'images' section contains information that drives the building +# and assignment of boot images used on managed (for example +# 'compute') nodes. The major pieces are: +# +# - a 'build_order' which is a list of the names image builders to +# be run in their dependency order +# +# - a collection of image builders ('builders') indexed by image +# builder name (same as is found in the 'build_order' list) which +# contain the full yaml code in free form used to generate the +# templated image builder YAML file for the OpenCHAMI deployment. +# +# - a list of 'deployment_targets' which identify which image will be +# used on which 'node_group' (see 'nodes' above). +# +# - a list of groups to be pre-provisioned on specified images +# +# - a list of users to be pre-provisioned on specified images +# +# Each piece is described more fully in-line below. +images: + # The 'build_order' list specifies the order in which images need to + # be built to handle dependencies in layered builds. All images that + # are not listed in the build order will be assumed to only have + # dependencies on those listed and will be built in an arbitrary + # order following completion of the specified ordered builds. + build_order: + - rocky-base-9 + - compute-base-rocky9 + # The 'deployment_targets' section associates images with managed node + # groups (see 'nodes' above). It is indexed by group and each element + # identifies the boot parameter file associated with the boot image + # image assigned to the specified group. By default there is one node + # group ('compute') and it boots the 'compute-base-rocky-9' image + # described in 'builders'. + deployment_targets: + compute: compute-base-rocky9 + # The 'builders' section is a bit different from other configuration + # because each element is used directly to generate a Jinja2 + # template for an image builder and then that template is run + # through the standard templated file generation before it is used + # for deployment. That means that Jinja2 templating can be included + # in the configuration (see, for example, the 'publish_registry' + # fields of each of the builders) and it will be honored. + builders: + rocky-base-9: + metadata: + boot_param_filename: rocky-base-9.yaml + data: + options: + layer_type: 'base' + name: 'rocky-base' + publish_tags: '9' + pkg_manager: 'dnf' + parent: 'scratch' + # NOTE: the publish_registry setting here may be used by other + # image builders as their 'parent' URL. Make sure if you + # change this you change those items as well. + publish_registry: '{{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:5000/{{ hosting_config.cluster_name }}' + registry_opts_push: + - '--tls-verify=false' + repos: + - alias: 'Rocky_9_BaseOS' + url: 'https://dl.rockylinux.org/pub/rocky/9/BaseOS/x86_64/os/' + gpg: 'https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-9' + - alias: 'Rocky_9_AppStream' + url: 'https://dl.rockylinux.org/pub/rocky/9/AppStream/x86_64/os/' + gpg: 'https://dl.rockylinux.org/pub/rocky/RPM-GPG-KEY-Rocky-9' + package_groups: + - 'Minimal Install' + - 'Development Tools' + packages: + - chrony + - cloud-init + - dracut-live + - kernel + - rsyslog + - sudo + - wget + cmds: + - cmd: 'dracut --add "dmsquash-live livenet network-manager" --kver $(basename /lib/modules/*) -N -f --logfile /tmp/dracut.log 2>/dev/null' + - cmd: 'echo DRACUT LOG:; cat /tmp/dracut.log' + compute-base-rocky9: + metadata: + boot_param_filename: compute-base-rocky9.yaml + data: + options: + layer_type: base + name: compute-base + publish_tags: + - 'rocky9' + pkg_manager: dnf + # NOTE: this uses the registry publication path in the + # 'rocky-base-9' image builder. Make sure if you change + # that you change this as well. + parent: '{{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:5000/{{ hosting_config.cluster_name }}/rocky-base:9' + registry_opts_pull: + - '--tls-verify=false' + + # Publish SquashFS image to local S3 + publish_s3: 'http://{{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:9000' + s3_prefix: 'compute/base/' + s3_bucket: 'boot-images' + # Publish OCI image to container registry + # + # This is the only way to be able to re-use this image as + # a parent for another image layer. + # + # NOTE: the publish_registry setting here may be used by other + # image builders as their 'parent' URL. Make sure if you + # change this you change those items as well. + publish_registry: '{{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:5000/demo' + registry_opts_push: + - '--tls-verify=false' + + repos: + - alias: 'Epel9' + url: 'https://dl.fedoraproject.org/pub/epel/9/Everything/x86_64/' + gpg: 'https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9' + packages: + - boxes + - cowsay + - figlet + - fortune-mod + - git + - nfs-utils + - tcpdump + - traceroute + - vim + compute-debug-rocky9: + metadata: + boot_param_filename: compute-base-rocky9.yaml + data: + options: + layer_type: base + name: compute-debug + publish_tags: + - 'rocky9' + pkg_manager: dnf + # NOTE: this uses the registry publication path in the + # 'compute-base-rocky9' image builder. Make sure if you + # change that you change this as well. + parent: '{{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:5000/{{ hosting_config.cluster_name }}/compute-base:rocky9' + registry_opts_pull: + - '--tls-verify=false' + + # Publish to local S3 + publish_s3: 'http://{{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:9000' + s3_prefix: 'compute/debug/' + s3_bucket: 'boot-images' + packages: + - shadow-utils diff --git a/install-openchami/install_openchami/config/config.yaml.license b/install-openchami/install_openchami/config/config.yaml.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/install_openchami/config/config.yaml.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/install_openchami/error.py b/install-openchami/install_openchami/error.py new file mode 100644 index 0000000..05e8081 --- /dev/null +++ b/install-openchami/install_openchami/error.py @@ -0,0 +1,16 @@ +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT + +# pylint: disable=consider-using-f-string +"""Exception classes to support the OpenCHAMI installer + +""" +# pylint: disable=unused-import +from vtds_base import ContextualError # Pass this along to importers + + +class ConfigError(Exception): # pylint: disable=too-few-public-methods + """Exception to specifically report errors in validation of + configuration. + + """ diff --git a/install-openchami/install_openchami/install_openchami.py b/install-openchami/install_openchami/install_openchami.py new file mode 100755 index 0000000..fa3c1c2 --- /dev/null +++ b/install-openchami/install_openchami/install_openchami.py @@ -0,0 +1,210 @@ +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT + +# pylint: disable=consider-using-f-string +"""Orchestrate the installation of OpenCHAMI on a single node or cluster. + +""" + +import sys +from getopt import ( + getopt, + GetoptError +) + +from .installer import Installer +from .error import ContextualError, ConfigError + + +def error(msg): + """Produce an error message on stderr + + """ + sys.stderr.write("ERROR: %s\n" % msg) + + +class UsageError(Exception): # pylint: disable=too-few-public-methods + """Exception to report usage errors + + """ + + +def usage(msg=None): + """Produce a usage message on stderr + """ + if msg: + sys.stderr.write("%s\n" % msg) + sys.stderr.write(USAGE_MSG) + + +USAGE_MSG = """ +Usage: install_openchami [-p|--prep-host][-f|--files-only] [ ...] + install_openchami [-p|--prep-host] -v|--validate [ ...] + install_openchami -c|--config [ ...] + install_openchami -b|--base-config + install_openchami -h|--help + +Where: + -b|--base-config + displays the base configuration with comments on standard output + -c|--config + displays the merged configuration (without comments) on standard output + -f|--files-only + generates and places the files in the manifest configuration but + does not run the OpenCHAMI installation + -p|--prep-host + run the installer in 'prepare-host' mode which will set up the initial + necessary conditions for the install instead of installing OpenCHAMI. + For validation, this skips validation steps that verify that those + initial conditions are met, while without this option, those conditions + would be checked. + -v|--validate + validates the merged configuration but does not generate files or + install anything + -h|--help + displays this message + + is the path to a YAML file containing a configuration + overlay to be applied to the base installation + configuration for the OpenCHAMI system. +"""[1:] + + +# pylint: disable=too-many-branches, too-many-statements +def process_args(argv): + """Split the arguments found in 'argv' into an input file and an + output file. Use options from a config file for defaults. + + """ + action_error = ( + "requested action may only be one of " + "'show base config' (-b, --base-config)" + "'show config' (-c, --config) " + "'help' (-h, --help) or " + "'validate' (-v, --validate)" + ) + ret_opts = { + 'operation': 'install', + 'files-only': False, + 'prep-host': False, + } + try: + opts, args = getopt( + argv, "bcfhpv", [ + 'base-config', + 'config', + 'files-only', + 'help', + 'prep-host', + 'validate', + ] + ) + except GetoptError as err: + raise UsageError(err) from err + for opt in opts: + if opt[0] in ('-h', 'help'): + # Don't raise a usage error here because this is not an + # error, just set the operation to 'help' + if ret_opts['operation'] != 'install': + raise UsageError(action_error) + ret_opts['operation'] = 'help' + elif opt[0] in ('-b', 'base-config'): + if ret_opts['operation'] != 'install': + raise UsageError(action_error) + ret_opts['operation'] = 'show-base-config' + elif opt[0] in ('-c', '--config'): + if ret_opts['operation'] != 'install': + raise UsageError(action_error) + ret_opts['operation'] = 'show-config' + elif opt[0] in ('-f', '--files-only'): + ret_opts['files-only'] = True + elif opt[0] in ('-p', '--prep-host'): + ret_opts['prep-host'] = True + elif opt[0] in ('-v', '--validate'): + if ret_opts['operation'] != 'install': + raise UsageError(action_error) + ret_opts['operation'] = 'validate' + else: + # Getopt will handle any unrecognized option, so if we + # get here, there is a recognized option that was never + # handled. Need to add option handling for that. + raise UsageError( + "INTERNAL ERROR: unprocessed option '%s'" % opt[0] + ) + if ret_opts['files-only'] and ret_opts['operation'] != 'install': + raise UsageError( + "'files-only' option is only valid with the install operation" + ) + if ( + ret_opts['prep-host'] and + ret_opts['operation'] not in ('install', 'validate') + ): + raise UsageError( + "'prep-host' option is only valid with the install or validate " + "operation" + ) + return ret_opts, args + + +def main(argv): + """main + + """ + options, config_overlays = process_args(argv[1:]) + operation = options['operation'] + installer = Installer(options, config_overlays) + + # Okay, we know what to do and the installer is ready to do + # it. Time to do something. + if operation == 'install': + installer.install() + return 0 + if operation == 'show-base-config': + installer.show_base_config() + return 0 + if operation == 'show-config': + installer.show_config() + return 0 + if operation == 'validate': + installer.validate() + return 0 + if operation == 'help': + usage() + return 0 + raise ContextualError( + "INTERNAL ERROR: unrecognized operation specified: '%s'" % operation + ) + + +def entrypoint(argv): + """Entrypoint function to handle exceptions from main and turn them + into return codes and error reports that will, eventually, become + exit status. + + """ + try: + return main(argv) + except UsageError as err: + usage(err) + return 1 + except ContextualError as err: + error(err) + return 1 + except ConfigError as err: + error("CONFIGURATION ERROR: %s" % str(err)) + return 1 + except KeyboardInterrupt: + return 1 + return 0 + + +def entry(): + """Command entrypoint + + """ + entrypoint(sys.argv) + + +# Start here +if __name__ == "__main__": # pragma no unit test + sys.exit(entrypoint(sys.argv[1:])) # pragma no unit test diff --git a/install-openchami/install_openchami/installer.py b/install-openchami/install_openchami/installer.py new file mode 100644 index 0000000..f02b87d --- /dev/null +++ b/install-openchami/install_openchami/installer.py @@ -0,0 +1,121 @@ +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT + +# pylint: disable=consider-using-f-string +"""The home of the Installer class that orchestrates installation. + +""" +from subprocess import ( + run, + CalledProcessError, +) + +from .config import Config +from .error import ContextualError + + +class Installer: + """The OpenCHAMI installer class that orchestrates installation of + OpenCHAMI on a system. + + """ + def __init__(self, options, config_overlays): + """Construct the installer instance using the config overlays + and options provided from the caller. + + """ + self.config = Config(options, config_overlays) + self.options = options + # Nothing to do, just return + + def prepare(self): + """Prepare the Installer to install the system by reading in + the configuration, merging the overlays onto the + configuration, and generating any configuration data that need + to be generated. + + """ + self.config.prepare() + + def validate(self): + """Validate the final configuration to be sure that everything + is reasonable before attempting an installation. + + """ + self.config.validate() + + def __run_install_script(self): + """The manifest in the configuration will have one file that + has the annotation 'install-entrypoint'. Find that script + and execute it as the user specified in + 'manifest.deployment_user.username'. + + """ + install_script = self.config.find_annotated_files( + 'install-entrypoint' + )[0] + deploy_user = self.config.config_by_path( + 'manifest.deployment_user.username' + ) + try: + run(['su', '-', deploy_user, install_script], check=True) + except CalledProcessError as err: + raise ContextualError( + "install script '%s' exited failed to run - %s" % ( + install_script, str(err) + ) + ) from err + + def __run_host_prep_script(self): + """The manifest in the configuration will have one file that has + the annotation 'host-prep-entrypoint'. Find that script and execute + it as the user specified in 'manifest.deployment_user.username'. + + """ + host_prep_script = self.config.find_annotated_files( + 'host-prep-entrypoint' + )[0] + try: + run([host_prep_script], check=True) + except CalledProcessError as err: + raise ContextualError( + "host-prep script '%s' exited failed to run - %s" % ( + host_prep_script, str(err) + ) + ) from err + + def install(self): + """Render all the templates and place the resulting files + according to the manifest, then run the requested installation + action (either prepare the host or install OpenCHAMI). If the + 'files-only' option is specified, install the files but do not + run the installation. + + """ + self.validate() + self.prepare() + prep_host = self.options.get('prep-host', False) + annotations = ( + ['host-prep-entrypoint', 'host-prep-support'] if prep_host + else None + ) + self.config.render_manifest(annotations) + if not self.options.get('files-only', False): + if not prep_host: + self.__run_install_script() + else: + self.__run_host_prep_script() + + def show_config(self): + """Display the configuration resulting from applying the base + configuration and all of the overlay files on standard output. + + """ + self.config.show_config() + + def show_base_config(self): + """Display the base configuration file (with comments) on + standard output. + + """ + self.config.show_base_config() diff --git a/install-openchami/install_openchami/templates/Corefile b/install-openchami/install_openchami/templates/Corefile new file mode 100644 index 0000000..6bd1bc7 --- /dev/null +++ b/install-openchami/install_openchami/templates/Corefile @@ -0,0 +1,45 @@ +# coresmd-coredns is used on 'cluster' mode deployments of OpenCHAMI +# to serve host names within the cluster. It is not used in 'host' +# mode because it conflicts with libvirt DNSMasq, so it can't bind +# on port 53. This keeps coresmd-coredns from starting, but we don't +# really care about it. This is what the config for coresmd-coredns +# would look like on a 'cluster' mode system. +.:53 { + # Enable readiness endpoint. + ready + + # Bind Prometheus metrics endpoint. + prometheus 0.0.0.0:9153 + + # Bind to specific IP address. + bind {{ hosting_config.net_head_ip }} + # Generate DNS records based on BMC and node data in SMD. + coresmd { + # Base URI of OpenCHAMI cluster. The SMD base endpoint is appended + # to this when requesting node and BMC data from SMD. + smd_url https://{{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:8443 + + # Path to CA certificate bundle to use when verifying TLS for smd_url. + ca_cert /root_ca/root_ca.crt + + # Frequency to update the SMD data cache. + cache_duration 30s + + # DNS zone configurations based on records generated from SMD data. + zone {{ hosting_config.net_head_domain }} { + # Besides generating DNS records for nodes based on xname, a custom + # record format can be specified based on the node ID. For instance: + # + # nodes de{03d} + # + # will produce: + # + # de001.{{ hosting_config.net_head_domain }} + # + # for node ID 1 and domain {{ hosting_config.net_head_domain }}. + nodes nid-{03d} + } + } + # Specify DNS forwarders. + forward . {{ hosting_config.net_head_dns_server }} +} diff --git a/install-openchami/install_openchami/templates/Corefile.license b/install-openchami/install_openchami/templates/Corefile.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/install_openchami/templates/Corefile.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/install_openchami/templates/OpenCHAMI-Install.sh b/install-openchami/install_openchami/templates/OpenCHAMI-Install.sh new file mode 100644 index 0000000..eed9dd6 --- /dev/null +++ b/install-openchami/install_openchami/templates/OpenCHAMI-Install.sh @@ -0,0 +1,420 @@ +#! /usr/bin/bash +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT + +# Set up the system level pieces needed to start deploying +# OpenCHAMI. This script is intended to be run by a user with +# passwordless 'sudo' permissions. The base node preparation script +# sets up the user 'rocky' with that before chaining here. + +# Set up error handling, the environment and some functions for +# running the "prepare" scripts... +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" > /dev/null && pwd )" +source "${SCRIPT_DIR}/prep_setup.sh" + +# Get some image building functions into our environment +source "/etc/profile.d/build-image.sh" + +# List the image builder configuration files to use for building the +# base OS image, the compute node base image and the compute node +# debug image. +IMAGE_BUILDERS=( + {%- for file in manifest.files.values() %} + {%- if "image-builder" in file.annotations %} + "{{ manifest.deployment_directory }}/{{ file.target }}" + {%- endif %} + {%- endfor %} +) + +ROCKY_DIRS=( + "/data/oci" + "/data/s3" +) + +WORK_DIRS=( + "${DEPLOY_DIR}/boot" + "${DEPLOY_DIR}/cloud-init" +) + +S3_PUBLIC_BUCKETS=( + "efi" + "boot-images" +) + +function cleanup_service() { + local service="${1}"; shift || fail "no service specified" + local dir="${1}"; shift || dir="" + info "cleaning up service '${service}'" + if sudo systemctl status --no-pager --full "${service}"; then + sudo systemctl stop "${service}" + fi + if [ -n "${dir}" -a -d "${dir}" ]; then + info "removing directory '${dir}'" + sudo rm -rf "${dir}" + sudo podman system prune -a -f --volumes + fi +} + +function ssh_to_compute_node() { + local hostname="${1}"; shift || fail "no hostname specified" + local user="${1}"; shift || fail "no deployment username provided" + local check="-o StrictHostKeyChecking=no" + local file="-o UserKnownHostsFile=/dev/null" + local time="-o ConnectTimeout=10" + local where="root@${hostname}" + local retval=0 + info "Attempting to SSH to ${hostname} as ${user}" + for retry in {0..30}; do + if sudo su - "${user}" -c \ + "ssh ${check} ${file} ${time} ${where} true"; then + info "Successful SSH to ${hostname} as ${user}" + return 0 + fi + sleep 10 + done + info "failed to SSH to ${hostname} as ${user}" + return 1 +} + + +# Create the backing directories for S3 and registry that must be made +# by 'root'. If they are already there and directories, remove them +# because they have stale data in them that leaks disk space. First, +# stop the services. +sudo systemctl daemon-reload +cleanup_service minio.service /data/s3 +cleanup_service registry.service /data/oci +for dir in "${ROCKY_DIRS[@]}"; do + info "Making directory: ${dir}" + sudo mkdir -p "${dir}" + sudo chown -R rocky: "${dir}" +done + +# Make the directories that are needed for deployment and can be made +# by rocky. If they are already there and directories, remove +# them because they have stale data in them that leaks disk space. +for dir in "${WORK_DIRS[@]}"; do + info "Making directory: ${dir}" + if [ -d "${dir}" ]; then + rm -rf "${dir}" + fi + mkdir -p "${dir}" +done + +info "turning on IPv4 forwarding" +# Turn on IPv4 forwarding on the management node to allow other nodes +# to reach OpenCHAMI services +sudo sysctl -w net.ipv4.ip_forward=1 + +{%- if deployment_mode == 'host' %} +# In 'host' mode, all of the compute nodes are VMs on the headnode VM. +# Clean them up so we don't have to worry about conflicts later. +{%- for node in nodes %} +if sudo virsh list --all | grep "{{ node.name }}"; then + info "cleaning up previously existing '{{ node.name }}' VM" + sudo virsh destroy "{{ node.name }}" + sudo virsh undefine "{{ node.name }}" --nvram +fi +{%- endfor %} + +info "removing the virtual network for the compute node VM(s) to use" +sudo virsh net-destroy {{ hosting_config.cluster_net_name }} || true +sudo virsh net-undefine {{ hosting_config.cluster_net_name }} || true + +info "configuring a virtual network for the compute node VM(s) to use" +sudo virsh net-define /opt/workdir/openchami-net.xml +sudo virsh net-start {{ hosting_config.cluster_net_name }} +sudo virsh net-autostart {{ hosting_config.cluster_net_name }} +{%- endif %} + +# Set up an /etc/hosts entry for the OpenCHAMI management head node so +# we can use it for certs and for reaching the services before any other +# DNS is set up. +info "Adding head node (${MANAGEMENT_HEADNODE_IP}) to /etc/hosts" +sudo sed -i /etc/hosts -e "/${MANAGEMENT_HEADNODE_FQDN}/d" +echo "${MANAGEMENT_HEADNODE_IP} ${MANAGEMENT_HEADNODE_FQDN}" | \ + sudo tee -a /etc/hosts > /dev/null + +{%- if deployment_mode == 'host' %} +# While we are at it, also add the managed nodes' hostnames and IP +# addresses to /etc/hosts because, since we are in 'host' mode, we are +# not going to be using any other DNS for cluster host naming. +# +# XXX - At the moment we are using the first IP address in the first +# interface. A better scheme should really be found using the +# network name, the cluster network name and the interface name, +# but I think that needs to be done in the python code not in +# the shell code. +{%- for node in nodes %} +info "Adding managed node {{ node.hostname }} to /etc/hosts" +NODE_FQDN="{{ node.hostname }}.{{ hosting_config.net_head_domain }}" +NODE_IP="{{ node.interfaces[0].ip_addrs[0].ip_addr }}" +sudo sed -i /etc/hosts -e "/${NODE_FQDN}/d" +echo "${NODE_IP} ${NODE_FQDN} {{ node.hostname }}" | \ + sudo tee -a /etc/hosts > /dev/null +{%- endfor %} +{%- endif %} + +# Reload systemd to pick up the minio and registry containers and then +# start those services +info "Restarting systemd and starting minio and registry services" +sudo systemctl daemon-reload +sudo systemctl start minio.service +sudo systemctl start registry.service + +# Set up Cluster SSL Certs for the +info "Setting up cluster SSL certs for OpenCHAMI" +sudo openchami-certificate-update update "${MANAGEMENT_HEADNODE_FQDN}" + +info "set management net IF in 'coredhcp.yaml'" +# Set the interface name in coredhcp.yaml +sudo sed -i \ + -e "s/::MGMT_NET_HEAD_IFNAME::/${MGMT_NET_HEAD_IFNAME}/g" \ + /etc/openchami/configs/coredhcp.yaml + +# Shut down and clean up after any pre-existing OpenCHAMI that might +# be running +cleanup_service openchami.target + +# Also remove any SMD or BSS data after +# giving the pods a chance to stop +sleep 5 +info "clearing SMD and BSS data if present" +for retry in {1..10}; do + if ! sudo podman volume ls | grep postgres-data >/dev/null 2>&1; then + # The volume doesn't exist, no reason to try to remove it. + break + fi + sleep 5 + if ! sudo podman volume ls --filter dangling=true |\ + grep postgres-data >/dev/null 2>&1; then + # The volume is still in use, don't try to remove it yet, the + # containers are probably still shutting down. Keep retrying. + continue + fi + if ! sudo podman volume rm postgres-data; then + # Removal failed for some other reason, keep retrying anyway. + continue + fi + break +done +if [ "${retry}" -eq 10 ]; then + fail "timed out waiting to clear the SMD and BSS data" +fi + +# Start OpenCHAMI +info "Starting OpenCHAMI" +sudo systemctl start openchami.target + +info "retrieving OpenCHAMI CLI (ochami) RPM" +OCHAMI_CLI_VERSION="latest" +latest_release_url=$(curl -s https://api.github.com/repos/OpenCHAMI/ochami/releases/${OCHAMI_CLI_VERSION} | jq -r ".assets[] | select(.name | endswith(\"$(derive_architecture).rpm\")) | .browser_download_url") +curl -L "${latest_release_url}" -o ochami.rpm +info "Installing OpenCHAMI CLI (ochami) RPM" +sudo dnf install -y ./ochami.rpm + +# Configure the OpenCHAMI CLI client +info "Configuring OpenCHAMI CLI (ochami) Client" +sudo rm -f /etc/ochami/config.yaml +echo y | sudo ochami config cluster set --system --default "${CLUSTER_NAME}" \ + cluster.uri "https://${MANAGEMENT_HEADNODE_FQDN}:8443" \ + || fail "failed to configure OpenCHAMI CLI" + +# Copy the application data files into their respective places so we are +# ready to build and boot compute nodes. +# +# Set up the 'rocky' user's S3 configuration +cp "${DEPLOY_DIR}/s3cfg" ~/.s3cfg + +# All the rendered files have been installed in their respective +# locations, time to set things up and build some images. +# +# The first thing we need is credentials to interact with +# OpenCHAMI. Since OpenCHAMI just came up, this might not work the +# first time, so retry a few times. +for i in {1..10}; do + get-ochami-token || DEMO_ACCESS_TOKEN="" + if [[ "${DEMO_ACCESS_TOKEN}" != "" ]]; then + break + fi + sleep 10 +done +[[ "${DEMO_ACCESS_TOKEN}" != "" ]] || fail "cannot get openchami access token" + +# Wait for SMD to be up and running. This can sometimes take a little +# while. If it takes more than 100 seconds, something is probably +# wrong. +smd_running=false +for i in {0..9}; do + info "waiting for smd for up to $(( 100 - (${i} * 10) )) more seconds" + if ochami smd component get > /dev/null 2>&1; then + smd_running=true + break + fi + sleep 10 +done +if ! ${smd_running}; then + fail "timeout waiting for SMD to start, openChami is not fully available" +fi + +# Run the static node discovery +info "performing static discovery" +ochami discover static $(discovery_version) -f yaml -d @"${DEPLOY_DIR}/nodes/nodes.yaml" + +# Install and configure 'regctl' +info "setting up 'regctl' to manage the registry" +curl -L https://github.com/regclient/regclient/releases/latest/download/regctl-linux-$(derive_architecture) > regctl \ + && sudo mv regctl /usr/local/bin/regctl \ + && sudo chmod 755 /usr/local/bin/regctl +/usr/local/bin/regctl registry set --tls disabled "${MANAGEMENT_HEADNODE_FQDN}:5000" + +# Install and configure S3 client +info "setting up buckets in S3" +for bucket in "${S3_PUBLIC_BUCKETS[@]}"; do + s3cmd ls | grep s3://"${bucket}" && s3cmd rb -r s3://"${bucket}" + s3cmd mb s3://"${bucket}" + s3cmd setacl s3://"${bucket}" --acl-public + s3cmd setpolicy "${DEPLOY_DIR}/s3-public-read-${bucket}.json" \ + s3://"${bucket}" \ + --host="${MANAGEMENT_HEADNODE_IP}:9000" \ + --host-bucket="${MANAGEMENT_HEADNODE_IP}:9000" +done + +# Build the node images... +for builder in "${IMAGE_BUILDERS[@]}"; do + info "Building image from image builder '${builder}'" + build-image "${builder}" +done +{%- if deployment_mode == 'cluster' %} +# On a 'cluster' configuration, cluster hostnames are served by +# coresmd-coredns, which should be running properly at this +# point. Make sure it is and switch over to using it. +systemctl is-active --quiet coresmd-coredns.service || \ + fail "coresmd-coredns is not active, ivestigate why not and try again" + +# Switch to coresmd-coredns as the nameserver +info "Switching to the cluster internal DNS nameserver" +switch_dns "${MANAGEMENT_HEADNODE_IP}" "${CLUSTER_DOMAIN}" +{%- endif %} + +# Refresh ochami token after the image builds in case it expired +export DEMO_ACCESS_TOKEN="$(sudo bash -lc 'gen_access_token')" + +# Create the boot configuration for the Compute node Debug image +cd "${DEPLOY_DIR}/boot" +for builder in "${IMAGE_BUILDERS[@]}"; do + BOOT_CONFIG_FILE="${DEPLOY_DIR}/boot/$(basename "${builder}")" + info "Building boot configuration '${BOOT_CONFIG_FILE}'" + S3_PREFIX="$( \ + yaml_to_json < "${builder}" | jq -r '.options.s3_prefix' | + sed -e 's:/[[:blank:]]*$::' \ + )" + generate-boot-config \ + "${S3_PREFIX}" \ + "${MANAGEMENT_HEADNODE_IP}" \ + $(managed_macs) | \ + tee "${BOOT_CONFIG_FILE}" +done + +info "Install boot configuration" +# At the moment, there is only one "active" boot image that can be set +# up. It is the iamge used by the 'compute' group in the 'images' +# section of the config. Set up a variable for easy access to the +# build script and boot script for that image. +ACTIVE_BOOT_IMAGE="{{ images.builders[images.deployment_targets['compute']].metadata.boot_param_filename }}" + +ochami bss boot params set -f yaml \ + -d @"${DEPLOY_DIR}/boot/${ACTIVE_BOOT_IMAGE}" + +# Set up cloud-init for some basics... +# +# First the global cloud-init metadata +# XXX - Need some templating here... +rm -f ~/.ssh/id_rsa* +ssh-keygen -t rsa -q -f ~/.ssh/id_rsa -N "" +mkdir -p "${DEPLOY_DIR}"/cloud-init +cat <&2 + return 1 +} + +function build-image() { + set -e + local config="${1}"; shift || _bi_fail "image config file not specified" + # Build with the specified builder. Default to using the RH9 builder + local builder="${1:-"ghcr.io/openchami/image-build-el9:latest"}" + [[ -f "${config}" ]] || _bi_fail "${config} not found" + podman run \ + --network=host \ + --rm \ + --device /dev/fuse \ + -e S3_ACCESS=admin \ + -e S3_SECRET=admin123 \ + -v "$(realpath "${config}")":/home/builder/config.yaml:Z \ + ${EXTRA_PODMAN_ARGS} \ + "${builder}" \ + image-build \ + --config config.yaml \ + --log-level DEBUG || _bi_fail "cannot build image defined in ${config}" +} + +function build-image-rh9() { + local config="${1}"; shift || _bi_fail "image config file not specified" + build-image "${config}" +} + +function build-image-rh8() { + local config="${1}"; shift || _bi_fail "image config file not specified" + local builder="ghcr.io/openchami/image-build:v0.1.0" + build-image "${config}" "${builder}" +} + +function generate-boot-config() { + local image_subpath="${1}"; shift || _bi_fail "image subpath (example 'compute/debug') not provided as first argument" + local headnode_ip="${1}"; shift || _bi_fail "management head-node IP address not provided as second argument" + local macs="$(for mac in "$@"; do echo "${mac}"; done)" + [[ "${macs}" != "" ]] || _bi_fail "no target node MAC addresses provided" + cd /opt/workdir/boot + local uris="$(s3cmd ls -Hr s3://boot-images | grep "${image_subpath}" | \ + awk '{print $4}' | \ + sed "s-s3://-http://${headnode_ip}:9000/-" | \ + xargs)" + local uri_img="$(echo "${uris}" | cut -d' ' -f1)" + [[ "${uri_img}" != "" ]] || _bi_fail "no disk image found that matches '${image_subpath}'" + local uri_initramfs="$(echo "${uris}" | cut -d' ' -f2)" + [[ "${uri_initramfs}" != "" ]] || _bi_fail "no initrd image found that matches '${image_subpath}'" + local uri_kernel="$(echo "${uris}" | cut -d' ' -f3)" + [[ "${uri_kernel}" != "" ]] || _bi_fail "no kernel image found that matches '${image_subpath}'" + cat < + {{ hosting_config.cluster_net_name }} + + + + + diff --git a/install-openchami/install_openchami/templates/openchami-net.xml.license b/install-openchami/install_openchami/templates/openchami-net.xml.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/install_openchami/templates/openchami-net.xml.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/install_openchami/templates/prep_setup.sh b/install-openchami/install_openchami/templates/prep_setup.sh new file mode 100644 index 0000000..af4f297 --- /dev/null +++ b/install-openchami/install_openchami/templates/prep_setup.sh @@ -0,0 +1,158 @@ +#! /usr/bin/bash +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT + +# Set up the system level pieces needed to start deploying +# OpenCHAMI. This script is intended to be run by a user with +# passwordless 'sudo' permissions. The base node preparation script +# sets up the user 'rocky' with that before chaining here. + +# Common setup for the prepare node scripts +set -o errexit -o errtrace +function error_handler() { + local filename="${1}"; shift + local lineno="${1}"; shift + local exitval="${1}"; shift + echo "exiting on error [${exitval}] from ${filename}:${lineno}" >&2 + exit ${exitval} +} +trap 'error_handler "${BASH_SOURCE[0]}" "${LINENO}" "${?}"' ERR + +function fail() { + local message="${*:-"failing for no specified reason"}" + echo "${BASH_SOURCE[1]}:${BASH_LINENO[0]}:[${FUNCNAME[1]}]: ${message}" >&2 + return 1 +} + +function info() { + local message="${*:-"failing for no specified reason"}" + echo "INFO: ${message}" >&2 +} + +function discovery_version() { + # The version of SMD changed how ochami needs to feed it manually + # discovered node data at 2.19. We need an extra option to address + # that if the version is 2.18 or lower. + local major="" + local minor="" + local patch="" + IFS='.' read major minor patch < \ + <( \ + sudo podman ps | \ + grep '/smd:v' | \ + awk '{sub(/^.*:v/, "", $2); print $2 }'\ + ) + if [ "${major}" -le "2" -a "${minor}" -lt "19" ]; then + echo "--discovery-version=1" + fi +} + +function node_groups() { + # Templated mechamism for getting a list of unique node 'group' + # names from the list of managed nodes. + sort -u < /dev/null && pwd )" +source "${SCRIPT_DIR}/prep_setup.sh" + +info "preparing platform - install required packages" +PRE_INSTALL_PACKAGES="\ + epel-release \ +{%- for package in hosting_config.extra_packages.pre %} + {{ package }} \ +{%- endfor %} +" +PACKAGES="\ +{%- if deployment_mode == 'host' %} + libvirt \ + qemu-kvm \ + virt-install \ + virt-manager \ +{%- endif %} + dnsmasq \ + podman \ + buildah \ + git \ + ansible-core \ + openssl \ + nfs-utils \ + s3cmd \ +{%- for package in hosting_config.extra_packages.main %} + {{ package }} \ +{%- endfor %} +" +dnf -y check-update || true +# packages needed before main package list install +dnf install -y ${PRE_INSTALL_PACKAGES} +# packages needed to install and use OpenCHAMI +dnf -y install ${PACKAGES} # list of packages, should not be quoted + +# Don't enable libvirt if we are not running in host mode +{%- if deployment_mode == 'host' %} +systemctl enable --now libvirtd +{%- endif %} + +info "preparing platform - create the deployment user '${DEPLOY_USER}'" +if ! getent group "${DEPLOY_GROUP}"; then + info "creating primary group '{{ group }}' for '${DEPLOY_USER}'" + groupadd "${DEPLOY_GROUP}" +fi +{%- for group in manifest.deployment_user.supplementary_groups %} +if ! getent group "{{ group }}"; then + info "creating supplementary group '{{ group }}' for '${DEPLOY_USER}'" + groupadd "{{ group }}" +fi +{%- endfor %} +if ! getent passwd "${DEPLOY_USER}"; then + info "creating user '${DEPLOY_USER}'" + useradd -g "${DEPLOY_GROUP}" "${DEPLOY_USER}" +fi +{%- for group in manifest.deployment_user.supplementary_groups %} +if ! getent group "{{ group }}"; then + info "adding supplementary group '{{ group }}' to '${DEPLOY_USER}'" + usermod -aG "{{ group }}" "${DEPLOY_USER}" +fi +{%- endfor %} +# Remove the deployment user from /etc/sudoers and then put it back +# with NOPASSWD access +info "giving user '${DEPLOY_USER}' passwordless sudo access" +sed -i -e "/[[:space:]]*${DEPLOY_USER}/d" /etc/sudoers +echo "${DEPLOY_USER} ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers diff --git a/install-openchami/install_openchami/templates/registry.container b/install-openchami/install_openchami/templates/registry.container new file mode 100644 index 0000000..68f7ca3 --- /dev/null +++ b/install-openchami/install_openchami/templates/registry.container @@ -0,0 +1,18 @@ +[Unit] +Description=Image OCI Registry +After=network-online.target +Requires=network-online.target + +[Container] +ContainerName=registry +HostName=registry +Image=docker.io/library/registry:latest +Volume=/data/oci:/var/lib/registry:Z +PublishPort=5000:5000 + +[Service] +TimeoutStartSec=0 +Restart=always + +[Install] +WantedBy=multi-user.target diff --git a/install-openchami/install_openchami/templates/registry.container.license b/install-openchami/install_openchami/templates/registry.container.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/install_openchami/templates/registry.container.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/install_openchami/templates/s3-public-read-boot-images.json b/install-openchami/install_openchami/templates/s3-public-read-boot-images.json new file mode 100644 index 0000000..638ab44 --- /dev/null +++ b/install-openchami/install_openchami/templates/s3-public-read-boot-images.json @@ -0,0 +1,11 @@ +{ + "Version":"2012-10-17", + "Statement":[ + { + "Effect":"Allow", + "Principal":"*", + "Action":["s3:GetObject"], + "Resource":["arn:aws:s3:::boot-images/*"] + } + ] +} diff --git a/install-openchami/install_openchami/templates/s3-public-read-boot-images.json.license b/install-openchami/install_openchami/templates/s3-public-read-boot-images.json.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/install_openchami/templates/s3-public-read-boot-images.json.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/install_openchami/templates/s3-public-read-efi.json b/install-openchami/install_openchami/templates/s3-public-read-efi.json new file mode 100644 index 0000000..1d4e147 --- /dev/null +++ b/install-openchami/install_openchami/templates/s3-public-read-efi.json @@ -0,0 +1,11 @@ +{ + "Version":"2012-10-17", + "Statement":[ + { + "Effect":"Allow", + "Principal":"*", + "Action":["s3:GetObject"], + "Resource":["arn:aws:s3:::efi/*"] + } + ] +} diff --git a/install-openchami/install_openchami/templates/s3-public-read-efi.json.license b/install-openchami/install_openchami/templates/s3-public-read-efi.json.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/install_openchami/templates/s3-public-read-efi.json.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/install_openchami/templates/s3cfg b/install-openchami/install_openchami/templates/s3cfg new file mode 100644 index 0000000..0540425 --- /dev/null +++ b/install-openchami/install_openchami/templates/s3cfg @@ -0,0 +1,13 @@ +# Setup endpoint +# +host_base = {{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:9000 +host_bucket = {{ hosting_config.net_head_hostname }}.{{ hosting_config.net_head_domain }}:9000 +bucket_location = us-east-1 +use_https = False + +# Setup access keys +access_key = admin +secret_key = admin123 + +# Enable S3 v4 signature APIs +signature_v2 = False diff --git a/install-openchami/install_openchami/templates/s3cfg.license b/install-openchami/install_openchami/templates/s3cfg.license new file mode 100644 index 0000000..3377279 --- /dev/null +++ b/install-openchami/install_openchami/templates/s3cfg.license @@ -0,0 +1,2 @@ +SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +SPDX-License-Identifier: MIT diff --git a/install-openchami/noxfile.py b/install-openchami/noxfile.py new file mode 100644 index 0000000..09a1e61 --- /dev/null +++ b/install-openchami/noxfile.py @@ -0,0 +1,87 @@ +# SPDX-FileCopyrightText: (C) Copyright 2024-2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT +""" Nox definitations for tests, docs, and linting +""" +from __future__ import absolute_import +import os + +import nox # pylint: disable=import-error + + +COVERAGE_FAIL = 95 + +PYTHON = ['3'] + +@nox.session(python=PYTHON) +def lint(session): + """Run linters. + Returns a failure if the linters find linting errors or sufficiently + serious code quality issues. + """ + run_cmd = [ + 'pylint', + 'install_openchami', + ] + if session.python: + session.install('.[lint]') + session.run(*run_cmd) + + +@nox.session(python=PYTHON) +def style(session): + """Run code style checker. + Returns a failure if the style checker fails. + """ + run_cmd = [ + 'pycodestyle', + '--config=.pycodestyle', + 'install_openchami', + ] + + if session.python: + session.install('.[style]') + session.run(*run_cmd) + + +@nox.session(python=PYTHON) +def tests(session): + """Default unit test session. + """ + # Install all test dependencies, then install this package in-place. + path = 'tests' + if session.python: + session.install('.[test]') + + # XXX - disable tests until we have some... + session.run('/usr/bin/true', external=True) +# # Run py.test against the tests. +# session.run( +# 'py.test', +# '--quiet', +# '-W', +# 'ignore::DeprecationWarning', +# '--cov=install_openchami', +# '--cov=tests', +# '--cov-append', +# '--cov-config=.coveragerc', +# '--cov-report=', +# '--cov-fail-under={}'.format(COVERAGE_FAIL), +# os.path.join(path), +# env={} +# ) + + + +@nox.session(python=PYTHON) +def cover(session): + """Run the final coverage report. + This outputs the coverage report aggregating coverage from the unit + test runs, and then erases coverage data. + """ + if session.python: + session.install('.[cover]') + # Disable coverage tests until we have some... + session.run('/usr/bin/true', external=True) +# session.run('coverage', 'report', '--show-missing', +# '--fail-under={}'.format(COVERAGE_FAIL)) +# session.run('coverage', 'erase') diff --git a/install-openchami/pyproject.toml b/install-openchami/pyproject.toml new file mode 100644 index 0000000..b789ee7 --- /dev/null +++ b/install-openchami/pyproject.toml @@ -0,0 +1,106 @@ +# SPDX-FileCopyrightText: (C) Copyright 2026 OpenCHAMI a Series of LF Projects, LLC +# SPDX-License-Identifier: MIT +[project] +name = 'install_openchami' +authors = [ + { name = 'Eric Lund', email = 'eric.lund@hpe.com' }, +] +classifiers = [ + 'License :: OSI Approved :: MIT License', + 'Natural Language :: English', + 'Operating System :: POSIX', + 'Programming Language :: Python :: 3 :: Only', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', +] +description = '2025 Tutorial Based Installer for OpenCHAMI Release content' +dependencies = [ + # yaml - this comment reduces conflicts in dependabot PRs + 'pyyaml~=6.0', + # passlib for composing and hashing user passwords + 'passlib~=1.7', + # setuptools < 80 is required to provide pkg_resources + # for passlib and to avoid the pkg_resources deprecation + # warning + 'setuptools<80', + # vtds-base for a few template and config operations + 'vtds-base~=0.0' +] + +dynamic = ['version'] +maintainers = [ + { name = 'Eric Lund', email = 'eric.lund@hpe.com' }, +] +readme = { file = 'README.md', content-type = 'text/markdown' } +requires-python = '>=3.9' + +[metadata] +url = 'https://github.com/erl-hpe/install-openchami' + +license = "MIT" +# Note: the LICENSE file here is populated by the RPM installer that +# typically runs the installation by copying the LICENSE file +# from the root of this project's tree into the installation +# tree for this python package. If you are trying to install +# this python package manually, copy '../LICENSE' into this +# directory before running the install. +license-files = ["LICENSE"] + +[project.optional-dependencies] +build = [ + 'build>=1.2.2,<1.5.0' +] +ci = [ + 'nox>=2024.10.9,<2026.3.0', +] +docs = [ +] +style = [ + 'pycodestyle~=2.11', +] +lint = [ + 'pylint~=3.2', +] +test = [ + # pytest - this comment reduces conflicts in dependabot PRs + 'pytest>=8.3.3,<8.5.0', + # pytest coverage - this comment reduces conflicts in dependabot PRs + 'pytest-cov>=4.1,<8.0', + # coverage - this comment reduces conflicts in dependabot PRs + 'coverage~=7.4', +] + +[project.scripts] +install_openchami = "install_openchami.install_openchami:entry" + +# Defines which folders belong to our package. +[tool.setuptools.packages.find] +include = ['install_openchami*'] +exclude = [] + +[tool.setuptools.package-data] +install_openchami = [ + "templates/*", + "config/*", +] + +[tool.setuptools.dynamic] + +# Allows for dynamic versioning via git-tag. +[tool.setuptools_scm] + +[build-system] +build-backend = 'setuptools.build_meta' +requires = [ + # setuptools - this comment reduces conflicts in dependabot PRs + 'setuptools >= 66,< 83', + # setuptools scm - this comment reduces conflicts in dependabot PRs + 'setuptools_scm[toml] >= 7.1,< 9.3', + # wheel - this comment reduces conflicts in dependabot PRs + 'wheel >= 0.45.1,< 0.47.0', +] + +[tool.distutils.bdist_wheel] +universal = false # This app. is not compatible with Python2. diff --git a/openchami.spec b/openchami.spec index 0f2fb87..00c9d01 100644 --- a/openchami.spec +++ b/openchami.spec @@ -12,6 +12,7 @@ BuildArch: noarch Requires: podman Requires: jq Requires: curl +Requires: python >= 3.9 Requires(post): coreutils Requires(post): openssl Requires(post): hostname @@ -24,8 +25,6 @@ The quadlets, systemd units, and config files for the Open Composable, Heterogen %setup -q %build -# nothing to build - %install # 1) Install config, unit, and script files mkdir -p %{buildroot}/etc/openchami/configs \ @@ -57,6 +56,21 @@ chmod +x %{buildroot}/usr/libexec/openchami/ohpc-nodes.sh chmod 600 %{buildroot}/etc/openchami/configs/openchami.env chmod 644 %{buildroot}/etc/openchami/configs/* +# 2) Put the 'install-openchami' source on the system so we can +# install it during 'post'. Also create a wrapper script to run +# 'install_openchami' from its shared virtual environment once +# this RPM finishes installing. +mkdir -p \ + "%{buildroot}/opt/install-openchami-%{version}/src" \ + "%{buildroot}/usr/bin" +cp -a install-openchami/* "%{buildroot}/opt/install-openchami-%{version}/src" +cp LICENSE "%{buildroot}/opt/install-openchami-%{version}/src/LICENSE" +cat < %{buildroot}/usr/bin/install_openchami +#! /bin/bash +exec /opt/install-openchami-%{version}/venv/bin/python3 -m install_openchami "\$@" +EOF +chmod +x %{buildroot}/usr/bin/install_openchami + %files %license LICENSE %config(noreplace) /etc/openchami/configs/* @@ -70,6 +84,8 @@ chmod 644 %{buildroot}/etc/openchami/configs/* /etc/profile.d/openchami.sh /etc/openchami/pg-init/multi-psql-db.sh /usr/bin/openchami-certificate-update +/usr/bin/install_openchami +/opt/install-openchami-%{version}/ %pre if [ -f /etc/containers/systemd/coresmd.container ]; then @@ -78,6 +94,18 @@ if [ -f /etc/containers/systemd/coresmd.container ]; then fi %post +# Create a shared python virtual environmnet in which to install +# 'install-openchami' and then use the pip from that virtual +# environment to install it. By doing it here instead of in the +# 'build' or 'install' stage we keep this RPM from becoming +# architecture dependent (due to inclusion of the python binary) and +# keep the size of the RPM down. +export SETUPTOOLS_SCM_PRETEND_VERSION_FOR_INSTALL_OPENCHAMI="%{version}" +export OPT_DIR="/opt/install-openchami-%{version}" +python3 -m venv "$OPT_DIR/venv" +"$OPT_DIR/venv/bin/pip" install --upgrade pip +"$OPT_DIR/venv/bin/pip" install "$OPT_DIR/src" + # reload systemd so new units are seen systemctl daemon-reload # bootstrap @@ -87,3 +115,6 @@ systemctl stop firewalld %postun # reload systemd on uninstall systemctl daemon-reload + +# Remove all the install-openchami stuff installed during 'post' +rm -rf /opt/install-openchami-%{version} From 24016f2fb889d1e564741a9851909af2e90232ed Mon Sep 17 00:00:00 2001 From: Eric Lund <77127214+erl-hpe@users.noreply.github.com> Date: Fri, 8 May 2026 08:02:27 -0500 Subject: [PATCH 2/2] Changes required to handle cluster mode deployments Signed-off-by: Eric Lund <77127214+erl-hpe@users.noreply.github.com> --- install-openchami/README.md | 102 +++++++++++++++++- .../templates/OpenCHAMI-Install.sh | 45 ++++++-- .../install_openchami/templates/coredhcp.yaml | 6 +- .../install_openchami/templates/prep_setup.sh | 21 +--- 4 files changed, 144 insertions(+), 30 deletions(-) diff --git a/install-openchami/README.md b/install-openchami/README.md index 5df39fc..bf1475e 100644 --- a/install-openchami/README.md +++ b/install-openchami/README.md @@ -20,9 +20,6 @@ managed nodes can be powered on, powered off, and reset using RedFish calls to RedFish instances running on Base Board Management Controllers (BMCs) accesible across a network from the headnode. - __NOTE: The 'cluster' mode is still under development and not quite - ready for use.__ - ## System Requirements At present, the OpenCHAMI Installer uses the 'dnf' package manager, @@ -120,6 +117,8 @@ to the OpenCHAMI tutorial with virtual managed (compute) nodes. This base configuration can be modified at run time by providing the paths to one or more YAML format configuration overlays on the command line. +### Example Configuration Overlay for ARM-64 Hosts + For example, to change the repository URLs in the image builders so you can deploy this on an `arm64` (`aarch64`) host, all that is required is a configuration overlay that overrides those URLs. Something like this: @@ -159,6 +158,102 @@ images: gpg: 'https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9' ``` +### Example Overlay for Cluster Mode OpenCHAMI Deployment + +The following is an example configuration overlay for deploying +OpenCHAMI on a cluster with a head node and four compute nodes all +residing on a cluster network 10.2.1.0/24 with an external DNS server +at 10.234.0.1 and a single BMC managing the four nodes and residing on +a hardware management network 10.3.1.0/24 at 10.3.1.1. The MAC +addresses have been collected from the BMCs and compute nodes. + +__NOTE: set the RedFish password in here to match the setting on the BMC__ + +``` +# Set the deployment mode to 'cluster' instead of 'host' so that no +# VM will be created on the head node and the cluster will be set up +# to use coresmd-coredns for DNS. +deployment_mode: cluster +bmcs: + # An X-Name is used here because it shows the relationship between + x2000c0s0b0: + blade_class: host-blade + blade_instance: 0 + network: + redfish_username: root + # NOTE: the redfish password is not shown here, but it should be + # configured to match whatever is configured for RedFish + # on the BMC. + redfish_password: null + mac: 52:54:00:b2:2e:f4 # Get this from the BMC + # The BMC is on a Hardware Management Network (1.3.1.1/24) + ipv4: 10.3.1.1 +hosting_config: + cluster_name: demo + cluster_net_dhcp_start: 10.2.1.32 + cluster_net_dhcp_end: 10.2.1.254 + cluster_net_mask: 255.255.255.0 + net_head_dns_server: 10.234.0.1 + net_head_domain: openchami.cluster + net_head_hostname: demo + net_head_ip: 10.2.1.2 +nodes: +- name: x2000c0s0b0n0 + bmc_name: x2000c0s0b0 + cluster_net_interface: compnet + hostname: compute-001 + nid: 1 + node_group: compute + interfaces: + - network_name: compnet + # Get this from the node's NIC on the cluster network + mac_addr: 52:54:00:3d:71:7b + ip_addrs: + - name: compnet + ip_addr: 10.2.1.32 +- name: x2000c0s0b0n1 + bmc_name: x2000c0s0b0 + cluster_net_interface: compnet + hostname: compute-002 + nid: 2 + node_group: compute + interfaces: + - network_name: compnet + # Get this from the node's NIC on the cluster network + mac_addr: 52:54:00:6a:d2:33 + ip_addrs: + - name: compnet + ip_addr: 10.2.1.33 +- name: x2000c0s0b0n2 + bmc_name: x2000c0s0b0 + cluster_net_interface: compnet + hostname: compute-003 + nid: 3 + node_group: compute + interfaces: + - network_name: compnet + # Get this from the node's NIC on the cluster network + mac_addr: 52:54:00:3b:aa:78 + ip_addrs: + - name: compnet + ip_addr: 10.2.1.34 +- name: x2000c0s0b0n3 + bmc_name: x2000c0s0b0 + cluster_net_interface: compnet + hostname: compute-004 + nid: 4 + node_group: compute + interfaces: + - network_name: compnet + # Get this from the node's NIC on the cluster network + mac_addr: 52:54:00:06:48:e8 + ip_addrs: + - name: compnet + ip_addr: 10.2.1.35 +``` + +### Working with the Installer Configuration + When creating a configuration overlay, it helps to know what the configuration to be overlaid looks like, and what the configuration looks like after applying the overlay. There are two options to the @@ -231,7 +326,6 @@ The OpenCHAMI Installer currently has the following limitations. Except where otherwise noted, solutions to these are being investigated and implemented: -- the 'cluster' mode is experimental - there is no 'remove' operation in the OpenCHAMI Installer - while the OpenCHAMI Installer tries to be re-usable, it is not perfectly idempotent, so situations may arise where re-running the diff --git a/install-openchami/install_openchami/templates/OpenCHAMI-Install.sh b/install-openchami/install_openchami/templates/OpenCHAMI-Install.sh index eed9dd6..a154281 100644 --- a/install-openchami/install_openchami/templates/OpenCHAMI-Install.sh +++ b/install-openchami/install_openchami/templates/OpenCHAMI-Install.sh @@ -58,24 +58,53 @@ function cleanup_service() { function ssh_to_compute_node() { local hostname="${1}"; shift || fail "no hostname specified" local user="${1}"; shift || fail "no deployment username provided" + local cmd="${1}"; shift || cmd="true" + local retries="${1}"; shift || retries=30 local check="-o StrictHostKeyChecking=no" local file="-o UserKnownHostsFile=/dev/null" local time="-o ConnectTimeout=10" local where="root@${hostname}" local retval=0 info "Attempting to SSH to ${hostname} as ${user}" - for retry in {0..30}; do + for ((retry=0; retry /dev/null {%- endfor %} {%- endif %} @@ -379,7 +410,9 @@ ochami cloud-init node set \ {%- if deployment_mode == 'cluster' %} # In 'cluster' mode the nodes are all "physical" hosts already plugged # into the cluster network. We just need to power them on and they -# should boot from OpenCHAMI +# should boot from OpenCHAMI. In case they were running before this, +# power them off first. +power-off-node "{{ node.name }}" "{{ node.bmc_name }}" || true power-on-node "{{ node.name }}" "{{ node.bmc_name }}" {%- else %} # In 'host' mode, all of the compute nodes are VMs on the headnode VM, @@ -416,5 +449,5 @@ sudo virt-install \ # Wait for compute node(s) to come up and try to SSH to compute # node(s) as a sanity check of the installation. {%- for node in nodes %} -ssh_to_compute_node {{ node.hostname }} "${DEPLOY_USER}" +ssh_to_compute_node "$(printf "nid-%3.3d" {{ node.nid }})" "${DEPLOY_USER}" {%- endfor %} diff --git a/install-openchami/install_openchami/templates/coredhcp.yaml b/install-openchami/install_openchami/templates/coredhcp.yaml index d7cef5f..9af773f 100644 --- a/install-openchami/install_openchami/templates/coredhcp.yaml +++ b/install-openchami/install_openchami/templates/coredhcp.yaml @@ -3,15 +3,15 @@ server4: # listen on by uncommenting the lines below and setting the interface # # The 'listen' setting is only known on the management node at -# deployment time, so it is filled in by 'prepare_node.sh' not +# deployment time, so it is filled in by 'OpenCHAMI-Install.sh' not # templated like the rest of the content here. listen: - "%::MGMT_NET_HEAD_IFNAME::" plugins: # You are able to set the IP address of the system in server_id as the # place to look for a DHCP server DNS is able to be set to whatever you -# want but it is much easier if you keep it set to the server IP Router -# is also able to be set to whatever you network router address is +# want but it is much easier if you keep it set so the server IP Router +# is also able to be set to whatever your network router address is - searchdomains: {{ hosting_config.net_head_domain }} - server_id: {{ hosting_config.net_head_ip }} - dns: {{ hosting_config.net_head_ip }} diff --git a/install-openchami/install_openchami/templates/prep_setup.sh b/install-openchami/install_openchami/templates/prep_setup.sh index af4f297..0686251 100644 --- a/install-openchami/install_openchami/templates/prep_setup.sh +++ b/install-openchami/install_openchami/templates/prep_setup.sh @@ -91,11 +91,6 @@ function switch_dns() { # assigned to them... local nameserver="${1}"; shift || fail "no nameserver specified to switch to" local domain="${1}"; shift || fail "no search domain specified" - # Optional network argument tells the logic to place the - # nameserver on the connection containing a local network address - # but not the nameserver address. If it is not provided the - # nameserver address is used. - local network="${1:-"${nameserver}"}" local connection="" local connections="$( for connection in $(nmcli --terse --fields NAME connection show); do @@ -111,18 +106,10 @@ function switch_dns() { sudo nmcli connection up "${connection}" done - # Okay, now, find the connection that has an IP address that - # matches the internal IP address of the head-node on the - # management network (in other words, the connection that is the - # management network) - connection="$( - for connection in $(nmcli --terse --fields NAME connection show); do - echo -n "${connection} " - nmcli connection show "${connection}" | grep 'ipv4.addresses:' - done | grep -F "${network}" | cut -d ' ' -f 1 - )" - [[ "${connection}" =~ ^[^\ ]*$ ]] || fail "more than one interface [${connection}] has the requested local network address '${network}'" - [[ "${connection}" != "" ]] || fail "no iinterface found with a suitable network to configure the DNS server '${nameserver}'" + # Now find the first interface (nmcli connection) that routes to + # the desired name server IP address. + connection="$(ip --json route get "${nameserver}" | jq -r '.[0] | .dev')" + [[ "${connection}" != "" ]] || fail "no interface found that can reach the DNS server '${nameserver}'" # Set the nameserver on the connection and put the cluster domain # in the search on the same connection