diff --git a/docs/user/agent/add-node/add-nodes.md b/docs/user/agent/add-node/add-nodes.md new file mode 100644 index 00000000000..bfa990db756 --- /dev/null +++ b/docs/user/agent/add-node/add-nodes.md @@ -0,0 +1,103 @@ +# Adding a node via the node-joiner tool + +## Pre-requisites +1. The `oc` tool must be available in the execution environment (the "user host"). +2. The user host has a valid network connection to the target OpenShift cluster to be expanded. + +## Setup +1. Download the [node-joiner.sh](./node-joiner.sh) script in a working directory in + the user host (the "assets folder"). +2. Create a `nodes-config.yaml` in the assets folder. This configuration file must contain the + list of all the nodes that the user wants to add to the target cluster. At minimum, the name and primary interface MAC address must be specified. For example: +``` +hosts: + - hostname: extra-worker-0 + interfaces: + - name: eth0 + macAddress: 00:02:46:e3:9e:7c + - hostname: extra-worker-1 + interfaces: + - name: eth0 + macAddress: 00:02:46:e3:9e:8c + - hostname: extra-worker-2 + interfaces: + - name: eth0 + macAddress: 00:02:46:e3:9e:9c +``` +3. Optionally, it's possible to specify - for each node - an `NMState` configuration block denoted below as `networkConfig` + (it will be applied during the first boot), for example: +``` +hosts: + - hostname: extra-worker-0 + interfaces: + - name: eth0 + macAddress: 00:02:46:e3:9e:7c + networkConfig: + interfaces: + - name: eth0 + type: ethernet + state: up + mac-address: 00:02:46:e3:9e:7c + ipv4: + enabled: true + address: + - ip: 192.168.111.90 + prefix-length: 24 + dhcp: false + dns-resolver: + config: + server: + - 192.168.111.1 + routes: + config: + - destination: 0.0.0.0/0 + next-hop-address: 192.168.111.1 + next-hop-interface: eth0 + table-id: 254 + - hostname: extra-worker-1 + interfaces: + - name: eth0 + macAddress: 00:02:46:e3:9e:8c + - hostname: extra-worker-2 + interfaces: + - name: eth0 + macAddress: 00:02:46:e3:9e:9c + +## ISO generation +Run the [node-joiner.sh](./node-joiner.sh): +```bash +$ ./node-joiner.sh +``` +The script will generate a temporary namespace prefixed with `openshift-node-joiner` in the target cluster, +where a pod will be launched to execute the effective node-joiner workload. +In case of success, the `node.x86_64.iso` ISO image will be downloaded in the assets folder. + +### Configuration file name +By default the script looks for a configuration file named `nodes-config.yaml`. It's possible to specify a +different config file name, as the first parameter of the script: + +```bash +$ ./node-joiner.sh config.yaml +``` + +## Nodes joining +Use the iso image to boot all the nodes listed in the configuration file, and wait for the related +certificate signing requests (CSRs) to appear. When adding a new node to the cluster, two pending CSRs will +be generated, and they must be manually approved by the user. +Use the following command to monitor the pending certificates: +``` +$ oc get csr +``` +User the `oc` `approve` command to approve them: +``` +$ oc adm certificate approve +``` +Once all the pendings certificates will be approved, then the new node will become available: +``` +$ oc get nodes +NAME STATUS ROLES AGE VERSION +extra-worker-0 Ready worker 1h v1.29.3+8628c3c +master-0 Ready control-plane,master 31h v1.29.3+8628c3c +master-1 Ready control-plane,master 32h v1.29.3+8628c3c +master-2 Ready control-plane,master 32h v1.29.3+8628c3c +``` \ No newline at end of file diff --git a/docs/user/agent/add-node/node-joiner.sh b/docs/user/agent/add-node/node-joiner.sh new file mode 100755 index 00000000000..d0c3b39a3eb --- /dev/null +++ b/docs/user/agent/add-node/node-joiner.sh @@ -0,0 +1,144 @@ +#!/bin/bash + +set -eu + +# Config file +nodesConfigFile=${1:-"nodes-config.yaml"} +if [ ! -f "$nodesConfigFile" ]; then + echo "Cannot find the config file $nodesConfigFile" + exit 1 +fi + +# Setup a cleanup function to ensure to remove the temporary +# file when the script will be completed. +cleanup() { + if [ -f "$pullSecretFile" ]; then + echo "Removing temporary file $pullSecretFile" + rm "$pullSecretFile" + fi +} +trap cleanup EXIT TERM + +# Retrieve the pullsecret and store it in a temporary file. +pullSecretFile=$(mktemp -p "/tmp" -t "nodejoiner-XXXXXXXXXX") +oc get secret -n openshift-config pull-secret -o jsonpath='{.data.\.dockerconfigjson}' | base64 -d > "$pullSecretFile" + +# Extract the baremetal-installer image pullspec from the current cluster. +nodeJoinerPullspec=$(oc adm release info --image-for=baremetal-installer --registry-config="$pullSecretFile") + +# Use the same random temp file suffix for the namespace. +namespace=$(echo "openshift-node-joiner-${pullSecretFile#/tmp/nodejoiner-}" | tr '[:upper:]' '[:lower:]') + +# Create the namespace to run the node-joiner, along with the required roles and bindings. +staticResources=$(cat </dev/null 2>&1; then + break + else + echo "Waiting for node-joiner pod to complete..." + sleep 10s + fi +done + +res=$(oc exec node-joiner -n "${namespace}" -- cat /assets/exit_code) +if [ "$res" = 0 ]; then + echo "node-joiner successfully completed, extracting ISO image..." + oc cp -n "${namespace}" node-joiner:/assets/node.x86_64.iso node.x86_64.iso +else + oc logs node-joiner -n "${namespace}" + echo "node-joiner failed" +fi + +echo "Cleaning up" +oc delete namespace "${namespace}" --grace-period=0 >/dev/null 2>&1 & \ No newline at end of file diff --git a/images/baremetal/Dockerfile.ci b/images/baremetal/Dockerfile.ci index e24228fdbf8..6341c7070b5 100644 --- a/images/baremetal/Dockerfile.ci +++ b/images/baremetal/Dockerfile.ci @@ -6,7 +6,9 @@ ARG TAGS="baremetal fipscapable" WORKDIR /go/src/github.com/openshift/installer COPY . . RUN DEFAULT_ARCH="$(go env GOHOSTARCH)" hack/build.sh +RUN DEFAULT_ARCH="$(go env GOHOSTARCH)" hack/build-node-joiner.sh +FROM registry.ci.openshift.org/ocp/4.16:cli-artifacts AS tools FROM registry.ci.openshift.org/ocp/4.16:base COPY --from=builder /go/src/github.com/openshift/installer/bin/openshift-install /bin/openshift-install @@ -16,6 +18,11 @@ RUN dnf upgrade -y && \ openssl unzip jq openssh-clients && \ dnf clean all && rm -rf /var/cache/yum/* +# node-joiner requirements +COPY --from=builder /go/src/github.com/openshift/installer/bin/node-joiner /bin/node-joiner +COPY --from=tools /usr/bin/oc /bin/oc +RUN dnf install -y nmstate + RUN mkdir /output && chown 1000:1000 /output USER 1000:1000 ENV PATH /bin diff --git a/images/installer/Dockerfile.ci b/images/installer/Dockerfile.ci index bc061739f11..f9ddc41e307 100644 --- a/images/installer/Dockerfile.ci +++ b/images/installer/Dockerfile.ci @@ -13,10 +13,15 @@ COPY --from=providers /go/src/github.com/openshift/installer/terraform/bin/ terr RUN DEFAULT_ARCH="$(go env GOHOSTARCH)" hack/build.sh RUN go run -mod=vendor hack/build-coreos-manifest.go +FROM registry.ci.openshift.org/ocp/4.16:cli-artifacts AS tools FROM registry.ci.openshift.org/ocp/4.16:base COPY --from=builder /go/src/github.com/openshift/installer/bin/openshift-install /bin/openshift-install COPY --from=builder /go/src/github.com/openshift/installer/bin/manifests/ /manifests/ +# Required to run agent-based installer from the container +COPY --from=tools /usr/bin/oc /bin/oc +RUN dnf install -y nmstate + RUN mkdir /output && chown 1000:1000 /output USER 1000:1000 ENV PATH /bin diff --git a/pkg/asset/agent/image/agentimage.go b/pkg/asset/agent/image/agentimage.go index 7fa17bff03e..8db8bf8368c 100644 --- a/pkg/asset/agent/image/agentimage.go +++ b/pkg/asset/agent/image/agentimage.go @@ -21,7 +21,7 @@ import ( const ( agentISOFilename = "agent.%s.iso" - agentAddNodesISOFilename = "agent-addnodes.%s.iso" + agentAddNodesISOFilename = "node.%s.iso" iso9660Level1ExtLen = 3 ) diff --git a/pkg/nodejoiner/addnodes.go b/pkg/nodejoiner/addnodes.go index 6610b7a96a1..62c0d10563c 100644 --- a/pkg/nodejoiner/addnodes.go +++ b/pkg/nodejoiner/addnodes.go @@ -2,6 +2,8 @@ package nodejoiner import ( "context" + "os" + "path/filepath" "github.com/openshift/installer/pkg/asset" "github.com/openshift/installer/pkg/asset/agent/image" @@ -10,6 +12,10 @@ import ( "github.com/openshift/installer/pkg/asset/store" ) +const ( + addNodesResultFile = "exit_code" +) + // NewAddNodesCommand creates a new command for add nodes. func NewAddNodesCommand(directory string, kubeConfig string) error { // Store the current parameters into the assets folder, so @@ -22,12 +28,20 @@ func NewAddNodesCommand(directory string, kubeConfig string) error { return err } - ctx := context.Background() - fetcher := store.NewAssetsFetcher(directory) - return fetcher.FetchAndPersist(ctx, []asset.WritableAsset{ + err = fetcher.FetchAndPersist(context.Background(), []asset.WritableAsset{ &workflow.AgentWorkflowAddNodes{}, &image.AgentImage{}, - // To be completed }) + + // Save the exit code result + exitCode := "0" + if err != nil { + exitCode = "1" + } + if err2 := os.WriteFile(filepath.Join(directory, addNodesResultFile), []byte(exitCode), 0600); err2 != nil { + return err2 + } + + return err }