Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 40 additions & 1 deletion cmd/node-joiner/main.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
package main

import (
"fmt"
"io"
"os"

"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
terminal "golang.org/x/term"

"github.com/openshift/installer/cmd/openshift-install/command"
"github.com/openshift/installer/pkg/nodejoiner"
)

Expand Down Expand Up @@ -33,14 +39,47 @@ func main() {
}

rootCmd := &cobra.Command{
Use: "node-joiner",
Use: "node-joiner",
PersistentPreRun: runRootCmd,
}
rootCmd.PersistentFlags().String("kubeconfig", "", "Path to the kubeconfig file.")
rootCmd.PersistentFlags().String("dir", ".", "assets directory")
rootCmd.PersistentFlags().String("log-level", "info", "log level (e.g. \"debug | info | warn | error\")")

rootCmd.AddCommand(nodesAddCmd)
rootCmd.AddCommand(nodesMonitorCmd)
if err := rootCmd.Execute(); err != nil {
logrus.Fatal(err)
}
}

func runRootCmd(cmd *cobra.Command, args []string) {
logrus.SetOutput(io.Discard)
logrus.SetLevel(logrus.TraceLevel)

logLevel, err := cmd.Flags().GetString("log-level")
if err != nil {
logrus.Fatal(err)
}

level, err := logrus.ParseLevel(logLevel)
if err != nil {
level = logrus.InfoLevel
}

logrus.AddHook(command.NewFileHookWithNewlineTruncate(os.Stderr, level, &logrus.TextFormatter{
// Setting ForceColors is necessary because logrus.TextFormatter determines
// whether or not to enable colors by looking at the output of the logger.
// In this case, the output is io.Discard, which is not a terminal.
// Overriding it here allows the same check to be done, but against the
// hook's output instead of the logger's output.
ForceColors: terminal.IsTerminal(int(os.Stderr.Fd())),
DisableTimestamp: true,
DisableLevelTruncation: true,
DisableQuote: true,
}))

if err != nil {
logrus.Fatal(fmt.Errorf("invalid log-level: %w", err))
}
}
47 changes: 47 additions & 0 deletions data/data/agent/files/usr/local/bin/add-node.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#!/bin/bash
set -e

# shellcheck disable=SC1091
source issue_status.sh

BASE_URL="${SERVICE_BASE_URL}api/assisted-install/v2"

cluster_id=""
while [[ "${cluster_id}" = "" ]]
do
# Get cluster id
cluster_id=$(curl -s -S "${BASE_URL}/clusters" | jq -r .[].id)
if [[ "${cluster_id}" = "" ]]; then
sleep 2
fi
done
Copy link
Contributor

@sadasu sadasu Apr 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need a timeout followed by error message here? Alternatively, do we need a status_issue for this step?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question. I think here, the cluster_id should be available fairly quickly as the agent-add-node service has an After dependency on apply-host-config.service, which itself, has a dependency on agent-register-infra.service and it on agent-import-cluster.service. So by the time this script/service runs the cluster_id should be available.

@sadasu What is a status_issue? Could you explain? Thanks!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, as per the agent services architecture I'd expect that in case of issue the agent-register-cluster would prevent the other services, the loop here it's just to cope in case of slower execution (similar approach in another scritps)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sadasu Please disregard my question. I see status_issue updated below.


printf '\nInfra env id is %s\n' "${INFRA_ENV_ID}" 1>&2

status_issue="90_add-node"

# Wait for the current host to be ready
host_ready=false
while [[ $host_ready == false ]]
do
host_status=$(curl -s -S "${BASE_URL}/infra-envs/${INFRA_ENV_ID}/hosts" | jq -r ".[].status")
if [[ "${host_status}" != "known" ]]; then
printf '\\e{yellow}Waiting for the host to be ready' | set_issue "${status_issue}"
sleep 10
else
host_ready=true
fi
done

HOST_ID=$(curl -s "${BASE_URL}/infra-envs/${INFRA_ENV_ID}/hosts" | jq -r '.[].id')
printf '\nHost %s is ready for installation\n' "${HOST_ID}" 1>&2
clear_issue "${status_issue}"

# Add the current host to the cluster
res=$(curl -X POST -s -S -w "%{http_code}\\n" -o /dev/null "${BASE_URL}/infra-envs/${INFRA_ENV_ID}/hosts/${HOST_ID}/actions/install")
if [[ $res = "202" ]]; then
printf '\nHost installation started\n' 1>&2
else
printf '\nHost installation failed\n' 1>&2
exit 1
fi
21 changes: 21 additions & 0 deletions data/data/agent/systemd/units/agent-add-node.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
[Unit]
Description=Adds the current node to an already existing cluster
Wants=network-online.target
Requires=apply-host-config.service
PartOf=assisted-service-pod.service
After=network-online.target apply-host-config.service
ConditionPathExists=/etc/assisted/node0

[Service]
EnvironmentFile=/usr/local/share/assisted-service/assisted-service.env
EnvironmentFile=/usr/local/share/start-cluster/start-cluster.env
EnvironmentFile=/etc/assisted/rendezvous-host.env
ExecStartPre=/usr/local/bin/wait-for-assisted-service.sh
ExecStart=/usr/local/bin/add-node.sh

KillMode=none
Type=oneshot
RemainAfterExit=true

[Install]
WantedBy=multi-user.target
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[Unit]
Description=Imports an already existing cluster
Wants=network-online.target assisted-service.service
PartOf=assisted-service-pod.service
After=network-online.target assisted-service.service
ConditionPathExists=/etc/assisted/node0

[Service]
Environment=PODMAN_SYSTEMD_UNIT=%n
Environment=OPENSHIFT_INSTALL_RELEASE_IMAGE_MIRROR={{.ReleaseImageMirror}}
EnvironmentFile=/etc/assisted/rendezvous-host.env
EnvironmentFile=/usr/local/share/assisted-service/agent-images.env
EnvironmentFile=/usr/local/share/assisted-service/assisted-service.env
EnvironmentFile=/etc/assisted/add-nodes.env
ExecStartPre=/bin/rm -f %t/%n.ctr-id
ExecStartPre=/usr/local/bin/wait-for-assisted-service.sh
ExecStart=podman run --net host --cidfile=%t/%n.ctr-id --cgroups=no-conmon --log-driver=journald --rm --pod-id-file=%t/assisted-service-pod.pod-id --replace --name=agent-import-cluster -v /etc/assisted/manifests:/manifests -v /etc/assisted/extra-manifests:/extra-manifests -v /etc/pki/ca-trust:/etc/pki/ca-trust:z {{ if .HaveMirrorConfig }}-v /etc/containers:/etc/containers{{ end }} --env SERVICE_BASE_URL --env OPENSHIFT_INSTALL_RELEASE_IMAGE_MIRROR --env CLUSTER_ID --env CLUSTER_NAME --env CLUSTER_API_VIP_DNS_NAME $SERVICE_IMAGE /usr/local/bin/agent-installer-client importCluster
ExecStop=/usr/bin/podman stop --ignore --cidfile=%t/%n.ctr-id
ExecStopPost=/usr/bin/podman rm -f --ignore --cidfile=%t/%n.ctr-id

KillMode=none
Type=oneshot
Restart=on-failure
RestartSec=30
RemainAfterExit=true

[Install]
WantedBy=multi-user.target
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
Description=Service that registers the infraenv
Wants=network-online.target assisted-service.service
PartOf=assisted-service-pod.service
After=network-online.target assisted-service.service agent-register-cluster.service
After=network-online.target assisted-service.service {{ if eq .WorkflowType "install" }}agent-register-cluster.service{{ end }}{{ if eq .WorkflowType "addnodes" }}agent-import-cluster.service{{ end }}
ConditionPathExists=/etc/assisted/node0

[Service]
Expand Down
2 changes: 1 addition & 1 deletion data/data/agent/systemd/units/apply-host-config.service
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ EnvironmentFile=/usr/local/share/assisted-service/assisted-service.env
ExecStartPre=/bin/rm -f %t/%n.ctr-id
ExecStartPre=/bin/mkdir -p %t/agent-installer /etc/assisted/hostconfig
ExecStartPre=/usr/local/bin/wait-for-assisted-service.sh
ExecStart=podman run --net host --cidfile=%t/%n.ctr-id --cgroups=no-conmon --log-driver=journald --restart=on-failure:10 --pod-id-file=%t/assisted-service-pod.pod-id --replace --name=apply-host-config -v /etc/assisted/hostconfig:/etc/assisted/hostconfig -v %t/agent-installer:/var/run/agent-installer:z --env SERVICE_BASE_URL --env INFRA_ENV_ID $SERVICE_IMAGE /usr/local/bin/agent-installer-client configure
ExecStart=podman run --net host --cidfile=%t/%n.ctr-id --cgroups=no-conmon --log-driver=journald --restart=on-failure:10 --pod-id-file=%t/assisted-service-pod.pod-id --replace --name=apply-host-config -v /etc/assisted/hostconfig:/etc/assisted/hostconfig -v %t/agent-installer:/var/run/agent-installer:z --env SERVICE_BASE_URL --env INFRA_ENV_ID --env WORKFLOW_TYPE $SERVICE_IMAGE /usr/local/bin/agent-installer-client configure
ExecStop=/usr/bin/podman stop --ignore --cidfile=%t/%n.ctr-id
ExecStopPost=/usr/bin/podman rm -f --ignore --cidfile=%t/%n.ctr-id

Expand Down
50 changes: 50 additions & 0 deletions hack/build-node-joiner.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/sh

set -ex

# shellcheck disable=SC2068
version() { IFS="."; printf "%03d%03d%03d\\n" $@; unset IFS;}

minimum_go_version=1.21
current_go_version=$(go version | cut -d " " -f 3)

if [ "$(version "${current_go_version#go}")" -lt "$(version "$minimum_go_version")" ]; then
echo "Go version should be greater or equal to $minimum_go_version"
exit 1
fi

export CGO_ENABLED=0
MODE="${MODE:-release}"

GIT_COMMIT="${SOURCE_GIT_COMMIT:-$(git rev-parse --verify 'HEAD^{commit}')}"
GIT_TAG="${BUILD_VERSION:-$(git describe --always --abbrev=40 --dirty)}"
DEFAULT_ARCH="${DEFAULT_ARCH:-amd64}"
GOFLAGS="${GOFLAGS:--mod=vendor}"
GCFLAGS=""
LDFLAGS="${LDFLAGS} -X github.com/openshift/installer/pkg/version.Raw=${GIT_TAG} -X github.com/openshift/installer/pkg/version.Commit=${GIT_COMMIT} -X github.com/openshift/installer/pkg/version.defaultArch=${DEFAULT_ARCH}"
TAGS="${TAGS:-}"
OUTPUT="${OUTPUT:-bin/node-joiner}"

case "${MODE}" in
release)
LDFLAGS="${LDFLAGS} -s -w"
TAGS="${TAGS} release"
;;
dev)
GCFLAGS="${GCFLAGS} all=-N -l"
;;
*)
echo "unrecognized mode: ${MODE}" >&2
exit 1
esac

if test "${SKIP_GENERATION}" != y
then
# this step has to be run natively, even when cross-compiling
GOOS='' GOARCH='' go generate ./data
fi

echo "building node-joiner"

# shellcheck disable=SC2086
go build ${GOFLAGS} -gcflags "${GCFLAGS}" -ldflags "${LDFLAGS}" -tags "${TAGS}" -o "${OUTPUT}" ./cmd/node-joiner
2 changes: 1 addition & 1 deletion pkg/asset/agent/image/agentartifacts.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ func (a *AgentArtifacts) Generate(dependencies asset.Parents) error {
func (a *AgentArtifacts) fetchAgentTuiFiles(releaseImage string, pullSecret string, mirrorConfig []mirror.RegistriesConfig) ([]string, error) {
release := NewRelease(
Config{MaxTries: OcDefaultTries, RetryDelay: OcDefaultRetryDelay},
releaseImage, pullSecret, mirrorConfig)
releaseImage, pullSecret, mirrorConfig, nil)

agentTuiFilenames := []string{"/usr/bin/agent-tui", "/usr/lib64/libnmstate.so.*"}
files := []string{}
Expand Down
10 changes: 7 additions & 3 deletions pkg/asset/agent/image/agentimage.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ import (
)

const (
agentISOFilename = "agent.%s.iso"
iso9660Level1ExtLen = 3
agentISOFilename = "agent.%s.iso"
agentAddNodesISOFilename = "agent-addnodes.%s.iso"
iso9660Level1ExtLen = 3
)

// AgentImage is an asset that generates the bootable image used to install clusters.
Expand All @@ -34,6 +35,7 @@ type AgentImage struct {
rootFSURL string
bootArtifactsBaseURL string
platform hiveext.PlatformType
isoFilename string
}

var _ asset.WritableAsset = (*AgentImage)(nil)
Expand Down Expand Up @@ -61,9 +63,11 @@ func (a *AgentImage) Generate(dependencies asset.Parents) error {
switch agentWorkflow.Workflow {
case workflow.AgentWorkflowTypeInstall:
a.platform = agentManifests.AgentClusterInstall.Spec.PlatformType
a.isoFilename = agentISOFilename

case workflow.AgentWorkflowTypeAddNodes:
a.platform = clusterInfo.PlatformType
a.isoFilename = agentAddNodesISOFilename

default:
return fmt.Errorf("AgentWorkflowType value not supported: %s", agentWorkflow.Workflow)
Expand Down Expand Up @@ -239,7 +243,7 @@ func (a *AgentImage) PersistToFile(directory string) error {
return errors.New("cannot generate ISO image due to configuration errors")
}

agentIsoFile := filepath.Join(directory, fmt.Sprintf(agentISOFilename, a.cpuArch))
agentIsoFile := filepath.Join(directory, fmt.Sprintf(a.isoFilename, a.cpuArch))

// Remove symlink if it exists
os.Remove(agentIsoFile)
Expand Down
Loading