Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions data/data/bootstrap/files/usr/local/bin/bootkube.sh.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ set -euoE pipefail ## -E option will cause functions to inherit trap

. /usr/local/bin/release-image.sh
. /usr/local/bin/bootstrap-cluster-gather.sh
. /usr/local/bin/bootstrap-verify-api-server-urls.sh

mkdir --parents /etc/kubernetes/{manifests,bootstrap-configs,bootstrap-manifests}

Expand Down Expand Up @@ -356,6 +357,19 @@ then
record_service_stage_success
fi

# Check if the API and API_INT Server URLs can be resolved and reached.
echo "Check if API and API-Int URLs are resolvable during bootstrap"
API_SERVER_URL="{{.APIServerURL}}"
API_INT_SERVER_URL="{{.APIIntServerURL}}"

if [[ ! -z "${API_SERVER_URL}" ]] ; then
check_url "API_URL" "${API_SERVER_URL}"
fi

if [[ ! -z "${API_INT_SERVER_URL}" ]] ; then
check_url "API_INT_URL" "${API_INT_SERVER_URL}"
fi

if [ ! -f cco-bootstrap.done ]
then
record_service_stage_start "cco-bootstrap"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env bash

# shellcheck disable=SC1091
. /usr/local/bin/bootstrap-service-record.sh

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@patrickdillon @jstuever I am unclear about whether kubeconfig needs to be specified here.
From my understanding, if this script is only run from analyze we don't have to but if output from this service is to be included in install-gather, then we would have to?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am unclear about whether kubeconfig needs to be specified here.

Every instance of oc I see in the bootstrap node specifies a kubeconfig explicitly so I think so.

if this script is only run from analyze we don't have to but if output from this service is to be included in install-gather, then we would have to?

analyze does not execute anything on the bootstrap node. gather will execute commands to grab logs on the bootstrap node, but this service should just be running as part of the standard execution of bootkube. gather will not cause it to be run but will just collect its logs

# This functions expects 2 arguments:
# 1. name of the URL
# 2. The value of the URL
function resolve_url() {
unset IPS
unset IP
IPS=$(dig "${2}" +short)
if [[ ! -z "${IPS}" ]] ; then
echo "Successfully resolved ${1} ${2}"
# dig returns multiple IPs. Check if the
# first IP is reachable.
ip_arr=""
readarray ip_arr -t <<<"${IPS}"
IP="$(echo "${ip_arr[0]}" | tr -d '\n')"
return 0
else
echo "Unable to resolve ${1} ${2}"
return 1
fi
}

# This functions expects 2 arguments:
# 1. name of the URL
# 2. URL to validate
function validate_url() {
if [[ $(curl --head -k --silent --fail --write-out "%{http_code}\\n" "${2}" -o /dev/null) == 200 ]]; then
echo "Success while trying to reach ${1}'s https endpoint at ${2}"
return 0
else
echo "Unable to reach ${1}'s https endpoint at ${2}"
return 1
fi
}

function check_url() {
if [[ -z "${1}" ]] || [[ -z "${2}" ]]; then
echo "Usage: check_url <API_URL or API_INT URL> <URL that needs to be verified>"
return
fi

local URL_TYPE=${1}
local SERVER_URL=${2}

if [[ ${URL_TYPE} != API_URL ]] && [[ ${URL_TYPE} != API_INT_URL ]]; then
echo "Usage: check_url <API_URL or API_INT URL> <URL that needs to be verified>"
return
fi

echo "Checking validity of ${SERVER_URL} of type ${URL_TYPE}"

if [[ "${URL_TYPE}" = "API_URL" ]]; then
local URL_STAGE_NAME="check-api-url"
else
local URL_STAGE_NAME="check-api-int-url"
fi

echo "Starting stage ${URL_STAGE_NAME}"
record_service_stage_start ${URL_STAGE_NAME}
if resolve_url "$URL_TYPE" "$SERVER_URL"; then
record_service_stage_success
else
record_service_stage_failure
# We do not want to stop bootkube service due to this failure.
# So not returning failure at this point.
return
fi

CURL_URL="https://${IP}:6443/version"

record_service_stage_start ${URL_STAGE_NAME}
if validate_url "$URL_TYPE" "$CURL_URL"; then
record_service_stage_success
else
echo "It might be too early for the ${CURL_URL} to be available."
record_service_stage_failure
fi
}
7 changes: 7 additions & 0 deletions pkg/asset/ignition/bootstrap/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@ type bootstrapTemplateData struct {
UseIPv6ForNodeIP bool
IsOKD bool
BootstrapNodeIP string
APIServerURL string
APIIntServerURL string
}

// platformTemplateData is the data to use to replace values in bootstrap
Expand Down Expand Up @@ -285,6 +287,9 @@ func (a *Common) getTemplateData(dependencies asset.Parents, bootstrapInPlace bo
if bootstrapInPlace {
bootstrapInPlaceConfig = installConfig.Config.BootstrapInPlace
}

apiURL := fmt.Sprintf("api.%s", installConfig.Config.ClusterDomain())
apiIntURL := fmt.Sprintf("api-int.%s", installConfig.Config.ClusterDomain())
return &bootstrapTemplateData{
AdditionalTrustBundle: installConfig.Config.AdditionalTrustBundle,
FIPS: installConfig.Config.FIPS,
Expand All @@ -301,6 +306,8 @@ func (a *Common) getTemplateData(dependencies asset.Parents, bootstrapInPlace bo
UseIPv6ForNodeIP: APIIntVIPonIPv6,
IsOKD: installConfig.Config.IsOKD(),
BootstrapNodeIP: bootstrapNodeIP,
APIServerURL: apiURL,
APIIntServerURL: apiIntURL,
}
}

Expand Down
29 changes: 26 additions & 3 deletions pkg/gather/service/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,14 +72,20 @@ func analyzeGatherBundle(bundleFile io.Reader) error {
}

analysisChecks := []struct {
name string
check func(analysis) bool
name string
check func(analysis) bool
optional bool
}{
{name: "release-image", check: checkReleaseImageDownload},
{name: "release-image", check: checkReleaseImageDownload, optional: false},
{name: "bootkube", check: checkAPIURLs, optional: false},
}
for _, check := range analysisChecks {
a := serviceAnalyses[check.name]
if a.starts == 0 {
if check.optional {
logrus.Infof("The bootstrap machine did not execute the %s.service systemd unit", check.name)
break
}
logrus.Errorf("The bootstrap machine did not execute the %s.service systemd unit", check.name)
break
}
Expand All @@ -100,6 +106,23 @@ func checkReleaseImageDownload(a analysis) bool {
return false
}

// bootstrap-verify-api-servel-urls.sh is currently running as part of the bootkube service.
// And the verification of the API and API-Int URLs are the only stage where a failure is
// currently reported. So, here we are able to conclude that a failure corresponds to a
// failure to resolve either the API URL or API-Int URL or both. If that changes and if
// any other stage in the bootkube service starts reporting a failure, we need to revisit
// this. At that point verification of the URLs could be moved to its own service.
func checkAPIURLs(a analysis) bool {
if a.successful {
return true
}
// Note: Even when there is a stage failure, we are not returning false here. That is
// intentional because we donot want to report this as an error in the "analyze" output.
logrus.Warn("The bootstrap machine is unable to resolve API and/or API-Int Server URLs")
a.logLastError()
return true
}

type analysis struct {
// starts is the number of times that the service started
starts int
Expand Down
125 changes: 100 additions & 25 deletions pkg/gather/service/analyze_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,41 @@ import (
"github.com/stretchr/testify/assert"
)

func generateSuccessOutput(stage string) string {
return `[
{"phase":"service start"},
{"phase":"stage start", "stage":"` + stage + `"},
{"phase":"stage end", "stage":"` + stage + `", "result":"success"},
{"phase":"service end", "result":"success"}
]`
}

func generateFailureOutput(stage string) string {
return `[
{"phase":"service start"},
{"phase":"stage start", "stage":"` + stage + `"},
{"phase":"stage end", "stage":"` + stage + `", "result":"failure", "errorMessage":"Line 1\nLine 2\nLine 3"}
]`
}

func failedReleaseImage() []logrus.Entry {
return []logrus.Entry{
{Level: logrus.ErrorLevel, Message: "The bootstrap machine failed to download the release image"},
{Level: logrus.InfoLevel, Message: "Line 1"},
{Level: logrus.InfoLevel, Message: "Line 2"},
{Level: logrus.InfoLevel, Message: "Line 3"},
}
}

func failedURLChecks() []logrus.Entry {
return []logrus.Entry{
{Level: logrus.WarnLevel, Message: "The bootstrap machine is unable to resolve API and/or API-Int Server URLs"},
{Level: logrus.InfoLevel, Message: "Line 1"},
{Level: logrus.InfoLevel, Message: "Line 2"},
{Level: logrus.InfoLevel, Message: "Line 3"},
}
}

func TestAnalyzeGatherBundle(t *testing.T) {
cases := []struct {
name string
Expand All @@ -33,42 +68,60 @@ func TestAnalyzeGatherBundle(t *testing.T) {
},
},
{
name: "release-image successful",
name: "bootkube not started",
files: map[string]string{
"log-bundle/bootstrap/services/release-image.json": `[
{"phase":"service start"},
{"phase":"stage start", "stage":"pull-release-image"},
{"phase":"stage end", "stage":"pull-release-image", "result":"success"},
{"phase":"service end", "result":"success"}
]`,
"log-bundle/bootstrap/services/release-image.json": generateSuccessOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": "[]",
},
expectedOutput: []logrus.Entry{
{Level: logrus.ErrorLevel, Message: "The bootstrap machine did not execute the bootkube.service systemd unit"},
},
},
{
name: "release-image successful bootstrap-in-place",
name: "release-image and API Server URL successful",
files: map[string]string{
"log-bundle/log-bundle-bootstrap/bootstrap/services/release-image.json": `[
{"phase":"service start"},
{"phase":"stage start", "stage":"pull-release-image"},
{"phase":"stage end", "stage":"pull-release-image", "result":"success"},
{"phase":"service end", "result":"success"}
]`,
"log-bundle/bootstrap/services/release-image.json": generateSuccessOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": generateSuccessOutput("check-api-url"),
},
},
{
name: "release-image failed",
name: "release-image and API Server URL successful bootstrap-in-place",
files: map[string]string{
"log-bundle/bootstrap/services/release-image.json": `[
{"phase":"service start"},
{"phase":"stage start", "stage":"pull-release-image"},
{"phase":"stage end", "stage":"pull-release-image", "result":"failure", "errorMessage":"Line 1\nLine 2\nLine 3"}
]`,
"log-bundle/log-bundle-bootstrap/bootstrap/services/release-image.json": generateSuccessOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": generateSuccessOutput("check-api-url"),
},
expectedOutput: []logrus.Entry{
{Level: logrus.ErrorLevel, Message: "The bootstrap machine failed to download the release image"},
{Level: logrus.InfoLevel, Message: "Line 1"},
{Level: logrus.InfoLevel, Message: "Line 2"},
{Level: logrus.InfoLevel, Message: "Line 3"},
},
{
name: "only release-image failed",
files: map[string]string{
"log-bundle/bootstrap/services/release-image.json": generateFailureOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": generateSuccessOutput("check-api-url"),
},
expectedOutput: failedReleaseImage(),
},
{
name: "API Server URL failed",
files: map[string]string{
"log-bundle/log-bundle-bootstrap/bootstrap/services/release-image.json": generateSuccessOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": generateFailureOutput("check-api-url"),
},
expectedOutput: failedURLChecks(),
},
{
name: "API-INT Server URL failed",
files: map[string]string{
"log-bundle/log-bundle-bootstrap/bootstrap/services/release-image.json": generateSuccessOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": generateFailureOutput("check-api-int-url"),
},
expectedOutput: failedURLChecks(),
},
{
name: "both release-image and API Server URLs failed",
files: map[string]string{
"log-bundle/log-bundle-bootstrap/bootstrap/services/release-image.json": generateFailureOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": generateFailureOutput("check-api-url"),
},
expectedOutput: failedReleaseImage(),
},
{
name: "empty release-image.json",
Expand All @@ -80,6 +133,17 @@ func TestAnalyzeGatherBundle(t *testing.T) {
{Level: logrus.ErrorLevel, Message: "The bootstrap machine did not execute the release-image.service systemd unit"},
},
},
{
name: "empty bootkube.json",
files: map[string]string{
"log-bundle/bootstrap/services/release-image.json": generateSuccessOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": "",
},
expectedOutput: []logrus.Entry{
{Level: logrus.InfoLevel, Message: "Could not analyze the bootkube.service: service entries file does not begin with a token: EOF"},
{Level: logrus.ErrorLevel, Message: "The bootstrap machine did not execute the bootkube.service systemd unit"},
},
},
{
name: "malformed release-image.json",
files: map[string]string{
Expand All @@ -90,6 +154,17 @@ func TestAnalyzeGatherBundle(t *testing.T) {
{Level: logrus.ErrorLevel, Message: "The bootstrap machine did not execute the release-image.service systemd unit"},
},
},
{
name: "malformed bootkube.json",
files: map[string]string{
"log-bundle/bootstrap/services/release-image.json": generateSuccessOutput("pull-release-image"),
"log-bundle/bootstrap/services/bootkube.json": "{}",
},
expectedOutput: []logrus.Entry{
{Level: logrus.InfoLevel, Message: "Could not analyze the bootkube.service: service entries file does not begin with an array"},
{Level: logrus.ErrorLevel, Message: "The bootstrap machine did not execute the bootkube.service systemd unit"},
},
},
}
for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
Expand Down