Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion data/data/baremetal/bootstrap/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ resource "libvirt_ignition" "bootstrap" {
resource "libvirt_domain" "bootstrap" {
name = "${var.cluster_id}-bootstrap"

memory = "4096"
memory = "6144"

vcpu = "4"

Expand Down
3 changes: 2 additions & 1 deletion data/data/baremetal/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ provider "libvirt" {
}

provider "ironic" {
url = var.ironic_uri
url = "http://${var.bootstrap_provisioning_ip}:6385/v1"
microversion = "1.52"
timeout = 1500
}

module "bootstrap" {
Expand Down
4 changes: 2 additions & 2 deletions data/data/baremetal/variables-baremetal.tf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
variable "ironic_uri" {
variable "bootstrap_provisioning_ip" {
type = string
description = "ironic connection URI"
description = "IP for the bootstrap VM provisioning nic"
}

variable "libvirt_uri" {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/bin/bash

set -ex

# We should switch to openshift builds of these images when ready ref
# https://github.com/openshift/installer/issues/2090
IRONIC_IMAGE=${IRONIC_IMAGE:-"quay.io/metal3-io/ironic:master"}
IRONIC_INSPECTOR_IMAGE=${IRONIC_INSPECTOR_IMAGE:-"quay.io/metal3-io/ironic-inspector:master"}
IPA_DOWNLOADER_IMAGE=${IPA_DOWNLOADER_IMAGE:-"quay.io/metal3-io/ironic-ipa-downloader:master"}
COREOS_DOWNLOADER_IMAGE=${COREOS_DOWNLOADER_IMAGE:-"quay.io/openshift-metal3/rhcos-downloader:master"}

# This image is templated in via the installer pkg/asset/ignition/bootstrap/bootstrap.go
RHCOS_BOOT_IMAGE_URL="{{.BootImage}}"

# First we stop any previously started containers, because ExecStop only runs when the ExecStart process
# e.g this script is still running, but we exit if *any* of the containers exits unexpectedly
for name in ironic-api ironic-conductor ironic-inspector dnsmasq httpd mariadb ipa-downloader coreos-downloader; do
podman ps | grep -w "$name$" && podman kill $name
podman ps --all | grep -w "$name$" && podman rm $name -f
done

# Start the provisioning nic if not already started
# Note removal of the hard-coded subnet tracked via https://github.com/openshift/installer/issues/2091
PROVISIONING_NIC=ens4
if ! nmcli -t device | grep "$PROVISIONING_NIC:ethernet:connected:provisioning"; then
nmcli c add type ethernet ifname $PROVISIONING_NIC con-name provisioning ip4 172.22.0.2/24 gw4 172.22.0.1
nmcli c up provisioning
fi

# Wait for the interface to come up
# This is how the ironic container currently detects IRONIC_IP, this could probably be improved by using
# nmcli show provisioning there instead, but we need to confirm that works with the static-ip-manager
while [ -z "$(ip -4 address show dev "$PROVISIONING_NIC" | grep -oP '(?<=inet\s)\d+(\.\d+){3}' | head -n 1)" ]; do
sleep 1
done

# set password for mariadb
mariadb_password=$(uuidgen -r | sed "s/-//g")

IRONIC_SHARED_VOLUME="ironic"
# Ignore errors here so we reuse any existing volume on pod restart
# this is helpful if an API service causes restart after the images
# have been downloaded
podman volume create $IRONIC_SHARED_VOLUME || true

# Apparently network-online doesn't necessarily mean iptables is ready, so wait until it is..
while ! iptables -L; do
sleep 1
done

# Add firewall rules to ensure the IPA ramdisk can reach httpd, Ironic and the Inspector API on the host
for port in 80 5050 6385 ; do
if ! sudo iptables -C INPUT -i $PROVISIONING_NIC -p tcp -m tcp --dport $port -j ACCEPT > /dev/null 2>&1; then
sudo iptables -I INPUT -i $PROVISIONING_NIC -p tcp -m tcp --dport $port -j ACCEPT
fi
done

# Start dnsmasq, http, mariadb, and ironic containers using same image
# Currently we do this outside of a pod because we need to ensure the images
# are downloaded before starting the API pods
podman run -d --net host --privileged --name mariadb \
-v $IRONIC_SHARED_VOLUME:/shared:z --entrypoint /bin/runmariadb \
--env MARIADB_PASSWORD=$mariadb_password ${IRONIC_IMAGE}

podman run -d --net host --privileged --name dnsmasq \
--env PROVISIONING_INTERFACE=$PROVISIONING_NIC \
-v $IRONIC_SHARED_VOLUME:/shared:z --entrypoint /bin/rundnsmasq ${IRONIC_IMAGE}

podman run -d --net host --privileged --name httpd \
--env PROVISIONING_INTERFACE=$PROVISIONING_NIC \
-v $IRONIC_SHARED_VOLUME:/shared:z --entrypoint /bin/runhttpd ${IRONIC_IMAGE}

# Set CACHEURL to the default route, so we try to consume any images cached on the host
# running the VM (dev-scripts configures a cache here), if none is found then the
# downloader containers just skip and download from the internet location
CACHEURL="http://$(ip r | grep default | head -n1 | awk '{print $3}')/images"
podman run -d --net host --name ipa-downloader \
--env CACHEURL=${CACHEURL} \
-v $IRONIC_SHARED_VOLUME:/shared:z ${IPA_DOWNLOADER_IMAGE} /usr/local/bin/get-resource.sh

podman run -d --net host --name coreos-downloader \
--env CACHEURL=${CACHEURL} \
-v $IRONIC_SHARED_VOLUME:/shared:z ${COREOS_DOWNLOADER_IMAGE} /usr/local/bin/get-resource.sh $RHCOS_BOOT_IMAGE_URL

# Wait for images to be downloaded/ready
podman wait -i 1000 ipa-downloader
podman wait -i 1000 coreos-downloader
while ! curl --fail http://localhost/images/rhcos-ootpa-latest.qcow2.md5sum ; do sleep 1; done
while ! curl --fail --head http://localhost/images/ironic-python-agent.initramfs ; do sleep 1; done
while ! curl --fail --head http://localhost/images/ironic-python-agent.tar.headers ; do sleep 1; done
while ! curl --fail --head http://localhost/images/ironic-python-agent.kernel ; do sleep 1; done

sudo podman run -d --net host --privileged --name ironic-conductor \
--env MARIADB_PASSWORD=$mariadb_password \
--env PROVISIONING_INTERFACE=$PROVISIONING_NIC \
--env OS_CONDUCTOR__HEARTBEAT_TIMEOUT=120 \
--entrypoint /bin/runironic-conductor \
-v $IRONIC_SHARED_VOLUME:/shared:z ${IRONIC_IMAGE}

# We need a better way to wait for the DB sync to happen..
sleep 10

podman run -d --net host --privileged --name ironic-inspector \
--env PROVISIONING_INTERFACE=$PROVISIONING_NIC \
-v $IRONIC_SHARED_VOLUME:/shared:z "${IRONIC_INSPECTOR_IMAGE}"

sudo podman run -d --net host --privileged --name ironic-api \
--env MARIADB_PASSWORD=$mariadb_password \
--env PROVISIONING_INTERFACE=$PROVISIONING_NIC \
--entrypoint /bin/runironic-api \
-v $IRONIC_SHARED_VOLUME:/shared:z ${IRONIC_IMAGE}

# Now loop so the service remains active and restart everything should one of the containers exit unexpectedly.
# The alternative would be RemainAfterExit=yes but then we lose the ability to restart if something crashes.
while true; do
for name in ironic-api ironic-conductor ironic-inspector dnsmasq httpd mariadb; do
podman ps | grep -w "$name$" || exit 1
done
sleep 10
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
#!/bin/bash

set -x

for name in ironic-api ironic-conductor ironic-inspector dnsmasq httpd mariadb ipa-downloader coreos-downloader; do
podman ps | grep -w "$name$" && podman kill $name
podman ps --all | grep -w "$name$" && podman rm $name -f
done
16 changes: 16 additions & 0 deletions data/data/bootstrap/baremetal/systemd/units/ironic.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
[Unit]
Description=Baremetal Deployment Ironic Services
Wants=network-online.target crio.service
After=network-online.target crio.service

[Service]
Type=exec
ExecStart=/usr/local/bin/startironic.sh
ExecStop=/usr/local/bin/stopironic.sh

Restart=on-failure
RestartSec=10
TimeoutStartSec=600

[Install]
WantedBy=multi-user.target
34 changes: 15 additions & 19 deletions docs/user/metal/install_ipi.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,6 @@ deployments, see [install_upi.md](install_upi.md).

## Prerequisites

### Ironic

Currently, the `baremetal` platform requires an existing Ironic environment.
This will eventually be handled by `openshift-install`, with Ironic being
deployed onto the bootstrap node. Until then, users of the `baremetal` platform
should use the
[openshift-metal3/dev-scripts](https://github.com/openshift-metal3/dev-scripts)
repository to handle configuration of Ironic.

The following PR contains the WIP changes for automating Ironic from
`openshift-install`: https://github.com/openshift-metal3/kni-installer/pull/100

### Network Requirements

It is assumed that all hosts have at least 2 NICs, used for the following
Expand Down Expand Up @@ -111,8 +99,6 @@ platform should be considered experimental and still subject to change without
backwards compatibility. In particular, some items likely to change soon
include:

* The `image` section will get completely removed.

* The `hardwareProfile` is currently exposed as a way to allow specifying
different hardware parameters for deployment. By default, we will deploy
RHCOS to the first disk, but that may not be appropriate for all hardware.
Expand Down Expand Up @@ -174,11 +160,6 @@ platform:
password: password
bootMACAddress: 00:11:07:4e:f6:71
hardwareProfile: default
image:
source: "http://172.22.0.1/images/rhcos-ootpa-latest.qcow2"
checksum: 2b3b1e19e18627d89da400b63430d5bb
deployKernel: http://172.22.0.1/images/ironic-python-agent.kernel
deployRamdisk: http://172.22.0.1/images/ironic-python-agent.initramfs
pullSecret: ...
sshKey: ...
```
Expand Down Expand Up @@ -227,3 +208,18 @@ When an installation fails, `openshift-install` will attempt to gather debug
information from hosts. This is not yet supported by the `baremetal` platform.

https://github.com/openshift-metal3/kni-installer/issues/79

### Provisioning subnet not fully configurable

There are some install-config parameters to control templating of the provisioning
network configuration, but fully supporting alternative subnets for the
provisioning network is incomplete.

https://github.com/openshift/installer/issues/2091

### Ironic services are using upstream images

We need to move to downstream openshift images for the Ironic containers that are
started on the boostrap VM

https://github.com/openshift/installer/issues/2090
1 change: 0 additions & 1 deletion pkg/asset/cluster/baremetal/baremetal.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,5 @@ import (
func Metadata(config *types.InstallConfig) *baremetal.Metadata {
return &baremetal.Metadata{
LibvirtURI: config.Platform.BareMetal.LibvirtURI,
IronicURI: config.Platform.BareMetal.IronicURI,
}
}
4 changes: 2 additions & 2 deletions pkg/asset/cluster/tfvars.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,12 +280,12 @@ func (t *TerraformVariables) Generate(parents asset.Parents) error {
case baremetal.Name:
data, err = baremetaltfvars.TFVars(
installConfig.Config.Platform.BareMetal.LibvirtURI,
installConfig.Config.Platform.BareMetal.IronicURI,
installConfig.Config.Platform.BareMetal.BootstrapProvisioningIP,
string(*rhcosBootstrapImage),
"baremetal",
"provisioning",
installConfig.Config.Platform.BareMetal.Hosts,
installConfig.Config.Platform.BareMetal.Image,
string(*rhcosImage),
)
if err != nil {
return errors.Wrapf(err, "failed to get %s Terraform variables", platform)
Expand Down
12 changes: 9 additions & 3 deletions pkg/asset/ignition/bootstrap/bootstrap.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/openshift/installer/pkg/asset/machines"
"github.com/openshift/installer/pkg/asset/manifests"
"github.com/openshift/installer/pkg/asset/releaseimage"
"github.com/openshift/installer/pkg/asset/rhcos"
"github.com/openshift/installer/pkg/asset/tls"
"github.com/openshift/installer/pkg/types"
)
Expand All @@ -46,6 +47,7 @@ type bootstrapTemplateData struct {
ReleaseImage string
Proxy *configv1.ProxyStatus
Registries []sysregistriesv2.Registry
BootImage string
}

// Bootstrap is an asset that generates the ignition config for bootstrap nodes.
Expand Down Expand Up @@ -109,6 +111,7 @@ func (a *Bootstrap) Dependencies() []asset.Asset {
&tls.RootCA{},
&tls.ServiceAccountKeyPair{},
&releaseimage.Image{},
new(rhcos.Image),
}
}

Expand All @@ -117,9 +120,10 @@ func (a *Bootstrap) Generate(dependencies asset.Parents) error {
installConfig := &installconfig.InstallConfig{}
proxy := &manifests.Proxy{}
releaseImage := &releaseimage.Image{}
dependencies.Get(installConfig, proxy, releaseImage)
rhcosImage := new(rhcos.Image)
dependencies.Get(installConfig, proxy, releaseImage, rhcosImage)

templateData, err := a.getTemplateData(installConfig.Config, releaseImage.PullSpec, installConfig.Config.ImageContentSources, proxy.Config)
templateData, err := a.getTemplateData(installConfig.Config, releaseImage.PullSpec, installConfig.Config.ImageContentSources, proxy.Config, rhcosImage)

if err != nil {
return errors.Wrap(err, "failed to get bootstrap templates")
Expand Down Expand Up @@ -195,7 +199,7 @@ func (a *Bootstrap) Files() []*asset.File {
}

// getTemplateData returns the data to use to execute bootstrap templates.
func (a *Bootstrap) getTemplateData(installConfig *types.InstallConfig, releaseImage string, imageSources []types.ImageContentSource, proxy *configv1.Proxy) (*bootstrapTemplateData, error) {
func (a *Bootstrap) getTemplateData(installConfig *types.InstallConfig, releaseImage string, imageSources []types.ImageContentSource, proxy *configv1.Proxy, rhcosImage *rhcos.Image) (*bootstrapTemplateData, error) {
etcdEndpoints := make([]string, *installConfig.ControlPlane.Replicas)

for i := range etcdEndpoints {
Expand Down Expand Up @@ -224,6 +228,7 @@ func (a *Bootstrap) getTemplateData(installConfig *types.InstallConfig, releaseI
EtcdCluster: strings.Join(etcdEndpoints, ","),
Proxy: &proxy.Status,
Registries: registries,
BootImage: string(*rhcosImage),
}, nil
}

Expand Down Expand Up @@ -293,6 +298,7 @@ func (a *Bootstrap) addSystemdUnits(uri string, templateData *bootstrapTemplateD
// baremetal & openstack platform services
"keepalived.service": {},
"coredns.service": {},
"ironic.service": {},
}

directory, err := data.Assets.Open(uri)
Expand Down
23 changes: 18 additions & 5 deletions pkg/asset/machines/baremetal/machines.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package baremetal

import (
"fmt"
"path"
"strings"

baremetalprovider "github.com/metal3-io/cluster-api-provider-baremetal/pkg/apis/baremetal/v1alpha1"

Expand All @@ -16,7 +18,7 @@ import (
)

// Machines returns a list of machines for a machinepool.
func Machines(clusterID string, config *types.InstallConfig, pool *types.MachinePool, role, userDataSecret string) ([]machineapi.Machine, error) {
func Machines(clusterID string, config *types.InstallConfig, pool *types.MachinePool, osImage, role, userDataSecret string) ([]machineapi.Machine, error) {
if configPlatform := config.Platform.Name(); configPlatform != baremetal.Name {
return nil, fmt.Errorf("non bare metal configuration: %q", configPlatform)
}
Expand All @@ -30,7 +32,7 @@ func Machines(clusterID string, config *types.InstallConfig, pool *types.Machine
if pool.Replicas != nil {
total = *pool.Replicas
}
provider := provider(clustername, config.Networking.MachineCIDR.String(), platform, userDataSecret)
provider := provider(clustername, config.Networking.MachineCIDR.String(), platform, osImage, userDataSecret)
var machines []machineapi.Machine
for idx := int64(0); idx < total; idx++ {
machine := machineapi.Machine{
Expand Down Expand Up @@ -60,11 +62,22 @@ func Machines(clusterID string, config *types.InstallConfig, pool *types.Machine
return machines, nil
}

func provider(clusterName string, networkInterfaceAddress string, platform *baremetal.Platform, userDataSecret string) *baremetalprovider.BareMetalMachineProviderSpec {
func provider(clusterName string, networkInterfaceAddress string, platform *baremetal.Platform, osImage string, userDataSecret string) *baremetalprovider.BareMetalMachineProviderSpec {
// The rhcos-downloader container launched by the baremetal-operator downloads the image,
// compresses it to speed up deployments and makes it available on platform.ClusterProvisioningIP, via http
// osImage looks like:
// https://releases-art-rhcos.svc.ci.openshift.org/art/storage/releases/rhcos-4.2/42.80.20190725.1/rhcos-42.80.20190725.1-openstack.qcow2
// But the cached URL looks like:
// http://172.22.0.3:6180/images/rhcos-42.80.20190725.1-openstack.qcow2/rhcos-42.80.20190725.1-compressed.qcow2
// See https://github.com/openshift/ironic-rhcos-downloader for more details
imageFilename := path.Base(osImage)
compressedImageFilename := strings.Replace(imageFilename, "openstack", "compressed", 1)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add comment in terms of what API this is trying to follow??

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ack sure, it's basically to align with the rhcos downloader filename convention:

https://github.com/openshift/ironic-rhcos-downloader/blob/master/get-resource.sh#L25

That downloads the openstack qcow2, then compresses it to make baremetal deployment faster (maybe we can figure out some way to do that by default in future).

I'll add an example of the expected URL and a link to the rhcos-downloader container as a comment.

cacheImageURL := fmt.Sprintf("http://%s:6180/images/%s/%s", platform.ClusterProvisioningIP, imageFilename, compressedImageFilename)
cacheChecksumURL := fmt.Sprintf("%s.md5sum", cacheImageURL)
return &baremetalprovider.BareMetalMachineProviderSpec{
Image: baremetalprovider.Image{
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how does it know what version of image to load? the URL seems like a URL to a directory?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might be wrong, therefore would love to see a comment here...

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as above, it's aligning with the file paths/URL generated via rhcos-downloader, so I'll add a comment that clarifies both the filename and the URL.

URL: platform.Image.Source,
Checksum: platform.Image.Checksum,
URL: cacheImageURL,
Checksum: cacheChecksumURL,
},
UserData: &corev1.SecretReference{Name: userDataSecret},
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/asset/machines/baremetal/machinesets.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import (
)

// MachineSets returns a list of machinesets for a machinepool.
func MachineSets(clusterID string, config *types.InstallConfig, pool *types.MachinePool, role, userDataSecret string) ([]*machineapi.MachineSet, error) {
func MachineSets(clusterID string, config *types.InstallConfig, pool *types.MachinePool, osImage, role, userDataSecret string) ([]*machineapi.MachineSet, error) {
if configPlatform := config.Platform.Name(); configPlatform != baremetal.Name {
return nil, fmt.Errorf("non bare metal configuration: %q", configPlatform)
}
Expand All @@ -32,7 +32,7 @@ func MachineSets(clusterID string, config *types.InstallConfig, pool *types.Mach
total = *pool.Replicas
}

provider := provider(clustername, config.Networking.MachineCIDR.String(), platform, userDataSecret)
provider := provider(clustername, config.Networking.MachineCIDR.String(), platform, osImage, userDataSecret)
name := fmt.Sprintf("%s-%s-%d", clustername, pool.Name, 0)
mset := &machineapi.MachineSet{
TypeMeta: metav1.TypeMeta{
Expand Down
2 changes: 1 addition & 1 deletion pkg/asset/machines/master.go
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ func (m *Master) Generate(dependencies asset.Parents) error {
mpool.Set(pool.Platform.BareMetal)
pool.Platform.BareMetal = &mpool

machines, err = baremetal.Machines(clusterID.InfraID, ic, pool, "master", "master-user-data")
machines, err = baremetal.Machines(clusterID.InfraID, ic, pool, string(*rhcosImage), "master", "master-user-data")
if err != nil {
return errors.Wrap(err, "failed to create master machine objects")
}
Expand Down
Loading