Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
240 changes: 240 additions & 0 deletions templates/common/baremetal/files/baremetal-non-virtual-ip.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
filesystem: "root"
mode: 0755
path: "/usr/local/bin/non_virtual_ip"
contents:
inline: |
#!/usr/libexec/platform-python
# /* vim: set filetype=python : */
import collections
import itertools
import socket
import struct
import subprocess
import sys
from typing import Callable, Iterable, Iterator, List, Optional, Tuple, Type, TypeVar


class SubnetNotFoundException(Exception):
"""
Exception raised when no subnet in the systems ifaces is on the VIP subnet
"""


class AddressNotFoundException(Exception):
"""
Exception raised when no Address in the systems ifaces is on the VIP subnet
"""


TA = TypeVar('TA', bound='Address')


class Address:
def __init__(self, cidr: str, name: str, family: str, index: int = -1, scope: str = '', flags: Iterable[str] = tuple(), label: Optional[str] = None) -> None:
self.index = index
self.name = name
self.family = family
self.cidr = cidr
self.scope = scope
self.flags = flags
self.label = label

@classmethod
def from_line(cls: Type[TA], line: str) -> TA:
tokens = collections.deque(line.split())
index = int(tokens.popleft()[:-1])
name = tokens.popleft()
family = tokens.popleft()
cidr = tokens.popleft()
_ = tokens.popleft() # dump scope label
scope = tokens.popleft()
flags = []
label = None
while True:
token = tokens.popleft()
if token[-1] == '\\':
if len(token) > 1:
label = token[:-1]
break
flags.append(token)
return cls(cidr, name, family, index, scope, flags, label)

def __str__(self) -> str:
return f'{self.__class__.__name__}({self.cidr}, dev={self.name})'


TR = TypeVar('TR', bound='V6Route')


class V6Route:
def __init__(self, destination: str, dev: Optional[str] = None, proto: Optional[str] = None, metric: Optional[int] = None, pref: Optional[str] = None, via: Optional[str] = None) -> None:
self.destination: str = destination
self.via: Optional[str] = via
self.dev: Optional[str] = dev
self.proto: Optional[str] = proto
self.metric: Optional[int] = metric
self.pref: Optional[str] = pref

@classmethod
def from_line(cls: Type[TR], line: str) -> TR:
items = line.split()
dest = items[0]
if dest == 'default':
dest = '::/0'
attrs = dict(itertools.zip_longest(*[iter(items[1:])]*2, fillvalue=None))
attrs['destination'] = dest
return cls(**attrs)

def __str__(self) -> str:
return f'{self.__class__.__name__}({self.destination}, dev={self.dev})'


SUBNET_MASK_LEN = {
'inet': 32,
'inet6': 128
}


def ntoa(family: str, num: int) -> str:
if family == 'inet':
result = socket.inet_ntoa(struct.pack("!I", num))
else:
lo_half = num & 0xFFFFFFFFFFFFFFFF
hi_half = num >> 64
result = socket.inet_ntop(socket.AF_INET6,
struct.pack(">QQ", hi_half, lo_half))
return result


def aton(family: str, rep: str) -> int:
if family == 'inet':
result = struct.unpack("!I", socket.inet_aton(rep))[0]
else:
hi_half, lo_half = struct.unpack(">QQ", socket.inet_pton(socket.AF_INET6, rep))
result = (hi_half << 64) | lo_half
return result


def addr_subnet_int_min_max(addr: Address) -> Tuple[int, int]:
ip_addr, prefix = addr.cidr.split('/')
ip_int = aton(addr.family, ip_addr)

prefix_int = int(prefix)
mask = int('1' * prefix_int +
'0' * (SUBNET_MASK_LEN[addr.family] - prefix_int), 2)

subnet_ip_int_min = ip_int & mask

remainder = '1' * (SUBNET_MASK_LEN[addr.family] - prefix_int)
subnet_ip_int_max = subnet_ip_int_min | (
0 if remainder == '' else int(remainder, 2))
return subnet_ip_int_min, subnet_ip_int_max


def vip_subnet_and_addrs_in_it(vip: str, addrs: List[Address]) -> Tuple[Address, List[Address]]:
try:
vip_int = aton('inet', vip)
except Exception:
vip_int = aton('inet6', vip)
subnet = None
candidates = []
for addr in addrs:
subnet_ip_int_min, subnet_ip_int_max = addr_subnet_int_min_max(addr)
subnet_ip = ntoa(addr.family, subnet_ip_int_min)
subnet_ip_max = ntoa(addr.family, subnet_ip_int_max)

sys.stderr.write('Is %s between %s and %s\n' %
(vip, subnet_ip, subnet_ip_max))
if subnet_ip_int_min < vip_int < subnet_ip_int_max:
subnet_ip = ntoa(addr.family, subnet_ip_int_min)
subnet = Address(name="subnet",
cidr='%s/%s' % (subnet_ip, addr.cidr.split('/')[1]),
family=addr.family,
scope='')
candidates.append(addr)
if subnet is None:
raise SubnetNotFoundException()
return subnet, candidates


def interface_addrs(filters: Optional[Iterable[Callable[[Address], bool]]] = None) -> Iterator[Address]:
out = subprocess.check_output(["ip", "-o", "addr", "show"], encoding=sys.stdout.encoding)
for addr in (Address.from_line(line) for line in out.splitlines()):
if not filters or all(f(addr) for f in filters):
if (addr.family == 'inet6' and
int(addr.cidr.split('/')[1]) == SUBNET_MASK_LEN[addr.family]):
route_out = subprocess.check_output(["ip", "-o", "-6", "route", "show"],
encoding=sys.stdout.encoding)
for route in (V6Route.from_line(rline) for rline in route_out.splitlines()):
if (route.dev == addr.name and route.proto == 'ra' and
route.destination != '::/0'):
sys.stderr.write('Checking %s for %s\n' % (route, addr))
route_net = Address(name=route.dev, cidr=route.destination, family='inet6')
route_filter = in_subnet(route_net)
if route_filter(addr):
ip_addr = addr.cidr.split('/')[0]
route_prefix = route_net.cidr.split('/')[1]
cidr = '%s/%s' % (ip_addr, route_prefix)
yield Address(cidr=cidr,
family=addr.family,
name=addr.name)
yield addr


def non_host_scope(addr: Address) -> bool:
if addr.scope == 'host':
sys.stderr.write(f'Filtering out {addr} due to it having host scope\n')
res = False
else:
res = True
return res


def non_deprecated(addr: Address) -> bool:
if 'deprecated' in addr.flags:
sys.stderr.write(f'Filtering out {addr} due to it being deprecated\n')
res = False
else:
res = True
return res


def non_secondary(addr: Address) -> bool:
if 'secondary' in addr.flags:
sys.stderr.write(f'Filtering out {addr} due to it being secondary\n')
res = False
else:
res = True
return res


def in_subnet(subnet: Address) -> Callable[[Address], bool]:
subnet_ip_int_min, subnet_ip_int_max = addr_subnet_int_min_max(subnet)

def filt(addr: Address) -> bool:
ip_addr, _ = addr.cidr.split('/')
ip_int = aton(addr.family, ip_addr)
return subnet_ip_int_min < ip_int < subnet_ip_int_max
return filt


def main() -> None:
api_vip = sys.argv[1]
vips = set(sys.argv[1:4])
filters = (non_host_scope, non_deprecated, non_secondary)
iface_addrs = list(interface_addrs(filters))
try:
subnet, candidates = vip_subnet_and_addrs_in_it(api_vip, iface_addrs)
sys.stderr.write('VIP Subnet %s\n' % subnet.cidr)

for addr in candidates:
ip_addr, _ = addr.cidr.split('/')
if ip_addr not in vips:
print(ip_addr)
sys.exit(0)
except SubnetNotFoundException:
sys.exit(1)


if __name__ == '__main__':
main()
78 changes: 78 additions & 0 deletions templates/common/baremetal/files/nodeip-finder.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
filesystem: "root"
mode: 0755
path: "/usr/local/bin/nodeip-finder"
contents:
inline: |
#!/usr/libexec/platform-python
# /* vim: set filetype=python : */
"""Writes Kubelet and CRI-O configuration to choose the right IP address

For kubelet, a systemd environment file with a KUBELET_NODE_IP setting
For CRI-O it drops a config file in /etc/crio/crio.conf.d"""
from importlib import util as iutil
from importlib import machinery as imachinery
from types import ModuleType
import os
import pathlib
import socket
import sys

loader = imachinery.SourceFileLoader(
'non_virtual_ip',
os.path.join(os.path.dirname(os.path.realpath(__file__)), 'non_virtual_ip'))
spec = iutil.spec_from_loader('non_virtual_ip', loader)
non_virtual_ip = iutil.module_from_spec(spec)
loader.exec_module(non_virtual_ip)


KUBELET_WORKAROUND_PATH = '/etc/systemd/system/kubelet.service.d/20-nodenet.conf'
CRIO_WORKAROUND_PATH = '/etc/systemd/system/crio.service.d/20-nodenet.conf'


def first_candidate_addr(api_vip: str) -> non_virtual_ip.Address:
filters = (non_virtual_ip.non_host_scope,
non_virtual_ip.non_deprecated,
non_virtual_ip.non_secondary)
iface_addrs = list(non_virtual_ip.interface_addrs(filters))
subnet, candidates = non_virtual_ip.vip_subnet_and_addrs_in_it(api_vip, iface_addrs)
sys.stderr.write('VIP Subnet %s\n' % subnet.cidr)

for addr in candidates:
return addr
raise non_virtual_ip.AddressNotFoundException()


def main() -> None:
if len(sys.argv) > 1:
api_vip = sys.argv[1]
else:
api_int_name = os.getenv('API_INT')
try:
sstream_tuple = socket.getaddrinfo(api_int_name, None)[0]
_, _, _, _, sockaddr = sstream_tuple
api_vip = sockaddr[0]
sys.stderr.write(f'Found {api_int_name} to resolve to {api_vip}\n')
except socket.gaierror:
sys.stderr.write(f'api-int VIP not provided and failed to resolve {api_int_name}\n')
sys.exit(1)
try:
first: non_virtual_ip.Address = first_candidate_addr(api_vip)
prefixless = first.cidr.split('/')[0]

# Kubelet
with open(KUBELET_WORKAROUND_PATH, 'w') as kwf:
print(f'[Service]\nEnvironment="KUBELET_NODE_IP={prefixless}"', file=kwf)

# CRI-O
crio_confd = pathlib.Path(CRIO_WORKAROUND_PATH).parent
crio_confd.mkdir(parents=True, exist_ok=True)
with open(CRIO_WORKAROUND_PATH, 'w') as cwf:
print(f'[Service]\nEnvironment="CONTAINER_STREAM_ADDRESS={prefixless}"', file=cwf)

except (non_virtual_ip.AddressNotFoundException, non_virtual_ip.SubnetNotFoundException):
sys.stderr.write('Failed to find suitable node ip')
sys.exit(1)


if __name__ == '__main__':
main()
20 changes: 20 additions & 0 deletions templates/common/baremetal/units/nodeip-configuration.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: "nodeip-configuration.service"
enabled: true
contents: |
[Unit]
Description=Writes IP address configuration so that kubelet and crio services select a valid node IP
# This only applies to VIP managing environments where the kubelet and crio IP
# address picking logic is flawed and may end up selecting an address from a
# different subnet or a deprecated address
Wants=network-online.target
After=network-online.target ignition-firstboot-complete.service
Before=kubelet.service crio.service

[Service]
# Need oneshot to delay kubelet
Type=oneshot
ExecStart=/usr/local/bin/nodeip-finder {{.Infra.Status.PlatformStatus.BareMetal.APIServerInternalIP }}

[Install]
WantedBy=multi-user.target

Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ contents: |
--container-runtime=remote \
--container-runtime-endpoint=/var/run/crio/crio.sock \
--node-labels=node-role.kubernetes.io/master,node.openshift.io/os_id=${ID} \
--node-ip="${KUBELET_NODE_IP}" \
--address="${KUBELET_NODE_IP}" \
--minimum-container-ttl-duration=6m0s \
--cloud-provider={{cloudProvider .}} \
--volume-plugin-dir=/etc/kubernetes/kubelet-plugins/volume/exec \
Expand Down
Loading