Skip to content

Commit

Permalink
netstack: implement RTM_SETLINK/IFLA_NET_NS_FD
Browse files Browse the repository at this point in the history
IFLA_NET_NS_FD specifies a file descriptor that refers to a network namespace.

PiperOrigin-RevId: 648882826
  • Loading branch information
avagin authored and gvisor-bot committed Jul 2, 2024
1 parent 5e89d40 commit b488752
Show file tree
Hide file tree
Showing 26 changed files with 210 additions and 73 deletions.
37 changes: 30 additions & 7 deletions pkg/sentry/socket/netstack/stack.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ func (s *Stack) SetInterface(ctx context.Context, msg *nlmsg.Message) *syserr.Er
case linux.IFLA_LINKINFO:
case linux.IFLA_ADDRESS:
case linux.IFLA_MTU:
case linux.IFLA_NET_NS_FD:
default:
ctx.Warningf("unexpected attribute: %x", attr)
return syserr.ErrNotSupported
Expand Down Expand Up @@ -164,10 +165,34 @@ func (s *Stack) SetInterface(ctx context.Context, msg *nlmsg.Message) *syserr.Er
// Netstack interfaces are always up.
}

return s.setLink(tcpip.NICID(ifinfomsg.Index), attrs)
return s.setLink(ctx, tcpip.NICID(ifinfomsg.Index), attrs)
}

func (s *Stack) setLink(id tcpip.NICID, linkAttrs map[uint16]nlmsg.BytesView) *syserr.Error {
func (s *Stack) setLink(ctx context.Context, id tcpip.NICID, linkAttrs map[uint16]nlmsg.BytesView) *syserr.Error {
// IFLA_NET_NS_FD has to be handled first, because other parameters may be reseted.
if v, ok := linkAttrs[linux.IFLA_NET_NS_FD]; ok {
fd, ok := v.Uint32()
if !ok {
return syserr.ErrInvalidArgument
}
f := inet.NamespaceByFDFromContext(ctx)
if f == nil {
return syserr.ErrInvalidArgument
}
ns, err := f(int32(fd))
if err != nil {
return syserr.FromError(err)
}
defer ns.DecRef(ctx)
peer := ns.Stack().(*Stack)
if peer.Stack != s.Stack {
var err tcpip.Error
id, err = s.Stack.SetNICStack(id, peer.Stack)
if err != nil {
return syserr.TranslateNetstackError(err)
}
}
}
for t, v := range linkAttrs {
switch t {
case linux.IFLA_MASTER:
Expand Down Expand Up @@ -268,8 +293,7 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
if err != nil {
return syserr.TranslateNetstackError(err)
}
ep.SetStack(s.Stack, id)
if err := s.setLink(id, linkAttrs); err != nil {
if err := s.setLink(ctx, id, linkAttrs); err != nil {
peerEP.Close()
return err
}
Expand All @@ -284,9 +308,8 @@ func (s *Stack) newVeth(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesVie
peerEP.Close()
return syserr.TranslateNetstackError(err)
}
peerEP.SetStack(peerStack.Stack, peerID)
if peerLinkAttrs != nil {
if err := peerStack.setLink(peerID, peerLinkAttrs); err != nil {
if err := peerStack.setLink(ctx, peerID, peerLinkAttrs); err != nil {
peerStack.Stack.RemoveNIC(peerID)
peerEP.Close()
return err
Expand All @@ -310,7 +333,7 @@ func (s *Stack) newBridge(ctx context.Context, linkAttrs map[uint16]nlmsg.BytesV
if err != nil {
return syserr.TranslateNetstackError(err)
}
if err := s.setLink(id, linkAttrs); err != nil {
if err := s.setLink(ctx, id, linkAttrs); err != nil {
return err
}

Expand Down
3 changes: 3 additions & 0 deletions pkg/tcpip/link/channel/channel.go
Original file line number Diff line number Diff line change
Expand Up @@ -314,3 +314,6 @@ func (*Endpoint) AddHeader(*stack.PacketBuffer) {}

// ParseHeader implements stack.LinkEndpoint.ParseHeader.
func (*Endpoint) ParseHeader(*stack.PacketBuffer) bool { return true }

// SetOnCloseAction implements stack.LinkEndpoint.
func (*Endpoint) SetOnCloseAction(func()) {}
3 changes: 3 additions & 0 deletions pkg/tcpip/link/fdbased/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -831,6 +831,9 @@ func (e *endpoint) ARPHardwareType() header.ARPHardwareType {
// Close implements stack.LinkEndpoint.
func (e *endpoint) Close() {}

// SetOnCloseAction implements stack.LinkEndpoint.
func (*endpoint) SetOnCloseAction(func()) {}

// InjectableEndpoint is an injectable fd-based endpoint. The endpoint writes
// to the FD, but does not read from it. All reads come from injected packets.
//
Expand Down
3 changes: 3 additions & 0 deletions pkg/tcpip/link/loopback/loopback.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,6 @@ func (*endpoint) ParseHeader(*stack.PacketBuffer) bool { return true }

// Close implements stack.LinkEndpoint.
func (*endpoint) Close() {}

// SetOnCloseAction implements stack.LinkEndpoint.
func (*endpoint) SetOnCloseAction(func()) {}
3 changes: 3 additions & 0 deletions pkg/tcpip/link/muxed/injectable.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,9 @@ func (*InjectableEndpoint) ParseHeader(*stack.PacketBuffer) bool { return true }
// Close implements stack.LinkEndpoint.
func (*InjectableEndpoint) Close() {}

// SetOnCloseAction implements stack.LinkEndpoint.SetOnCloseAction.
func (*InjectableEndpoint) SetOnCloseAction(func()) {}

// NewInjectableEndpoint creates a new multi-endpoint injectable endpoint.
func NewInjectableEndpoint(routes map[tcpip.Address]stack.InjectableLinkEndpoint) *InjectableEndpoint {
return &InjectableEndpoint{
Expand Down
5 changes: 5 additions & 0 deletions pkg/tcpip/link/nested/nested.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,8 @@ func (e *Endpoint) ParseHeader(pkt *stack.PacketBuffer) bool {
func (e *Endpoint) Close() {
e.child.Close()
}

// SetOnCloseAction implement stack.LinkEndpoints.
func (e *Endpoint) SetOnCloseAction(action func()) {
e.child.SetOnCloseAction(action)
}
1 change: 1 addition & 0 deletions pkg/tcpip/link/packetsocket/packetsocket_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ func (*nullEndpoint) ARPHardwareType() header.ARPHardwareType { return header.AR
func (*nullEndpoint) AddHeader(*stack.PacketBuffer) {}
func (*nullEndpoint) ParseHeader(*stack.PacketBuffer) bool { return true }
func (*nullEndpoint) Close() {}
func (*nullEndpoint) SetOnCloseAction(func()) {}

var _ stack.NetworkDispatcher = (*testNetworkDispatcher)(nil)

Expand Down
3 changes: 3 additions & 0 deletions pkg/tcpip/link/pipe/pipe.go
Original file line number Diff line number Diff line change
Expand Up @@ -151,3 +151,6 @@ func (*Endpoint) ParseHeader(*stack.PacketBuffer) bool { return true }

// Close implements stack.LinkEndpoint.
func (e *Endpoint) Close() {}

// SetOnCloseAction implements stack.LinkEndpoint.SetOnCloseAction.
func (*Endpoint) SetOnCloseAction(func()) {}
3 changes: 3 additions & 0 deletions pkg/tcpip/link/sharedmem/sharedmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ func New(opts Options) (stack.LinkEndpoint, error) {
return e, nil
}

// SetOnCloseAction implements stack.LinkEndpoint.SetOnCloseAction.
func (e *endpoint) SetOnCloseAction(func()) {}

// Close frees most resources associated with the endpoint. Wait() must be
// called after Close() in order to free the rest.
func (e *endpoint) Close() {
Expand Down
3 changes: 3 additions & 0 deletions pkg/tcpip/link/sharedmem/sharedmem_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ func NewServerEndpoint(opts Options) (stack.LinkEndpoint, error) {
return e, nil
}

// SetOnCloseAction implements stack.LinkEndpoint.SetOnCloseAction.
func (*serverEndpoint) SetOnCloseAction(func()) {}

// Close frees all resources associated with the endpoint.
func (e *serverEndpoint) Close() {
// Tell dispatch goroutine to stop, then write to the eventfd so that it wakes
Expand Down
126 changes: 67 additions & 59 deletions pkg/tcpip/link/veth/veth.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,36 @@ import (
var _ stack.LinkEndpoint = (*Endpoint)(nil)
var _ stack.GSOEndpoint = (*Endpoint)(nil)

type veth struct {
mu sync.RWMutex
closed bool
backlogQueue chan vethPacket
mtu uint32
endpoints [2]Endpoint
}

func (v *veth) close() {
v.mu.Lock()
closed := v.closed
v.closed = true
v.mu.Unlock()
if closed {
return
}

for i := range v.endpoints {
e := &v.endpoints[i]
e.mu.Lock()
action := e.onCloseAction
e.onCloseAction = nil
e.mu.Unlock()
if action != nil {
action()
}
}
close(v.backlogQueue)
}

// +stateify savable
type vethPacket struct {
e *Endpoint
Expand All @@ -38,84 +68,55 @@ const backlogQueueSize = 64
//
// +stateify savable
type Endpoint struct {
pair *Endpoint
peer *Endpoint

backlogQueue *chan vethPacket
veth *veth

mu sync.RWMutex `state:"nosave"`
// +checklocks:mu
dispatcher stack.NetworkDispatcher

// +checklocks:mu
stack *stack.Stack
// +checklocks:mu
idx tcpip.NICID
// linkAddr is the local address of this endpoint.
//
// +checklocks:mu
linkAddr tcpip.LinkAddress
// +checklocks:mu
mtu uint32
onCloseAction func()
}

// NewPair creates a new veth pair.
func NewPair(mtu uint32) (*Endpoint, *Endpoint) {
backlogQueue := make(chan vethPacket, backlogQueueSize)
a := &Endpoint{
mtu: mtu,
linkAddr: tcpip.GetRandMacAddr(),
backlogQueue: &backlogQueue,
}
b := &Endpoint{
veth := veth{
backlogQueue: make(chan vethPacket, backlogQueueSize),
mtu: mtu,
pair: a,
linkAddr: tcpip.GetRandMacAddr(),
backlogQueue: &backlogQueue,
endpoints: [2]Endpoint{
Endpoint{
linkAddr: tcpip.GetRandMacAddr(),
},
Endpoint{
linkAddr: tcpip.GetRandMacAddr(),
},
},
}
a.pair = b
a := &veth.endpoints[0]
b := &veth.endpoints[1]
a.peer = b
b.peer = a
a.veth = &veth
b.veth = &veth
go func() {
for t := range backlogQueue {
for t := range veth.backlogQueue {
t.e.InjectInbound(t.protocol, t.pkt)
t.pkt.DecRef()
}

}()
return a, b
}

// SetStack stores the stack and the device index.
func (e *Endpoint) SetStack(s *stack.Stack, idx tcpip.NICID) {
e.mu.Lock()
defer e.mu.Unlock()
e.stack = s
e.idx = idx
}

// Close closes e. Further packet injections will return an error, and all pending
// packets are discarded. Close may be called concurrently with WritePackets.
func (e *Endpoint) Close() {
e.mu.Lock()
stack := e.stack
e.stack = nil
e.mu.Unlock()
if stack == nil {
return
}

e = e.pair
e.mu.Lock()
stack = e.stack
idx := e.idx
e.stack = nil
e.mu.Unlock()
if stack != nil {
// The pair endpoint can live in the current stack or another one.
// RemoveNIC will take the stack lock, so let's run it in another
// goroutine to avoid lock conflicts.
go func() {
stack.RemoveNIC(idx)
}()
}
close(*e.backlogQueue)
e.veth.close()
}

// InjectInbound injects an inbound packet. If the endpoint is not attached, the
Expand Down Expand Up @@ -146,16 +147,16 @@ func (e *Endpoint) IsAttached() bool {

// MTU implements stack.LinkEndpoint.MTU.
func (e *Endpoint) MTU() uint32 {
e.mu.RLock()
defer e.mu.RUnlock()
return e.mtu
e.veth.mu.RLock()
defer e.veth.mu.RUnlock()
return e.veth.mtu
}

// SetMTU implements stack.LinkEndpoint.SetMTU.
func (e *Endpoint) SetMTU(mtu uint32) {
e.mu.Lock()
defer e.mu.Unlock()
e.mtu = mtu
e.veth.mu.Lock()
defer e.veth.mu.Unlock()
e.veth.mtu = mtu
}

// Capabilities implements stack.LinkEndpoint.Capabilities.
Expand Down Expand Up @@ -204,8 +205,8 @@ func (e *Endpoint) WritePackets(pkts stack.PacketBufferList) (int, tcpip.Error)
newPkt := stack.NewPacketBuffer(stack.PacketBufferOptions{
Payload: pkt.ToBuffer(),
})
(*e.backlogQueue) <- vethPacket{
e: e.pair,
(e.veth.backlogQueue) <- vethPacket{
e: e.peer,
protocol: pkt.NetworkProtocolNumber,
pkt: newPkt,
}
Expand All @@ -228,3 +229,10 @@ func (e *Endpoint) AddHeader(pkt *stack.PacketBuffer) {}

// ParseHeader implements stack.LinkEndpoint.ParseHeader.
func (e *Endpoint) ParseHeader(pkt *stack.PacketBuffer) bool { return true }

// SetOnCloseAction implements stack.LinkEndpoint.
func (e *Endpoint) SetOnCloseAction(action func()) {
e.mu.Lock()
defer e.mu.Unlock()
e.onCloseAction = action
}
2 changes: 0 additions & 2 deletions pkg/tcpip/link/veth/veth_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,11 @@ func TestDestroyDevices(t *testing.T) {
if err := s1.CreateNIC(vethFirstID, ethernet.New(veth1)); err != nil {
t.Fatalf("s.CreateNIC(%d, _): %s", vethFirstID, err)
}
veth1.SetStack(s1, vethFirstID)

s2 := stack.New(stack.Options{})
if err := s2.CreateNIC(vethSecondID, ethernet.New(veth2)); err != nil {
t.Fatalf("s.CreateNIC(%d, _): %s", vethSecondID, err)
}
veth2.SetStack(s2, vethSecondID)

s1.RemoveNIC(vethFirstID)
timeout := time.Millisecond
Expand Down
5 changes: 5 additions & 0 deletions pkg/tcpip/link/waitable/waitable.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,11 @@ func (e *Endpoint) ParseHeader(pkt *stack.PacketBuffer) bool {
return e.lower.ParseHeader(pkt)
}

// SetOnCloseAction implements stack.LinkEndpoint.SetOnCloseAction.
func (e *Endpoint) SetOnCloseAction(action func()) {
e.lower.SetOnCloseAction(action)
}

// Close implements stack.LinkEndpoint.
func (e *Endpoint) Close() {
e.lower.Close()
Expand Down
3 changes: 3 additions & 0 deletions pkg/tcpip/link/waitable/waitable_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,9 @@ func (*countedEndpoint) ParseHeader(*stack.PacketBuffer) bool {
// Close implements stack.LinkEndpoint.
func (*countedEndpoint) Close() {}

// SetOnCloseAction implements stack.LinkEndpoint.SetOnCloseAction.
func (*countedEndpoint) SetOnCloseAction(func()) {}

func TestWaitWrite(t *testing.T) {
ep := &countedEndpoint{}
wep := New(ep)
Expand Down
3 changes: 3 additions & 0 deletions pkg/tcpip/link/xdp/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -416,3 +416,6 @@ func (ep *endpoint) dispatch() (bool, tcpip.Error) {

// Close implements stack.LinkEndpoint.
func (*endpoint) Close() {}

// SetOnCloseAction implements stack.LinkEndpoint.
func (*endpoint) SetOnCloseAction(func()) {}
3 changes: 3 additions & 0 deletions pkg/tcpip/network/internal/testutil/testutil.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ func (ep *MockLinkEndpoint) Close() {
ep.WrittenPackets = nil
}

// SetOnCloseAction implements stack.LinkEndpoint.SetOnCloseAction.
func (*MockLinkEndpoint) SetOnCloseAction(func()) {}

// MakeRandPkt generates a randomized packet. transportHeaderLength indicates
// how many random bytes will be copied in the Transport Header.
// extraHeaderReserveLength indicates how much extra space will be reserved for
Expand Down
Loading

0 comments on commit b488752

Please sign in to comment.