From 806e1098206fded3a3e18ecddd3d8794a897fa11 Mon Sep 17 00:00:00 2001 From: Owersun <4807375+Owersun@users.noreply.github.com> Date: Fri, 23 Jan 2026 20:37:01 +0100 Subject: [PATCH 1/2] Proxy: TUN: Enhance Darwin interface support. - reduce number of actions done to create/configure the interface in the system - assign synthetic static link-local ipv4/ipv6 addresses to the interface, that are required by the OS for the routing to work - make tun_darwin_endpoint be implemented significantly more similar to tun_windows_enpoint, preparing them for potential unification --- proxy/tun/README.md | 22 +++ proxy/tun/tun_darwin.go | 222 ++++++++++++++++++++---------- proxy/tun/tun_darwin_endpoint.go | 138 +++++++++++-------- proxy/tun/tun_windows_endpoint.go | 11 +- 4 files changed, 257 insertions(+), 136 deletions(-) diff --git a/proxy/tun/README.md b/proxy/tun/README.md index 684747b8c656..ea5d799d1eaf 100644 --- a/proxy/tun/README.md +++ b/proxy/tun/README.md @@ -172,3 +172,25 @@ route add 1.1.1.1 mask 255.0.0.0 0.0.0.0 if 47 Note on ipv6 support. \ Despite Windows also giving the adapter autoconfigured ipv6 address, the ipv6 is not possible until the interface has any _routable_ ipv6 address (given link-local address will not accept traffic from external addresses). \ So everything applicable for ipv4 above also works for ipv6, you only need to give the interface some address manually, e.g. anything private like fc00::a:b:c:d/64 will do just fine + +## MAC OS X SUPPORT + +Darwin (Mac OS X) support of the same functionality is implemented through utun (userspace tunnel). + +Interface name in the configuration must comply to the scheme "utunN", where N is some number. \ +Most running OS'es create some amount of utun interfaces in advance for own needs. Please either check the interfaces you already have occupied by issuing following command: +``` +ifconfig +``` +Produced list will have all system interfaces listed, from which you will see how many "utun" ones already exists. +It's not required to select next available number, e.g. if you have utun1-utun7 interfaces, it's not required to have "utun8" in the config. You can choose any available name, even utun20, to get surely available interface number. + +To attach routing to the interface, route command like following can be executed: +``` +sudo route add -net 1.1.1.0/24 -iface utun10 +``` +``` +sudo route add -inet6 -host 2606:4700:4700::1111 -iface utun10 +sudo route add -inet6 -host 2606:4700:4700::1001 -iface utun10 +``` +Important to remember that everything written above about Linux routing concept, also apply to Mac OS X. If you simply route default route through utun interface, that will result network loop and immediate network failure. diff --git a/proxy/tun/tun_darwin.go b/proxy/tun/tun_darwin.go index b2d6f89f655b..ceba8d3276f0 100644 --- a/proxy/tun/tun_darwin.go +++ b/proxy/tun/tun_darwin.go @@ -5,7 +5,10 @@ package tun import ( "errors" "fmt" - "strings" + "net" + "net/netip" + "os" + "syscall" "unsafe" "golang.org/x/sys/unix" @@ -14,13 +17,19 @@ import ( const ( utunControlName = "com.apple.net.utun_control" - utunOptIfName = 2 sysprotoControl = 2 + gateway = "169.254.10.1/30" +) + +const ( + SIOCAIFADDR6 = 2155899162 // netinet6/in6_var.h + IN6_IFF_NODAD = 0x0020 // netinet6/in6_var.h + IN6_IFF_SECURED = 0x0400 // netinet6/in6_var.h + ND6_INFINITE_LIFETIME = 0xFFFFFFFF // netinet6/nd6.h ) type DarwinTun struct { - tunFd int - name string + tunFile *os.File options TunOptions } @@ -28,140 +37,205 @@ var _ Tun = (*DarwinTun)(nil) var _ GVisorTun = (*DarwinTun)(nil) func NewTun(options TunOptions) (Tun, error) { - tunFd, name, err := openUTun(options.Name) + tunFile, err := open(options.Name) if err != nil { return nil, err } + err = setup(options.Name, options.MTU) + if err != nil { + _ = tunFile.Close() + return nil, err + } + return &DarwinTun{ - tunFd: tunFd, - name: name, + tunFile: tunFile, options: options, }, nil } func (t *DarwinTun) Start() error { - if t.options.MTU > 0 { - if err := setMTU(t.name, int(t.options.MTU)); err != nil { - return err - } - } - return setState(t.name, true) + return nil } func (t *DarwinTun) Close() error { - _ = setState(t.name, false) - return unix.Close(t.tunFd) + return t.tunFile.Close() } func (t *DarwinTun) newEndpoint() (stack.LinkEndpoint, error) { - return newDarwinEndpoint(t.tunFd, t.options.MTU), nil + return &DarwinEndpoint{tun: t}, nil } -func openUTun(name string) (int, string, error) { +// open the interface, by creating new utunN if in the system and returning its file descriptor +func open(name string) (*os.File, error) { + ifIndex := -1 + _, err := fmt.Sscanf(name, "utun%d", &ifIndex) + if err != nil || ifIndex < 0 { + return nil, errors.New("interface name must be utunN, where N is a number, e.g. utun9, utun11 and so on") + } + fd, err := unix.Socket(unix.AF_SYSTEM, unix.SOCK_DGRAM, sysprotoControl) if err != nil { - return -1, "", err + return nil, err } ctlInfo := &unix.CtlInfo{} copy(ctlInfo.Name[:], utunControlName) if err := unix.IoctlCtlInfo(fd, ctlInfo); err != nil { _ = unix.Close(fd) - return -1, "", err + return nil, err } sockaddr := &unix.SockaddrCtl{ ID: ctlInfo.Id, - Unit: parseUTunUnit(name), + Unit: uint32(ifIndex) + 1, } - if err := unix.Connect(fd, sockaddr); err != nil { _ = unix.Close(fd) - return -1, "", err + return nil, err } if err := unix.SetNonblock(fd, true); err != nil { _ = unix.Close(fd) - return -1, "", err + return nil, err } - tunName, err := unix.GetsockoptString(fd, sysprotoControl, utunOptIfName) - if err != nil { - _ = unix.Close(fd) - return -1, "", err + return os.NewFile(uintptr(fd), name), nil +} + +// setup the interface by name +func setup(name string, MTU uint32) error { + if err := setMTU(name, MTU); err != nil { + return err } - tunName = strings.TrimRight(tunName, "\x00") - if tunName == "" { - _ = unix.Close(fd) - return -1, "", errors.New("empty utun name") + /* + * Darwin routing require tunnel type interface to have local and remote address, to be routable. + * To simplify inevitable task, assign the interface static ip address, which in current implementation + * is just some random ip from link-local pool, allowing to not bother about existing routing intersection. + */ + syntheticIP, _ := netip.ParsePrefix(gateway) + if err := setIPAddress(name, syntheticIP); err != nil { + return err } - return fd, tunName, nil + return nil } -func parseUTunUnit(name string) uint32 { - var unit uint32 - if _, err := fmt.Sscanf(name, "utun%d", &unit); err != nil { - return 0 +// setMTU sets MTU on the interface by given name +func setMTU(name string, mtu uint32) error { + socket, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0) + if err != nil { + return err } - return unit + 1 -} + defer unix.Close(socket) -type ifreqMTU struct { - Name [unix.IFNAMSIZ]byte - MTU int32 - _ [12]byte + ifr := unix.IfreqMTU{MTU: int32(mtu)} + copy(ifr.Name[:], name) + return unix.IoctlSetIfreqMTU(socket, &ifr) } -type ifreqFlags struct { - Name [unix.IFNAMSIZ]byte - Flags int16 - _ [14]byte +type ifAliasReq4 struct { + Name [unix.IFNAMSIZ]byte + Addr unix.RawSockaddrInet4 + Dstaddr unix.RawSockaddrInet4 + Mask unix.RawSockaddrInet4 } -func setMTU(name string, mtu int) error { - if mtu <= 0 { - return nil - } - - fd, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0) - if err != nil { - return err - } - defer func() { _ = unix.Close(fd) }() +type ifAliasReq6 struct { + Name [unix.IFNAMSIZ]byte + Addr unix.RawSockaddrInet6 + Dstaddr unix.RawSockaddrInet6 + Mask unix.RawSockaddrInet6 + Flags uint32 + Lifetime addrLifetime6 +} - ifr := ifreqMTU{MTU: int32(mtu)} - copy(ifr.Name[:], name) - return ioctlPtr(fd, unix.SIOCSIFMTU, unsafe.Pointer(&ifr)) +type addrLifetime6 struct { + Expire float64 + Preferred float64 + Vltime uint32 + Pltime uint32 } -func setState(name string, up bool) error { - fd, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0) +// setIPAddress sets ipv4 and ipv6 addresses to the interface, required for the routing to work +func setIPAddress(name string, gateway netip.Prefix) error { + socket4, err := unix.Socket(unix.AF_INET, unix.SOCK_DGRAM, 0) if err != nil { return err } - defer func() { _ = unix.Close(fd) }() - - ifr := ifreqFlags{} - copy(ifr.Name[:], name) - - if err := ioctlPtr(fd, unix.SIOCGIFFLAGS, unsafe.Pointer(&ifr)); err != nil { + defer unix.Close(socket4) + + // assume local ip address is next one from the remote address + local4 := gateway.Addr().As4() + local4[3]++ + + // fill the configuration for ipv4 + ifReq4 := ifAliasReq4{ + Addr: unix.RawSockaddrInet4{ + Len: unix.SizeofSockaddrInet4, + Family: unix.AF_INET, + Addr: local4, + }, + Dstaddr: unix.RawSockaddrInet4{ + Len: unix.SizeofSockaddrInet4, + Family: unix.AF_INET, + Addr: gateway.Addr().As4(), + }, + Mask: unix.RawSockaddrInet4{ + Len: unix.SizeofSockaddrInet4, + Family: unix.AF_INET, + Addr: netip.MustParseAddr(net.IP(net.CIDRMask(gateway.Bits(), 32)).String()).As4(), + }, + } + copy(ifReq4.Name[:], name) + if err = ioctlPtr(socket4, unix.SIOCAIFADDR, unsafe.Pointer(&ifReq4)); err != nil { + return os.NewSyscallError("SIOCAIFADDR", err) + } + + socket6, err := unix.Socket(unix.AF_INET6, unix.SOCK_DGRAM, 0) + if err != nil { return err } - - if up { - ifr.Flags |= unix.IFF_UP - } else { - ifr.Flags &^= unix.IFF_UP + defer unix.Close(socket6) + + // link-local ipv6 address with suffix from ipv6 + local6 := netip.AddrFrom16([16]byte{0: 0xfe, 1: 0x80, 12: local4[0], 13: local4[1], 14: local4[2], 15: local4[3]}) + + // fill the configuration for ipv6 + // only link-local address without the destination is enough for it + ifReq6 := ifAliasReq6{ + Addr: unix.RawSockaddrInet6{ + Len: unix.SizeofSockaddrInet6, + Family: unix.AF_INET6, + Addr: local6.As16(), + }, + Mask: unix.RawSockaddrInet6{ + Len: unix.SizeofSockaddrInet6, + Family: unix.AF_INET6, + Addr: netip.MustParseAddr(net.IP(net.CIDRMask(64, 128)).String()).As16(), + }, + Flags: IN6_IFF_NODAD, + Lifetime: addrLifetime6{ + Vltime: ND6_INFINITE_LIFETIME, + Pltime: ND6_INFINITE_LIFETIME, + }, + } + // assign link-local ipv6 address to the interface. + // this will additionally trigger OS level autoconfiguration, which will result two different link-local + // addresses - the requested one, and autoconfigured one. + // this really has no known side effects, just look excessive. and actually considered pretty normal way to + // enable the ipv6 on the interface by macOS concepts. + copy(ifReq6.Name[:], name) + if err = ioctlPtr(socket6, SIOCAIFADDR6, unsafe.Pointer(&ifReq6)); err != nil { + return os.NewSyscallError("SIOCAIFADDR6", err) } - return ioctlPtr(fd, unix.SIOCSIFFLAGS, unsafe.Pointer(&ifr)) + return nil } func ioctlPtr(fd int, req uint, arg unsafe.Pointer) error { - _, _, errno := unix.Syscall(unix.SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) + _, _, errno := unix.Syscall(syscall.SYS_IOCTL, uintptr(fd), uintptr(req), uintptr(arg)) if errno != 0 { return errno } diff --git a/proxy/tun/tun_darwin_endpoint.go b/proxy/tun/tun_darwin_endpoint.go index a0af543b4036..55db1e76c8f1 100644 --- a/proxy/tun/tun_darwin_endpoint.go +++ b/proxy/tun/tun_darwin_endpoint.go @@ -4,9 +4,10 @@ package tun import ( "context" - "encoding/binary" "errors" + _ "unsafe" + "github.com/xtls/xray-core/common/buf" "golang.org/x/sys/unix" "gvisor.dev/gvisor/pkg/buffer" "gvisor.dev/gvisor/pkg/tcpip" @@ -18,24 +19,21 @@ const utunHeaderSize = 4 var ErrUnsupportedNetworkProtocol = errors.New("unsupported ip version") +var ErrQueueEmpty = errors.New("queue is empty") + // DarwinEndpoint implements GVisor stack.LinkEndpoint var _ stack.LinkEndpoint = (*DarwinEndpoint)(nil) type DarwinEndpoint struct { - tunFd int - mtu uint32 + tun *DarwinTun dispatcherCancel context.CancelFunc } -func newDarwinEndpoint(tunFd int, mtu uint32) *DarwinEndpoint { - return &DarwinEndpoint{ - tunFd: tunFd, - mtu: mtu, - } -} +//go:linkname procyield runtime.procyield +func procyield(cycles uint32) func (e *DarwinEndpoint) MTU() uint32 { - return e.mtu + return e.tun.options.MTU } func (e *DarwinEndpoint) SetMTU(_ uint32) { @@ -76,6 +74,7 @@ func (e *DarwinEndpoint) IsAttached() bool { } func (e *DarwinEndpoint) Wait() { + } func (e *DarwinEndpoint) ARPHardwareType() header.ARPHardwareType { @@ -98,6 +97,7 @@ func (e *DarwinEndpoint) Close() { } func (e *DarwinEndpoint) SetOnCloseAction(_ func()) { + } func (e *DarwinEndpoint) WritePackets(packetBufferList stack.PacketBufferList) (int, tcpip.Error) { @@ -108,91 +108,111 @@ func (e *DarwinEndpoint) WritePackets(packetBufferList stack.PacketBufferList) ( return n, &tcpip.ErrAborted{} } - var headerBytes [utunHeaderSize]byte - binary.BigEndian.PutUint32(headerBytes[:], family) + // request memory to write from reusable buffer pool + b := buf.NewWithSize(int32(e.tun.options.MTU) + utunHeaderSize) - writeSlices := append([][]byte{headerBytes[:]}, packetBuffer.AsSlices()...) - if _, err := unix.Writev(e.tunFd, writeSlices); err != nil { + // build Darwin specific packet header + _, _ = b.Write([]byte{0x0, 0x0, 0x0, byte(family)}) + // copy the bytes of slices that compose the packet into the allocated buffer + for _, packetElement := range packetBuffer.AsSlices() { + _, _ = b.Write(packetElement) + } + + if _, err := e.tun.tunFile.Write(b.Bytes()); err != nil { if errors.Is(err, unix.EAGAIN) { return n, &tcpip.ErrWouldBlock{} } return n, &tcpip.ErrAborted{} } + b.Release() n++ } return n, nil } -func (e *DarwinEndpoint) dispatchLoop(ctx context.Context, dispatcher stack.NetworkDispatcher) { - readSize := int(e.mtu) - if readSize <= 0 { - readSize = 65535 - } - readSize += utunHeaderSize - - buf := make([]byte, readSize) - for ctx.Err() == nil { - - n, err := unix.Read(e.tunFd, buf) - if err != nil { - if errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EINTR) { - continue - } - e.Attach(nil) - return - } - if n <= utunHeaderSize { - continue - } - - networkProtocol, packet, err := parseUTunPacket(buf[:n]) - if errors.Is(err, ErrUnsupportedNetworkProtocol) { - continue - } - if err != nil { - e.Attach(nil) - return - } +func (e *DarwinEndpoint) readPacket() (tcpip.NetworkProtocolNumber, *stack.PacketBuffer, error) { + // request memory to write from reusable buffer pool + b := buf.NewWithSize(int32(e.tun.options.MTU) + utunHeaderSize) - dispatcher.DeliverNetworkPacket(networkProtocol, packet) - packet.DecRef() + // read the bytes to the buffer + n, err := b.ReadFrom(e.tun.tunFile) + if errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EINTR) { + b.Release() + return 0, nil, ErrQueueEmpty + } + if err != nil { + b.Release() + return 0, nil, err } -} -func parseUTunPacket(packet []byte) (tcpip.NetworkProtocolNumber, *stack.PacketBuffer, error) { - if len(packet) <= utunHeaderSize { - return 0, nil, errors.New("packet too short") + // discard empty or sub-empty packets + if n <= utunHeaderSize { + b.Release() + return 0, nil, ErrQueueEmpty } - family := binary.BigEndian.Uint32(packet[:utunHeaderSize]) var networkProtocol tcpip.NetworkProtocolNumber - switch family { - case uint32(unix.AF_INET): + switch b.Byte(3) { + case unix.AF_INET: networkProtocol = header.IPv4ProtocolNumber - case uint32(unix.AF_INET6): + case unix.AF_INET6: networkProtocol = header.IPv6ProtocolNumber default: + b.Release() return 0, nil, ErrUnsupportedNetworkProtocol } - payload := packet[utunHeaderSize:] - packetBuffer := buffer.MakeWithData(payload) + packetBuffer := buffer.MakeWithData(b.BytesFrom(utunHeaderSize)) return networkProtocol, stack.NewPacketBuffer(stack.PacketBufferOptions{ Payload: packetBuffer, IsForwardedPacket: true, + OnRelease: func() { + b.Release() + }, }), nil } -func ipFamilyFromPacket(packetBuffer *stack.PacketBuffer) (uint32, error) { +func (e *DarwinEndpoint) dispatchLoop(ctx context.Context, dispatcher stack.NetworkDispatcher) { + + for { + select { + case <-ctx.Done(): + return + default: + networkProtocolNumber, packet, err := e.readPacket() + // read queue empty, yield slightly, wait for the spinlock, retry + if errors.Is(err, ErrQueueEmpty) { + procyield(1) + continue + } + // discard unknown network protocol packet + if errors.Is(err, ErrUnsupportedNetworkProtocol) { + continue + } + // stop dispatcher loop on any other interface failure + if err != nil { + e.Attach(nil) + return + } + + // dispatch the buffer to the stack + dispatcher.DeliverNetworkPacket(networkProtocolNumber, packet) + // signal the buffer that it can be released + packet.DecRef() + } + } +} + +func ipFamilyFromPacket(packetBuffer *stack.PacketBuffer) (int, error) { for _, slice := range packetBuffer.AsSlices() { if len(slice) == 0 { continue } switch header.IPVersion(slice) { case header.IPv4Version: - return uint32(unix.AF_INET), nil + return unix.AF_INET, nil case header.IPv6Version: - return uint32(unix.AF_INET6), nil + return unix.AF_INET6, nil default: return 0, ErrUnsupportedNetworkProtocol } diff --git a/proxy/tun/tun_windows_endpoint.go b/proxy/tun/tun_windows_endpoint.go index f8769788fee2..d71f3e38b3ec 100644 --- a/proxy/tun/tun_windows_endpoint.go +++ b/proxy/tun/tun_windows_endpoint.go @@ -14,6 +14,10 @@ import ( "gvisor.dev/gvisor/pkg/tcpip/stack" ) +var ErrUnsupportedNetworkProtocol = errors.New("unsupported ip version") + +var ErrQueueEmpty = errors.New("queue is empty") + // WintunEndpoint implements GVisor stack.LinkEndpoint var _ stack.LinkEndpoint = (*WintunEndpoint)(nil) @@ -22,8 +26,6 @@ type WintunEndpoint struct { dispatcherCancel context.CancelFunc } -var ErrUnsupportedNetworkProtocol = errors.New("unsupported ip version") - //go:linkname procyield runtime.procyield func procyield(cycles uint32) @@ -120,6 +122,9 @@ func (e *WintunEndpoint) WritePackets(packetBufferList stack.PacketBufferList) ( func (e *WintunEndpoint) readPacket() (tcpip.NetworkProtocolNumber, *stack.PacketBuffer, error) { packet, err := e.tun.session.ReceivePacket() + if errors.Is(err, windows.ERROR_NO_MORE_ITEMS) { + return 0, nil, ErrQueueEmpty + } if err != nil { return 0, nil, err } @@ -156,7 +161,7 @@ func (e *WintunEndpoint) dispatchLoop(ctx context.Context, dispatcher stack.Netw default: networkProtocolNumber, packet, err := e.readPacket() // read queue empty, yield slightly, wait for the spinlock, retry - if errors.Is(err, windows.ERROR_NO_MORE_ITEMS) { + if errors.Is(err, ErrQueueEmpty) { procyield(1) _, _ = windows.WaitForSingleObject(readWait, windows.INFINITE) continue From fae956f72bfaaa12e4cad43bed75953f238f02ad Mon Sep 17 00:00:00 2001 From: Owersun <4807375+Owersun@users.noreply.github.com> Date: Sun, 25 Jan 2026 11:00:11 +0100 Subject: [PATCH 2/2] Proxy: TUN: Unify Darwin/Windows endpoint, which are now extremely similar, into one GVisorEndpoint. Making darwin/windows tun implement GVisorDevice with simple readpacket/writepacket methods that GVisorEndpoint untilise --- proxy/tun/stack_gvisor_endpoint.go | 155 ++++++++++++++++++++ proxy/tun/tun_darwin.go | 84 ++++++++++- proxy/tun/tun_darwin_endpoint.go | 221 ----------------------------- proxy/tun/tun_windows.go | 78 ++++++++-- proxy/tun/tun_windows_endpoint.go | 185 ------------------------ 5 files changed, 307 insertions(+), 416 deletions(-) create mode 100644 proxy/tun/stack_gvisor_endpoint.go delete mode 100644 proxy/tun/tun_darwin_endpoint.go delete mode 100644 proxy/tun/tun_windows_endpoint.go diff --git a/proxy/tun/stack_gvisor_endpoint.go b/proxy/tun/stack_gvisor_endpoint.go new file mode 100644 index 000000000000..31def35ea5ee --- /dev/null +++ b/proxy/tun/stack_gvisor_endpoint.go @@ -0,0 +1,155 @@ +package tun + +import ( + "context" + "errors" + + "gvisor.dev/gvisor/pkg/tcpip" + "gvisor.dev/gvisor/pkg/tcpip/header" + "gvisor.dev/gvisor/pkg/tcpip/stack" +) + +var ErrQueueEmpty = errors.New("queue is empty") + +type GVisorDevice interface { + WritePacket(packet *stack.PacketBuffer) tcpip.Error + ReadPacket() (byte, *stack.PacketBuffer, error) + Wait() +} + +// LinkEndpoint implements GVisor stack.LinkEndpoint +var _ stack.LinkEndpoint = (*LinkEndpoint)(nil) + +type LinkEndpoint struct { + deviceMTU uint32 + device GVisorDevice + dispatcherCancel context.CancelFunc +} + +func (e *LinkEndpoint) MTU() uint32 { + return e.deviceMTU +} + +func (e *LinkEndpoint) SetMTU(_ uint32) { + // not Implemented, as it is not expected GVisor will be asking tun device to be modified +} + +func (e *LinkEndpoint) MaxHeaderLength() uint16 { + return 0 +} + +func (e *LinkEndpoint) LinkAddress() tcpip.LinkAddress { + return "" +} + +func (e *LinkEndpoint) SetLinkAddress(_ tcpip.LinkAddress) { + // not Implemented, as it is not expected GVisor will be asking tun device to be modified +} + +func (e *LinkEndpoint) Capabilities() stack.LinkEndpointCapabilities { + return stack.CapabilityRXChecksumOffload +} + +func (e *LinkEndpoint) Attach(dispatcher stack.NetworkDispatcher) { + if e.dispatcherCancel != nil { + e.dispatcherCancel() + e.dispatcherCancel = nil + } + + if dispatcher != nil { + ctx, cancel := context.WithCancel(context.Background()) + go e.dispatchLoop(ctx, dispatcher) + e.dispatcherCancel = cancel + } +} + +func (e *LinkEndpoint) IsAttached() bool { + return e.dispatcherCancel != nil +} + +func (e *LinkEndpoint) Wait() { + +} + +func (e *LinkEndpoint) ARPHardwareType() header.ARPHardwareType { + return header.ARPHardwareNone +} + +func (e *LinkEndpoint) AddHeader(buffer *stack.PacketBuffer) { + // tun interface doesn't have link layer header, it will be added by the OS +} + +func (e *LinkEndpoint) ParseHeader(ptr *stack.PacketBuffer) bool { + return true +} + +func (e *LinkEndpoint) Close() { + if e.dispatcherCancel != nil { + e.dispatcherCancel() + e.dispatcherCancel = nil + } +} + +func (e *LinkEndpoint) SetOnCloseAction(_ func()) { + +} + +func (e *LinkEndpoint) WritePackets(packetBufferList stack.PacketBufferList) (int, tcpip.Error) { + var n int + var err tcpip.Error + + for _, packetBuffer := range packetBufferList.AsSlice() { + err = e.device.WritePacket(packetBuffer) + if err != nil { + return n, &tcpip.ErrAborted{} + } + n++ + } + + return n, nil +} + +func (e *LinkEndpoint) dispatchLoop(ctx context.Context, dispatcher stack.NetworkDispatcher) { + var networkProtocolNumber tcpip.NetworkProtocolNumber + var version byte + var packet *stack.PacketBuffer + var err error + + for { + select { + case <-ctx.Done(): + return + default: + version, packet, err = e.device.ReadPacket() + // on "queue empty", ask device to yield slightly and continue + if errors.Is(err, ErrQueueEmpty) { + e.device.Wait() + continue + } + // stop dispatcher loop on any other interface failure + if err != nil { + e.Attach(nil) + return + } + + // extract network protocol number from the packet first byte + // (which is returned separately, since it is so incredibly hard to extract one byte from + // stack.PacketBuffer without additional memory allocation and full copying it back and forth) + switch version { + case 4: + networkProtocolNumber = header.IPv4ProtocolNumber + case 6: + networkProtocolNumber = header.IPv6ProtocolNumber + default: + // discard unknown network protocol packet + packet.DecRef() + continue + } + + // dispatch the buffer to the stack + dispatcher.DeliverNetworkPacket(networkProtocolNumber, packet) + // signal the buffer that it can be released + packet.DecRef() + } + } +} diff --git a/proxy/tun/tun_darwin.go b/proxy/tun/tun_darwin.go index ceba8d3276f0..a1b24deeb257 100644 --- a/proxy/tun/tun_darwin.go +++ b/proxy/tun/tun_darwin.go @@ -11,7 +11,10 @@ import ( "syscall" "unsafe" + "github.com/xtls/xray-core/common/buf" "golang.org/x/sys/unix" + "gvisor.dev/gvisor/pkg/buffer" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" ) @@ -19,6 +22,7 @@ const ( utunControlName = "com.apple.net.utun_control" sysprotoControl = 2 gateway = "169.254.10.1/30" + utunHeaderSize = 4 ) const ( @@ -28,6 +32,9 @@ const ( ND6_INFINITE_LIFETIME = 0xFFFFFFFF // netinet6/nd6.h ) +//go:linkname procyield runtime.procyield +func procyield(cycles uint32) + type DarwinTun struct { tunFile *os.File options TunOptions @@ -35,6 +42,7 @@ type DarwinTun struct { var _ Tun = (*DarwinTun)(nil) var _ GVisorTun = (*DarwinTun)(nil) +var _ GVisorDevice = (*DarwinTun)(nil) func NewTun(options TunOptions) (Tun, error) { tunFile, err := open(options.Name) @@ -62,8 +70,82 @@ func (t *DarwinTun) Close() error { return t.tunFile.Close() } +// WritePacket implements GVisorDevice method to write one packet to the tun device +func (t *DarwinTun) WritePacket(packet *stack.PacketBuffer) tcpip.Error { + // request memory to write from reusable buffer pool + b := buf.NewWithSize(int32(t.options.MTU) + utunHeaderSize) + defer b.Release() + + // prepare Darwin specific packet header + _, _ = b.Write([]byte{0x0, 0x0, 0x0, 0x0}) + // copy the bytes of slices that compose the packet into the allocated buffer + for _, packetElement := range packet.AsSlices() { + _, _ = b.Write(packetElement) + } + // fill Darwin specific header from the first raw packet byte, that we can access now + var family byte + switch b.Byte(4) >> 4 { + case 4: + family = unix.AF_INET + case 6: + family = unix.AF_INET6 + default: + return &tcpip.ErrAborted{} + } + b.SetByte(3, family) + + if _, err := t.tunFile.Write(b.Bytes()); err != nil { + if errors.Is(err, unix.EAGAIN) { + return &tcpip.ErrWouldBlock{} + } + return &tcpip.ErrAborted{} + } + return nil +} + +// ReadPacket implements GVisorDevice method to read one packet from the tun device +// It is expected that the method will not block, rather return ErrQueueEmpty when there is nothing on the line, +// which will make the stack call Wait which should implement desired push-back +func (t *DarwinTun) ReadPacket() (byte, *stack.PacketBuffer, error) { + // request memory to write from reusable buffer pool + b := buf.NewWithSize(int32(t.options.MTU) + utunHeaderSize) + + // read the bytes to the interface file + n, err := b.ReadFrom(t.tunFile) + if errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EINTR) { + b.Release() + return 0, nil, ErrQueueEmpty + } + if err != nil { + b.Release() + return 0, nil, err + } + + // discard empty or sub-empty packets + if n <= utunHeaderSize { + b.Release() + return 0, nil, ErrQueueEmpty + } + + // network protocol version from first byte of the raw packet, the one that follows Darwin specific header + version := b.Byte(utunHeaderSize) >> 4 + packetBuffer := buffer.MakeWithData(b.BytesFrom(utunHeaderSize)) + return version, stack.NewPacketBuffer(stack.PacketBufferOptions{ + Payload: packetBuffer, + IsForwardedPacket: true, + OnRelease: func() { + b.Release() + }, + }), nil +} + +// Wait some cpu cycles +func (t *DarwinTun) Wait() { + procyield(1) +} + func (t *DarwinTun) newEndpoint() (stack.LinkEndpoint, error) { - return &DarwinEndpoint{tun: t}, nil + return &LinkEndpoint{deviceMTU: t.options.MTU, device: t}, nil } // open the interface, by creating new utunN if in the system and returning its file descriptor diff --git a/proxy/tun/tun_darwin_endpoint.go b/proxy/tun/tun_darwin_endpoint.go deleted file mode 100644 index 55db1e76c8f1..000000000000 --- a/proxy/tun/tun_darwin_endpoint.go +++ /dev/null @@ -1,221 +0,0 @@ -//go:build darwin - -package tun - -import ( - "context" - "errors" - _ "unsafe" - - "github.com/xtls/xray-core/common/buf" - "golang.org/x/sys/unix" - "gvisor.dev/gvisor/pkg/buffer" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/pkg/tcpip/stack" -) - -const utunHeaderSize = 4 - -var ErrUnsupportedNetworkProtocol = errors.New("unsupported ip version") - -var ErrQueueEmpty = errors.New("queue is empty") - -// DarwinEndpoint implements GVisor stack.LinkEndpoint -var _ stack.LinkEndpoint = (*DarwinEndpoint)(nil) - -type DarwinEndpoint struct { - tun *DarwinTun - dispatcherCancel context.CancelFunc -} - -//go:linkname procyield runtime.procyield -func procyield(cycles uint32) - -func (e *DarwinEndpoint) MTU() uint32 { - return e.tun.options.MTU -} - -func (e *DarwinEndpoint) SetMTU(_ uint32) { - // not Implemented, as it is not expected GVisor will be asking tun device to be modified -} - -func (e *DarwinEndpoint) MaxHeaderLength() uint16 { - return 0 -} - -func (e *DarwinEndpoint) LinkAddress() tcpip.LinkAddress { - return "" -} - -func (e *DarwinEndpoint) SetLinkAddress(_ tcpip.LinkAddress) { - // not Implemented, as it is not expected GVisor will be asking tun device to be modified -} - -func (e *DarwinEndpoint) Capabilities() stack.LinkEndpointCapabilities { - return stack.CapabilityRXChecksumOffload -} - -func (e *DarwinEndpoint) Attach(dispatcher stack.NetworkDispatcher) { - if e.dispatcherCancel != nil { - e.dispatcherCancel() - e.dispatcherCancel = nil - } - - if dispatcher != nil { - ctx, cancel := context.WithCancel(context.Background()) - go e.dispatchLoop(ctx, dispatcher) - e.dispatcherCancel = cancel - } -} - -func (e *DarwinEndpoint) IsAttached() bool { - return e.dispatcherCancel != nil -} - -func (e *DarwinEndpoint) Wait() { - -} - -func (e *DarwinEndpoint) ARPHardwareType() header.ARPHardwareType { - return header.ARPHardwareNone -} - -func (e *DarwinEndpoint) AddHeader(buffer *stack.PacketBuffer) { - // tun interface doesn't have link layer header, it will be added by the OS -} - -func (e *DarwinEndpoint) ParseHeader(ptr *stack.PacketBuffer) bool { - return true -} - -func (e *DarwinEndpoint) Close() { - if e.dispatcherCancel != nil { - e.dispatcherCancel() - e.dispatcherCancel = nil - } -} - -func (e *DarwinEndpoint) SetOnCloseAction(_ func()) { - -} - -func (e *DarwinEndpoint) WritePackets(packetBufferList stack.PacketBufferList) (int, tcpip.Error) { - var n int - for _, packetBuffer := range packetBufferList.AsSlice() { - family, err := ipFamilyFromPacket(packetBuffer) - if err != nil { - return n, &tcpip.ErrAborted{} - } - - // request memory to write from reusable buffer pool - b := buf.NewWithSize(int32(e.tun.options.MTU) + utunHeaderSize) - - // build Darwin specific packet header - _, _ = b.Write([]byte{0x0, 0x0, 0x0, byte(family)}) - // copy the bytes of slices that compose the packet into the allocated buffer - for _, packetElement := range packetBuffer.AsSlices() { - _, _ = b.Write(packetElement) - } - - if _, err := e.tun.tunFile.Write(b.Bytes()); err != nil { - if errors.Is(err, unix.EAGAIN) { - return n, &tcpip.ErrWouldBlock{} - } - return n, &tcpip.ErrAborted{} - } - b.Release() - n++ - } - return n, nil -} - -func (e *DarwinEndpoint) readPacket() (tcpip.NetworkProtocolNumber, *stack.PacketBuffer, error) { - // request memory to write from reusable buffer pool - b := buf.NewWithSize(int32(e.tun.options.MTU) + utunHeaderSize) - - // read the bytes to the buffer - n, err := b.ReadFrom(e.tun.tunFile) - if errors.Is(err, unix.EAGAIN) || errors.Is(err, unix.EINTR) { - b.Release() - return 0, nil, ErrQueueEmpty - } - if err != nil { - b.Release() - return 0, nil, err - } - - // discard empty or sub-empty packets - if n <= utunHeaderSize { - b.Release() - return 0, nil, ErrQueueEmpty - } - - var networkProtocol tcpip.NetworkProtocolNumber - switch b.Byte(3) { - case unix.AF_INET: - networkProtocol = header.IPv4ProtocolNumber - case unix.AF_INET6: - networkProtocol = header.IPv6ProtocolNumber - default: - b.Release() - return 0, nil, ErrUnsupportedNetworkProtocol - } - - packetBuffer := buffer.MakeWithData(b.BytesFrom(utunHeaderSize)) - return networkProtocol, stack.NewPacketBuffer(stack.PacketBufferOptions{ - Payload: packetBuffer, - IsForwardedPacket: true, - OnRelease: func() { - b.Release() - }, - }), nil -} - -func (e *DarwinEndpoint) dispatchLoop(ctx context.Context, dispatcher stack.NetworkDispatcher) { - - for { - select { - case <-ctx.Done(): - return - default: - networkProtocolNumber, packet, err := e.readPacket() - // read queue empty, yield slightly, wait for the spinlock, retry - if errors.Is(err, ErrQueueEmpty) { - procyield(1) - continue - } - // discard unknown network protocol packet - if errors.Is(err, ErrUnsupportedNetworkProtocol) { - continue - } - // stop dispatcher loop on any other interface failure - if err != nil { - e.Attach(nil) - return - } - - // dispatch the buffer to the stack - dispatcher.DeliverNetworkPacket(networkProtocolNumber, packet) - // signal the buffer that it can be released - packet.DecRef() - } - } -} - -func ipFamilyFromPacket(packetBuffer *stack.PacketBuffer) (int, error) { - for _, slice := range packetBuffer.AsSlices() { - if len(slice) == 0 { - continue - } - switch header.IPVersion(slice) { - case header.IPv4Version: - return unix.AF_INET, nil - case header.IPv6Version: - return unix.AF_INET6, nil - default: - return 0, ErrUnsupportedNetworkProtocol - } - } - return 0, errors.New("empty packet") -} diff --git a/proxy/tun/tun_windows.go b/proxy/tun/tun_windows.go index 92d200e9543b..1452441c620a 100644 --- a/proxy/tun/tun_windows.go +++ b/proxy/tun/tun_windows.go @@ -3,19 +3,28 @@ package tun import ( + "errors" + _ "unsafe" + "golang.org/x/sys/windows" "golang.zx2c4.com/wintun" + "gvisor.dev/gvisor/pkg/buffer" + "gvisor.dev/gvisor/pkg/tcpip" "gvisor.dev/gvisor/pkg/tcpip/stack" ) +//go:linkname procyield runtime.procyield +func procyield(cycles uint32) + // WindowsTun is an object that handles tun network interface on Windows // current version is heavily stripped to do nothing more, // then create a network interface, to be provided as endpoint to gVisor ip stack type WindowsTun struct { - options TunOptions - adapter *wintun.Adapter - session wintun.Session - MTU uint32 + options TunOptions + adapter *wintun.Adapter + session wintun.Session + readWait windows.Handle + MTU uint32 } // WindowsTun implements Tun @@ -24,6 +33,9 @@ var _ Tun = (*WindowsTun)(nil) // WindowsTun implements GVisorTun var _ GVisorTun = (*WindowsTun)(nil) +// WindowsTun implements GVisorDevice +var _ GVisorDevice = (*WindowsTun)(nil) + // NewTun creates a Wintun interface with the given name. Should a Wintun // interface with the same name exist, it tried to be reused. func NewTun(options TunOptions) (Tun, error) { @@ -41,9 +53,10 @@ func NewTun(options TunOptions) (Tun, error) { } tun := &WindowsTun{ - options: options, - adapter: adapter, - session: session, + options: options, + adapter: adapter, + session: session, + readWait: session.ReadWaitEvent(), // there is currently no iphndl.dll support, which is the netlink library for windows // so there is nowhere to change MTU for the Wintun interface, and we take its default value MTU: wintun.PacketSizeMax, @@ -78,7 +91,54 @@ func (t *WindowsTun) Close() error { return nil } -// newEndpoint builds new gVisor stack.LinkEndpoint (WintunEndpoint) on top of WindowsTun +// WritePacket implements GVisorDevice method to write one packet to the tun device +func (t *WindowsTun) WritePacket(packetBuffer *stack.PacketBuffer) tcpip.Error { + // request buffer from Wintun + packet, err := t.session.AllocateSendPacket(packetBuffer.Size()) + if err != nil { + return &tcpip.ErrAborted{} + } + + // copy the bytes of slices that compose the packet into the allocated buffer + var index int + for _, packetElement := range packetBuffer.AsSlices() { + index += copy(packet[index:], packetElement) + } + + // signal Wintun to send that buffer as the packet + t.session.SendPacket(packet) + + return nil +} + +// ReadPacket implements GVisorDevice method to read one packet from the tun device +// It is expected that the method will not block, rather return ErrQueueEmpty when there is nothing on the line, +// which will make the stack call Wait which should implement desired push-back +func (t *WindowsTun) ReadPacket() (byte, *stack.PacketBuffer, error) { + packet, err := t.session.ReceivePacket() + if errors.Is(err, windows.ERROR_NO_MORE_ITEMS) { + return 0, nil, ErrQueueEmpty + } + if err != nil { + return 0, nil, err + } + + version := packet[0] >> 4 + packetBuffer := buffer.MakeWithView(buffer.NewViewWithData(packet)) + return version, stack.NewPacketBuffer(stack.PacketBufferOptions{ + Payload: packetBuffer, + IsForwardedPacket: true, + OnRelease: func() { + t.session.ReleaseReceivePacket(packet) + }, + }), nil +} + +func (t *WindowsTun) Wait() { + procyield(1) + _, _ = windows.WaitForSingleObject(t.readWait, windows.INFINITE) +} + func (t *WindowsTun) newEndpoint() (stack.LinkEndpoint, error) { - return &WintunEndpoint{tun: t}, nil + return &LinkEndpoint{deviceMTU: t.options.MTU, device: t}, nil } diff --git a/proxy/tun/tun_windows_endpoint.go b/proxy/tun/tun_windows_endpoint.go deleted file mode 100644 index d71f3e38b3ec..000000000000 --- a/proxy/tun/tun_windows_endpoint.go +++ /dev/null @@ -1,185 +0,0 @@ -//go:build windows - -package tun - -import ( - "context" - "errors" - _ "unsafe" - - "golang.org/x/sys/windows" - "gvisor.dev/gvisor/pkg/buffer" - "gvisor.dev/gvisor/pkg/tcpip" - "gvisor.dev/gvisor/pkg/tcpip/header" - "gvisor.dev/gvisor/pkg/tcpip/stack" -) - -var ErrUnsupportedNetworkProtocol = errors.New("unsupported ip version") - -var ErrQueueEmpty = errors.New("queue is empty") - -// WintunEndpoint implements GVisor stack.LinkEndpoint -var _ stack.LinkEndpoint = (*WintunEndpoint)(nil) - -type WintunEndpoint struct { - tun *WindowsTun - dispatcherCancel context.CancelFunc -} - -//go:linkname procyield runtime.procyield -func procyield(cycles uint32) - -func (e *WintunEndpoint) MTU() uint32 { - return e.tun.MTU -} - -func (e *WintunEndpoint) SetMTU(mtu uint32) { - // not Implemented, as it is not expected GVisor will be asking tun device to be modified -} - -func (e *WintunEndpoint) MaxHeaderLength() uint16 { - return 0 -} - -func (e *WintunEndpoint) LinkAddress() tcpip.LinkAddress { - return "" -} - -func (e *WintunEndpoint) SetLinkAddress(addr tcpip.LinkAddress) { - // not Implemented, as it is not expected GVisor will be asking tun device to be modified -} - -func (e *WintunEndpoint) Capabilities() stack.LinkEndpointCapabilities { - return stack.CapabilityRXChecksumOffload -} - -func (e *WintunEndpoint) Attach(dispatcher stack.NetworkDispatcher) { - if e.dispatcherCancel != nil { - e.dispatcherCancel() - e.dispatcherCancel = nil - } - - if dispatcher != nil { - ctx, cancel := context.WithCancel(context.Background()) - go e.dispatchLoop(ctx, dispatcher) - e.dispatcherCancel = cancel - } -} - -func (e *WintunEndpoint) IsAttached() bool { - return e.dispatcherCancel != nil -} - -func (e *WintunEndpoint) Wait() { - -} - -func (e *WintunEndpoint) ARPHardwareType() header.ARPHardwareType { - return header.ARPHardwareNone -} - -func (e *WintunEndpoint) AddHeader(buffer *stack.PacketBuffer) { - // tun interface doesn't have link layer header, it will be added by the OS -} - -func (e *WintunEndpoint) ParseHeader(ptr *stack.PacketBuffer) bool { - return true -} - -func (e *WintunEndpoint) Close() { - if e.dispatcherCancel != nil { - e.dispatcherCancel() - e.dispatcherCancel = nil - } -} - -func (e *WintunEndpoint) SetOnCloseAction(f func()) { - -} - -func (e *WintunEndpoint) WritePackets(packetBufferList stack.PacketBufferList) (int, tcpip.Error) { - var n int - // for all packets in the list to send - for _, packetBuffer := range packetBufferList.AsSlice() { - // request buffer from Wintun - packet, err := e.tun.session.AllocateSendPacket(packetBuffer.Size()) - if err != nil { - return n, &tcpip.ErrAborted{} - } - - // copy the bytes of slices that compose the packet into the allocated buffer - var index int - for _, packetElement := range packetBuffer.AsSlices() { - index += copy(packet[index:], packetElement) - } - - // signal Wintun to send that buffer as the packet - e.tun.session.SendPacket(packet) - n++ - } - return n, nil -} - -func (e *WintunEndpoint) readPacket() (tcpip.NetworkProtocolNumber, *stack.PacketBuffer, error) { - packet, err := e.tun.session.ReceivePacket() - if errors.Is(err, windows.ERROR_NO_MORE_ITEMS) { - return 0, nil, ErrQueueEmpty - } - if err != nil { - return 0, nil, err - } - - var networkProtocol tcpip.NetworkProtocolNumber - switch header.IPVersion(packet) { - case header.IPv4Version: - networkProtocol = header.IPv4ProtocolNumber - case header.IPv6Version: - networkProtocol = header.IPv6ProtocolNumber - default: - e.tun.session.ReleaseReceivePacket(packet) - return 0, nil, ErrUnsupportedNetworkProtocol - } - - packetBuffer := buffer.MakeWithView(buffer.NewViewWithData(packet)) - pkt := stack.NewPacketBuffer(stack.PacketBufferOptions{ - Payload: packetBuffer, - IsForwardedPacket: true, - OnRelease: func() { - e.tun.session.ReleaseReceivePacket(packet) - }, - }) - return networkProtocol, pkt, nil -} - -func (e *WintunEndpoint) dispatchLoop(ctx context.Context, dispatcher stack.NetworkDispatcher) { - readWait := e.tun.session.ReadWaitEvent() - - for { - select { - case <-ctx.Done(): - return - default: - networkProtocolNumber, packet, err := e.readPacket() - // read queue empty, yield slightly, wait for the spinlock, retry - if errors.Is(err, ErrQueueEmpty) { - procyield(1) - _, _ = windows.WaitForSingleObject(readWait, windows.INFINITE) - continue - } - // discard unknown network protocol packet - if errors.Is(err, ErrUnsupportedNetworkProtocol) { - continue - } - // stop dispatcher loop on any other interface failure - if err != nil { - e.Attach(nil) - continue - } - - // dispatch the buffer to the stack - dispatcher.DeliverNetworkPacket(networkProtocolNumber, packet) - // signal the buffer that it can be released - packet.DecRef() - } - } -}