diff --git a/docs/release-notes.md b/docs/release-notes.md index 165d5bfc9..e55e0d534 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -11,6 +11,7 @@ nav_order: 9 ### Features +- Support the native Apple Hypervisor - Support Hetzner Cloud - A GRUB configuration suitable for use with https://github.com/coreos/bootupd can now be installed; use `make install-grub-for-bootupd` to install it diff --git a/docs/supported-platforms.md b/docs/supported-platforms.md index 3e6bffb1c..1c9d63bba 100644 --- a/docs/supported-platforms.md +++ b/docs/supported-platforms.md @@ -7,6 +7,7 @@ nav_order: 8 Ignition is currently only supported for the following platforms: * [Alibaba Cloud] (`aliyun`) - Ignition will read its configuration from the instance userdata. Cloud SSH keys are handled separately. +* [Apple Hypervisor] (`applehv`) - Ignition will read its configuration using an HTTP GET over a vsock connection with its host on port 1024. * [Amazon Web Services] (`aws`) - Ignition will read its configuration from the instance userdata. Cloud SSH keys are handled separately. * [Microsoft Azure] (`azure`)- Ignition will read its configuration from the custom data provided to the instance. Cloud SSH keys are handled separately. * [Microsoft Azure Stack] (`azurestack`) - Ignition will read its configuration from the custom data provided to the instance. Cloud SSH keys are handled separately. @@ -35,6 +36,7 @@ Ignition is under active development, so this list may grow over time. For most cloud providers, cloud SSH keys and custom network configuration are handled by [Afterburn]. [Alibaba Cloud]: https://www.alibabacloud.com/product/ecs +[Apple Hypervisor]: https://developer.apple.com/documentation/hypervisor [Amazon Web Services]: https://aws.amazon.com/ec2/ [Microsoft Azure]: https://azure.microsoft.com/en-us/services/virtual-machines/ [Microsoft Azure Stack]: https://azure.microsoft.com/en-us/overview/azure-stack/ diff --git a/dracut/30ignition/module-setup.sh b/dracut/30ignition/module-setup.sh index ad7e80fdf..5271f480c 100755 --- a/dracut/30ignition/module-setup.sh +++ b/dracut/30ignition/module-setup.sh @@ -100,4 +100,8 @@ install() { installkernel() { # required by hyperv platform to read kvp from the kernel instmods hv_utils + + # required by applehv platform to read ignition file through vsock + instmods -c vsock } + diff --git a/go.mod b/go.mod index 4a2899dc3..2f41f38af 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/coreos/vcontext v0.0.0-20230201181013-d72178a18687 github.com/google/renameio/v2 v2.0.0 github.com/google/uuid v1.4.0 + github.com/mdlayher/vsock v1.2.1 github.com/mitchellh/copystructure v1.2.0 github.com/pin/tftp v2.1.0+incompatible github.com/spf13/pflag v1.0.6-0.20210604193023-d5e0c0615ace @@ -40,6 +41,7 @@ require ( github.com/googleapis/enterprise-certificate-proxy v0.3.1 // indirect github.com/googleapis/gax-go/v2 v2.12.0 // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect + github.com/mdlayher/socket v0.4.1 // indirect github.com/mitchellh/reflectwalk v1.0.2 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect go.opencensus.io v0.24.0 // indirect diff --git a/go.sum b/go.sum index 4b9d747e4..8dda0bee0 100644 --- a/go.sum +++ b/go.sum @@ -79,6 +79,10 @@ github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9Y github.com/jmespath/go-jmespath v0.4.0/go.mod h1:T8mJZnbsbmF+m6zOOFylbeCJqk5+pHWvzYPziyZiYoo= github.com/jmespath/go-jmespath/internal/testify v1.5.1 h1:shLQSRRSCCPj3f2gpwzGwWFoC7ycTf1rcQZHOlsJ6N8= github.com/jmespath/go-jmespath/internal/testify v1.5.1/go.mod h1:L3OGu8Wl2/fWfCI6z80xFu9LTZmf1ZRjMHUOPmWr69U= +github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U= +github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA= +github.com/mdlayher/vsock v1.2.1 h1:pC1mTJTvjo1r9n9fbm7S1j04rCgCzhCOS5DY0zqHlnQ= +github.com/mdlayher/vsock v1.2.1/go.mod h1:NRfCibel++DgeMD8z/hP+PPTjlNJsdPOmxcnENvE+SE= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= diff --git a/internal/providers/applehv/applehv.go b/internal/providers/applehv/applehv.go new file mode 100644 index 000000000..cbb68c668 --- /dev/null +++ b/internal/providers/applehv/applehv.go @@ -0,0 +1,108 @@ +// Copyright 2023 Red Hat +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package applehv + +import ( + "context" + "fmt" + "io" + "net" + "net/http" + "os/exec" + + "github.com/coreos/ignition/v2/config/v3_5_experimental/types" + "github.com/coreos/ignition/v2/internal/distro" + "github.com/coreos/ignition/v2/internal/platform" + "github.com/coreos/ignition/v2/internal/providers/util" + "github.com/coreos/ignition/v2/internal/resource" + "github.com/coreos/vcontext/report" + "github.com/mdlayher/vsock" +) + +/* + This provider is specific to virtual machines running under Apple Hypervisor on macOS on Apple hardware. + + It should however be possible to emulate the platform setup with QEMU, using [1] to assign a vsock to the + guest and then forward the request from the Ignition process running in the virtual machine to an HTTP + server running on the host, using the vsock support in socat for example. + + [1] https://wiki.qemu.org/Features/VirtioVsock +*/ + +func init() { + platform.Register(platform.Provider{ + Name: "applehv", + Fetch: fetchConfig, + }) +} + +func fetchConfig(f *resource.Fetcher) (types.Config, report.Report, error) { + // the vsock module must be built into the kernel or loaded so we can communicate + // with the host + if _, err := f.Logger.LogCmd(exec.Command(distro.ModprobeCmd(), "vsock"), "Loading vsock kernel module"); err != nil { + f.Logger.Err("failed to install vsock kernel module: %v", err) + return types.Config{}, report.Report{}, fmt.Errorf("failed to install vsock kernel module: %v", err) + } + + // we use a http GET over vsock to fetch the ignition file. the + // vsock connection itself is begun here. the "host" will need an HTTPD + // server listen on the the other end of the vsock connection on port 1024. The + // port is trivial and was just chosen by author + // ID =2 is shorthand for "the host" + // + conn, err := vsock.Dial(2, 1024, &vsock.Config{}) + if err != nil { + return types.Config{}, report.Report{}, err + } + defer func() { + if err := conn.Close(); err != nil { + f.Logger.Err("unable to close vsock connection: %v", err) + } + }() + + // The host portion of the URL is arbitrary here. The schema is important however. Because + // this is more or less HTTP over a UDS, then the host name is discarded. + req, err := http.NewRequest(http.MethodGet, "http://d/", nil) + if err != nil { + return types.Config{}, report.Report{}, err + } + req.Header.Set("Accept", "application/json") + + client := http.Client{ + Transport: &http.Transport{ + DialContext: func(ctx context.Context, _, _ string) (net.Conn, error) { + return conn, nil + }, + }, + } + + resp, err := client.Do(req) + if err != nil { + return types.Config{}, report.Report{}, err + } + + defer func() { + if err := resp.Body.Close(); err != nil { + f.Logger.Err("unable to close response body: %v", err) + } + }() + + b, err := io.ReadAll(resp.Body) + if err != nil { + return types.Config{}, report.Report{}, err + } + + return util.ParseConfig(f.Logger, b) +} diff --git a/internal/register/providers.go b/internal/register/providers.go index 27717f8b8..ddebb8bb9 100644 --- a/internal/register/providers.go +++ b/internal/register/providers.go @@ -16,6 +16,7 @@ package register import ( _ "github.com/coreos/ignition/v2/internal/providers/aliyun" + _ "github.com/coreos/ignition/v2/internal/providers/applehv" _ "github.com/coreos/ignition/v2/internal/providers/aws" _ "github.com/coreos/ignition/v2/internal/providers/azure" _ "github.com/coreos/ignition/v2/internal/providers/azurestack" diff --git a/vendor/github.com/mdlayher/socket/CHANGELOG.md b/vendor/github.com/mdlayher/socket/CHANGELOG.md new file mode 100644 index 000000000..f0d01641a --- /dev/null +++ b/vendor/github.com/mdlayher/socket/CHANGELOG.md @@ -0,0 +1,80 @@ +# CHANGELOG + +## v0.4.1 + +- [Bug Fix] [commit](https://github.com/mdlayher/socket/commit/2a14ceef4da279de1f957c5761fffcc6c87bbd3b): + ensure `socket.Conn` can be used with non-socket file descriptors by handling + `ENOTSOCK` in the constructor. + +## v0.4.0 + +**This is the first release of package socket that only supports Go 1.18+. +Users on older versions of Go must use v0.3.0.** + +- [Improvement]: drop support for older versions of Go so we can begin using + modern versions of `x/sys` and other dependencies. + +## v0.3.0 + +**This is the last release of package socket that supports Go 1.17 and below.** + +- [New API/API change] [PR](https://github.com/mdlayher/socket/pull/8): + numerous `socket.Conn` methods now support context cancelation. Future + releases will continue adding support as needed. + - New `ReadContext` and `WriteContext` methods. + - `Connect`, `Recvfrom`, `Recvmsg`, `Sendmsg`, and `Sendto` methods now accept + a context. + - `Sendto` parameter order was also fixed to match the underlying syscall. + +## v0.2.3 + +- [New API] [commit](https://github.com/mdlayher/socket/commit/a425d96e0f772c053164f8ce4c9c825380a98086): + `socket.Conn` has new `Pidfd*` methods for wrapping the `pidfd_*(2)` family of + system calls. + +## v0.2.2 + +- [New API] [commit](https://github.com/mdlayher/socket/commit/a2429f1dfe8ec2586df5a09f50ead865276cd027): + `socket.Conn` has new `IoctlKCM*` methods for wrapping `ioctl(2)` for `AF_KCM` + operations. + +## v0.2.1 + +- [New API] [commit](https://github.com/mdlayher/socket/commit/b18ddbe9caa0e34552b4409a3aa311cb460d2f99): + `socket.Conn` has a new `SetsockoptPacketMreq` method for wrapping + `setsockopt(2)` for `AF_PACKET` socket options. + +## v0.2.0 + +- [New API] [commit](https://github.com/mdlayher/socket/commit/6e912a68523c45e5fd899239f4b46c402dd856da): + `socket.FileConn` can be used to create a `socket.Conn` from an existing + `os.File`, which may be provided by systemd socket activation or another + external mechanism. +- [API change] [commit](https://github.com/mdlayher/socket/commit/66d61f565188c23fe02b24099ddc856d538bf1a7): + `socket.Conn.Connect` now returns the `unix.Sockaddr` value provided by + `getpeername(2)`, since we have to invoke that system call anyway to verify + that a connection to a remote peer was successfully established. +- [Bug Fix] [commit](https://github.com/mdlayher/socket/commit/b60b2dbe0ac3caff2338446a150083bde8c5c19c): + check the correct error from `unix.GetsockoptInt` in the `socket.Conn.Connect` + method. Thanks @vcabbage! + +## v0.1.2 + +- [Bug Fix]: `socket.Conn.Connect` now properly checks the `SO_ERROR` socket + option value after calling `connect(2)` to verify whether or not a connection + could successfully be established. This means that `Connect` should now report + an error for an `AF_INET` TCP connection refused or `AF_VSOCK` connection + reset by peer. +- [New API]: add `socket.Conn.Getpeername` for use in `Connect`, but also for + use by external callers. + +## v0.1.1 + +- [New API]: `socket.Conn` now has `CloseRead`, `CloseWrite`, and `Shutdown` + methods. +- [Improvement]: internal rework to more robustly handle various errors. + +## v0.1.0 + +- Initial unstable release. Most functionality has been developed and ported +from package [`netlink`](https://github.com/mdlayher/netlink). diff --git a/vendor/github.com/mdlayher/socket/LICENSE.md b/vendor/github.com/mdlayher/socket/LICENSE.md new file mode 100644 index 000000000..3ccdb75b2 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/LICENSE.md @@ -0,0 +1,9 @@ +# MIT License + +Copyright (C) 2021 Matt Layher + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/mdlayher/socket/README.md b/vendor/github.com/mdlayher/socket/README.md new file mode 100644 index 000000000..2aa065cbb --- /dev/null +++ b/vendor/github.com/mdlayher/socket/README.md @@ -0,0 +1,23 @@ +# socket [![Test Status](https://github.com/mdlayher/socket/workflows/Test/badge.svg)](https://github.com/mdlayher/socket/actions) [![Go Reference](https://pkg.go.dev/badge/github.com/mdlayher/socket.svg)](https://pkg.go.dev/github.com/mdlayher/socket) [![Go Report Card](https://goreportcard.com/badge/github.com/mdlayher/socket)](https://goreportcard.com/report/github.com/mdlayher/socket) + +Package `socket` provides a low-level network connection type which integrates +with Go's runtime network poller to provide asynchronous I/O and deadline +support. MIT Licensed. + +This package focuses on UNIX-like operating systems which make use of BSD +sockets system call APIs. It is meant to be used as a foundation for the +creation of operating system-specific socket packages, for socket families such +as Linux's `AF_NETLINK`, `AF_PACKET`, or `AF_VSOCK`. This package should not be +used directly in end user applications. + +Any use of package socket should be guarded by build tags, as one would also +use when importing the `syscall` or `golang.org/x/sys` packages. + +## Stability + +See the [CHANGELOG](./CHANGELOG.md) file for a description of changes between +releases. + +This package only supports the two most recent major versions of Go, mirroring +Go's own release policy. Older versions of Go may lack critical features and bug +fixes which are necessary for this package to function correctly. diff --git a/vendor/github.com/mdlayher/socket/accept.go b/vendor/github.com/mdlayher/socket/accept.go new file mode 100644 index 000000000..47e9d897e --- /dev/null +++ b/vendor/github.com/mdlayher/socket/accept.go @@ -0,0 +1,23 @@ +//go:build !dragonfly && !freebsd && !illumos && !linux +// +build !dragonfly,!freebsd,!illumos,!linux + +package socket + +import ( + "fmt" + "runtime" + + "golang.org/x/sys/unix" +) + +const sysAccept = "accept" + +// accept wraps accept(2). +func accept(fd, flags int) (int, unix.Sockaddr, error) { + if flags != 0 { + // These operating systems have no support for flags to accept(2). + return 0, nil, fmt.Errorf("socket: Conn.Accept flags are ineffective on %s", runtime.GOOS) + } + + return unix.Accept(fd) +} diff --git a/vendor/github.com/mdlayher/socket/accept4.go b/vendor/github.com/mdlayher/socket/accept4.go new file mode 100644 index 000000000..e1016b206 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/accept4.go @@ -0,0 +1,15 @@ +//go:build dragonfly || freebsd || illumos || linux +// +build dragonfly freebsd illumos linux + +package socket + +import ( + "golang.org/x/sys/unix" +) + +const sysAccept = "accept4" + +// accept wraps accept4(2). +func accept(fd, flags int) (int, unix.Sockaddr, error) { + return unix.Accept4(fd, flags) +} diff --git a/vendor/github.com/mdlayher/socket/conn.go b/vendor/github.com/mdlayher/socket/conn.go new file mode 100644 index 000000000..7b3cc7a6e --- /dev/null +++ b/vendor/github.com/mdlayher/socket/conn.go @@ -0,0 +1,880 @@ +package socket + +import ( + "context" + "errors" + "io" + "os" + "sync" + "sync/atomic" + "syscall" + "time" + + "golang.org/x/sys/unix" +) + +// Lock in an expected public interface for convenience. +var _ interface { + io.ReadWriteCloser + syscall.Conn + SetDeadline(t time.Time) error + SetReadDeadline(t time.Time) error + SetWriteDeadline(t time.Time) error +} = &Conn{} + +// A Conn is a low-level network connection which integrates with Go's runtime +// network poller to provide asynchronous I/O and deadline support. +// +// Many of a Conn's blocking methods support net.Conn deadlines as well as +// cancelation via context. Note that passing a context with a deadline set will +// override any of the previous deadlines set by calls to the SetDeadline family +// of methods. +type Conn struct { + // Indicates whether or not Conn.Close has been called. Must be accessed + // atomically. Atomics definitions must come first in the Conn struct. + closed uint32 + + // A unique name for the Conn which is also associated with derived file + // descriptors such as those created by accept(2). + name string + + // facts contains information we have determined about Conn to trigger + // alternate behavior in certain functions. + facts facts + + // Provides access to the underlying file registered with the runtime + // network poller, and arbitrary raw I/O calls. + fd *os.File + rc syscall.RawConn +} + +// facts contains facts about a Conn. +type facts struct { + // isStream reports whether this is a streaming descriptor, as opposed to a + // packet-based descriptor like a UDP socket. + isStream bool + + // zeroReadIsEOF reports Whether a zero byte read indicates EOF. This is + // false for a message based socket connection. + zeroReadIsEOF bool +} + +// A Config contains options for a Conn. +type Config struct { + // NetNS specifies the Linux network namespace the Conn will operate in. + // This option is unsupported on other operating systems. + // + // If set (non-zero), Conn will enter the specified network namespace and an + // error will occur in Socket if the operation fails. + // + // If not set (zero), a best-effort attempt will be made to enter the + // network namespace of the calling thread: this means that any changes made + // to the calling thread's network namespace will also be reflected in Conn. + // If this operation fails (due to lack of permissions or because network + // namespaces are disabled by kernel configuration), Socket will not return + // an error, and the Conn will operate in the default network namespace of + // the process. This enables non-privileged use of Conn in applications + // which do not require elevated privileges. + // + // Entering a network namespace is a privileged operation (root or + // CAP_SYS_ADMIN are required), and most applications should leave this set + // to 0. + NetNS int +} + +// High-level methods which provide convenience over raw system calls. + +// Close closes the underlying file descriptor for the Conn, which also causes +// all in-flight I/O operations to immediately unblock and return errors. Any +// subsequent uses of Conn will result in EBADF. +func (c *Conn) Close() error { + // The caller has expressed an intent to close the socket, so immediately + // increment s.closed to force further calls to result in EBADF before also + // closing the file descriptor to unblock any outstanding operations. + // + // Because other operations simply check for s.closed != 0, we will permit + // double Close, which would increment s.closed beyond 1. + if atomic.AddUint32(&c.closed, 1) != 1 { + // Multiple Close calls. + return nil + } + + return os.NewSyscallError("close", c.fd.Close()) +} + +// CloseRead shuts down the reading side of the Conn. Most callers should just +// use Close. +func (c *Conn) CloseRead() error { return c.Shutdown(unix.SHUT_RD) } + +// CloseWrite shuts down the writing side of the Conn. Most callers should just +// use Close. +func (c *Conn) CloseWrite() error { return c.Shutdown(unix.SHUT_WR) } + +// Read reads directly from the underlying file descriptor. +func (c *Conn) Read(b []byte) (int, error) { return c.fd.Read(b) } + +// ReadContext reads from the underlying file descriptor with added support for +// context cancelation. +func (c *Conn) ReadContext(ctx context.Context, b []byte) (int, error) { + if c.facts.isStream && len(b) > maxRW { + b = b[:maxRW] + } + + n, err := readT(c, ctx, "read", func(fd int) (int, error) { + return unix.Read(fd, b) + }) + if n == 0 && err == nil && c.facts.zeroReadIsEOF { + return 0, io.EOF + } + + return n, os.NewSyscallError("read", err) +} + +// Write writes directly to the underlying file descriptor. +func (c *Conn) Write(b []byte) (int, error) { return c.fd.Write(b) } + +// WriteContext writes to the underlying file descriptor with added support for +// context cancelation. +func (c *Conn) WriteContext(ctx context.Context, b []byte) (int, error) { + var ( + n, nn int + err error + ) + + doErr := c.write(ctx, "write", func(fd int) error { + max := len(b) + if c.facts.isStream && max-nn > maxRW { + max = nn + maxRW + } + + n, err = unix.Write(fd, b[nn:max]) + if n > 0 { + nn += n + } + if nn == len(b) { + return err + } + if n == 0 && err == nil { + err = io.ErrUnexpectedEOF + return nil + } + + return err + }) + if doErr != nil { + return 0, doErr + } + + return nn, os.NewSyscallError("write", err) +} + +// SetDeadline sets both the read and write deadlines associated with the Conn. +func (c *Conn) SetDeadline(t time.Time) error { return c.fd.SetDeadline(t) } + +// SetReadDeadline sets the read deadline associated with the Conn. +func (c *Conn) SetReadDeadline(t time.Time) error { return c.fd.SetReadDeadline(t) } + +// SetWriteDeadline sets the write deadline associated with the Conn. +func (c *Conn) SetWriteDeadline(t time.Time) error { return c.fd.SetWriteDeadline(t) } + +// ReadBuffer gets the size of the operating system's receive buffer associated +// with the Conn. +func (c *Conn) ReadBuffer() (int, error) { + return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF) +} + +// WriteBuffer gets the size of the operating system's transmit buffer +// associated with the Conn. +func (c *Conn) WriteBuffer() (int, error) { + return c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF) +} + +// SetReadBuffer sets the size of the operating system's receive buffer +// associated with the Conn. +// +// When called with elevated privileges on Linux, the SO_RCVBUFFORCE option will +// be used to override operating system limits. Otherwise SO_RCVBUF is used +// (which obeys operating system limits). +func (c *Conn) SetReadBuffer(bytes int) error { return c.setReadBuffer(bytes) } + +// SetWriteBuffer sets the size of the operating system's transmit buffer +// associated with the Conn. +// +// When called with elevated privileges on Linux, the SO_SNDBUFFORCE option will +// be used to override operating system limits. Otherwise SO_SNDBUF is used +// (which obeys operating system limits). +func (c *Conn) SetWriteBuffer(bytes int) error { return c.setWriteBuffer(bytes) } + +// SyscallConn returns a raw network connection. This implements the +// syscall.Conn interface. +// +// SyscallConn is intended for advanced use cases, such as getting and setting +// arbitrary socket options using the socket's file descriptor. If possible, +// those operations should be performed using methods on Conn instead. +// +// Once invoked, it is the caller's responsibility to ensure that operations +// performed using Conn and the syscall.RawConn do not conflict with each other. +func (c *Conn) SyscallConn() (syscall.RawConn, error) { + if atomic.LoadUint32(&c.closed) != 0 { + return nil, os.NewSyscallError("syscallconn", unix.EBADF) + } + + // TODO(mdlayher): mutex or similar to enforce syscall.RawConn contract of + // FD remaining valid for duration of calls? + return c.rc, nil +} + +// Socket wraps the socket(2) system call to produce a Conn. domain, typ, and +// proto are passed directly to socket(2), and name should be a unique name for +// the socket type such as "netlink" or "vsock". +// +// The cfg parameter specifies optional configuration for the Conn. If nil, no +// additional configuration will be applied. +// +// If the operating system supports SOCK_CLOEXEC and SOCK_NONBLOCK, they are +// automatically applied to typ to mirror the standard library's socket flag +// behaviors. +func Socket(domain, typ, proto int, name string, cfg *Config) (*Conn, error) { + if cfg == nil { + cfg = &Config{} + } + + if cfg.NetNS == 0 { + // Non-Linux or no network namespace. + return socket(domain, typ, proto, name) + } + + // Linux only: create Conn in the specified network namespace. + return withNetNS(cfg.NetNS, func() (*Conn, error) { + return socket(domain, typ, proto, name) + }) +} + +// socket is the internal, cross-platform entry point for socket(2). +func socket(domain, typ, proto int, name string) (*Conn, error) { + var ( + fd int + err error + ) + + for { + fd, err = unix.Socket(domain, typ|socketFlags, proto) + switch { + case err == nil: + // Some OSes already set CLOEXEC with typ. + if !flagCLOEXEC { + unix.CloseOnExec(fd) + } + + // No error, prepare the Conn. + return New(fd, name) + case !ready(err): + // System call interrupted or not ready, try again. + continue + case err == unix.EINVAL, err == unix.EPROTONOSUPPORT: + // On Linux, SOCK_NONBLOCK and SOCK_CLOEXEC were introduced in + // 2.6.27. On FreeBSD, both flags were introduced in FreeBSD 10. + // EINVAL and EPROTONOSUPPORT check for earlier versions of these + // OSes respectively. + // + // Mirror what the standard library does when creating file + // descriptors: avoid racing a fork/exec with the creation of new + // file descriptors, so that child processes do not inherit socket + // file descriptors unexpectedly. + // + // For a more thorough explanation, see similar work in the Go tree: + // func sysSocket in net/sock_cloexec.go, as well as the detailed + // comment in syscall/exec_unix.go. + syscall.ForkLock.RLock() + fd, err = unix.Socket(domain, typ, proto) + if err != nil { + syscall.ForkLock.RUnlock() + return nil, os.NewSyscallError("socket", err) + } + unix.CloseOnExec(fd) + syscall.ForkLock.RUnlock() + + return New(fd, name) + default: + // Unhandled error. + return nil, os.NewSyscallError("socket", err) + } + } +} + +// FileConn returns a copy of the network connection corresponding to the open +// file. It is the caller's responsibility to close the file when finished. +// Closing the Conn does not affect the File, and closing the File does not +// affect the Conn. +func FileConn(f *os.File, name string) (*Conn, error) { + // First we'll try to do fctnl(2) with F_DUPFD_CLOEXEC because we can dup + // the file descriptor and set the flag in one syscall. + fd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) + switch err { + case nil: + // OK, ready to set up non-blocking I/O. + return New(fd, name) + case unix.EINVAL: + // The kernel rejected our fcntl(2), fall back to separate dup(2) and + // setting close on exec. + // + // Mirror what the standard library does when creating file descriptors: + // avoid racing a fork/exec with the creation of new file descriptors, + // so that child processes do not inherit socket file descriptors + // unexpectedly. + syscall.ForkLock.RLock() + fd, err := unix.Dup(fd) + if err != nil { + syscall.ForkLock.RUnlock() + return nil, os.NewSyscallError("dup", err) + } + unix.CloseOnExec(fd) + syscall.ForkLock.RUnlock() + + return New(fd, name) + default: + // Any other errors. + return nil, os.NewSyscallError("fcntl", err) + } +} + +// New wraps an existing file descriptor to create a Conn. name should be a +// unique name for the socket type such as "netlink" or "vsock". +// +// Most callers should use Socket or FileConn to construct a Conn. New is +// intended for integrating with specific system calls which provide a file +// descriptor that supports asynchronous I/O. The file descriptor is immediately +// set to nonblocking mode and registered with Go's runtime network poller for +// future I/O operations. +// +// Unlike FileConn, New does not duplicate the existing file descriptor in any +// way. The returned Conn takes ownership of the underlying file descriptor. +func New(fd int, name string) (*Conn, error) { + // All Conn I/O is nonblocking for integration with Go's runtime network + // poller. Depending on the OS this might already be set but it can't hurt + // to set it again. + if err := unix.SetNonblock(fd, true); err != nil { + return nil, os.NewSyscallError("setnonblock", err) + } + + // os.NewFile registers the non-blocking file descriptor with the runtime + // poller, which is then used for most subsequent operations except those + // that require raw I/O via SyscallConn. + // + // See also: https://golang.org/pkg/os/#NewFile + f := os.NewFile(uintptr(fd), name) + rc, err := f.SyscallConn() + if err != nil { + return nil, err + } + + c := &Conn{ + name: name, + fd: f, + rc: rc, + } + + // Probe the file descriptor for socket settings. + sotype, err := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_TYPE) + switch { + case err == nil: + // File is a socket, check its properties. + c.facts = facts{ + isStream: sotype == unix.SOCK_STREAM, + zeroReadIsEOF: sotype != unix.SOCK_DGRAM && sotype != unix.SOCK_RAW, + } + case errors.Is(err, unix.ENOTSOCK): + // File is not a socket, treat it as a regular file. + c.facts = facts{ + isStream: true, + zeroReadIsEOF: true, + } + default: + return nil, err + } + + return c, nil +} + +// Low-level methods which provide raw system call access. + +// Accept wraps accept(2) or accept4(2) depending on the operating system, but +// returns a Conn for the accepted connection rather than a raw file descriptor. +// +// If the operating system supports accept4(2) (which allows flags), +// SOCK_CLOEXEC and SOCK_NONBLOCK are automatically applied to flags to mirror +// the standard library's socket flag behaviors. +// +// If the operating system only supports accept(2) (which does not allow flags) +// and flags is not zero, an error will be returned. +// +// Accept obeys context cancelation and uses the deadline set on the context to +// cancel accepting the next connection. If a deadline is set on ctx, this +// deadline will override any previous deadlines set using SetDeadline or +// SetReadDeadline. Upon return, the read deadline is cleared. +func (c *Conn) Accept(ctx context.Context, flags int) (*Conn, unix.Sockaddr, error) { + type ret struct { + nfd int + sa unix.Sockaddr + } + + r, err := readT(c, ctx, sysAccept, func(fd int) (ret, error) { + // Either accept(2) or accept4(2) depending on the OS. + nfd, sa, err := accept(fd, flags|socketFlags) + return ret{nfd, sa}, err + }) + if err != nil { + // internal/poll, context error, or user function error. + return nil, nil, err + } + + // Successfully accepted a connection, wrap it in a Conn for use by the + // caller. + ac, err := New(r.nfd, c.name) + if err != nil { + return nil, nil, err + } + + return ac, r.sa, nil +} + +// Bind wraps bind(2). +func (c *Conn) Bind(sa unix.Sockaddr) error { + return c.control(context.Background(), "bind", func(fd int) error { + return unix.Bind(fd, sa) + }) +} + +// Connect wraps connect(2). In order to verify that the underlying socket is +// connected to a remote peer, Connect calls getpeername(2) and returns the +// unix.Sockaddr from that call. +// +// Connect obeys context cancelation and uses the deadline set on the context to +// cancel connecting to a remote peer. If a deadline is set on ctx, this +// deadline will override any previous deadlines set using SetDeadline or +// SetWriteDeadline. Upon return, the write deadline is cleared. +func (c *Conn) Connect(ctx context.Context, sa unix.Sockaddr) (unix.Sockaddr, error) { + const op = "connect" + + // TODO(mdlayher): it would seem that trying to connect to unbound vsock + // listeners by calling Connect multiple times results in ECONNRESET for the + // first and nil error for subsequent calls. Do we need to memoize the + // error? Check what the stdlib behavior is. + + var ( + // Track progress between invocations of the write closure. We don't + // have an explicit WaitWrite call like internal/poll does, so we have + // to wait until the runtime calls the closure again to indicate we can + // write. + progress uint32 + + // Capture closure sockaddr and error. + rsa unix.Sockaddr + err error + ) + + doErr := c.write(ctx, op, func(fd int) error { + if atomic.AddUint32(&progress, 1) == 1 { + // First call: initiate connect. + return unix.Connect(fd, sa) + } + + // Subsequent calls: the runtime network poller indicates fd is + // writable. Check for errno. + errno, gerr := c.GetsockoptInt(unix.SOL_SOCKET, unix.SO_ERROR) + if gerr != nil { + return gerr + } + if errno != 0 { + // Connection is still not ready or failed. If errno indicates + // the socket is not ready, we will wait for the next write + // event. Otherwise we propagate this errno back to the as a + // permanent error. + uerr := unix.Errno(errno) + err = uerr + return uerr + } + + // According to internal/poll, it's possible for the runtime network + // poller to spuriously wake us and return errno 0 for SO_ERROR. + // Make sure we are actually connected to a peer. + peer, err := c.Getpeername() + if err != nil { + // internal/poll unconditionally goes back to WaitWrite. + // Synthesize an error that will do the same for us. + return unix.EAGAIN + } + + // Connection complete. + rsa = peer + return nil + }) + if doErr != nil { + // internal/poll or context error. + return nil, doErr + } + + if err == unix.EISCONN { + // TODO(mdlayher): is this block obsolete with the addition of the + // getsockopt SO_ERROR check above? + // + // EISCONN is reported if the socket is already established and should + // not be treated as an error. + // - Darwin reports this for at least TCP sockets + // - Linux reports this for at least AF_VSOCK sockets + return rsa, nil + } + + return rsa, os.NewSyscallError(op, err) +} + +// Getsockname wraps getsockname(2). +func (c *Conn) Getsockname() (unix.Sockaddr, error) { + return controlT(c, context.Background(), "getsockname", unix.Getsockname) +} + +// Getpeername wraps getpeername(2). +func (c *Conn) Getpeername() (unix.Sockaddr, error) { + return controlT(c, context.Background(), "getpeername", unix.Getpeername) +} + +// GetsockoptInt wraps getsockopt(2) for integer values. +func (c *Conn) GetsockoptInt(level, opt int) (int, error) { + return controlT(c, context.Background(), "getsockopt", func(fd int) (int, error) { + return unix.GetsockoptInt(fd, level, opt) + }) +} + +// Listen wraps listen(2). +func (c *Conn) Listen(n int) error { + return c.control(context.Background(), "listen", func(fd int) error { + return unix.Listen(fd, n) + }) +} + +// Recvmsg wraps recvmsg(2). +func (c *Conn) Recvmsg(ctx context.Context, p, oob []byte, flags int) (int, int, int, unix.Sockaddr, error) { + type ret struct { + n, oobn, recvflags int + from unix.Sockaddr + } + + r, err := readT(c, ctx, "recvmsg", func(fd int) (ret, error) { + n, oobn, recvflags, from, err := unix.Recvmsg(fd, p, oob, flags) + return ret{n, oobn, recvflags, from}, err + }) + if r.n == 0 && err == nil && c.facts.zeroReadIsEOF { + return 0, 0, 0, nil, io.EOF + } + + return r.n, r.oobn, r.recvflags, r.from, err +} + +// Recvfrom wraps recvfrom(2). +func (c *Conn) Recvfrom(ctx context.Context, p []byte, flags int) (int, unix.Sockaddr, error) { + type ret struct { + n int + addr unix.Sockaddr + } + + out, err := readT(c, ctx, "recvfrom", func(fd int) (ret, error) { + n, addr, err := unix.Recvfrom(fd, p, flags) + return ret{n, addr}, err + }) + if out.n == 0 && err == nil && c.facts.zeroReadIsEOF { + return 0, nil, io.EOF + } + + return out.n, out.addr, err +} + +// Sendmsg wraps sendmsg(2). +func (c *Conn) Sendmsg(ctx context.Context, p, oob []byte, to unix.Sockaddr, flags int) (int, error) { + return writeT(c, ctx, "sendmsg", func(fd int) (int, error) { + return unix.SendmsgN(fd, p, oob, to, flags) + }) +} + +// Sendto wraps sendto(2). +func (c *Conn) Sendto(ctx context.Context, p []byte, flags int, to unix.Sockaddr) error { + return c.write(ctx, "sendto", func(fd int) error { + return unix.Sendto(fd, p, flags, to) + }) +} + +// SetsockoptInt wraps setsockopt(2) for integer values. +func (c *Conn) SetsockoptInt(level, opt, value int) error { + return c.control(context.Background(), "setsockopt", func(fd int) error { + return unix.SetsockoptInt(fd, level, opt, value) + }) +} + +// Shutdown wraps shutdown(2). +func (c *Conn) Shutdown(how int) error { + return c.control(context.Background(), "shutdown", func(fd int) error { + return unix.Shutdown(fd, how) + }) +} + +// Conn low-level read/write/control functions. These functions mirror the +// syscall.RawConn APIs but the input closures return errors rather than +// booleans. + +// read wraps readT to execute a function and capture its error result. This is +// a convenience wrapper for functions which don't return any extra values. +func (c *Conn) read(ctx context.Context, op string, f func(fd int) error) error { + _, err := readT(c, ctx, op, func(fd int) (struct{}, error) { + return struct{}{}, f(fd) + }) + return err +} + +// write executes f, a write function, against the associated file descriptor. +// op is used to create an *os.SyscallError if the file descriptor is closed. +func (c *Conn) write(ctx context.Context, op string, f func(fd int) error) error { + _, err := writeT(c, ctx, op, func(fd int) (struct{}, error) { + return struct{}{}, f(fd) + }) + return err +} + +// readT executes c.rc.Read for op using the input function, returning a newly +// allocated result T. +func readT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) { + return rwT(c, rwContext[T]{ + Context: ctx, + Type: read, + Op: op, + Do: f, + }) +} + +// writeT executes c.rc.Write for op using the input function, returning a newly +// allocated result T. +func writeT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) { + return rwT(c, rwContext[T]{ + Context: ctx, + Type: write, + Op: op, + Do: f, + }) +} + +// readWrite indicates if an operation intends to read or write. +type readWrite bool + +// Possible readWrite values. +const ( + read readWrite = false + write readWrite = true +) + +// An rwContext provides arguments to rwT. +type rwContext[T any] struct { + // The caller's context passed for cancelation. + Context context.Context + + // The type of an operation: read or write. + Type readWrite + + // The name of the operation used in errors. + Op string + + // The actual function to perform. + Do func(fd int) (T, error) +} + +// rwT executes c.rc.Read or c.rc.Write (depending on the value of rw.Type) for +// rw.Op using the input function, returning a newly allocated result T. +// +// It obeys context cancelation and the rw.Context must not be nil. +func rwT[T any](c *Conn, rw rwContext[T]) (T, error) { + if atomic.LoadUint32(&c.closed) != 0 { + // If the file descriptor is already closed, do nothing. + return *new(T), os.NewSyscallError(rw.Op, unix.EBADF) + } + + if err := rw.Context.Err(); err != nil { + // Early exit due to context cancel. + return *new(T), os.NewSyscallError(rw.Op, err) + } + + var ( + // The read or write function used to access the runtime network poller. + poll func(func(uintptr) bool) error + + // The read or write function used to set the matching deadline. + deadline func(time.Time) error + ) + + if rw.Type == write { + poll = c.rc.Write + deadline = c.SetWriteDeadline + } else { + poll = c.rc.Read + deadline = c.SetReadDeadline + } + + var ( + // Whether or not the context carried a deadline we are actively using + // for cancelation. + setDeadline bool + + // Signals for the cancelation watcher goroutine. + wg sync.WaitGroup + doneC = make(chan struct{}) + + // Atomic: reports whether we have to disarm the deadline. + // + // TODO(mdlayher): switch back to atomic.Bool when we drop support for + // Go 1.18. + needDisarm int64 + ) + + // On cancel, clean up the watcher. + defer func() { + close(doneC) + wg.Wait() + }() + + if d, ok := rw.Context.Deadline(); ok { + // The context has an explicit deadline. We will use it for cancelation + // but disarm it after poll for the next call. + if err := deadline(d); err != nil { + return *new(T), err + } + setDeadline = true + atomic.AddInt64(&needDisarm, 1) + } else { + // The context does not have an explicit deadline. We have to watch for + // cancelation so we can propagate that signal to immediately unblock + // the runtime network poller. + // + // TODO(mdlayher): is it possible to detect a background context vs a + // context with possible future cancel? + wg.Add(1) + go func() { + defer wg.Done() + + select { + case <-rw.Context.Done(): + // Cancel the operation. Make the caller disarm after poll + // returns. + atomic.AddInt64(&needDisarm, 1) + _ = deadline(time.Unix(0, 1)) + case <-doneC: + // Nothing to do. + } + }() + } + + var ( + t T + err error + ) + + pollErr := poll(func(fd uintptr) bool { + t, err = rw.Do(int(fd)) + return ready(err) + }) + + if atomic.LoadInt64(&needDisarm) > 0 { + _ = deadline(time.Time{}) + } + + if pollErr != nil { + if rw.Context.Err() != nil || (setDeadline && errors.Is(pollErr, os.ErrDeadlineExceeded)) { + // The caller canceled the operation or we set a deadline internally + // and it was reached. + // + // Unpack a plain context error. We wait for the context to be done + // to synchronize state externally. Otherwise we have noticed I/O + // timeout wakeups when we set a deadline but the context was not + // yet marked done. + <-rw.Context.Done() + return *new(T), os.NewSyscallError(rw.Op, rw.Context.Err()) + } + + // Error from syscall.RawConn methods. Conventionally the standard + // library does not wrap internal/poll errors in os.NewSyscallError. + return *new(T), pollErr + } + + // Result from user function. + return t, os.NewSyscallError(rw.Op, err) +} + +// control executes Conn.control for op using the input function. +func (c *Conn) control(ctx context.Context, op string, f func(fd int) error) error { + _, err := controlT(c, ctx, op, func(fd int) (struct{}, error) { + return struct{}{}, f(fd) + }) + return err +} + +// controlT executes c.rc.Control for op using the input function, returning a +// newly allocated result T. +func controlT[T any](c *Conn, ctx context.Context, op string, f func(fd int) (T, error)) (T, error) { + if atomic.LoadUint32(&c.closed) != 0 { + // If the file descriptor is already closed, do nothing. + return *new(T), os.NewSyscallError(op, unix.EBADF) + } + + var ( + t T + err error + ) + + doErr := c.rc.Control(func(fd uintptr) { + // Repeatedly attempt the syscall(s) invoked by f until completion is + // indicated by the return value of ready or the context is canceled. + // + // The last values for t and err are captured outside of the closure for + // use when the loop breaks. + for { + if err = ctx.Err(); err != nil { + // Early exit due to context cancel. + return + } + + t, err = f(int(fd)) + if ready(err) { + return + } + } + }) + if doErr != nil { + // Error from syscall.RawConn methods. Conventionally the standard + // library does not wrap internal/poll errors in os.NewSyscallError. + return *new(T), doErr + } + + // Result from user function. + return t, os.NewSyscallError(op, err) +} + +// ready indicates readiness based on the value of err. +func ready(err error) bool { + switch err { + case unix.EAGAIN, unix.EINPROGRESS, unix.EINTR: + // When a socket is in non-blocking mode, we might see a variety of errors: + // - EAGAIN: most common case for a socket read not being ready + // - EINPROGRESS: reported by some sockets when first calling connect + // - EINTR: system call interrupted, more frequently occurs in Go 1.14+ + // because goroutines can be asynchronously preempted + // + // Return false to let the poller wait for readiness. See the source code + // for internal/poll.FD.RawRead for more details. + return false + default: + // Ready regardless of whether there was an error or no error. + return true + } +} + +// Darwin and FreeBSD can't read or write 2GB+ files at a time, +// even on 64-bit systems. +// The same is true of socket implementations on many systems. +// See golang.org/issue/7812 and golang.org/issue/16266. +// Use 1GB instead of, say, 2GB-1, to keep subsequent reads aligned. +const maxRW = 1 << 30 diff --git a/vendor/github.com/mdlayher/socket/conn_linux.go b/vendor/github.com/mdlayher/socket/conn_linux.go new file mode 100644 index 000000000..37579d4a0 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/conn_linux.go @@ -0,0 +1,118 @@ +//go:build linux +// +build linux + +package socket + +import ( + "context" + "os" + "unsafe" + + "golang.org/x/net/bpf" + "golang.org/x/sys/unix" +) + +// IoctlKCMClone wraps ioctl(2) for unix.KCMClone values, but returns a Conn +// rather than a raw file descriptor. +func (c *Conn) IoctlKCMClone() (*Conn, error) { + info, err := controlT(c, context.Background(), "ioctl", unix.IoctlKCMClone) + if err != nil { + return nil, err + } + + // Successful clone, wrap in a Conn for use by the caller. + return New(int(info.Fd), c.name) +} + +// IoctlKCMAttach wraps ioctl(2) for unix.KCMAttach values. +func (c *Conn) IoctlKCMAttach(info unix.KCMAttach) error { + return c.control(context.Background(), "ioctl", func(fd int) error { + return unix.IoctlKCMAttach(fd, info) + }) +} + +// IoctlKCMUnattach wraps ioctl(2) for unix.KCMUnattach values. +func (c *Conn) IoctlKCMUnattach(info unix.KCMUnattach) error { + return c.control(context.Background(), "ioctl", func(fd int) error { + return unix.IoctlKCMUnattach(fd, info) + }) +} + +// PidfdGetfd wraps pidfd_getfd(2) for a Conn which wraps a pidfd, but returns a +// Conn rather than a raw file descriptor. +func (c *Conn) PidfdGetfd(targetFD, flags int) (*Conn, error) { + outFD, err := controlT(c, context.Background(), "pidfd_getfd", func(fd int) (int, error) { + return unix.PidfdGetfd(fd, targetFD, flags) + }) + if err != nil { + return nil, err + } + + // Successful getfd, wrap in a Conn for use by the caller. + return New(outFD, c.name) +} + +// PidfdSendSignal wraps pidfd_send_signal(2) for a Conn which wraps a Linux +// pidfd. +func (c *Conn) PidfdSendSignal(sig unix.Signal, info *unix.Siginfo, flags int) error { + return c.control(context.Background(), "pidfd_send_signal", func(fd int) error { + return unix.PidfdSendSignal(fd, sig, info, flags) + }) +} + +// SetBPF attaches an assembled BPF program to a Conn. +func (c *Conn) SetBPF(filter []bpf.RawInstruction) error { + // We can't point to the first instruction in the array if no instructions + // are present. + if len(filter) == 0 { + return os.NewSyscallError("setsockopt", unix.EINVAL) + } + + prog := unix.SockFprog{ + Len: uint16(len(filter)), + Filter: (*unix.SockFilter)(unsafe.Pointer(&filter[0])), + } + + return c.SetsockoptSockFprog(unix.SOL_SOCKET, unix.SO_ATTACH_FILTER, &prog) +} + +// RemoveBPF removes a BPF filter from a Conn. +func (c *Conn) RemoveBPF() error { + // 0 argument is ignored. + return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_DETACH_FILTER, 0) +} + +// SetsockoptPacketMreq wraps setsockopt(2) for unix.PacketMreq values. +func (c *Conn) SetsockoptPacketMreq(level, opt int, mreq *unix.PacketMreq) error { + return c.control(context.Background(), "setsockopt", func(fd int) error { + return unix.SetsockoptPacketMreq(fd, level, opt, mreq) + }) +} + +// SetsockoptSockFprog wraps setsockopt(2) for unix.SockFprog values. +func (c *Conn) SetsockoptSockFprog(level, opt int, fprog *unix.SockFprog) error { + return c.control(context.Background(), "setsockopt", func(fd int) error { + return unix.SetsockoptSockFprog(fd, level, opt, fprog) + }) +} + +// GetsockoptTpacketStats wraps getsockopt(2) for unix.TpacketStats values. +func (c *Conn) GetsockoptTpacketStats(level, name int) (*unix.TpacketStats, error) { + return controlT(c, context.Background(), "getsockopt", func(fd int) (*unix.TpacketStats, error) { + return unix.GetsockoptTpacketStats(fd, level, name) + }) +} + +// GetsockoptTpacketStatsV3 wraps getsockopt(2) for unix.TpacketStatsV3 values. +func (c *Conn) GetsockoptTpacketStatsV3(level, name int) (*unix.TpacketStatsV3, error) { + return controlT(c, context.Background(), "getsockopt", func(fd int) (*unix.TpacketStatsV3, error) { + return unix.GetsockoptTpacketStatsV3(fd, level, name) + }) +} + +// Waitid wraps waitid(2). +func (c *Conn) Waitid(idType int, info *unix.Siginfo, options int, rusage *unix.Rusage) error { + return c.read(context.Background(), "waitid", func(fd int) error { + return unix.Waitid(idType, fd, info, options, rusage) + }) +} diff --git a/vendor/github.com/mdlayher/socket/doc.go b/vendor/github.com/mdlayher/socket/doc.go new file mode 100644 index 000000000..7d4566c90 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/doc.go @@ -0,0 +1,13 @@ +// Package socket provides a low-level network connection type which integrates +// with Go's runtime network poller to provide asynchronous I/O and deadline +// support. +// +// This package focuses on UNIX-like operating systems which make use of BSD +// sockets system call APIs. It is meant to be used as a foundation for the +// creation of operating system-specific socket packages, for socket families +// such as Linux's AF_NETLINK, AF_PACKET, or AF_VSOCK. This package should not +// be used directly in end user applications. +// +// Any use of package socket should be guarded by build tags, as one would also +// use when importing the syscall or golang.org/x/sys packages. +package socket diff --git a/vendor/github.com/mdlayher/socket/netns_linux.go b/vendor/github.com/mdlayher/socket/netns_linux.go new file mode 100644 index 000000000..b29115ad1 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/netns_linux.go @@ -0,0 +1,150 @@ +//go:build linux +// +build linux + +package socket + +import ( + "errors" + "fmt" + "os" + "runtime" + + "golang.org/x/sync/errgroup" + "golang.org/x/sys/unix" +) + +// errNetNSDisabled is returned when network namespaces are unavailable on +// a given system. +var errNetNSDisabled = errors.New("socket: Linux network namespaces are not enabled on this system") + +// withNetNS invokes fn within the context of the network namespace specified by +// fd, while also managing the logic required to safely do so by manipulating +// thread-local state. +func withNetNS(fd int, fn func() (*Conn, error)) (*Conn, error) { + var ( + eg errgroup.Group + conn *Conn + ) + + eg.Go(func() error { + // Retrieve and store the calling OS thread's network namespace so the + // thread can be reassigned to it after creating a socket in another network + // namespace. + runtime.LockOSThread() + + ns, err := threadNetNS() + if err != nil { + // No thread-local manipulation, unlock. + runtime.UnlockOSThread() + return err + } + defer ns.Close() + + // Beyond this point, the thread's network namespace is poisoned. Do not + // unlock the OS thread until all network namespace manipulation completes + // to avoid returning to the caller with altered thread-local state. + + // Assign the current OS thread the goroutine is locked to to the given + // network namespace. + if err := ns.Set(fd); err != nil { + return err + } + + // Attempt Conn creation and unconditionally restore the original namespace. + c, err := fn() + if nerr := ns.Restore(); nerr != nil { + // Failed to restore original namespace. Return an error and allow the + // runtime to terminate the thread. + if err == nil { + _ = c.Close() + } + + return nerr + } + + // No more thread-local state manipulation; return the new Conn. + runtime.UnlockOSThread() + conn = c + return nil + }) + + if err := eg.Wait(); err != nil { + return nil, err + } + + return conn, nil +} + +// A netNS is a handle that can manipulate network namespaces. +// +// Operations performed on a netNS must use runtime.LockOSThread before +// manipulating any network namespaces. +type netNS struct { + // The handle to a network namespace. + f *os.File + + // Indicates if network namespaces are disabled on this system, and thus + // operations should become a no-op or return errors. + disabled bool +} + +// threadNetNS constructs a netNS using the network namespace of the calling +// thread. If the namespace is not the default namespace, runtime.LockOSThread +// should be invoked first. +func threadNetNS() (*netNS, error) { + return fileNetNS(fmt.Sprintf("/proc/self/task/%d/ns/net", unix.Gettid())) +} + +// fileNetNS opens file and creates a netNS. fileNetNS should only be called +// directly in tests. +func fileNetNS(file string) (*netNS, error) { + f, err := os.Open(file) + switch { + case err == nil: + return &netNS{f: f}, nil + case os.IsNotExist(err): + // Network namespaces are not enabled on this system. Use this signal + // to return errors elsewhere if the caller explicitly asks for a + // network namespace to be set. + return &netNS{disabled: true}, nil + default: + return nil, err + } +} + +// Close releases the handle to a network namespace. +func (n *netNS) Close() error { + return n.do(func() error { return n.f.Close() }) +} + +// FD returns a file descriptor which represents the network namespace. +func (n *netNS) FD() int { + if n.disabled { + // No reasonable file descriptor value in this case, so specify a + // non-existent one. + return -1 + } + + return int(n.f.Fd()) +} + +// Restore restores the original network namespace for the calling thread. +func (n *netNS) Restore() error { + return n.do(func() error { return n.Set(n.FD()) }) +} + +// Set sets a new network namespace for the current thread using fd. +func (n *netNS) Set(fd int) error { + return n.do(func() error { + return os.NewSyscallError("setns", unix.Setns(fd, unix.CLONE_NEWNET)) + }) +} + +// do runs fn if network namespaces are enabled on this system. +func (n *netNS) do(fn func() error) error { + if n.disabled { + return errNetNSDisabled + } + + return fn() +} diff --git a/vendor/github.com/mdlayher/socket/netns_others.go b/vendor/github.com/mdlayher/socket/netns_others.go new file mode 100644 index 000000000..4cceb3d04 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/netns_others.go @@ -0,0 +1,14 @@ +//go:build !linux +// +build !linux + +package socket + +import ( + "fmt" + "runtime" +) + +// withNetNS returns an error on non-Linux systems. +func withNetNS(_ int, _ func() (*Conn, error)) (*Conn, error) { + return nil, fmt.Errorf("socket: Linux network namespace support is not available on %s", runtime.GOOS) +} diff --git a/vendor/github.com/mdlayher/socket/setbuffer_linux.go b/vendor/github.com/mdlayher/socket/setbuffer_linux.go new file mode 100644 index 000000000..0d4aa4417 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/setbuffer_linux.go @@ -0,0 +1,24 @@ +//go:build linux +// +build linux + +package socket + +import "golang.org/x/sys/unix" + +// setReadBuffer wraps the SO_RCVBUF{,FORCE} setsockopt(2) options. +func (c *Conn) setReadBuffer(bytes int) error { + err := c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUFFORCE, bytes) + if err != nil { + err = c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF, bytes) + } + return err +} + +// setWriteBuffer wraps the SO_SNDBUF{,FORCE} setsockopt(2) options. +func (c *Conn) setWriteBuffer(bytes int) error { + err := c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUFFORCE, bytes) + if err != nil { + err = c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF, bytes) + } + return err +} diff --git a/vendor/github.com/mdlayher/socket/setbuffer_others.go b/vendor/github.com/mdlayher/socket/setbuffer_others.go new file mode 100644 index 000000000..72b36dbe3 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/setbuffer_others.go @@ -0,0 +1,16 @@ +//go:build !linux +// +build !linux + +package socket + +import "golang.org/x/sys/unix" + +// setReadBuffer wraps the SO_RCVBUF setsockopt(2) option. +func (c *Conn) setReadBuffer(bytes int) error { + return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_RCVBUF, bytes) +} + +// setWriteBuffer wraps the SO_SNDBUF setsockopt(2) option. +func (c *Conn) setWriteBuffer(bytes int) error { + return c.SetsockoptInt(unix.SOL_SOCKET, unix.SO_SNDBUF, bytes) +} diff --git a/vendor/github.com/mdlayher/socket/typ_cloexec_nonblock.go b/vendor/github.com/mdlayher/socket/typ_cloexec_nonblock.go new file mode 100644 index 000000000..40e834310 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/typ_cloexec_nonblock.go @@ -0,0 +1,12 @@ +//go:build !darwin +// +build !darwin + +package socket + +import "golang.org/x/sys/unix" + +const ( + // These operating systems support CLOEXEC and NONBLOCK socket options. + flagCLOEXEC = true + socketFlags = unix.SOCK_CLOEXEC | unix.SOCK_NONBLOCK +) diff --git a/vendor/github.com/mdlayher/socket/typ_none.go b/vendor/github.com/mdlayher/socket/typ_none.go new file mode 100644 index 000000000..9bbb1aab5 --- /dev/null +++ b/vendor/github.com/mdlayher/socket/typ_none.go @@ -0,0 +1,11 @@ +//go:build darwin +// +build darwin + +package socket + +const ( + // These operating systems do not support CLOEXEC and NONBLOCK socket + // options. + flagCLOEXEC = false + socketFlags = 0 +) diff --git a/vendor/github.com/mdlayher/vsock/.gitignore b/vendor/github.com/mdlayher/vsock/.gitignore new file mode 100644 index 000000000..8130d4158 --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/.gitignore @@ -0,0 +1,4 @@ +cover.out +vsock.test +cmd/vscp/vscp +cmd/vsockhttp/vsockhttp diff --git a/vendor/github.com/mdlayher/vsock/CHANGELOG.md b/vendor/github.com/mdlayher/vsock/CHANGELOG.md new file mode 100644 index 000000000..c64a797bc --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/CHANGELOG.md @@ -0,0 +1,53 @@ +# CHANGELOG + +# v1.2.1 + +- [Improvement]: updated dependencies, test with Go 1.20. + +# v1.2.0 + +**This is the first release of package vsock that only supports Go 1.18+. Users +on older versions of Go must use v1.1.1.** + +- [Improvement]: drop support for older versions of Go so we can begin using + modern versions of `x/sys` and other dependencies. + +## v1.1.1 + +**This is the last release of package vsock that supports Go 1.17 and below.** + +- [Bug Fix] [commit](https://github.com/mdlayher/vsock/commit/ead86435c244d5d6baad549a6df0557ada3f4401): + fix build on non-UNIX platforms such as Windows. This is a no-op change on + Linux but provides a friendlier experience for non-Linux users. + +## v1.1.0 + +- [New API] [commit](https://github.com/mdlayher/vsock/commit/44cd82dc5f7de644436f22236b111ab97fa9a14f): + `vsock.FileListener` can be used to create a `vsock.Listener` from an existing + `os.File`, which may be provided by systemd socket activation or another + external mechanism. + +## v1.0.1 + +- [Bug Fix] [commit](https://github.com/mdlayher/vsock/commit/99a6dccdebad21d1fa5f757d228d677ccb1412dc): + upgrade `github.com/mdlayher/socket` to handle non-blocking `connect(2)` + errors (called in `vsock.Dial`) properly by checking the `SO_ERROR` socket + option. Lock in this behavior with a new test. +- [Improvement] [commit](https://github.com/mdlayher/vsock/commit/375f3bbcc363500daf367ec511638a4655471719): + downgrade the version of `golang.org/x/net` in use to support Go 1.12. We + don't need the latest version for this package. + +## v1.0.0 + +**This is the first release of package vsock that only supports Go 1.12+. +Users on older versions of Go must use an unstable release.** + +- Initial stable commit! +- [API change]: the `vsock.Dial` and `vsock.Listen` constructors now accept an + optional `*vsock.Config` parameter to enable future expansion in v1.x.x + without prompting further breaking API changes. Because `vsock.Config` has no + options as of this release, `nil` may be passed in all call sites to fix + existing code upon upgrading to v1.0.0. +- [New API]: the `vsock.ListenContextID` function can be used to create a + `*vsock.Listener` which is bound to an explicit context ID address, rather + than inferring one automatically as `vsock.Listen` does. diff --git a/vendor/github.com/mdlayher/vsock/LICENSE.md b/vendor/github.com/mdlayher/vsock/LICENSE.md new file mode 100644 index 000000000..9fa6774b1 --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/LICENSE.md @@ -0,0 +1,9 @@ +# MIT License + +Copyright (C) 2017-2022 Matt Layher + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/mdlayher/vsock/README.md b/vendor/github.com/mdlayher/vsock/README.md new file mode 100644 index 000000000..b1ec4cfbe --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/README.md @@ -0,0 +1,21 @@ +# vsock [![Test Status](https://github.com/mdlayher/vsock/workflows/Linux%20Test/badge.svg)](https://github.com/mdlayher/vsock/actions) [![Go Reference](https://pkg.go.dev/badge/github.com/mdlayher/vsock.svg)](https://pkg.go.dev/github.com/mdlayher/vsock) [![Go Report Card](https://goreportcard.com/badge/github.com/mdlayher/vsock)](https://goreportcard.com/report/github.com/mdlayher/vsock) + +Package `vsock` provides access to Linux VM sockets (`AF_VSOCK`) for +communication between a hypervisor and its virtual machines. MIT Licensed. + +For more information about VM sockets, see my blog about +[Linux VM sockets in Go](https://mdlayher.com/blog/linux-vm-sockets-in-go/) or +the [QEMU wiki page on virtio-vsock](http://wiki.qemu-project.org/Features/VirtioVsock). + +## Stability + +See the [CHANGELOG](./CHANGELOG.md) file for a description of changes between +releases. + +This package has a stable v1 API and any future breaking changes will prompt +the release of a new major version. Features and bug fixes will continue to +occur in the v1.x.x series. + +This package only supports the two most recent major versions of Go, mirroring +Go's own release policy. Older versions of Go may lack critical features and bug +fixes which are necessary for this package to function correctly. diff --git a/vendor/github.com/mdlayher/vsock/conn_linux.go b/vendor/github.com/mdlayher/vsock/conn_linux.go new file mode 100644 index 000000000..6029d547e --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/conn_linux.go @@ -0,0 +1,62 @@ +//go:build linux +// +build linux + +package vsock + +import ( + "context" + + "github.com/mdlayher/socket" + "golang.org/x/sys/unix" +) + +// A conn is the net.Conn implementation for connection-oriented VM sockets. +// We can use socket.Conn directly on Linux to implement all of the necessary +// methods. +type conn = socket.Conn + +// dial is the entry point for Dial on Linux. +func dial(cid, port uint32, _ *Config) (*Conn, error) { + // TODO(mdlayher): Config default nil check and initialize. Pass options to + // socket.Config where necessary. + + c, err := socket.Socket(unix.AF_VSOCK, unix.SOCK_STREAM, 0, "vsock", nil) + if err != nil { + return nil, err + } + + sa := &unix.SockaddrVM{CID: cid, Port: port} + rsa, err := c.Connect(context.Background(), sa) + if err != nil { + _ = c.Close() + return nil, err + } + + // TODO(mdlayher): getpeername(2) appears to return nil in the GitHub CI + // environment, so in the event of a nil sockaddr, fall back to the previous + // method of synthesizing the remote address. + if rsa == nil { + rsa = sa + } + + lsa, err := c.Getsockname() + if err != nil { + _ = c.Close() + return nil, err + } + + lsavm := lsa.(*unix.SockaddrVM) + rsavm := rsa.(*unix.SockaddrVM) + + return &Conn{ + c: c, + local: &Addr{ + ContextID: lsavm.CID, + Port: lsavm.Port, + }, + remote: &Addr{ + ContextID: rsavm.CID, + Port: rsavm.Port, + }, + }, nil +} diff --git a/vendor/github.com/mdlayher/vsock/doc.go b/vendor/github.com/mdlayher/vsock/doc.go new file mode 100644 index 000000000..e158b1836 --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/doc.go @@ -0,0 +1,10 @@ +// Package vsock provides access to Linux VM sockets (AF_VSOCK) for +// communication between a hypervisor and its virtual machines. +// +// The types in this package implement interfaces provided by package net and +// may be used in applications that expect a net.Listener or net.Conn. +// +// - *Addr implements net.Addr +// - *Conn implements net.Conn +// - *Listener implements net.Listener +package vsock diff --git a/vendor/github.com/mdlayher/vsock/fd_linux.go b/vendor/github.com/mdlayher/vsock/fd_linux.go new file mode 100644 index 000000000..531e53f92 --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/fd_linux.go @@ -0,0 +1,36 @@ +package vsock + +import ( + "fmt" + "os" + + "golang.org/x/sys/unix" +) + +// contextID retrieves the local context ID for this system. +func contextID() (uint32, error) { + f, err := os.Open(devVsock) + if err != nil { + return 0, err + } + defer f.Close() + + return unix.IoctlGetUint32(int(f.Fd()), unix.IOCTL_VM_SOCKETS_GET_LOCAL_CID) +} + +// isErrno determines if an error a matches UNIX error number. +func isErrno(err error, errno int) bool { + switch errno { + case ebadf: + return err == unix.EBADF + case enotconn: + return err == unix.ENOTCONN + default: + panicf("vsock: isErrno called with unhandled error number parameter: %d", errno) + return false + } +} + +func panicf(format string, a ...interface{}) { + panic(fmt.Sprintf(format, a...)) +} diff --git a/vendor/github.com/mdlayher/vsock/listener_linux.go b/vendor/github.com/mdlayher/vsock/listener_linux.go new file mode 100644 index 000000000..50fa1b7a4 --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/listener_linux.go @@ -0,0 +1,133 @@ +//go:build linux +// +build linux + +package vsock + +import ( + "context" + "net" + "os" + "time" + + "github.com/mdlayher/socket" + "golang.org/x/sys/unix" +) + +var _ net.Listener = &listener{} + +// A listener is the net.Listener implementation for connection-oriented +// VM sockets. +type listener struct { + c *socket.Conn + addr *Addr +} + +// Addr and Close implement the net.Listener interface for listener. +func (l *listener) Addr() net.Addr { return l.addr } +func (l *listener) Close() error { return l.c.Close() } +func (l *listener) SetDeadline(t time.Time) error { return l.c.SetDeadline(t) } + +// Accept accepts a single connection from the listener, and sets up +// a net.Conn backed by conn. +func (l *listener) Accept() (net.Conn, error) { + c, rsa, err := l.c.Accept(context.Background(), 0) + if err != nil { + return nil, err + } + + savm := rsa.(*unix.SockaddrVM) + remote := &Addr{ + ContextID: savm.CID, + Port: savm.Port, + } + + return &Conn{ + c: c, + local: l.addr, + remote: remote, + }, nil +} + +// name is the socket name passed to package socket. +const name = "vsock" + +// listen is the entry point for Listen on Linux. +func listen(cid, port uint32, _ *Config) (*Listener, error) { + // TODO(mdlayher): Config default nil check and initialize. Pass options to + // socket.Config where necessary. + + c, err := socket.Socket(unix.AF_VSOCK, unix.SOCK_STREAM, 0, name, nil) + if err != nil { + return nil, err + } + + // Be sure to close the Conn if any of the system calls fail before we + // return the Conn to the caller. + + if port == 0 { + port = unix.VMADDR_PORT_ANY + } + + if err := c.Bind(&unix.SockaddrVM{CID: cid, Port: port}); err != nil { + _ = c.Close() + return nil, err + } + + if err := c.Listen(unix.SOMAXCONN); err != nil { + _ = c.Close() + return nil, err + } + + l, err := newListener(c) + if err != nil { + _ = c.Close() + return nil, err + } + + return l, nil +} + +// fileListener is the entry point for FileListener on Linux. +func fileListener(f *os.File) (*Listener, error) { + c, err := socket.FileConn(f, name) + if err != nil { + return nil, err + } + + l, err := newListener(c) + if err != nil { + _ = c.Close() + return nil, err + } + + return l, nil +} + +// newListener creates a Listener from a raw socket.Conn. +func newListener(c *socket.Conn) (*Listener, error) { + lsa, err := c.Getsockname() + if err != nil { + return nil, err + } + + // Now that the library can also accept arbitrary os.Files, we have to + // verify the address family so we don't accidentally create a + // *vsock.Listener backed by TCP or some other socket type. + lsavm, ok := lsa.(*unix.SockaddrVM) + if !ok { + // All errors should wrapped with os.SyscallError. + return nil, os.NewSyscallError("listen", unix.EINVAL) + } + + addr := &Addr{ + ContextID: lsavm.CID, + Port: lsavm.Port, + } + + return &Listener{ + l: &listener{ + c: c, + addr: addr, + }, + }, nil +} diff --git a/vendor/github.com/mdlayher/vsock/vsock.go b/vendor/github.com/mdlayher/vsock/vsock.go new file mode 100644 index 000000000..78763936a --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/vsock.go @@ -0,0 +1,435 @@ +package vsock + +import ( + "errors" + "fmt" + "io" + "net" + "os" + "strings" + "syscall" + "time" +) + +const ( + // Hypervisor specifies that a socket should communicate with the hypervisor + // process. Note that this is _not_ the same as a socket owned by a process + // running on the hypervisor. Most users should probably use Host instead. + Hypervisor = 0x0 + + // Local specifies that a socket should communicate with a matching socket + // on the same machine. This provides an alternative to UNIX sockets or + // similar and may be useful in testing VM sockets applications. + Local = 0x1 + + // Host specifies that a socket should communicate with processes other than + // the hypervisor on the host machine. This is the correct choice to + // communicate with a process running on a hypervisor using a socket dialed + // from a guest. + Host = 0x2 + + // Error numbers we recognize, copied here to avoid importing x/sys/unix in + // cross-platform code. + ebadf = 9 + enotconn = 107 + + // devVsock is the location of /dev/vsock. It is exposed on both the + // hypervisor and on virtual machines. + devVsock = "/dev/vsock" + + // network is the vsock network reported in net.OpError. + network = "vsock" + + // Operation names which may be returned in net.OpError. + opAccept = "accept" + opClose = "close" + opDial = "dial" + opListen = "listen" + opRawControl = "raw-control" + opRawRead = "raw-read" + opRawWrite = "raw-write" + opRead = "read" + opSet = "set" + opSyscallConn = "syscall-conn" + opWrite = "write" +) + +// TODO(mdlayher): plumb through socket.Config.NetNS if it makes sense. + +// Config contains options for a Conn or Listener. +type Config struct{} + +// Listen opens a connection-oriented net.Listener for incoming VM sockets +// connections. The port parameter specifies the port for the Listener. Config +// specifies optional configuration for the Listener. If config is nil, a +// default configuration will be used. +// +// To allow the server to assign a port automatically, specify 0 for port. The +// address of the server can be retrieved using the Addr method. +// +// Listen automatically infers the appropriate context ID for this machine by +// calling ContextID and passing that value to ListenContextID. Callers with +// advanced use cases (such as using the Local context ID) may wish to use +// ListenContextID directly. +// +// When the Listener is no longer needed, Close must be called to free +// resources. +func Listen(port uint32, cfg *Config) (*Listener, error) { + cid, err := ContextID() + if err != nil { + // No addresses available. + return nil, opError(opListen, err, nil, nil) + } + + return ListenContextID(cid, port, cfg) +} + +// ListenContextID is the same as Listen, but also accepts an explicit context +// ID parameter. This function is intended for advanced use cases and most +// callers should use Listen instead. +// +// See the documentation of Listen for more details. +func ListenContextID(contextID, port uint32, cfg *Config) (*Listener, error) { + l, err := listen(contextID, port, cfg) + if err != nil { + // No remote address available. + return nil, opError(opListen, err, &Addr{ + ContextID: contextID, + Port: port, + }, nil) + } + + return l, nil +} + +// FileListener returns a copy of the network listener corresponding to an open +// os.File. It is the caller's responsibility to close the Listener when +// finished. Closing the Listener does not affect the os.File, and closing the +// os.File does not affect the Listener. +// +// This function is intended for advanced use cases and most callers should use +// Listen instead. +func FileListener(f *os.File) (*Listener, error) { + l, err := fileListener(f) + if err != nil { + // No addresses available. + return nil, opError(opListen, err, nil, nil) + } + + return l, nil +} + +var _ net.Listener = &Listener{} + +// A Listener is a VM sockets implementation of a net.Listener. +type Listener struct { + l *listener +} + +// Accept implements the Accept method in the net.Listener interface; it waits +// for the next call and returns a generic net.Conn. The returned net.Conn will +// always be of type *Conn. +func (l *Listener) Accept() (net.Conn, error) { + c, err := l.l.Accept() + if err != nil { + return nil, l.opError(opAccept, err) + } + + return c, nil +} + +// Addr returns the listener's network address, a *Addr. The Addr returned is +// shared by all invocations of Addr, so do not modify it. +func (l *Listener) Addr() net.Addr { return l.l.Addr() } + +// Close stops listening on the VM sockets address. Already Accepted connections +// are not closed. +func (l *Listener) Close() error { + return l.opError(opClose, l.l.Close()) +} + +// SetDeadline sets the deadline associated with the listener. A zero time value +// disables the deadline. +func (l *Listener) SetDeadline(t time.Time) error { + return l.opError(opSet, l.l.SetDeadline(t)) +} + +// opError is a convenience for the function opError that also passes the local +// address of the Listener. +func (l *Listener) opError(op string, err error) error { + // No remote address for a Listener. + return opError(op, err, l.Addr(), nil) +} + +// Dial dials a connection-oriented net.Conn to a VM sockets listener. The +// context ID and port parameters specify the address of the listener. Config +// specifies optional configuration for the Conn. If config is nil, a default +// configuration will be used. +// +// If dialing a connection from the hypervisor to a virtual machine, the VM's +// context ID should be specified. +// +// If dialing from a VM to the hypervisor, Hypervisor should be used to +// communicate with the hypervisor process, or Host should be used to +// communicate with other processes on the host machine. +// +// When the connection is no longer needed, Close must be called to free +// resources. +func Dial(contextID, port uint32, cfg *Config) (*Conn, error) { + c, err := dial(contextID, port, cfg) + if err != nil { + // No local address, but we have a remote address we can return. + return nil, opError(opDial, err, nil, &Addr{ + ContextID: contextID, + Port: port, + }) + } + + return c, nil +} + +var ( + _ net.Conn = &Conn{} + _ syscall.Conn = &Conn{} +) + +// A Conn is a VM sockets implementation of a net.Conn. +type Conn struct { + c *conn + local *Addr + remote *Addr +} + +// Close closes the connection. +func (c *Conn) Close() error { + return c.opError(opClose, c.c.Close()) +} + +// CloseRead shuts down the reading side of the VM sockets connection. Most +// callers should just use Close. +func (c *Conn) CloseRead() error { + return c.opError(opClose, c.c.CloseRead()) +} + +// CloseWrite shuts down the writing side of the VM sockets connection. Most +// callers should just use Close. +func (c *Conn) CloseWrite() error { + return c.opError(opClose, c.c.CloseWrite()) +} + +// LocalAddr returns the local network address. The Addr returned is shared by +// all invocations of LocalAddr, so do not modify it. +func (c *Conn) LocalAddr() net.Addr { return c.local } + +// RemoteAddr returns the remote network address. The Addr returned is shared by +// all invocations of RemoteAddr, so do not modify it. +func (c *Conn) RemoteAddr() net.Addr { return c.remote } + +// Read implements the net.Conn Read method. +func (c *Conn) Read(b []byte) (int, error) { + n, err := c.c.Read(b) + if err != nil { + return n, c.opError(opRead, err) + } + + return n, nil +} + +// Write implements the net.Conn Write method. +func (c *Conn) Write(b []byte) (int, error) { + n, err := c.c.Write(b) + if err != nil { + return n, c.opError(opWrite, err) + } + + return n, nil +} + +// SetDeadline implements the net.Conn SetDeadline method. +func (c *Conn) SetDeadline(t time.Time) error { + return c.opError(opSet, c.c.SetDeadline(t)) +} + +// SetReadDeadline implements the net.Conn SetReadDeadline method. +func (c *Conn) SetReadDeadline(t time.Time) error { + return c.opError(opSet, c.c.SetReadDeadline(t)) +} + +// SetWriteDeadline implements the net.Conn SetWriteDeadline method. +func (c *Conn) SetWriteDeadline(t time.Time) error { + return c.opError(opSet, c.c.SetWriteDeadline(t)) +} + +// SyscallConn returns a raw network connection. This implements the +// syscall.Conn interface. +func (c *Conn) SyscallConn() (syscall.RawConn, error) { + rc, err := c.c.SyscallConn() + if err != nil { + return nil, c.opError(opSyscallConn, err) + } + + return &rawConn{ + rc: rc, + local: c.local, + remote: c.remote, + }, nil +} + +// opError is a convenience for the function opError that also passes the local +// and remote addresses of the Conn. +func (c *Conn) opError(op string, err error) error { + return opError(op, err, c.local, c.remote) +} + +// TODO(mdlayher): see if we can port smarter net.OpError with local/remote +// address error logic into socket.Conn's SyscallConn type to avoid the need for +// this wrapper. + +var _ syscall.RawConn = &rawConn{} + +// A rawConn is a syscall.RawConn that wraps an internal syscall.RawConn in order +// to produce net.OpError error values. +type rawConn struct { + rc syscall.RawConn + local, remote *Addr +} + +// Control implements the syscall.RawConn Control method. +func (rc *rawConn) Control(fn func(fd uintptr)) error { + return rc.opError(opRawControl, rc.rc.Control(fn)) +} + +// Control implements the syscall.RawConn Read method. +func (rc *rawConn) Read(fn func(fd uintptr) (done bool)) error { + return rc.opError(opRawRead, rc.rc.Read(fn)) +} + +// Control implements the syscall.RawConn Write method. +func (rc *rawConn) Write(fn func(fd uintptr) (done bool)) error { + return rc.opError(opRawWrite, rc.rc.Write(fn)) +} + +// opError is a convenience for the function opError that also passes the local +// and remote addresses of the rawConn. +func (rc *rawConn) opError(op string, err error) error { + return opError(op, err, rc.local, rc.remote) +} + +var _ net.Addr = &Addr{} + +// An Addr is the address of a VM sockets endpoint. +type Addr struct { + ContextID, Port uint32 +} + +// Network returns the address's network name, "vsock". +func (a *Addr) Network() string { return network } + +// String returns a human-readable representation of Addr, and indicates if +// ContextID is meant to be used for a hypervisor, host, VM, etc. +func (a *Addr) String() string { + var host string + + switch a.ContextID { + case Hypervisor: + host = fmt.Sprintf("hypervisor(%d)", a.ContextID) + case Local: + host = fmt.Sprintf("local(%d)", a.ContextID) + case Host: + host = fmt.Sprintf("host(%d)", a.ContextID) + default: + host = fmt.Sprintf("vm(%d)", a.ContextID) + } + + return fmt.Sprintf("%s:%d", host, a.Port) +} + +// fileName returns a file name for use with os.NewFile for Addr. +func (a *Addr) fileName() string { + return fmt.Sprintf("%s:%s", a.Network(), a.String()) +} + +// ContextID retrieves the local VM sockets context ID for this system. +// ContextID can be used to directly determine if a system is capable of using +// VM sockets. +// +// If the kernel module is unavailable, access to the kernel module is denied, +// or VM sockets are unsupported on this system, it returns an error. +func ContextID() (uint32, error) { + return contextID() +} + +// opError unpacks err if possible, producing a net.OpError with the input +// parameters in order to implement net.Conn. As a convenience, opError returns +// nil if the input error is nil. +func opError(op string, err error, local, remote net.Addr) error { + if err == nil { + return nil + } + + // TODO(mdlayher): this entire function is suspect and should probably be + // looked at carefully, especially with Go 1.13+ error wrapping. + // + // Eventually this *net.OpError logic should probably be ported into + // mdlayher/socket because similar checks are necessary to comply with + // nettest.TestConn. + + // Unwrap inner errors from error types. + // + // TODO(mdlayher): errors.Cause or similar in Go 1.13. + switch xerr := err.(type) { + // os.PathError produced by os.File method calls. + case *os.PathError: + // Although we could make use of xerr.Op here, we're passing it manually + // for consistency, since some of the Conn calls we are making don't + // wrap an os.File, which would return an Op for us. + // + // As a special case, if the error is related to access to the /dev/vsock + // device, we don't unwrap it, so the caller has more context as to why + // their operation actually failed than "permission denied" or similar. + if xerr.Path != devVsock { + err = xerr.Err + } + } + + switch { + case err == io.EOF, isErrno(err, enotconn): + // We may see a literal io.EOF as happens with x/net/nettest, but + // "transport not connected" also means io.EOF in Go. + return io.EOF + case err == os.ErrClosed, isErrno(err, ebadf), strings.Contains(err.Error(), "use of closed"): + // Different operations may return different errors that all effectively + // indicate a closed file. + // + // To rectify the differences, net.TCPConn uses an error with this text + // from internal/poll for the backing file already being closed. + err = errors.New("use of closed network connection") + default: + // Nothing to do, return this directly. + } + + // Determine source and addr using the rules defined by net.OpError's + // documentation: https://golang.org/pkg/net/#OpError. + var source, addr net.Addr + switch op { + case opClose, opDial, opRawRead, opRawWrite, opRead, opWrite: + if local != nil { + source = local + } + if remote != nil { + addr = remote + } + case opAccept, opListen, opRawControl, opSet, opSyscallConn: + if local != nil { + addr = local + } + } + + return &net.OpError{ + Op: op, + Net: network, + Source: source, + Addr: addr, + Err: err, + } +} diff --git a/vendor/github.com/mdlayher/vsock/vsock_others.go b/vendor/github.com/mdlayher/vsock/vsock_others.go new file mode 100644 index 000000000..5c1e88e39 --- /dev/null +++ b/vendor/github.com/mdlayher/vsock/vsock_others.go @@ -0,0 +1,45 @@ +//go:build !linux +// +build !linux + +package vsock + +import ( + "fmt" + "net" + "os" + "runtime" + "syscall" + "time" +) + +// errUnimplemented is returned by all functions on platforms that +// cannot make use of VM sockets. +var errUnimplemented = fmt.Errorf("vsock: not implemented on %s", runtime.GOOS) + +func fileListener(_ *os.File) (*Listener, error) { return nil, errUnimplemented } +func listen(_, _ uint32, _ *Config) (*Listener, error) { return nil, errUnimplemented } + +type listener struct{} + +func (*listener) Accept() (net.Conn, error) { return nil, errUnimplemented } +func (*listener) Addr() net.Addr { return nil } +func (*listener) Close() error { return errUnimplemented } +func (*listener) SetDeadline(_ time.Time) error { return errUnimplemented } + +func dial(_, _ uint32, _ *Config) (*Conn, error) { return nil, errUnimplemented } + +type conn struct{} + +func (*conn) Close() error { return errUnimplemented } +func (*conn) CloseRead() error { return errUnimplemented } +func (*conn) CloseWrite() error { return errUnimplemented } +func (*conn) Read(_ []byte) (int, error) { return 0, errUnimplemented } +func (*conn) Write(_ []byte) (int, error) { return 0, errUnimplemented } +func (*conn) SetDeadline(_ time.Time) error { return errUnimplemented } +func (*conn) SetReadDeadline(_ time.Time) error { return errUnimplemented } +func (*conn) SetWriteDeadline(_ time.Time) error { return errUnimplemented } +func (*conn) SyscallConn() (syscall.RawConn, error) { return nil, errUnimplemented } + +func contextID() (uint32, error) { return 0, errUnimplemented } + +func isErrno(_ error, _ int) bool { return false } diff --git a/vendor/golang.org/x/net/bpf/asm.go b/vendor/golang.org/x/net/bpf/asm.go new file mode 100644 index 000000000..15e21b181 --- /dev/null +++ b/vendor/golang.org/x/net/bpf/asm.go @@ -0,0 +1,41 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bpf + +import "fmt" + +// Assemble converts insts into raw instructions suitable for loading +// into a BPF virtual machine. +// +// Currently, no optimization is attempted, the assembled program flow +// is exactly as provided. +func Assemble(insts []Instruction) ([]RawInstruction, error) { + ret := make([]RawInstruction, len(insts)) + var err error + for i, inst := range insts { + ret[i], err = inst.Assemble() + if err != nil { + return nil, fmt.Errorf("assembling instruction %d: %s", i+1, err) + } + } + return ret, nil +} + +// Disassemble attempts to parse raw back into +// Instructions. Unrecognized RawInstructions are assumed to be an +// extension not implemented by this package, and are passed through +// unchanged to the output. The allDecoded value reports whether insts +// contains no RawInstructions. +func Disassemble(raw []RawInstruction) (insts []Instruction, allDecoded bool) { + insts = make([]Instruction, len(raw)) + allDecoded = true + for i, r := range raw { + insts[i] = r.Disassemble() + if _, ok := insts[i].(RawInstruction); ok { + allDecoded = false + } + } + return insts, allDecoded +} diff --git a/vendor/golang.org/x/net/bpf/constants.go b/vendor/golang.org/x/net/bpf/constants.go new file mode 100644 index 000000000..12f3ee835 --- /dev/null +++ b/vendor/golang.org/x/net/bpf/constants.go @@ -0,0 +1,222 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bpf + +// A Register is a register of the BPF virtual machine. +type Register uint16 + +const ( + // RegA is the accumulator register. RegA is always the + // destination register of ALU operations. + RegA Register = iota + // RegX is the indirection register, used by LoadIndirect + // operations. + RegX +) + +// An ALUOp is an arithmetic or logic operation. +type ALUOp uint16 + +// ALU binary operation types. +const ( + ALUOpAdd ALUOp = iota << 4 + ALUOpSub + ALUOpMul + ALUOpDiv + ALUOpOr + ALUOpAnd + ALUOpShiftLeft + ALUOpShiftRight + aluOpNeg // Not exported because it's the only unary ALU operation, and gets its own instruction type. + ALUOpMod + ALUOpXor +) + +// A JumpTest is a comparison operator used in conditional jumps. +type JumpTest uint16 + +// Supported operators for conditional jumps. +// K can be RegX for JumpIfX +const ( + // K == A + JumpEqual JumpTest = iota + // K != A + JumpNotEqual + // K > A + JumpGreaterThan + // K < A + JumpLessThan + // K >= A + JumpGreaterOrEqual + // K <= A + JumpLessOrEqual + // K & A != 0 + JumpBitsSet + // K & A == 0 + JumpBitsNotSet +) + +// An Extension is a function call provided by the kernel that +// performs advanced operations that are expensive or impossible +// within the BPF virtual machine. +// +// Extensions are only implemented by the Linux kernel. +// +// TODO: should we prune this list? Some of these extensions seem +// either broken or near-impossible to use correctly, whereas other +// (len, random, ifindex) are quite useful. +type Extension int + +// Extension functions available in the Linux kernel. +const ( + // extOffset is the negative maximum number of instructions used + // to load instructions by overloading the K argument. + extOffset = -0x1000 + // ExtLen returns the length of the packet. + ExtLen Extension = 1 + // ExtProto returns the packet's L3 protocol type. + ExtProto Extension = 0 + // ExtType returns the packet's type (skb->pkt_type in the kernel) + // + // TODO: better documentation. How nice an API do we want to + // provide for these esoteric extensions? + ExtType Extension = 4 + // ExtPayloadOffset returns the offset of the packet payload, or + // the first protocol header that the kernel does not know how to + // parse. + ExtPayloadOffset Extension = 52 + // ExtInterfaceIndex returns the index of the interface on which + // the packet was received. + ExtInterfaceIndex Extension = 8 + // ExtNetlinkAttr returns the netlink attribute of type X at + // offset A. + ExtNetlinkAttr Extension = 12 + // ExtNetlinkAttrNested returns the nested netlink attribute of + // type X at offset A. + ExtNetlinkAttrNested Extension = 16 + // ExtMark returns the packet's mark value. + ExtMark Extension = 20 + // ExtQueue returns the packet's assigned hardware queue. + ExtQueue Extension = 24 + // ExtLinkLayerType returns the packet's hardware address type + // (e.g. Ethernet, Infiniband). + ExtLinkLayerType Extension = 28 + // ExtRXHash returns the packets receive hash. + // + // TODO: figure out what this rxhash actually is. + ExtRXHash Extension = 32 + // ExtCPUID returns the ID of the CPU processing the current + // packet. + ExtCPUID Extension = 36 + // ExtVLANTag returns the packet's VLAN tag. + ExtVLANTag Extension = 44 + // ExtVLANTagPresent returns non-zero if the packet has a VLAN + // tag. + // + // TODO: I think this might be a lie: it reads bit 0x1000 of the + // VLAN header, which changed meaning in recent revisions of the + // spec - this extension may now return meaningless information. + ExtVLANTagPresent Extension = 48 + // ExtVLANProto returns 0x8100 if the frame has a VLAN header, + // 0x88a8 if the frame has a "Q-in-Q" double VLAN header, or some + // other value if no VLAN information is present. + ExtVLANProto Extension = 60 + // ExtRand returns a uniformly random uint32. + ExtRand Extension = 56 +) + +// The following gives names to various bit patterns used in opcode construction. + +const ( + opMaskCls uint16 = 0x7 + // opClsLoad masks + opMaskLoadDest = 0x01 + opMaskLoadWidth = 0x18 + opMaskLoadMode = 0xe0 + // opClsALU & opClsJump + opMaskOperand = 0x08 + opMaskOperator = 0xf0 +) + +const ( + // +---------------+-----------------+---+---+---+ + // | AddrMode (3b) | LoadWidth (2b) | 0 | 0 | 0 | + // +---------------+-----------------+---+---+---+ + opClsLoadA uint16 = iota + // +---------------+-----------------+---+---+---+ + // | AddrMode (3b) | LoadWidth (2b) | 0 | 0 | 1 | + // +---------------+-----------------+---+---+---+ + opClsLoadX + // +---+---+---+---+---+---+---+---+ + // | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | + // +---+---+---+---+---+---+---+---+ + opClsStoreA + // +---+---+---+---+---+---+---+---+ + // | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | + // +---+---+---+---+---+---+---+---+ + opClsStoreX + // +---------------+-----------------+---+---+---+ + // | Operator (4b) | OperandSrc (1b) | 1 | 0 | 0 | + // +---------------+-----------------+---+---+---+ + opClsALU + // +-----------------------------+---+---+---+---+ + // | TestOperator (4b) | 0 | 1 | 0 | 1 | + // +-----------------------------+---+---+---+---+ + opClsJump + // +---+-------------------------+---+---+---+---+ + // | 0 | 0 | 0 | RetSrc (1b) | 0 | 1 | 1 | 0 | + // +---+-------------------------+---+---+---+---+ + opClsReturn + // +---+-------------------------+---+---+---+---+ + // | 0 | 0 | 0 | TXAorTAX (1b) | 0 | 1 | 1 | 1 | + // +---+-------------------------+---+---+---+---+ + opClsMisc +) + +const ( + opAddrModeImmediate uint16 = iota << 5 + opAddrModeAbsolute + opAddrModeIndirect + opAddrModeScratch + opAddrModePacketLen // actually an extension, not an addressing mode. + opAddrModeMemShift +) + +const ( + opLoadWidth4 uint16 = iota << 3 + opLoadWidth2 + opLoadWidth1 +) + +// Operand for ALU and Jump instructions +type opOperand uint16 + +// Supported operand sources. +const ( + opOperandConstant opOperand = iota << 3 + opOperandX +) + +// An jumpOp is a conditional jump condition. +type jumpOp uint16 + +// Supported jump conditions. +const ( + opJumpAlways jumpOp = iota << 4 + opJumpEqual + opJumpGT + opJumpGE + opJumpSet +) + +const ( + opRetSrcConstant uint16 = iota << 4 + opRetSrcA +) + +const ( + opMiscTAX = 0x00 + opMiscTXA = 0x80 +) diff --git a/vendor/golang.org/x/net/bpf/doc.go b/vendor/golang.org/x/net/bpf/doc.go new file mode 100644 index 000000000..04ec1c8ab --- /dev/null +++ b/vendor/golang.org/x/net/bpf/doc.go @@ -0,0 +1,80 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +/* +Package bpf implements marshaling and unmarshaling of programs for the +Berkeley Packet Filter virtual machine, and provides a Go implementation +of the virtual machine. + +BPF's main use is to specify a packet filter for network taps, so that +the kernel doesn't have to expensively copy every packet it sees to +userspace. However, it's been repurposed to other areas where running +user code in-kernel is needed. For example, Linux's seccomp uses BPF +to apply security policies to system calls. For simplicity, this +documentation refers only to packets, but other uses of BPF have their +own data payloads. + +BPF programs run in a restricted virtual machine. It has almost no +access to kernel functions, and while conditional branches are +allowed, they can only jump forwards, to guarantee that there are no +infinite loops. + +# The virtual machine + +The BPF VM is an accumulator machine. Its main register, called +register A, is an implicit source and destination in all arithmetic +and logic operations. The machine also has 16 scratch registers for +temporary storage, and an indirection register (register X) for +indirect memory access. All registers are 32 bits wide. + +Each run of a BPF program is given one packet, which is placed in the +VM's read-only "main memory". LoadAbsolute and LoadIndirect +instructions can fetch up to 32 bits at a time into register A for +examination. + +The goal of a BPF program is to produce and return a verdict (uint32), +which tells the kernel what to do with the packet. In the context of +packet filtering, the returned value is the number of bytes of the +packet to forward to userspace, or 0 to ignore the packet. Other +contexts like seccomp define their own return values. + +In order to simplify programs, attempts to read past the end of the +packet terminate the program execution with a verdict of 0 (ignore +packet). This means that the vast majority of BPF programs don't need +to do any explicit bounds checking. + +In addition to the bytes of the packet, some BPF programs have access +to extensions, which are essentially calls to kernel utility +functions. Currently, the only extensions supported by this package +are the Linux packet filter extensions. + +# Examples + +This packet filter selects all ARP packets. + + bpf.Assemble([]bpf.Instruction{ + // Load "EtherType" field from the ethernet header. + bpf.LoadAbsolute{Off: 12, Size: 2}, + // Skip over the next instruction if EtherType is not ARP. + bpf.JumpIf{Cond: bpf.JumpNotEqual, Val: 0x0806, SkipTrue: 1}, + // Verdict is "send up to 4k of the packet to userspace." + bpf.RetConstant{Val: 4096}, + // Verdict is "ignore packet." + bpf.RetConstant{Val: 0}, + }) + +This packet filter captures a random 1% sample of traffic. + + bpf.Assemble([]bpf.Instruction{ + // Get a 32-bit random number from the Linux kernel. + bpf.LoadExtension{Num: bpf.ExtRand}, + // 1% dice roll? + bpf.JumpIf{Cond: bpf.JumpLessThan, Val: 2^32/100, SkipFalse: 1}, + // Capture. + bpf.RetConstant{Val: 4096}, + // Ignore. + bpf.RetConstant{Val: 0}, + }) +*/ +package bpf // import "golang.org/x/net/bpf" diff --git a/vendor/golang.org/x/net/bpf/instructions.go b/vendor/golang.org/x/net/bpf/instructions.go new file mode 100644 index 000000000..3cffcaa01 --- /dev/null +++ b/vendor/golang.org/x/net/bpf/instructions.go @@ -0,0 +1,726 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bpf + +import "fmt" + +// An Instruction is one instruction executed by the BPF virtual +// machine. +type Instruction interface { + // Assemble assembles the Instruction into a RawInstruction. + Assemble() (RawInstruction, error) +} + +// A RawInstruction is a raw BPF virtual machine instruction. +type RawInstruction struct { + // Operation to execute. + Op uint16 + // For conditional jump instructions, the number of instructions + // to skip if the condition is true/false. + Jt uint8 + Jf uint8 + // Constant parameter. The meaning depends on the Op. + K uint32 +} + +// Assemble implements the Instruction Assemble method. +func (ri RawInstruction) Assemble() (RawInstruction, error) { return ri, nil } + +// Disassemble parses ri into an Instruction and returns it. If ri is +// not recognized by this package, ri itself is returned. +func (ri RawInstruction) Disassemble() Instruction { + switch ri.Op & opMaskCls { + case opClsLoadA, opClsLoadX: + reg := Register(ri.Op & opMaskLoadDest) + sz := 0 + switch ri.Op & opMaskLoadWidth { + case opLoadWidth4: + sz = 4 + case opLoadWidth2: + sz = 2 + case opLoadWidth1: + sz = 1 + default: + return ri + } + switch ri.Op & opMaskLoadMode { + case opAddrModeImmediate: + if sz != 4 { + return ri + } + return LoadConstant{Dst: reg, Val: ri.K} + case opAddrModeScratch: + if sz != 4 || ri.K > 15 { + return ri + } + return LoadScratch{Dst: reg, N: int(ri.K)} + case opAddrModeAbsolute: + if ri.K > extOffset+0xffffffff { + return LoadExtension{Num: Extension(-extOffset + ri.K)} + } + return LoadAbsolute{Size: sz, Off: ri.K} + case opAddrModeIndirect: + return LoadIndirect{Size: sz, Off: ri.K} + case opAddrModePacketLen: + if sz != 4 { + return ri + } + return LoadExtension{Num: ExtLen} + case opAddrModeMemShift: + return LoadMemShift{Off: ri.K} + default: + return ri + } + + case opClsStoreA: + if ri.Op != opClsStoreA || ri.K > 15 { + return ri + } + return StoreScratch{Src: RegA, N: int(ri.K)} + + case opClsStoreX: + if ri.Op != opClsStoreX || ri.K > 15 { + return ri + } + return StoreScratch{Src: RegX, N: int(ri.K)} + + case opClsALU: + switch op := ALUOp(ri.Op & opMaskOperator); op { + case ALUOpAdd, ALUOpSub, ALUOpMul, ALUOpDiv, ALUOpOr, ALUOpAnd, ALUOpShiftLeft, ALUOpShiftRight, ALUOpMod, ALUOpXor: + switch operand := opOperand(ri.Op & opMaskOperand); operand { + case opOperandX: + return ALUOpX{Op: op} + case opOperandConstant: + return ALUOpConstant{Op: op, Val: ri.K} + default: + return ri + } + case aluOpNeg: + return NegateA{} + default: + return ri + } + + case opClsJump: + switch op := jumpOp(ri.Op & opMaskOperator); op { + case opJumpAlways: + return Jump{Skip: ri.K} + case opJumpEqual, opJumpGT, opJumpGE, opJumpSet: + cond, skipTrue, skipFalse := jumpOpToTest(op, ri.Jt, ri.Jf) + switch operand := opOperand(ri.Op & opMaskOperand); operand { + case opOperandX: + return JumpIfX{Cond: cond, SkipTrue: skipTrue, SkipFalse: skipFalse} + case opOperandConstant: + return JumpIf{Cond: cond, Val: ri.K, SkipTrue: skipTrue, SkipFalse: skipFalse} + default: + return ri + } + default: + return ri + } + + case opClsReturn: + switch ri.Op { + case opClsReturn | opRetSrcA: + return RetA{} + case opClsReturn | opRetSrcConstant: + return RetConstant{Val: ri.K} + default: + return ri + } + + case opClsMisc: + switch ri.Op { + case opClsMisc | opMiscTAX: + return TAX{} + case opClsMisc | opMiscTXA: + return TXA{} + default: + return ri + } + + default: + panic("unreachable") // switch is exhaustive on the bit pattern + } +} + +func jumpOpToTest(op jumpOp, skipTrue uint8, skipFalse uint8) (JumpTest, uint8, uint8) { + var test JumpTest + + // Decode "fake" jump conditions that don't appear in machine code + // Ensures the Assemble -> Disassemble stage recreates the same instructions + // See https://github.com/golang/go/issues/18470 + if skipTrue == 0 { + switch op { + case opJumpEqual: + test = JumpNotEqual + case opJumpGT: + test = JumpLessOrEqual + case opJumpGE: + test = JumpLessThan + case opJumpSet: + test = JumpBitsNotSet + } + + return test, skipFalse, 0 + } + + switch op { + case opJumpEqual: + test = JumpEqual + case opJumpGT: + test = JumpGreaterThan + case opJumpGE: + test = JumpGreaterOrEqual + case opJumpSet: + test = JumpBitsSet + } + + return test, skipTrue, skipFalse +} + +// LoadConstant loads Val into register Dst. +type LoadConstant struct { + Dst Register + Val uint32 +} + +// Assemble implements the Instruction Assemble method. +func (a LoadConstant) Assemble() (RawInstruction, error) { + return assembleLoad(a.Dst, 4, opAddrModeImmediate, a.Val) +} + +// String returns the instruction in assembler notation. +func (a LoadConstant) String() string { + switch a.Dst { + case RegA: + return fmt.Sprintf("ld #%d", a.Val) + case RegX: + return fmt.Sprintf("ldx #%d", a.Val) + default: + return fmt.Sprintf("unknown instruction: %#v", a) + } +} + +// LoadScratch loads scratch[N] into register Dst. +type LoadScratch struct { + Dst Register + N int // 0-15 +} + +// Assemble implements the Instruction Assemble method. +func (a LoadScratch) Assemble() (RawInstruction, error) { + if a.N < 0 || a.N > 15 { + return RawInstruction{}, fmt.Errorf("invalid scratch slot %d", a.N) + } + return assembleLoad(a.Dst, 4, opAddrModeScratch, uint32(a.N)) +} + +// String returns the instruction in assembler notation. +func (a LoadScratch) String() string { + switch a.Dst { + case RegA: + return fmt.Sprintf("ld M[%d]", a.N) + case RegX: + return fmt.Sprintf("ldx M[%d]", a.N) + default: + return fmt.Sprintf("unknown instruction: %#v", a) + } +} + +// LoadAbsolute loads packet[Off:Off+Size] as an integer value into +// register A. +type LoadAbsolute struct { + Off uint32 + Size int // 1, 2 or 4 +} + +// Assemble implements the Instruction Assemble method. +func (a LoadAbsolute) Assemble() (RawInstruction, error) { + return assembleLoad(RegA, a.Size, opAddrModeAbsolute, a.Off) +} + +// String returns the instruction in assembler notation. +func (a LoadAbsolute) String() string { + switch a.Size { + case 1: // byte + return fmt.Sprintf("ldb [%d]", a.Off) + case 2: // half word + return fmt.Sprintf("ldh [%d]", a.Off) + case 4: // word + if a.Off > extOffset+0xffffffff { + return LoadExtension{Num: Extension(a.Off + 0x1000)}.String() + } + return fmt.Sprintf("ld [%d]", a.Off) + default: + return fmt.Sprintf("unknown instruction: %#v", a) + } +} + +// LoadIndirect loads packet[X+Off:X+Off+Size] as an integer value +// into register A. +type LoadIndirect struct { + Off uint32 + Size int // 1, 2 or 4 +} + +// Assemble implements the Instruction Assemble method. +func (a LoadIndirect) Assemble() (RawInstruction, error) { + return assembleLoad(RegA, a.Size, opAddrModeIndirect, a.Off) +} + +// String returns the instruction in assembler notation. +func (a LoadIndirect) String() string { + switch a.Size { + case 1: // byte + return fmt.Sprintf("ldb [x + %d]", a.Off) + case 2: // half word + return fmt.Sprintf("ldh [x + %d]", a.Off) + case 4: // word + return fmt.Sprintf("ld [x + %d]", a.Off) + default: + return fmt.Sprintf("unknown instruction: %#v", a) + } +} + +// LoadMemShift multiplies the first 4 bits of the byte at packet[Off] +// by 4 and stores the result in register X. +// +// This instruction is mainly useful to load into X the length of an +// IPv4 packet header in a single instruction, rather than have to do +// the arithmetic on the header's first byte by hand. +type LoadMemShift struct { + Off uint32 +} + +// Assemble implements the Instruction Assemble method. +func (a LoadMemShift) Assemble() (RawInstruction, error) { + return assembleLoad(RegX, 1, opAddrModeMemShift, a.Off) +} + +// String returns the instruction in assembler notation. +func (a LoadMemShift) String() string { + return fmt.Sprintf("ldx 4*([%d]&0xf)", a.Off) +} + +// LoadExtension invokes a linux-specific extension and stores the +// result in register A. +type LoadExtension struct { + Num Extension +} + +// Assemble implements the Instruction Assemble method. +func (a LoadExtension) Assemble() (RawInstruction, error) { + if a.Num == ExtLen { + return assembleLoad(RegA, 4, opAddrModePacketLen, 0) + } + return assembleLoad(RegA, 4, opAddrModeAbsolute, uint32(extOffset+a.Num)) +} + +// String returns the instruction in assembler notation. +func (a LoadExtension) String() string { + switch a.Num { + case ExtLen: + return "ld #len" + case ExtProto: + return "ld #proto" + case ExtType: + return "ld #type" + case ExtPayloadOffset: + return "ld #poff" + case ExtInterfaceIndex: + return "ld #ifidx" + case ExtNetlinkAttr: + return "ld #nla" + case ExtNetlinkAttrNested: + return "ld #nlan" + case ExtMark: + return "ld #mark" + case ExtQueue: + return "ld #queue" + case ExtLinkLayerType: + return "ld #hatype" + case ExtRXHash: + return "ld #rxhash" + case ExtCPUID: + return "ld #cpu" + case ExtVLANTag: + return "ld #vlan_tci" + case ExtVLANTagPresent: + return "ld #vlan_avail" + case ExtVLANProto: + return "ld #vlan_tpid" + case ExtRand: + return "ld #rand" + default: + return fmt.Sprintf("unknown instruction: %#v", a) + } +} + +// StoreScratch stores register Src into scratch[N]. +type StoreScratch struct { + Src Register + N int // 0-15 +} + +// Assemble implements the Instruction Assemble method. +func (a StoreScratch) Assemble() (RawInstruction, error) { + if a.N < 0 || a.N > 15 { + return RawInstruction{}, fmt.Errorf("invalid scratch slot %d", a.N) + } + var op uint16 + switch a.Src { + case RegA: + op = opClsStoreA + case RegX: + op = opClsStoreX + default: + return RawInstruction{}, fmt.Errorf("invalid source register %v", a.Src) + } + + return RawInstruction{ + Op: op, + K: uint32(a.N), + }, nil +} + +// String returns the instruction in assembler notation. +func (a StoreScratch) String() string { + switch a.Src { + case RegA: + return fmt.Sprintf("st M[%d]", a.N) + case RegX: + return fmt.Sprintf("stx M[%d]", a.N) + default: + return fmt.Sprintf("unknown instruction: %#v", a) + } +} + +// ALUOpConstant executes A = A Val. +type ALUOpConstant struct { + Op ALUOp + Val uint32 +} + +// Assemble implements the Instruction Assemble method. +func (a ALUOpConstant) Assemble() (RawInstruction, error) { + return RawInstruction{ + Op: opClsALU | uint16(opOperandConstant) | uint16(a.Op), + K: a.Val, + }, nil +} + +// String returns the instruction in assembler notation. +func (a ALUOpConstant) String() string { + switch a.Op { + case ALUOpAdd: + return fmt.Sprintf("add #%d", a.Val) + case ALUOpSub: + return fmt.Sprintf("sub #%d", a.Val) + case ALUOpMul: + return fmt.Sprintf("mul #%d", a.Val) + case ALUOpDiv: + return fmt.Sprintf("div #%d", a.Val) + case ALUOpMod: + return fmt.Sprintf("mod #%d", a.Val) + case ALUOpAnd: + return fmt.Sprintf("and #%d", a.Val) + case ALUOpOr: + return fmt.Sprintf("or #%d", a.Val) + case ALUOpXor: + return fmt.Sprintf("xor #%d", a.Val) + case ALUOpShiftLeft: + return fmt.Sprintf("lsh #%d", a.Val) + case ALUOpShiftRight: + return fmt.Sprintf("rsh #%d", a.Val) + default: + return fmt.Sprintf("unknown instruction: %#v", a) + } +} + +// ALUOpX executes A = A X +type ALUOpX struct { + Op ALUOp +} + +// Assemble implements the Instruction Assemble method. +func (a ALUOpX) Assemble() (RawInstruction, error) { + return RawInstruction{ + Op: opClsALU | uint16(opOperandX) | uint16(a.Op), + }, nil +} + +// String returns the instruction in assembler notation. +func (a ALUOpX) String() string { + switch a.Op { + case ALUOpAdd: + return "add x" + case ALUOpSub: + return "sub x" + case ALUOpMul: + return "mul x" + case ALUOpDiv: + return "div x" + case ALUOpMod: + return "mod x" + case ALUOpAnd: + return "and x" + case ALUOpOr: + return "or x" + case ALUOpXor: + return "xor x" + case ALUOpShiftLeft: + return "lsh x" + case ALUOpShiftRight: + return "rsh x" + default: + return fmt.Sprintf("unknown instruction: %#v", a) + } +} + +// NegateA executes A = -A. +type NegateA struct{} + +// Assemble implements the Instruction Assemble method. +func (a NegateA) Assemble() (RawInstruction, error) { + return RawInstruction{ + Op: opClsALU | uint16(aluOpNeg), + }, nil +} + +// String returns the instruction in assembler notation. +func (a NegateA) String() string { + return fmt.Sprintf("neg") +} + +// Jump skips the following Skip instructions in the program. +type Jump struct { + Skip uint32 +} + +// Assemble implements the Instruction Assemble method. +func (a Jump) Assemble() (RawInstruction, error) { + return RawInstruction{ + Op: opClsJump | uint16(opJumpAlways), + K: a.Skip, + }, nil +} + +// String returns the instruction in assembler notation. +func (a Jump) String() string { + return fmt.Sprintf("ja %d", a.Skip) +} + +// JumpIf skips the following Skip instructions in the program if A +// Val is true. +type JumpIf struct { + Cond JumpTest + Val uint32 + SkipTrue uint8 + SkipFalse uint8 +} + +// Assemble implements the Instruction Assemble method. +func (a JumpIf) Assemble() (RawInstruction, error) { + return jumpToRaw(a.Cond, opOperandConstant, a.Val, a.SkipTrue, a.SkipFalse) +} + +// String returns the instruction in assembler notation. +func (a JumpIf) String() string { + return jumpToString(a.Cond, fmt.Sprintf("#%d", a.Val), a.SkipTrue, a.SkipFalse) +} + +// JumpIfX skips the following Skip instructions in the program if A +// X is true. +type JumpIfX struct { + Cond JumpTest + SkipTrue uint8 + SkipFalse uint8 +} + +// Assemble implements the Instruction Assemble method. +func (a JumpIfX) Assemble() (RawInstruction, error) { + return jumpToRaw(a.Cond, opOperandX, 0, a.SkipTrue, a.SkipFalse) +} + +// String returns the instruction in assembler notation. +func (a JumpIfX) String() string { + return jumpToString(a.Cond, "x", a.SkipTrue, a.SkipFalse) +} + +// jumpToRaw assembles a jump instruction into a RawInstruction +func jumpToRaw(test JumpTest, operand opOperand, k uint32, skipTrue, skipFalse uint8) (RawInstruction, error) { + var ( + cond jumpOp + flip bool + ) + switch test { + case JumpEqual: + cond = opJumpEqual + case JumpNotEqual: + cond, flip = opJumpEqual, true + case JumpGreaterThan: + cond = opJumpGT + case JumpLessThan: + cond, flip = opJumpGE, true + case JumpGreaterOrEqual: + cond = opJumpGE + case JumpLessOrEqual: + cond, flip = opJumpGT, true + case JumpBitsSet: + cond = opJumpSet + case JumpBitsNotSet: + cond, flip = opJumpSet, true + default: + return RawInstruction{}, fmt.Errorf("unknown JumpTest %v", test) + } + jt, jf := skipTrue, skipFalse + if flip { + jt, jf = jf, jt + } + return RawInstruction{ + Op: opClsJump | uint16(cond) | uint16(operand), + Jt: jt, + Jf: jf, + K: k, + }, nil +} + +// jumpToString converts a jump instruction to assembler notation +func jumpToString(cond JumpTest, operand string, skipTrue, skipFalse uint8) string { + switch cond { + // K == A + case JumpEqual: + return conditionalJump(operand, skipTrue, skipFalse, "jeq", "jneq") + // K != A + case JumpNotEqual: + return fmt.Sprintf("jneq %s,%d", operand, skipTrue) + // K > A + case JumpGreaterThan: + return conditionalJump(operand, skipTrue, skipFalse, "jgt", "jle") + // K < A + case JumpLessThan: + return fmt.Sprintf("jlt %s,%d", operand, skipTrue) + // K >= A + case JumpGreaterOrEqual: + return conditionalJump(operand, skipTrue, skipFalse, "jge", "jlt") + // K <= A + case JumpLessOrEqual: + return fmt.Sprintf("jle %s,%d", operand, skipTrue) + // K & A != 0 + case JumpBitsSet: + if skipFalse > 0 { + return fmt.Sprintf("jset %s,%d,%d", operand, skipTrue, skipFalse) + } + return fmt.Sprintf("jset %s,%d", operand, skipTrue) + // K & A == 0, there is no assembler instruction for JumpBitNotSet, use JumpBitSet and invert skips + case JumpBitsNotSet: + return jumpToString(JumpBitsSet, operand, skipFalse, skipTrue) + default: + return fmt.Sprintf("unknown JumpTest %#v", cond) + } +} + +func conditionalJump(operand string, skipTrue, skipFalse uint8, positiveJump, negativeJump string) string { + if skipTrue > 0 { + if skipFalse > 0 { + return fmt.Sprintf("%s %s,%d,%d", positiveJump, operand, skipTrue, skipFalse) + } + return fmt.Sprintf("%s %s,%d", positiveJump, operand, skipTrue) + } + return fmt.Sprintf("%s %s,%d", negativeJump, operand, skipFalse) +} + +// RetA exits the BPF program, returning the value of register A. +type RetA struct{} + +// Assemble implements the Instruction Assemble method. +func (a RetA) Assemble() (RawInstruction, error) { + return RawInstruction{ + Op: opClsReturn | opRetSrcA, + }, nil +} + +// String returns the instruction in assembler notation. +func (a RetA) String() string { + return fmt.Sprintf("ret a") +} + +// RetConstant exits the BPF program, returning a constant value. +type RetConstant struct { + Val uint32 +} + +// Assemble implements the Instruction Assemble method. +func (a RetConstant) Assemble() (RawInstruction, error) { + return RawInstruction{ + Op: opClsReturn | opRetSrcConstant, + K: a.Val, + }, nil +} + +// String returns the instruction in assembler notation. +func (a RetConstant) String() string { + return fmt.Sprintf("ret #%d", a.Val) +} + +// TXA copies the value of register X to register A. +type TXA struct{} + +// Assemble implements the Instruction Assemble method. +func (a TXA) Assemble() (RawInstruction, error) { + return RawInstruction{ + Op: opClsMisc | opMiscTXA, + }, nil +} + +// String returns the instruction in assembler notation. +func (a TXA) String() string { + return fmt.Sprintf("txa") +} + +// TAX copies the value of register A to register X. +type TAX struct{} + +// Assemble implements the Instruction Assemble method. +func (a TAX) Assemble() (RawInstruction, error) { + return RawInstruction{ + Op: opClsMisc | opMiscTAX, + }, nil +} + +// String returns the instruction in assembler notation. +func (a TAX) String() string { + return fmt.Sprintf("tax") +} + +func assembleLoad(dst Register, loadSize int, mode uint16, k uint32) (RawInstruction, error) { + var ( + cls uint16 + sz uint16 + ) + switch dst { + case RegA: + cls = opClsLoadA + case RegX: + cls = opClsLoadX + default: + return RawInstruction{}, fmt.Errorf("invalid target register %v", dst) + } + switch loadSize { + case 1: + sz = opLoadWidth1 + case 2: + sz = opLoadWidth2 + case 4: + sz = opLoadWidth4 + default: + return RawInstruction{}, fmt.Errorf("invalid load byte length %d", sz) + } + return RawInstruction{ + Op: cls | sz | mode, + K: k, + }, nil +} diff --git a/vendor/golang.org/x/net/bpf/setter.go b/vendor/golang.org/x/net/bpf/setter.go new file mode 100644 index 000000000..43e35f0ac --- /dev/null +++ b/vendor/golang.org/x/net/bpf/setter.go @@ -0,0 +1,10 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bpf + +// A Setter is a type which can attach a compiled BPF filter to itself. +type Setter interface { + SetBPF(filter []RawInstruction) error +} diff --git a/vendor/golang.org/x/net/bpf/vm.go b/vendor/golang.org/x/net/bpf/vm.go new file mode 100644 index 000000000..73f57f1f7 --- /dev/null +++ b/vendor/golang.org/x/net/bpf/vm.go @@ -0,0 +1,150 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bpf + +import ( + "errors" + "fmt" +) + +// A VM is an emulated BPF virtual machine. +type VM struct { + filter []Instruction +} + +// NewVM returns a new VM using the input BPF program. +func NewVM(filter []Instruction) (*VM, error) { + if len(filter) == 0 { + return nil, errors.New("one or more Instructions must be specified") + } + + for i, ins := range filter { + check := len(filter) - (i + 1) + switch ins := ins.(type) { + // Check for out-of-bounds jumps in instructions + case Jump: + if check <= int(ins.Skip) { + return nil, fmt.Errorf("cannot jump %d instructions; jumping past program bounds", ins.Skip) + } + case JumpIf: + if check <= int(ins.SkipTrue) { + return nil, fmt.Errorf("cannot jump %d instructions in true case; jumping past program bounds", ins.SkipTrue) + } + if check <= int(ins.SkipFalse) { + return nil, fmt.Errorf("cannot jump %d instructions in false case; jumping past program bounds", ins.SkipFalse) + } + case JumpIfX: + if check <= int(ins.SkipTrue) { + return nil, fmt.Errorf("cannot jump %d instructions in true case; jumping past program bounds", ins.SkipTrue) + } + if check <= int(ins.SkipFalse) { + return nil, fmt.Errorf("cannot jump %d instructions in false case; jumping past program bounds", ins.SkipFalse) + } + // Check for division or modulus by zero + case ALUOpConstant: + if ins.Val != 0 { + break + } + + switch ins.Op { + case ALUOpDiv, ALUOpMod: + return nil, errors.New("cannot divide by zero using ALUOpConstant") + } + // Check for unknown extensions + case LoadExtension: + switch ins.Num { + case ExtLen: + default: + return nil, fmt.Errorf("extension %d not implemented", ins.Num) + } + } + } + + // Make sure last instruction is a return instruction + switch filter[len(filter)-1].(type) { + case RetA, RetConstant: + default: + return nil, errors.New("BPF program must end with RetA or RetConstant") + } + + // Though our VM works using disassembled instructions, we + // attempt to assemble the input filter anyway to ensure it is compatible + // with an operating system VM. + _, err := Assemble(filter) + + return &VM{ + filter: filter, + }, err +} + +// Run runs the VM's BPF program against the input bytes. +// Run returns the number of bytes accepted by the BPF program, and any errors +// which occurred while processing the program. +func (v *VM) Run(in []byte) (int, error) { + var ( + // Registers of the virtual machine + regA uint32 + regX uint32 + regScratch [16]uint32 + + // OK is true if the program should continue processing the next + // instruction, or false if not, causing the loop to break + ok = true + ) + + // TODO(mdlayher): implement: + // - NegateA: + // - would require a change from uint32 registers to int32 + // registers + + // TODO(mdlayher): add interop tests that check signedness of ALU + // operations against kernel implementation, and make sure Go + // implementation matches behavior + + for i := 0; i < len(v.filter) && ok; i++ { + ins := v.filter[i] + + switch ins := ins.(type) { + case ALUOpConstant: + regA = aluOpConstant(ins, regA) + case ALUOpX: + regA, ok = aluOpX(ins, regA, regX) + case Jump: + i += int(ins.Skip) + case JumpIf: + jump := jumpIf(ins, regA) + i += jump + case JumpIfX: + jump := jumpIfX(ins, regA, regX) + i += jump + case LoadAbsolute: + regA, ok = loadAbsolute(ins, in) + case LoadConstant: + regA, regX = loadConstant(ins, regA, regX) + case LoadExtension: + regA = loadExtension(ins, in) + case LoadIndirect: + regA, ok = loadIndirect(ins, in, regX) + case LoadMemShift: + regX, ok = loadMemShift(ins, in) + case LoadScratch: + regA, regX = loadScratch(ins, regScratch, regA, regX) + case RetA: + return int(regA), nil + case RetConstant: + return int(ins.Val), nil + case StoreScratch: + regScratch = storeScratch(ins, regScratch, regA, regX) + case TAX: + regX = regA + case TXA: + regA = regX + default: + return 0, fmt.Errorf("unknown Instruction at index %d: %T", i, ins) + } + } + + return 0, nil +} diff --git a/vendor/golang.org/x/net/bpf/vm_instructions.go b/vendor/golang.org/x/net/bpf/vm_instructions.go new file mode 100644 index 000000000..0aa307c06 --- /dev/null +++ b/vendor/golang.org/x/net/bpf/vm_instructions.go @@ -0,0 +1,182 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bpf + +import ( + "encoding/binary" + "fmt" +) + +func aluOpConstant(ins ALUOpConstant, regA uint32) uint32 { + return aluOpCommon(ins.Op, regA, ins.Val) +} + +func aluOpX(ins ALUOpX, regA uint32, regX uint32) (uint32, bool) { + // Guard against division or modulus by zero by terminating + // the program, as the OS BPF VM does + if regX == 0 { + switch ins.Op { + case ALUOpDiv, ALUOpMod: + return 0, false + } + } + + return aluOpCommon(ins.Op, regA, regX), true +} + +func aluOpCommon(op ALUOp, regA uint32, value uint32) uint32 { + switch op { + case ALUOpAdd: + return regA + value + case ALUOpSub: + return regA - value + case ALUOpMul: + return regA * value + case ALUOpDiv: + // Division by zero not permitted by NewVM and aluOpX checks + return regA / value + case ALUOpOr: + return regA | value + case ALUOpAnd: + return regA & value + case ALUOpShiftLeft: + return regA << value + case ALUOpShiftRight: + return regA >> value + case ALUOpMod: + // Modulus by zero not permitted by NewVM and aluOpX checks + return regA % value + case ALUOpXor: + return regA ^ value + default: + return regA + } +} + +func jumpIf(ins JumpIf, regA uint32) int { + return jumpIfCommon(ins.Cond, ins.SkipTrue, ins.SkipFalse, regA, ins.Val) +} + +func jumpIfX(ins JumpIfX, regA uint32, regX uint32) int { + return jumpIfCommon(ins.Cond, ins.SkipTrue, ins.SkipFalse, regA, regX) +} + +func jumpIfCommon(cond JumpTest, skipTrue, skipFalse uint8, regA uint32, value uint32) int { + var ok bool + + switch cond { + case JumpEqual: + ok = regA == value + case JumpNotEqual: + ok = regA != value + case JumpGreaterThan: + ok = regA > value + case JumpLessThan: + ok = regA < value + case JumpGreaterOrEqual: + ok = regA >= value + case JumpLessOrEqual: + ok = regA <= value + case JumpBitsSet: + ok = (regA & value) != 0 + case JumpBitsNotSet: + ok = (regA & value) == 0 + } + + if ok { + return int(skipTrue) + } + + return int(skipFalse) +} + +func loadAbsolute(ins LoadAbsolute, in []byte) (uint32, bool) { + offset := int(ins.Off) + size := ins.Size + + return loadCommon(in, offset, size) +} + +func loadConstant(ins LoadConstant, regA uint32, regX uint32) (uint32, uint32) { + switch ins.Dst { + case RegA: + regA = ins.Val + case RegX: + regX = ins.Val + } + + return regA, regX +} + +func loadExtension(ins LoadExtension, in []byte) uint32 { + switch ins.Num { + case ExtLen: + return uint32(len(in)) + default: + panic(fmt.Sprintf("unimplemented extension: %d", ins.Num)) + } +} + +func loadIndirect(ins LoadIndirect, in []byte, regX uint32) (uint32, bool) { + offset := int(ins.Off) + int(regX) + size := ins.Size + + return loadCommon(in, offset, size) +} + +func loadMemShift(ins LoadMemShift, in []byte) (uint32, bool) { + offset := int(ins.Off) + + // Size of LoadMemShift is always 1 byte + if !inBounds(len(in), offset, 1) { + return 0, false + } + + // Mask off high 4 bits and multiply low 4 bits by 4 + return uint32(in[offset]&0x0f) * 4, true +} + +func inBounds(inLen int, offset int, size int) bool { + return offset+size <= inLen +} + +func loadCommon(in []byte, offset int, size int) (uint32, bool) { + if !inBounds(len(in), offset, size) { + return 0, false + } + + switch size { + case 1: + return uint32(in[offset]), true + case 2: + return uint32(binary.BigEndian.Uint16(in[offset : offset+size])), true + case 4: + return uint32(binary.BigEndian.Uint32(in[offset : offset+size])), true + default: + panic(fmt.Sprintf("invalid load size: %d", size)) + } +} + +func loadScratch(ins LoadScratch, regScratch [16]uint32, regA uint32, regX uint32) (uint32, uint32) { + switch ins.Dst { + case RegA: + regA = regScratch[ins.N] + case RegX: + regX = regScratch[ins.N] + } + + return regA, regX +} + +func storeScratch(ins StoreScratch, regScratch [16]uint32, regA uint32, regX uint32) [16]uint32 { + switch ins.Src { + case RegA: + regScratch[ins.N] = regA + case RegX: + regScratch[ins.N] = regX + } + + return regScratch +} diff --git a/vendor/golang.org/x/sync/errgroup/errgroup.go b/vendor/golang.org/x/sync/errgroup/errgroup.go new file mode 100644 index 000000000..b18efb743 --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/errgroup.go @@ -0,0 +1,132 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package errgroup provides synchronization, error propagation, and Context +// cancelation for groups of goroutines working on subtasks of a common task. +package errgroup + +import ( + "context" + "fmt" + "sync" +) + +type token struct{} + +// A Group is a collection of goroutines working on subtasks that are part of +// the same overall task. +// +// A zero Group is valid, has no limit on the number of active goroutines, +// and does not cancel on error. +type Group struct { + cancel func(error) + + wg sync.WaitGroup + + sem chan token + + errOnce sync.Once + err error +} + +func (g *Group) done() { + if g.sem != nil { + <-g.sem + } + g.wg.Done() +} + +// WithContext returns a new Group and an associated Context derived from ctx. +// +// The derived Context is canceled the first time a function passed to Go +// returns a non-nil error or the first time Wait returns, whichever occurs +// first. +func WithContext(ctx context.Context) (*Group, context.Context) { + ctx, cancel := withCancelCause(ctx) + return &Group{cancel: cancel}, ctx +} + +// Wait blocks until all function calls from the Go method have returned, then +// returns the first non-nil error (if any) from them. +func (g *Group) Wait() error { + g.wg.Wait() + if g.cancel != nil { + g.cancel(g.err) + } + return g.err +} + +// Go calls the given function in a new goroutine. +// It blocks until the new goroutine can be added without the number of +// active goroutines in the group exceeding the configured limit. +// +// The first call to return a non-nil error cancels the group's context, if the +// group was created by calling WithContext. The error will be returned by Wait. +func (g *Group) Go(f func() error) { + if g.sem != nil { + g.sem <- token{} + } + + g.wg.Add(1) + go func() { + defer g.done() + + if err := f(); err != nil { + g.errOnce.Do(func() { + g.err = err + if g.cancel != nil { + g.cancel(g.err) + } + }) + } + }() +} + +// TryGo calls the given function in a new goroutine only if the number of +// active goroutines in the group is currently below the configured limit. +// +// The return value reports whether the goroutine was started. +func (g *Group) TryGo(f func() error) bool { + if g.sem != nil { + select { + case g.sem <- token{}: + // Note: this allows barging iff channels in general allow barging. + default: + return false + } + } + + g.wg.Add(1) + go func() { + defer g.done() + + if err := f(); err != nil { + g.errOnce.Do(func() { + g.err = err + if g.cancel != nil { + g.cancel(g.err) + } + }) + } + }() + return true +} + +// SetLimit limits the number of active goroutines in this group to at most n. +// A negative value indicates no limit. +// +// Any subsequent call to the Go method will block until it can add an active +// goroutine without exceeding the configured limit. +// +// The limit must not be modified while any goroutines in the group are active. +func (g *Group) SetLimit(n int) { + if n < 0 { + g.sem = nil + return + } + if len(g.sem) != 0 { + panic(fmt.Errorf("errgroup: modify limit while %v goroutines in the group are still active", len(g.sem))) + } + g.sem = make(chan token, n) +} diff --git a/vendor/golang.org/x/sync/errgroup/go120.go b/vendor/golang.org/x/sync/errgroup/go120.go new file mode 100644 index 000000000..7d419d376 --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/go120.go @@ -0,0 +1,14 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build go1.20 +// +build go1.20 + +package errgroup + +import "context" + +func withCancelCause(parent context.Context) (context.Context, func(error)) { + return context.WithCancelCause(parent) +} diff --git a/vendor/golang.org/x/sync/errgroup/pre_go120.go b/vendor/golang.org/x/sync/errgroup/pre_go120.go new file mode 100644 index 000000000..1795c18ac --- /dev/null +++ b/vendor/golang.org/x/sync/errgroup/pre_go120.go @@ -0,0 +1,15 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build !go1.20 +// +build !go1.20 + +package errgroup + +import "context" + +func withCancelCause(parent context.Context) (context.Context, func(error)) { + ctx, cancel := context.WithCancel(parent) + return ctx, func(error) { cancel() } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index afe4a207c..128f53db3 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -166,6 +166,12 @@ github.com/googleapis/gax-go/v2/internal # github.com/jmespath/go-jmespath v0.4.0 ## explicit; go 1.14 github.com/jmespath/go-jmespath +# github.com/mdlayher/socket v0.4.1 +## explicit; go 1.20 +github.com/mdlayher/socket +# github.com/mdlayher/vsock v1.2.1 +## explicit; go 1.20 +github.com/mdlayher/vsock # github.com/mitchellh/copystructure v1.2.0 ## explicit; go 1.15 github.com/mitchellh/copystructure @@ -225,6 +231,7 @@ golang.org/x/crypto/internal/alias golang.org/x/crypto/internal/poly1305 # golang.org/x/net v0.17.0 ## explicit; go 1.17 +golang.org/x/net/bpf golang.org/x/net/context golang.org/x/net/http/httpguts golang.org/x/net/http/httpproxy @@ -246,6 +253,7 @@ golang.org/x/oauth2/jws golang.org/x/oauth2/jwt # golang.org/x/sync v0.4.0 ## explicit; go 1.17 +golang.org/x/sync/errgroup golang.org/x/sync/semaphore # golang.org/x/sys v0.13.0 ## explicit; go 1.17