From cb934482ea7172d58210a7bbc6180975852dea01 Mon Sep 17 00:00:00 2001 From: Dennis Marttinen Date: Mon, 4 Jul 2022 16:51:49 +0300 Subject: [PATCH] feat: dhcpv4: send current hostname, fix spec compliance of renewals This adds support for automatically registering node hostnames in DNS by sending the current hostname to DHCP via option 12. If the current hostname is updated, issue a new DISCOVER to propagate the update to DHCP (updating the hostname on lease renewals is not universally supported by DHCP servers). This addition maintains the previous functionality where the node can also request its hostname from the DHCP server. The received hostname will be processed and prioritized as usual by the `network.HostnameSpecController`. This change set also contains fixes to make DHCP renewals compliant with RFC 2131, specifically avoiding sending the server identifier and requested IP address when issuing renewals using a previous offer. This also uncovered an issue in the upstream `insomniacslk/dhcp` library, for which a fix is now pending at https://github.com/insomniacslk/dhcp/pull/469. As upstream contributions seem to have stalled, I have temporarily added a replacement for the library to my own fork that carries the fix. Sending hostname updates have been tested against `dnsmasq` and the built-in DHCP + DNS services in Windows Server. Hostname retrieval from DHCP and edge cases with overridden hostnames from different configuration layers have been extensively tested against `dnsmasq`. Signed-off-by: Dennis Marttinen --- go.mod | 4 + .../pkg/controllers/network/operator/dhcp4.go | 331 ++++++++++++++---- .../network/operator/utils/utils.go | 22 ++ .../pkg/controllers/network/operator_spec.go | 2 +- pkg/machinery/resources/network/condition.go | 2 +- .../resources/network/hostname_spec.go | 2 +- 6 files changed, 300 insertions(+), 63 deletions(-) create mode 100644 internal/app/machined/pkg/controllers/network/operator/utils/utils.go diff --git a/go.mod b/go.mod index 6cb5978c1ef..66c3e053d52 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,10 @@ module github.com/siderolabs/talos go 1.20 replace ( + // Forked insomniacslk/dhcp with DHCP spec compliancy fixes + // Upstream PR: https://github.com/insomniacslk/dhcp/pull/469 + github.com/insomniacslk/dhcp => github.com/twelho/dhcp v0.0.0-20220911105712-2eae1fc34ef5 + // Use nested module. github.com/siderolabs/talos/pkg/machinery => ./pkg/machinery diff --git a/internal/app/machined/pkg/controllers/network/operator/dhcp4.go b/internal/app/machined/pkg/controllers/network/operator/dhcp4.go index e0fff4eb0fb..c89b3e01aa3 100644 --- a/internal/app/machined/pkg/controllers/network/operator/dhcp4.go +++ b/internal/app/machined/pkg/controllers/network/operator/dhcp4.go @@ -14,12 +14,15 @@ import ( "sync" "time" + "github.com/cosi-project/runtime/pkg/resource" + "github.com/cosi-project/runtime/pkg/state" "github.com/insomniacslk/dhcp/dhcpv4" "github.com/insomniacslk/dhcp/dhcpv4/nclient4" "github.com/siderolabs/gen/slices" "go.uber.org/zap" "go4.org/netipx" + "github.com/talos-systems/talos/internal/app/machined/pkg/controllers/network/operator/utils" "github.com/siderolabs/talos/internal/app/machined/pkg/runtime" "github.com/siderolabs/talos/pkg/machinery/nethelpers" "github.com/siderolabs/talos/pkg/machinery/resources/network" @@ -28,13 +31,14 @@ import ( // DHCP4 implements the DHCPv4 network operator. type DHCP4 struct { logger *zap.Logger + state state.State linkName string routeMetric uint32 skipHostnameRequest bool requestMTU bool - offer *dhcpv4.DHCPv4 + lease *nclient4.Lease mu sync.Mutex addresses []network.AddressSpecSpec @@ -46,9 +50,10 @@ type DHCP4 struct { } // NewDHCP4 creates DHCPv4 operator. -func NewDHCP4(logger *zap.Logger, linkName string, config network.DHCP4OperatorSpec, platform runtime.Platform) *DHCP4 { +func NewDHCP4(logger *zap.Logger, linkName string, config network.DHCP4OperatorSpec, platform runtime.Platform, state state.State) *DHCP4 { return &DHCP4{ logger: logger, + state: state, linkName: linkName, routeMetric: config.RouteMetric, skipHostnameRequest: config.SkipHostnameRequest, @@ -64,25 +69,134 @@ func (d *DHCP4) Prefix() string { return fmt.Sprintf("dhcp4/%s", d.linkName) } +// extractHostname extracts a hostname from the given resource if it is a valid network.HostnameStatus. +func extractHostname(res resource.Resource) string { + if res, ok := res.(*network.HostnameStatus); ok { + return res.TypedSpec().Hostname + } + + return "" +} + +// setupHostnameWatch returns the initial hostname and a channel that outputs all events related to hostname changes. +func (d *DHCP4) setupHostnameWatch(ctx context.Context) (string, <-chan state.Event, error) { + hostnameWatchCh := make(chan state.Event) + if err := d.state.Watch(ctx, resource.NewMetadata( + network.NamespaceName, + network.HostnameStatusType, + network.HostnameID, + resource.VersionUndefined, + ), hostnameWatchCh); err != nil { + return "", nil, err + } + + return extractHostname((<-hostnameWatchCh).Resource), hostnameWatchCh, nil +} + +// knownHostname checks if the given hostname has been defined by this operator. +func (d *DHCP4) knownHostname(hostname string) bool { + for i := range d.hostname { + if d.hostname[i].Hostname == hostname { + return true + } + } + + return false +} + +// waitForNetworkReady waits for the network to be ready and the leased address to +// be assigned to the associated so that unicast operations can bind successfully. +func (d *DHCP4) waitForNetworkReady(ctx context.Context) error { + // if an IP address has been registered, wait for the address association to be ready + if len(d.addresses) > 0 { + _, err := d.state.WatchFor(ctx, + resource.NewMetadata( + network.NamespaceName, + network.AddressStatusType, + network.AddressID(d.linkName, d.addresses[0].Address), + resource.VersionUndefined, + ), + state.WithPhases(resource.PhaseRunning), + ) + if err != nil { + return fmt.Errorf("failed to wait for the address association to be ready: %w", err) + } + } + + // wait for the network (address and connectivity) to be ready + if err := network.NewReadyCondition(d.state, network.AddressReady, network.ConnectivityReady).Wait(ctx); err != nil { + return fmt.Errorf("failed to wait for the network address and connectivity to be ready: %w", err) + } + + return nil +} + // Run the operator loop. // -//nolint:gocyclo,dupl +//nolint:gocyclo,cyclop,dupl func (d *DHCP4) Run(ctx context.Context, notifyCh chan<- struct{}) { const minRenewDuration = 5 * time.Second // protect from renewing too often renewInterval := minRenewDuration + hostname, hostnameWatchCh, err := d.setupHostnameWatch(ctx) + if err != nil && !errors.Is(err, context.Canceled) { + d.logger.Warn("failed to watch for hostname changes", zap.Error(err)) + } + for { - leaseTime, err := d.renew(ctx) + // track if we need to acquire a new lease + newLease := d.lease == nil + + // perform a lease request or renewal + leaseTime, err := d.requestRenew(ctx, hostname) if err != nil && !errors.Is(err, context.Canceled) { - d.logger.Warn("renew failed", zap.Error(err), zap.String("link", d.linkName)) + d.logger.Warn("request/renew failed", zap.Error(err), zap.String("link", d.linkName)) } + if newLease { + // notify the underlying controller about the new lease + if err == nil { + select { + case notifyCh <- struct{}{}: + case <-ctx.Done(): + return + } + } + + // wait for networking to be established before transitioning to unicast operations + if err = d.waitForNetworkReady(ctx); err != nil && !errors.Is(err, context.Canceled) { + d.logger.Warn("failed to wait for networking to become ready", zap.Error(err)) + } + } + + // DHCP hostname parroting protection: if e.g. `dnsmasq` receives a request that both sends + // a hostname and requests one, it will "parrot" the sent hostname back if no other name has + // been defined for the requesting host. That causes update anomalies, since removing a + // hostname defined previously by e.g. the configuration layer causes a copy of that + // hostname to live on in a spec defined by this operator, even though it isn't sourced from + // DHCP. + // + // To avoid this issue, never send and request a hostname in the same operation. When + // negotiating a new lease, send the current hostname when acquiring the lease, and follow + // up with a dedicated INFORM request to ask the server for a DHCP-defined hostname. When + // renewing a lease, we're free to always request a hostname (to detect server-side + // changes), since any changes to the node hostname will cause a lease invalidation and + // re-start the negotiation process. More details below. if err == nil { - select { - case notifyCh <- struct{}{}: - case <-ctx.Done(): - return + // request the node hostname from the DHCP server + err = d.requestHostname(ctx) + if err != nil && !errors.Is(err, context.Canceled) { + d.logger.Warn("hostname request failed", zap.Error(err), zap.String("link", d.linkName)) + } + + // notify the underlying controller about the received hostname and/or renewed lease + if err == nil { + select { + case notifyCh <- struct{}{}: + case <-ctx.Done(): + return + } } } @@ -96,10 +210,39 @@ func (d *DHCP4) Run(ctx context.Context, notifyCh chan<- struct{}) { renewInterval = minRenewDuration } - select { - case <-ctx.Done(): - return - case <-time.After(renewInterval): + for { + select { + case <-ctx.Done(): + return + case <-time.After(renewInterval): + case event := <-hostnameWatchCh: + // If the hostname resource was deleted entirely, we must still inform the DHCP + // server that the node has no hostname anymore. `extractHostname` will return a + // blank hostname for a Tombstone resource generated by a deletion event. + hostname = extractHostname(event.Resource) + + // If, on first invocation, the DHCP server has given a new hostname for the node, + // and the network.HostnameSpecController decides to apply it as a preferred + // hostname, this operator would unnecessarily drop the lease and restart DHCP + // discovery. Thus, if the selected hostname has been sourced from this operator, + // we don't need to do anything. + if d.knownHostname(hostname) { + continue + } + + // While updating the hostname together with a RENEW request works for dnsmasq, it + // doesn't work with the Windows Server DHCP + DNS. A hostname update via an + // INIT-REBOOT request also gets ignored. Thus, the only reliable way to update the + // hostname seems to be to forget the old release and initiate a new DISCOVER flow + // with the new hostname. RFC 2131 doesn't define any better way to do this, and + // since according to the spec the DISCOVER cannot be targeted at the previous + // lessor, the node may switch DHCP servers on hostname change. This is not that + // big of a concern though, since a single network should not have multiple + // competing DHCP servers in the first place. + d.lease = nil + } + + break } } } @@ -152,8 +295,33 @@ func (d *DHCP4) TimeServerSpecs() []network.TimeServerSpecSpec { return d.timeservers } +func (d *DHCP4) parseHostnameFromAck(ack *dhcpv4.DHCPv4) { + d.mu.Lock() + defer d.mu.Unlock() + + if ack.HostName() != "" && !d.skipHostnameRequest { + spec := network.HostnameSpecSpec{ + ConfigLayer: network.ConfigOperator, + } + + if err := spec.ParseFQDN(ack.HostName()); err == nil { + if ack.DomainName() != "" { + spec.Domainname = ack.DomainName() + } + + d.hostname = []network.HostnameSpecSpec{ + spec, + } + } else { + d.hostname = nil + } + } else { + d.hostname = nil + } +} + //nolint:gocyclo -func (d *DHCP4) parseAck(ack *dhcpv4.DHCPv4) { +func (d *DHCP4) parseNetworkConfigFromAck(ack *dhcpv4.DHCPv4) { d.mu.Lock() defer d.mu.Unlock() @@ -266,26 +434,6 @@ func (d *DHCP4) parseAck(ack *dhcpv4.DHCPv4) { d.resolvers = nil } - if ack.HostName() != "" && !d.skipHostnameRequest { - spec := network.HostnameSpecSpec{ - ConfigLayer: network.ConfigOperator, - } - - if err = spec.ParseFQDN(ack.HostName()); err == nil { - if ack.DomainName() != "" { - spec.Domainname = ack.DomainName() - } - - d.hostname = []network.HostnameSpecSpec{ - spec, - } - } else { - d.hostname = nil - } - } else { - d.hostname = nil - } - if len(ack.NTPServers()) > 0 { convertIP := func(ip net.IP) string { result, _ := netipx.FromStdIP(ip) @@ -304,10 +452,82 @@ func (d *DHCP4) parseAck(ack *dhcpv4.DHCPv4) { } } -func (d *DHCP4) renew(ctx context.Context) (time.Duration, error) { +func (d *DHCP4) newClient() (*nclient4.Client, error) { + var clientOpts []nclient4.ClientOpt + + // we have an existing lease, target the server with unicast + if d.lease != nil { + serverAddr, err := utils.ToUDPAddr(d.lease.Offer.ServerIPAddr, nclient4.ServerPort) + if err != nil { + return nil, err + } + + clientAddr, err := utils.ToUDPAddr(d.lease.Offer.YourIPAddr, nclient4.ClientPort) + if err != nil { + return nil, err + } + + // RFC 2131, section 4.3.2: + // DHCPREQUEST generated during RENEWING state: + // ... This message will be unicast, so no relay + // agents will be involved in its transmission. + clientOpts = append(clientOpts, + nclient4.WithServerAddr(serverAddr), + // WithUnicast must be specified manually, WithServerAddr is not enough + nclient4.WithUnicast(clientAddr), + ) + } + + // create a new client, the caller is responsible for closing it + return nclient4.New(d.linkName, clientOpts...) +} + +// requestHostname uses an INFORM request to request a hostname from DHCP as requesting +// it during a DISCOVER is not reliable when simultaneously sending the local hostname. +func (d *DHCP4) requestHostname(ctx context.Context) error { + opts := []dhcpv4.OptionCode{ + dhcpv4.OptionHostName, + dhcpv4.OptionDomainName, + } + + client, err := d.newClient() + if err != nil { + return err + } + + //nolint:errcheck + defer client.Close() + + d.logger.Debug("DHCP INFORM", zap.String("link", d.linkName)) + + // create an INFORM request for querying a hostname from DHCP + request, err := dhcpv4.NewInform(client.InterfaceAddr(), d.lease.Offer.YourIPAddr, + dhcpv4.WithOptionCopied(d.lease.Offer, dhcpv4.OptionServerIdentifier), + dhcpv4.WithRequestedOptions(opts...), + ) + if err != nil { + return err + } + + // send the request and wait for a response + ack, err := client.TransactAckNak(ctx, request) + if err != nil { + return err + } + + d.logger.Debug("DHCP ACK", zap.String("link", d.linkName), zap.String("dhcp", collapseSummary(ack.Summary()))) + + // parse the hostname from the response + d.parseHostnameFromAck(ack) + + return nil +} + +func (d *DHCP4) requestRenew(ctx context.Context, hostname string) (time.Duration, error) { opts := []dhcpv4.OptionCode{ dhcpv4.OptionClasslessStaticRoute, dhcpv4.OptionDomainNameServer, + // TODO(twelho): This is unused until network.ResolverSpec supports search domains dhcpv4.OptionDNSDomainSearchList, dhcpv4.OptionNTPServers, dhcpv4.OptionDomainName, @@ -322,50 +542,41 @@ func (d *DHCP4) renew(ctx context.Context) (time.Duration, error) { } mods := []dhcpv4.Modifier{dhcpv4.WithRequestedOptions(opts...)} - clientOpts := []nclient4.ClientOpt{} - if d.offer != nil { - // do not use broadcast, but send the packet to DHCP server directly - addr, err := net.ResolveUDPAddr("udp", d.offer.ServerIPAddr.String()+":67") - if err != nil { - return 0, err - } - - // by default it's set to 0.0.0.0 which actually breaks lease renew - d.offer.ClientIPAddr = d.offer.YourIPAddr - - clientOpts = append(clientOpts, nclient4.WithServerAddr(addr)) + // if the node has a hostname, always send it to the DHCP + // server with option 12 during lease acquisition and renewal + if len(hostname) > 0 { + mods = append(mods, dhcpv4.WithOption(dhcpv4.OptHostName(hostname))) } - cli, err := nclient4.New(d.linkName, clientOpts...) + client, err := d.newClient() if err != nil { return 0, err } //nolint:errcheck - defer cli.Close() - - var lease *nclient4.Lease + defer client.Close() - if d.offer != nil { - lease, err = cli.RequestFromOffer(ctx, d.offer, mods...) + if d.lease != nil { + d.logger.Debug("DHCP RENEW", zap.String("link", d.linkName)) + err = client.Renew(ctx, d.lease, mods...) } else { - lease, err = cli.Request(ctx, mods...) + d.logger.Debug("DHCP REQUEST", zap.String("link", d.linkName)) + d.lease, err = client.Request(ctx, mods...) } if err != nil { - // clear offer if request fails to start with discover sequence next time - d.offer = nil + // explicitly clear the lease on failure to start with the discovery sequence next time + d.lease = nil return 0, err } - d.logger.Debug("DHCP ACK", zap.String("link", d.linkName), zap.String("dhcp", collapseSummary(lease.ACK.Summary()))) + d.logger.Debug("DHCP ACK", zap.String("link", d.linkName), zap.String("dhcp", collapseSummary(d.lease.ACK.Summary()))) - d.offer = lease.Offer - d.parseAck(lease.ACK) + d.parseNetworkConfigFromAck(d.lease.ACK) - return lease.ACK.IPAddressLeaseTime(time.Minute * 30), nil + return d.lease.ACK.IPAddressLeaseTime(time.Minute * 30), nil } func collapseSummary(summary string) string { diff --git a/internal/app/machined/pkg/controllers/network/operator/utils/utils.go b/internal/app/machined/pkg/controllers/network/operator/utils/utils.go new file mode 100644 index 00000000000..29e00f45ad2 --- /dev/null +++ b/internal/app/machined/pkg/controllers/network/operator/utils/utils.go @@ -0,0 +1,22 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// Package utils provides common methods for operators. +package utils + +import ( + "fmt" + "net" + "net/netip" +) + +// ToUDPAddr combines the given net.IP and port to form a net.UDPAddr. +func ToUDPAddr(ip net.IP, port uint16) (*net.UDPAddr, error) { + addr, ok := netip.AddrFromSlice(ip) + if !ok { + return nil, fmt.Errorf("failed to parse %q as an IP address", ip) + } + + return net.UDPAddrFromAddrPort(netip.AddrPortFrom(addr, port)), nil +} diff --git a/internal/app/machined/pkg/controllers/network/operator_spec.go b/internal/app/machined/pkg/controllers/network/operator_spec.go index 984e79538ba..22469d9cb12 100644 --- a/internal/app/machined/pkg/controllers/network/operator_spec.go +++ b/internal/app/machined/pkg/controllers/network/operator_spec.go @@ -423,7 +423,7 @@ func (ctrl *OperatorSpecController) newOperator(logger *zap.Logger, spec *networ case network.OperatorDHCP4: logger = logger.With(zap.String("operator", "dhcp4")) - return operator.NewDHCP4(logger, spec.LinkName, spec.DHCP4, ctrl.V1alpha1Platform) + return operator.NewDHCP4(logger, spec.LinkName, spec.DHCP4, ctrl.V1alpha1Platform, ctrl.State) case network.OperatorDHCP6: logger = logger.With(zap.String("operator", "dhcp6")) diff --git a/pkg/machinery/resources/network/condition.go b/pkg/machinery/resources/network/condition.go index e12c5169ad6..93a691c0836 100644 --- a/pkg/machinery/resources/network/condition.go +++ b/pkg/machinery/resources/network/condition.go @@ -19,7 +19,7 @@ type ReadyCondition struct { checks []StatusCheck } -// NewReadyCondition builds a coondition which waits for the network to be ready. +// NewReadyCondition builds a condition which waits for the network to be ready. func NewReadyCondition(state state.State, checks ...StatusCheck) *ReadyCondition { return &ReadyCondition{ state: state, diff --git a/pkg/machinery/resources/network/hostname_spec.go b/pkg/machinery/resources/network/hostname_spec.go index d9ba8555786..4776e354430 100644 --- a/pkg/machinery/resources/network/hostname_spec.go +++ b/pkg/machinery/resources/network/hostname_spec.go @@ -25,7 +25,7 @@ type HostnameSpec = typed.Resource[HostnameSpecSpec, HostnameSpecExtension] // HostnameID is the ID of the singleton instance. const HostnameID resource.ID = "hostname" -// HostnameSpecSpec describes node nostname. +// HostnameSpecSpec describes node hostname. // //gotagsrewrite:gen type HostnameSpecSpec struct {