Skip to content

Commit

Permalink
fix: node identity flip
Browse files Browse the repository at this point in the history
The issue shows up in our tests as:

```
=== RUN   TestIntegration/api.DiscoverySuite/TestRegistries
    discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found
    discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found
    discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found
    discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found
    discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found
    discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found
    discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found
    discovery.go:210: waiting for cluster affiliates to be discovered: 4 expected, 6 found
```

It should be a minor issue for non-KubeSpan'ed clusters (as members get
correctly de-duplicated), but might cause connectivity issues for
KubeSpan'ed clusters.

The issue comes from the short mount in the sequencer around
`loadConfig` step: as the mount time is short, it triggers a race in the
node identity controller when it tries to read existing identity from
`/system/state`, but as the partition is unmounted by the time it tries
to read, it assumes there's no identity and establishes a new one.

Eventually, it will write new identity back to disk, but that new
identity is different from the previous one, so it creates another entry
for itself in the discovery service.

A proper solution is a volume mount controller, but a temporary band aid
is to avoid broadcasting mount notification for this short `STATE` mount
via resources, so that controller isn't triggered.

Signed-off-by: Andrey Smirnov <[email protected]>
  • Loading branch information
smira committed Dec 16, 2024
1 parent 590c016 commit c9c6851
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package v1alpha1

import (
"strconv"
"time"

"github.com/siderolabs/go-pointer"
"github.com/siderolabs/go-procfs/procfs"
Expand Down Expand Up @@ -176,6 +177,7 @@ func (*Sequencer) Install(r runtime.Runtime) []runtime.Phase {
).Append(
"saveConfig",
SaveConfig,
Sleep(time.Second),
).Append(
"unmountState",
UnmountStatePartition,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,21 @@ func SaveConfig(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
}, "saveConfig"
}

// Sleep represents the Sleep task.
func Sleep(d time.Duration) func(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(_ runtime.Sequence, _ any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
select {
case <-time.After(d):
case <-ctx.Done():
return ctx.Err()
}

return nil
}, "sleep"
}
}

// MemorySizeCheck represents the MemorySizeCheck task.
func MemorySizeCheck(runtime.Sequence, any) (runtime.TaskExecutionFunc, string) {
return func(ctx context.Context, logger *log.Logger, r runtime.Runtime) error {
Expand Down Expand Up @@ -1646,7 +1661,7 @@ func MountStatePartition(required bool) func(seq runtime.Sequence, _ any) (runti
}
}

return mount.SystemPartitionMount(ctx, r, logger, constants.StatePartitionLabel)
return mount.SystemPartitionMount(ctx, r, logger, constants.StatePartitionLabel, !required)
}, "mountStatePartition"
}
}
Expand All @@ -1665,7 +1680,7 @@ func MountEphemeralPartition(runtime.Sequence, any) (runtime.TaskExecutionFunc,
return err
}

return mount.SystemPartitionMount(ctx, r, logger, constants.EphemeralPartitionLabel,
return mount.SystemPartitionMount(ctx, r, logger, constants.EphemeralPartitionLabel, false,
mountv2.WithProjectQuota(r.Config().Machine().Features().DiskQuotaSupportEnabled()))
}, "mountEphemeralPartition"
}
Expand Down
43 changes: 24 additions & 19 deletions internal/pkg/mount/system.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ func IdempotentSystemPartitionMounter(r runtime.Runtime) func(label string, opts
return nil
}

return SystemPartitionMount(context.Background(), r, log.Default(), label, opts...)
return SystemPartitionMount(context.Background(), r, log.Default(), label, false, opts...)
}
}

Expand All @@ -48,7 +48,7 @@ func IsSystemPartitionMounted(label string) bool {
}

// SystemPartitionMount mounts a system partition by the label.
func SystemPartitionMount(ctx context.Context, r runtime.Runtime, logger *log.Logger, label string, opts ...mountv2.NewPointOption) (err error) {
func SystemPartitionMount(ctx context.Context, r runtime.Runtime, logger *log.Logger, label string, silent bool, opts ...mountv2.NewPointOption) (err error) {
volumeStatus, err := safe.StateGetByID[*block.VolumeStatus](ctx, r.State().V1Alpha2().Resources(), label)
if err != nil {
return fmt.Errorf("error getting volume status %q: %w", label, err)
Expand Down Expand Up @@ -77,26 +77,29 @@ func SystemPartitionMount(ctx context.Context, r runtime.Runtime, logger *log.Lo
return err
}

// record mount as the resource
mountStatus := runtimeres.NewMountStatus(v1alpha1.NamespaceName, label)
mountStatus.TypedSpec().Source = volumeStatus.TypedSpec().MountLocation
mountStatus.TypedSpec().Target = volumeConfig.TypedSpec().Mount.TargetPath
mountStatus.TypedSpec().FilesystemType = volumeStatus.TypedSpec().Filesystem.String()
mountStatus.TypedSpec().Encrypted = volumeStatus.TypedSpec().EncryptionProvider != block.EncryptionProviderNone
// silent mounts skip resource notification to other components
if !silent {
// record mount as the resource
mountStatus := runtimeres.NewMountStatus(v1alpha1.NamespaceName, label)
mountStatus.TypedSpec().Source = volumeStatus.TypedSpec().MountLocation
mountStatus.TypedSpec().Target = volumeConfig.TypedSpec().Mount.TargetPath
mountStatus.TypedSpec().FilesystemType = volumeStatus.TypedSpec().Filesystem.String()
mountStatus.TypedSpec().Encrypted = volumeStatus.TypedSpec().EncryptionProvider != block.EncryptionProviderNone

if mountStatus.TypedSpec().Encrypted {
encryptionProviders := make(map[string]struct{})
if mountStatus.TypedSpec().Encrypted {
encryptionProviders := make(map[string]struct{})

for _, cfg := range volumeConfig.TypedSpec().Encryption.Keys {
encryptionProviders[cfg.Type.String()] = struct{}{}
}
for _, cfg := range volumeConfig.TypedSpec().Encryption.Keys {
encryptionProviders[cfg.Type.String()] = struct{}{}
}

mountStatus.TypedSpec().EncryptionProviders = maps.Keys(encryptionProviders)
}
mountStatus.TypedSpec().EncryptionProviders = maps.Keys(encryptionProviders)
}

// ignore the error if the MountStatus already exists, as many mounts are silently skipped with the flag SkipIfMounted
if err = r.State().V1Alpha2().Resources().Create(context.Background(), mountStatus); err != nil && !state.IsConflictError(err) {
return fmt.Errorf("error creating mount status resource: %w", err)
// ignore the error if the MountStatus already exists, as many mounts are silently skipped with the flag SkipIfMounted
if err = r.State().V1Alpha2().Resources().Create(context.Background(), mountStatus); err != nil && !state.IsConflictError(err) {
return fmt.Errorf("error creating mount status resource: %w", err)
}
}

mountpointsMutex.Lock()
Expand Down Expand Up @@ -127,7 +130,9 @@ func SystemPartitionUnmount(r runtime.Runtime, logger *log.Logger, label string)
}

if err = r.State().V1Alpha2().Resources().Destroy(context.Background(), runtimeres.NewMountStatus(v1alpha1.NamespaceName, label).Metadata()); err != nil {
return fmt.Errorf("error destroying mount status resource: %w", err)
if !state.IsNotFoundError(err) {
return fmt.Errorf("error destroying mount status resource: %w", err)
}
}

mountpointsMutex.Lock()
Expand Down

0 comments on commit c9c6851

Please sign in to comment.