Skip to content

Commit

Permalink
tests/boot-mirror: bump memory request to 8G on ppc64le and aarch64
Browse files Browse the repository at this point in the history
All our other root reprovisioning tests double the memory request on
ppc64le and aarch64 due to the larger page size. Do this for the boot
mirroring tests too and increase the memory request to 8G. With the
current 4G, the tests would sometimes panic during the reboot right
after the primary block device detach.

Even with 8G, the panic still happens, albeit much more rarely. Rather
than bumping the memory even more, I've found that sleeping a bit before
rebooting does the trick.

Partially fixes: #2725
Partially fixes: #3360
  • Loading branch information
jlebon authored and dustymabe committed Sep 14, 2023
1 parent 75945a3 commit b1fff53
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 1 deletion.
1 change: 1 addition & 0 deletions mantle/kola/harness.go
Original file line number Diff line number Diff line change
Expand Up @@ -1205,6 +1205,7 @@ ExecStart=%s

// Architectures using 64k pages use slightly more memory, ask for more than requested
// to make sure that we don't run out of it. Currently ppc64le and aarch64 use 64k pages.
// See similar logic in boot-mirror.go and luks.go.
switch coreosarch.CurrentRpmArch() {
case "ppc64le", "aarch64":
if targetMeta.MinMemory <= 4096 {
Expand Down
2 changes: 1 addition & 1 deletion mantle/kola/tests/ignition/luks.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ func runTest(c cluster.TestCluster, tpm2 bool, threshold int, killTangAfterFirst
opts := platform.MachineOptions{
MinMemory: 4096,
}
// ppc64le and aarch64 use 64K pages
// ppc64le and aarch64 use 64K pages; see similar logic in harness.go and boot-mirror.go
switch coreosarch.CurrentRpmArch() {
case "ppc64le", "aarch64":
opts.MinMemory = 8192
Expand Down
17 changes: 17 additions & 0 deletions mantle/kola/tests/misc/boot-mirror.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,11 @@ func runBootMirrorTest(c cluster.TestCluster) {
MinMemory: 4096,
},
}
// ppc64le and aarch64 use 64K pages; see similar logic in harness.go and luks.go
switch coreosarch.CurrentRpmArch() {
case "ppc64le", "aarch64":
options.MinMemory = 8192
}
// FIXME: for QEMU tests kola currently assumes the host CPU architecture
// matches the one under test
userdata := bootmirror.Subst("LAYOUT", coreosarch.CurrentRpmArch())
Expand Down Expand Up @@ -147,6 +152,11 @@ func runBootMirrorLUKSTest(c cluster.TestCluster) {
MinMemory: 4096,
},
}
// ppc64le and aarch64 use 64K pages; see similar logic in harness.go and luks.go
switch coreosarch.CurrentRpmArch() {
case "ppc64le", "aarch64":
options.MinMemory = 8192
}
// FIXME: for QEMU tests kola currently assumes the host CPU architecture
// matches the one under test
userdata := bootmirrorluks.Subst("LAYOUT", coreosarch.CurrentRpmArch())
Expand Down Expand Up @@ -230,6 +240,13 @@ func detachPrimaryBlockDevice(c cluster.TestCluster, m platform.Machine) {
}); err != nil {
c.Fatalf("Failed to retrieve boot ID: %v", err)
}

// Give some time to the host before doing the reboot. Without it, we've noticed
// that rebooting too quickly after ripping out the primary device can trigger
// a kernel panic on ppc64le. This may be memory-related since the same panic
// happens more easily if memory is lowered to 4G.
time.Sleep(30 * time.Second)

err := m.Reboot()
if err != nil {
c.Fatalf("Failed to reboot the machine: %v", err)
Expand Down

0 comments on commit b1fff53

Please sign in to comment.