Skip to content

Commit

Permalink
test: enable remote tests (#5103)
Browse files Browse the repository at this point in the history
* test: WIP enable remote tests

* fix(in-cluster-building): make sync more reliable after redeploy of util pod

Before this fix we sometimes synced to or from the old Pod

Co-authored-by: Mikael Högqvist Tabor <[email protected]>

* improvement: allow [email protected] to impersonate service account for tests

This allows google artifact registry access in the garden-ci project.
Using the service account impersonation has the benefit that we can allow any google group to impersonate this service account, so we do not need to manage access manually:
This only works because we granted the google group "[email protected]" permission to impersonate this service account with the following command:
`gcloud iam service-accounts add-iam-policy-binding [email protected] --member=group:[email protected] --role=roles/iam.serviceAccountTokenCreator`
We could extend this in the future to allow access to the CI cluster in the same way, without the need to manage users manually.

Co-authored-by: twelvemo <[email protected]>

* fix(test): kaniko should pull the image

* test(kaniko): fix deploy of simple service

* test: run remote tests in circleci

* fix(test): buildkit should pull image

* chore: improved error handling when k8s websocket fails

Co-authored-by: Steffen Neubauer <[email protected]>

* Revert "fix(in-cluster-building): make sync more reliable after redeploy of util pod"

This reverts commit c34b8ec.

* test: fix remote deploy with local docker build

Co-authored-by: Steffen Neubauer <[email protected]>

* improvement(test): better error messages on failed impersonation

* test: publish to remote registry with tags

* fix: scope

* test: improve error message

* fix: retry websocket errors

* test: getContainerTestGarden cleanup

* test: fix kaniko-project-namespace should build a simple container

co-authored-by: mikael <[email protected]>

* fix: local credentials directory

* test: further fixes to e2e build/publish tests

* fix: verbose error rendering

* test: helpers to check and clean local and remote repos

 Co-authored-by: Steffen Neubauer <[email protected]>

* chore: fixed linter issues

* test: fixed kaniko build publish tests

Co-authored-by: twelvemo <[email protected]>

* test: kaniko custom image

* test: buildkit tests with deployment registry

* test: buildkit rootless tests with deployment registry

* fix: tags tests

* test: more cases for deploying with a remote deployment registry

* chore: review comments

* test: skipping tests with shared state

* chore: dont throw wrapped internal error for kaniko fails

- move error handling of impersonated client to point of creation

Co-authored-by: Steffen Neubauer <[email protected]>

* chore: fix linting issue

* chore: fixed gcloud error code constant

---------

Co-authored-by: Mikael Högqvist Tabor <[email protected]>
Co-authored-by: twelvemo <[email protected]>
Co-authored-by: Mikael Hoegqvist Tabor <[email protected]>
Co-authored-by: Tim Beyer <[email protected]>
  • Loading branch information
5 people authored Oct 2, 2023
1 parent 3c112d2 commit d2c79de
Show file tree
Hide file tree
Showing 35 changed files with 16,266 additions and 16,608 deletions.
28 changes: 27 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -721,7 +721,12 @@ jobs:
skipTests:
description: Integ test groups to skip
type: string
# Note: By default, we skip tests that only work for remote environments
default: "kaniko remote-only"
remoteCluster:
description: "Configure remote cluster"
type: boolean
default: false
environment:
<<: *shared-env-config
K8S_VERSION: <<parameters.kubernetesVersion>>
Expand Down Expand Up @@ -760,15 +765,25 @@ jobs:
# Work around annoying issue on recent minikubes where namespaces take a long time to generate default service account
kubectl create namespace container-default
sleep 10
- when:
condition: <<parameters.remoteCluster>>
steps:
- configure_remote_cluster
- run:
name: Integ tests
# Note: We skip tests that only work for remote environments
command: |
cd core
GARDEN_SKIP_TESTS="<<parameters.skipTests>>" npm run _integ
- run:
name: Deploy demo-project
command: ./bin/garden deploy --root examples/demo-project
- when:
condition: <<parameters.remoteCluster>>
steps:
# NOTE: currently we do not create a namespace in the remote ci cluster during remote integ tests.
# If we start doing that, we need to use the environment variable CIRCLE_BUILD_NUM in the tests.
- cleanup_remote_cluster:
filter: $CIRCLE_BUILD_NUM-remote

test-k3s:
machine:
Expand Down Expand Up @@ -1067,6 +1082,17 @@ workflows:
# project: vote-helm-modules
# requires: [build]

### REMOTE TESTS ###
# The remote tests (e.g. kaniko and buildkit) still require a local kubernetes cluster for some reason…
- test-minikube:
<<: *only-internal-prs
name: test-remote
requires: [build]
kubernetesVersion: "1.23.3"
minikubeVersion: "v1.25.2"
skipTests: "local-only"
remoteCluster: true

### VM TESTS ###

# Note: The below is a not-quite-random mix of local k8s variants and k8s versions. Doing a full matrix of every
Expand Down
5 changes: 2 additions & 3 deletions core/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
"@jsdevtools/readdir-enhanced": "^6.0.4",
"@kubernetes/client-node": "=0.18.0",
"@opentelemetry/api": "^1.4.1",
"@opentelemetry/core": "^1.14.0",
"@opentelemetry/exporter-trace-otlp-http": "^0.40.0",
"@opentelemetry/instrumentation-http": "^0.40.0",
"@opentelemetry/otlp-exporter-base": "^0.40.0",
Expand Down Expand Up @@ -173,7 +172,7 @@
"@commitlint/cli": "^17.6.5",
"@commitlint/config-conventional": "^17.6.5",
"@garden-io/platform-api-types": "1.914.0",
"@google-cloud/kms": "^3.7.0",
"@google-cloud/artifact-registry": "^3.0.1",
"@types/analytics-node": "^3.1.10",
"@types/async": "^3.2.18",
"@types/async-lock": "^1.4.0",
Expand All @@ -184,7 +183,6 @@
"@types/fs-extra": "^11.0.1",
"@types/glob": "^7.2.0",
"@types/global-agent": "^2.1.1",
"@types/google-cloud__kms": "^1.5.4",
"@types/got": "^9.6.12",
"@types/hapi__joi": "^17.1.9",
"@types/has-ansi": "^3.0.0",
Expand Down Expand Up @@ -244,6 +242,7 @@
"eslint-plugin-no-null": "^1.0.2",
"eslint-plugin-react": "^7.32.2",
"finalhandler": "^1.2.0",
"google-auth-library": "^9.0.0",
"is-subset": "^0.1.1",
"md5": "^2.3.0",
"mocha": "^10.2.0",
Expand Down
14 changes: 3 additions & 11 deletions core/src/exceptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,9 @@ export abstract class GardenError extends Error {
`

if (this.wrappedErrors) {
return dedent`
${errorDetails}
Wrapped errors:
${this.wrappedErrors?.map(
(e) => dedent`
${indentString(e.toString(verbose), 3).trim()}
`
)}
`
return `${errorDetails}\nWrapped errors:\n${this.wrappedErrors?.map(
(e) => `⮑ ${indentString(e.toString(verbose), 3).trim()}`
)}`
} else {
return errorDetails
}
Expand Down
2 changes: 1 addition & 1 deletion core/src/plugins/container/build.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ export const getContainerBuildStatus: BuildActionHandler<"getStatus", ContainerB
log,
}) => {
const outputs = action.getOutputs()
const identifier = await containerHelpers.imageExistsLocally(outputs.localImageId, log, ctx)
const { identifier } = (await containerHelpers.getLocalImageInfo(outputs.localImageId, log, ctx)) || {}

if (identifier) {
log.debug(`Image ${identifier} already exists`)
Expand Down
39 changes: 36 additions & 3 deletions core/src/plugins/container/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -233,15 +233,48 @@ const helpers = {
}
},

async imageExistsLocally(identifier: string, log: Log, ctx: PluginContext) {
// Verifies the existance of a local image with the given identifier
// and returns the identifier with a list of corresponding image IDs
async getLocalImageInfo(
identifier: string,
log: Log,
ctx: PluginContext
): Promise<{ identifier: string; imageIds: string[] } | undefined> {
const result = await helpers.dockerCli({
cwd: ctx.projectRoot,
args: ["images", identifier, "-q"],
log,
ctx,
})
const exists = result.stdout!.length > 0
return exists ? identifier : null

if (result.stdout!.length === 0) {
return undefined
}

const imageIds = result.stdout.split("\n")
return { identifier, imageIds }
},

// Remove all images for a given identifier
async removeLocalImage(identifier: string, log: Log, ctx: PluginContext) {
const { imageIds } = (await containerHelpers.getLocalImageInfo(identifier, log, ctx)) || {}

if (!imageIds) {
return undefined
}

const result = await helpers.dockerCli({
cwd: ctx.projectRoot,
args: ["rmi", "--force", imageIds.join(" ").trim()],
log,
ctx,
})

if (result.stdout!.length === 0) {
return undefined
}

return { identifier, imageIds }
},

/**
Expand Down
44 changes: 0 additions & 44 deletions core/src/plugins/kubernetes/container/build/build.ts

This file was deleted.

35 changes: 17 additions & 18 deletions core/src/plugins/kubernetes/container/build/common.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@

import AsyncLock from "async-lock"
import { ContainerBuildAction, ContainerRegistryConfig } from "../../../container/moduleConfig"
import { getRunningDeploymentPod } from "../../util"
import { buildSyncVolumeName, dockerAuthSecretKey, k8sUtilImageName, rsyncPortName } from "../../constants"
import { KubeApi } from "../../api"
import { KubernetesPluginContext, KubernetesProvider } from "../../config"
import { PodRunner, PodRunnerError } from "../../run"
import { PluginContext } from "../../../../plugin-context"
import { hashString, sleep } from "../../../../util/util"
import { GardenError, InternalError, RuntimeError } from "../../../../exceptions"
import { InternalError, RuntimeError } from "../../../../exceptions"
import { Log } from "../../../../logger/log-entry"
import { prepareDockerAuth } from "../../init"
import { prepareSecrets } from "../../secrets"
Expand All @@ -31,6 +29,8 @@ import { k8sGetContainerBuildActionOutputs } from "../handlers"
import { Resolved } from "../../../../actions/types"
import { stringifyResources } from "../util"
import { getKubectlExecDestination } from "../../sync"
import { getRunningDeploymentPod } from "../../util"
import { buildSyncVolumeName, dockerAuthSecretKey, k8sUtilImageName, rsyncPortName } from "../../constants"

export const utilContainerName = "util"
export const utilRsyncPort = 8730
Expand Down Expand Up @@ -223,28 +223,26 @@ export async function skopeoBuildStatus({
})
return { state: "ready", outputs, detail: {} }
} catch (err) {
if (!(err instanceof GardenError)) {
throw err
}

if (err instanceof PodRunnerError) {
const res = err.details.result

// Non-zero exit code can both mean the manifest is not found, and any other unexpected error
if (res?.exitCode !== 0 && !skopeoManifestUnknown(res?.stderr)) {
const output = res?.allLogs || err.message

throw new RuntimeError({
message: `Unable to query registry for image status: Command "${skopeoCommand.join(
" "
)}" failed. This is the output:\n${output}`,
})
if (res?.exitCode !== 0 && skopeoManifestUnknown(res?.stderr)) {
// This would happen when the image does not exist, i.e. not ready
return { state: "not-ready", outputs, detail: {} }
}

const output = res?.allLogs || err.message

throw new RuntimeError({
message: `Unable to query registry for image status: Command "${skopeoCommand.join(
" "
)}" failed. This is the output:\n${output}`,
wrappedErrors: [err],
})
}

// TODO: Do we really want to return state: "unknown" with no details on any other error?
// NOTE(steffen): I'd have expected us to throw here
return { state: "unknown", outputs, detail: {} }
throw err
}
}

Expand All @@ -259,6 +257,7 @@ export function skopeoManifestUnknown(errMsg: string | null | undefined): boolea
return (
errMsg.includes("manifest unknown") ||
errMsg.includes("name unknown") ||
errMsg.includes("Failed to fetch") ||
/(artifact|repository) [^ ]+ not found/.test(errMsg)
)
}
Expand Down
15 changes: 11 additions & 4 deletions core/src/plugins/kubernetes/container/publish.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ import { BuildActionHandler } from "../../../plugin/action-types"
import { containerHelpers } from "../../container/helpers"

export const k8sPublishContainerBuild: BuildActionHandler<"publish", ContainerBuildAction> = async (params) => {
const { ctx, action, log } = params
const { ctx, action, log, tag } = params
const k8sCtx = ctx as KubernetesPluginContext
const provider = k8sCtx.provider

Expand All @@ -33,13 +33,20 @@ export const k8sPublishContainerBuild: BuildActionHandler<"publish", ContainerBu
await pullBuild({ ctx: k8sCtx, action, log, localId, remoteId })
}

log.info({ msg: `Publishing image ${remoteId}...` })
// optionally use the tag instead of the garden version, this requires that we tag the image locally
// before publishing to the remote registry

let remotePublishId = tag ? `${action.getOutput("deploymentImageName")}:${tag}` : remoteId

await containerHelpers.dockerCli({ cwd: action.getBuildPath(), args: ["tag", localId, remotePublishId], log, ctx })

log.info({ msg: `Publishing image ${remotePublishId}...` })
// TODO: stream output to log if at debug log level
await containerHelpers.dockerCli({ cwd: action.getBuildPath(), args: ["push", remoteId], log, ctx })
await containerHelpers.dockerCli({ cwd: action.getBuildPath(), args: ["push", remotePublishId], log, ctx })

return {
state: "ready",
detail: { published: true, message: `Published ${remoteId}` },
detail: { published: true, message: `Published ${remotePublishId}` },
// TODO-0.13.1
outputs: {},
}
Expand Down
14 changes: 4 additions & 10 deletions core/src/plugins/kubernetes/container/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,17 @@ import {
import { V1ResourceRequirements, V1SecurityContext } from "@kubernetes/client-node"
import { ConfigurationError } from "../../../exceptions"
import { Resolved } from "../../../actions/types"
import { containerHelpers } from "../../container/helpers"
import { kilobytesToString, megabytesToString, millicpuToString } from "../util"

export function getDeployedImageId(action: Resolved<ContainerRuntimeAction>, provider: KubernetesProvider): string {
const explicitImage = action.getSpec().image
const build = action.getResolvedBuildAction<Resolved<ContainerBuildAction>>()

if (explicitImage) {
// if there is a build, we had a configured dockerfile and should use that image
if (build) {
return build.getOutput("deployment-image-id")
} else if (explicitImage) {
return explicitImage
} else if (build) {
// TODO-0.13.0: we can get this off the BuildAction when static outputs are implemented
return containerHelpers.getBuildDeploymentImageId(
build.name,
undefined,
build.moduleVersion(),
provider.config.deploymentRegistry
)
} else {
throw new ConfigurationError({
message: `${action.longDescription()} specifies neither a \`build\` nor \`spec.image\``,
Expand Down
Loading

0 comments on commit d2c79de

Please sign in to comment.