-
Notifications
You must be signed in to change notification settings - Fork 4
93 lines (86 loc) · 4.56 KB
/
test-telemetry-setup.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# This workflow is meant to imitate the behavior of RAPIDS project PR workflows, such as
on:
pull_request:
workflow_dispatch:
env:
SHARED_ACTIONS_REF: ${{ github.ref}}
defaults:
run:
shell: bash
jobs:
base-env-setup:
runs-on: ubuntu-latest
# These will be stashed. The names are not arbitrary. They match special OpenTelemetry names
# or names that are hard-coded in actions/scripts downstream.
env:
SHARED_ACTIONS_REPO: rapidsai/shared-actions
SHARED_ACTIONS_REF: ${{ github.ref }}
# this should stay the same throughout this workflow, but child workflows will each
# have their own OTEL_SERVICE_NAME. It is generally the job name, including any matrix elements.
# This is what distinguishes one job trace from another, so it is important to be distinct
# between jobs.
OTEL_SERVICE_NAME: test-telemetry plus something
# TODO: this should be set as an org-wide variable
OTEL_EXPORTER_OTLP_ENDPOINT: https://tempo.gha-runners.nvidia.com:4318
OTEL_EXPORTER_OTLP_PROTOCOL: "http/protobuf"
OTEL_RESOURCE_ATTRIBUTES: "git.repository=${{ github.repository }},git.ref=${{ github.ref }},git.sha=${{ github.sha }},git.job_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
outputs:
service-name: ${{ steps.export.outputs.service_name }}
steps:
- name: Compute traceparent and stash telemetry-related env vars
uses: rapidsai/shared-actions/telemetry-dispatch-stash-base-env-vars@telemetry-dispatch-actions
- name: Export service name so we can check it below
id: export
run: echo service_name="${OTEL_SERVICE_NAME}" >> ${GITHUB_OUTPUT}
child-workflow:
needs: base-env-setup
secrets: inherit
uses: rapidsai/shared-actions/.github/workflows/test-child-workflow.yaml@telemetry-dispatch-actions
summarize-top-level:
runs-on: ubuntu-latest
continue-on-error: true
needs:
- base-env-setup
- child-workflow
steps:
- name: Load base env vars, including OTEL_SERVICE_NAME
uses: rapidsai/shared-actions/telemetry-dispatch-load-base-env-vars@telemetry-dispatch-actions
with:
load_service_name: "true"
- name: Check if service name took on an unexpected value
run: |
echo "(should be the value set to the OTEL_SERVICE_NAME env var in base-env-setup job)"
[ "${OTEL_SERVICE_NAME}" = "${{needs.base-env-setup.outputs.service-name}}" ] || exit 1
- name: Telemetry summarize
uses: rapidsai/shared-actions/telemetry-dispatch-write-summary@telemetry-dispatch-actions
with:
cert_concat: "${{ secrets.OTEL_EXPORTER_OTLP_CA_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE }};${{ secrets.OTEL_EXPORTER_OTLP_CLIENT_KEY }}"
- name: Check if service name was altered during telemetry summary
run: |
echo "(should be the value set to the OTEL_SERVICE_NAME env var in base-env-setup job)"
[ "${OTEL_SERVICE_NAME}" = "${{needs.base-env-setup.outputs.service-name}}" ] || exit 1
- name: Query the Tempo HTTP API and check that our trace is present and has expected properties
run: |
TRACE_ID=$( cut -d '-' -f 2 <<< "$TRACEPARENT" );
echo "Trace ID is: ${TRACE_ID}";
TRACE_URL="${OTEL_EXPORTER_OTLP_ENDPOINT/4318/3200}/api/traces/${TRACE_ID}"
echo "Trace URL is: ${TRACE_URL}"
curl \
--cert /tmp/certs/client.crt.pem --key /tmp/certs/client.key.pem --cacert /tmp/certs/ca.crt.pem \
-Gs "${TRACE_URL}" > trace_record.json;
- name: Upload trace record
uses: actions/upload-artifact@v4
with:
name: trace-record
path: trace_record.json
- name: Validate span metadata
# these are not returned in any particular order. The span kind is the only one
# that we can reliably expect to be the same.
run: |
span_kind="$(jq -r '.batches[0].scopeSpans[0].spans[0].kind' trace_record.json )";
echo "Checking if span kind is as expected"
echo "Span kind is: "${span_kind}""
[ "${span_kind}" = "SPAN_KIND_CLIENT" ] || exit 1
echo "Verify that job names (also called service name) are correct"
job_names="$(jq -c '[.batches[].resource.attributes[] | select(.key == "service.name") | .value.stringValue] | unique' trace_record.json)"
[ "$job_names" = '["child-workflow / Jobby McJobface (with)","child-workflow / Jobby McJobface (without)","test-telemetry plus something"]' ] || exit 1