diff --git a/.ecrc b/.ecrc new file mode 100644 index 0000000..bd7e3a2 --- /dev/null +++ b/.ecrc @@ -0,0 +1,20 @@ +# This file is the top-most EditorConfig file +root = true + +# All Files +[*] +charset = utf-8 +indent_style = space +indent_size = 2 +end_of_line = lf +insert_final_newline = true +trim_trailing_whitespace = true + +# Makefiles +[{Makefile,**.mk}] +indent_style = tab + +# Python Files +[*.py] +indent_style = space +indent_size = 4 diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..d3a1a5a --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,24 @@ +--- +version: 2 +updates: + - package-ecosystem: 'npm' + directory: '/' + schedule: + interval: 'weekly' + day: 'saturday' + versioning-strategy: 'increase' + labels: + - 'dependencies' + - 'automerge' + open-pull-requests-limit: 5 + pull-request-branch-name: + separator: '-' + commit-message: + # cause a release for non-dev-deps + prefix: fix(deps) + # no release for dev-deps + prefix-development: chore(dev-deps) + ignore: + - dependency-name: '@salesforce/dev-scripts' + - dependency-name: '*' + update-types: ['version-update:semver-major'] diff --git a/.github/issue-labeler.yml b/.github/issue-labeler.yml new file mode 100644 index 0000000..c56a158 --- /dev/null +++ b/.github/issue-labeler.yml @@ -0,0 +1,59 @@ +--- +# Use the following command to test globs locally: + +brokers: + - '(broker|brokers|kafka|rabbitmq|nats|nats-streaming)' + +cloud: + - '(cloud|aws|azure|gcp|google|alibaba|tencent|oracle|digitalocean|linode|vultr|upcloud|scaleway|hetzner|ovh|cloudflare)' + +aws: + - '(aws|amazon|ec2|s3|lambda|cloudfront|route53|eks|ecr|ecs|fargate|cloudwatch|cloudtrail|cloudformation|cfn|cognito|iam|sts|sqs|sns|sqs)' + +azure: + - '(azure|azurerm|aks|acr|app-service|cosmosdb|keyvault|vm|virtual-machine|vnet|virtual-network|storage|blob|table|queue|file|cdn|frontdoor)' + +gcp: + - '(gcp|google|gke|gce|gcs|cloud-storage|cloud-functions|cloud-run|cloud-sql|cloud-spanner|cloud-pubsub|cloud-iot|cloud-tpu|cloud-ml|cloud-ai)' + +grafana: + - '(grafana|grafana-labs|grafana-cloud|grafana-enterprise|grafana-agent|grafana-loki|raintank|metrictank)' + +kubernetes: + - '(kube|k8s|kubernetes|cadvisor|kubeadm|kubectl|kube-?proxy|kube-scheduler|kube-controller|kube-?apiserver|kubelet)' + +loki: + - '(loki|gel|log|logql|enterprise(-| )log|promtail)' + +mimir: + - '(mimir|gem|prometheus|promql|enterprise(-| )?metric)' + +ci-cd: + - '(lint|tag|release|deploy|ci|cd|pipeline|workflow|github-actions)' + +openshift: + - '(openshift|oc|oadm|oadmin|ocp|okd|openshift-origin)' + +otel: + - '(otel|instrument|telemetry|trace|tracing|opentelemetry|jaeger|zipkin)' + +system: + - '(system|linux|unix|macos|windows|freebsd|openbsd|netbsd|dragonfly|solaris|aix|esxi|vmware||ubuntu|debian|centos|rhel|fedora|suse|raspbian|raspberry)' + +pyroscope: + - '(pyroscope|profile|profiling|heap|goroutine|goroutines|flamegraph|flame)' + +tempo: + - '(tempo|trace|tracing|jaeger|zipkin|opentelemetry)' + +windows: + - '(windows|win|wmi|perfmon|eventlog|event-log|eventviewer|event-viewer)' + +utils: + - '(utils)' + +logs: + - '(logs)' + +metrics: + - '(metrics)' diff --git a/.github/pr-labeler.yml b/.github/pr-labeler.yml new file mode 100644 index 0000000..7cad3c8 --- /dev/null +++ b/.github/pr-labeler.yml @@ -0,0 +1,125 @@ +--- +# Use the following command to test globs locally: +# compgen -G '' + +# Add 'brokers' label to any changes within 'modules' that references 'brokers' +brokers: + - changed-files: + - any-glob-to-any-file: 'modules/**/**brokers**' + +# Add 'cloud' label to any changes within 'modules' that references 'cloud' +cloud: + - changed-files: + - any-glob-to-any-file: 'modules/**/**cloud**' + +# Add 'aws' label to any changes within 'modules' that references 'cloud' +aws: + - changed-files: + - any-glob-to-any-file: 'modules/**/cloud/**aws**' + +# Add 'azure' label to any changes within 'modules' that references 'aws' +azure: + - changed-files: + - any-glob-to-any-file: 'modules/**/cloud/**azure**' + +# Add 'gcp' label to any changes within 'modules' that references 'azure' +gcp: + - changed-files: + - any-glob-to-any-file: 'modules/**/cloud/**gcp**' + +# Add 'grafana' label to any changes within 'modules' that references 'gcp' +grafana: + - changed-files: + - any-glob-to-any-file: 'modules/**/cloud/**grafana**' + +# Add 'collectors' label to any changes within 'modules' that references 'collectors' +collectors: + - changed-files: + - any-glob-to-any-file: 'modules/**/**collectors**' + +# Add 'ci / cd' label to any changes within 'modules' that references 'ci' or 'cd +ci-cd: + - changed-files: + - any-glob-to-any-file: '.*' # dotfiles + - any-glob-to-any-file: 'Makefile' + - any-glob-to-any-file: 'package.json' + - any-glob-to-any-file: 'tools/**/*' + +# Add 'kubernetes' label to any changes within 'modules' that references 'kubernetes' +kubernetes: + - changed-files: + - any-glob-to-any-file: 'modules/**/**kubernetes**' + +# Add 'networking' label to any changes within 'modules' that references 'networking' +networking: + - changed-files: + - any-glob-to-any-file: 'modules/**/**networking**' + +# Add 'source-control' label to any changes within 'modules' that references 'source-control' +source-control: + - changed-files: + - any-glob-to-any-file: 'modules/**/**source-control**' + +# Add 'system' label to any changes within 'modules' that references 'system' +system: + - changed-files: + - any-glob-to-any-file: 'modules/**/**system**' + +# Add 'k6' label to any changes within 'modules' that references 'k6' +ui: + - changed-files: + - any-glob-to-any-file: 'modules/**/**ui**' + +# Add 'utils' label to any changes within 'modules' that references 'utils' +utils: + - changed-files: + - any-glob-to-any-file: 'modules/**/**utils**' + +# Add 'loki' label to any changes within 'modules' that references 'loki' +loki: + - changed-files: + - any-glob-to-any-file: 'modules/**/**loki**' + +# Add 'mimir' label to any changes within 'modules' that references 'mimir' +mimir: + - changed-files: + - any-glob-to-any-file: 'modules/**/**mimir**' + +# Add 'logs' label to any changes within 'modules' that references 'logs' +logs: + - changed-files: + - any-glob-to-any-file: 'modules/**/**logs**' + +# Add 'logs' label to any changes within 'modules' that references 'metrics' +metrics: + - changed-files: + - any-glob-to-any-file: 'modules/**/**metrics**' + +# Add 'otel' label to any changes within 'modules' that references 'otel' +otel: + - changed-files: + - any-glob-to-any-file: 'modules/**/**otel**' + +# Add 'pyroscope' label to any changes within 'modules' that references 'pyroscope' or 'profil' +pyroscope: + - changed-files: + - any-glob-to-any-file: 'modules/**/**pyroscope**' + - any-glob-to-any-file: 'modules/**/**profil**' + +# Add 'tempo' label to any changes within 'modules' that references 'tempo' or 'trac' +tempo: + - changed-files: + - any-glob-to-any-file: 'modules/**/**tempo**' + - any-glob-to-any-file: 'modules/**/**trac**' + +# Add 'unix' label to any changes within 'modules' that references 'linux' or 'unix' or 'node-exporter' +unix: + - changed-files: + - any-glob-to-any-file: 'modules/**/**linux**' + - any-glob-to-any-file: 'modules/**/**unix**' + - any-glob-to-any-file: 'modules/**/**node-exporter**' + +# Add 'windows' label to any changes within 'modules' that references 'windows' +windows: + - changed-files: + - any-glob-to-any-file: 'modules/**/**windows**' diff --git a/.github/workflows/auto-approve-dependabot.yml b/.github/workflows/auto-approve-dependabot.yml new file mode 100644 index 0000000..3776741 --- /dev/null +++ b/.github/workflows/auto-approve-dependabot.yml @@ -0,0 +1,27 @@ +--- +name: Dependabot Auto-Approve +# yamllint disable-line rule:truthy +on: + # run when a PR is opened or updated to main + pull_request: + branches: ["main"] + +permissions: + pull-requests: write + +jobs: + dependabot: + runs-on: ubuntu-latest + if: github.actor == 'dependabot[bot]' + steps: + - name: Dependabot metadata + id: metadata + uses: dependabot/fetch-metadata@v1 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + + - name: Approve a PR + run: gh pr review --approve "$PR_URL" + env: + PR_URL: ${{github.event.pull_request.html_url}} + GH_TOKEN: ${{secrets.GITHUB_TOKEN}} diff --git a/.github/workflows/auto-merge.yml b/.github/workflows/auto-merge.yml new file mode 100644 index 0000000..5b9aaa1 --- /dev/null +++ b/.github/workflows/auto-merge.yml @@ -0,0 +1,30 @@ +--- +name: Auto-Merge +# yamllint disable-line rule:truthy +on: + pull_request: + types: + - labeled + - unlabeled + - synchronize + - opened + - edited + - ready_for_review + - reopened + - unlocked + pull_request_review: + types: + - submitted + check_suite: + types: + - completed + status: {} +jobs: + automerge: + runs-on: ubuntu-latest + steps: + - id: automerge + name: automerge + uses: "pascalgn/automerge-action@v0.16.2" + env: + GITHUB_TOKEN: "${{ secrets.GITHUB_TOKEN }}" diff --git a/.github/workflows/auto-tag.yml b/.github/workflows/auto-tag.yml new file mode 100644 index 0000000..95363ff --- /dev/null +++ b/.github/workflows/auto-tag.yml @@ -0,0 +1,28 @@ +--- +name: Bump version +on: + pull_request: + types: + - closed + branches: + - main + +jobs: + build: + if: github.event.pull_request.merged == true + runs-on: ubuntu-22.04 + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.merge_commit_sha }} + fetch-depth: '0' + + - name: Bump version and push tag + uses: anothrNick/github-tag-action@v1 # Don't use @master or @v1 unless you're happy to test the latest version + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # if you don't want to set write permissions use a PAT token + WITH_V: true + DEFAULT_BUMP: patch + INITIAL_VERSION: 0.1.0 diff --git a/.github/workflows/issue-labeler.yml b/.github/workflows/issue-labeler.yml new file mode 100644 index 0000000..b13a0e5 --- /dev/null +++ b/.github/workflows/issue-labeler.yml @@ -0,0 +1,22 @@ +--- +name: Issue Labeler + +# yamllint disable-line rule:truthy +on: + issues: + types: [opened, edited] + +permissions: + issues: write + contents: read + +jobs: + triage: + runs-on: ubuntu-latest + steps: + - uses: github/issue-labeler@v3.4 + with: + configuration-path: .github/issue-labeler.yml + include-title: 1 + include-body: 1 + repo-token: ${{ github.token }} diff --git a/.github/workflows/pr-labeler.yml b/.github/workflows/pr-labeler.yml new file mode 100644 index 0000000..396a569 --- /dev/null +++ b/.github/workflows/pr-labeler.yml @@ -0,0 +1,18 @@ +--- +name: Pull Request Labeler + +# yamllint disable-line rule:truthy +on: + - pull_request_target + +jobs: + triage: + permissions: + contents: read + pull-requests: write + runs-on: ubuntu-latest + steps: + - uses: actions/labeler@v5 + with: + # automatically add labels to pull requests based on the files changed + configuration-path: .github/pr-labeler.yml diff --git a/.github/workflows/reviewdog.yml b/.github/workflows/reviewdog.yml new file mode 100644 index 0000000..fbe7481 --- /dev/null +++ b/.github/workflows/reviewdog.yml @@ -0,0 +1,138 @@ +--- + name: ReviewDog + # yamllint disable-line rule:truthy + on: + pull_request: + branches: ["main"] + + jobs: + markdownlint: + name: runner / markdownlint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: markdownlint + uses: reviewdog/action-markdownlint@v0 + with: + github_token: ${{ secrets.GITHUB_TOKEN }} + reporter: github-check + + misspell: + name: runner / misspell + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: reviewdog/action-misspell@v1 + with: + github_token: ${{ secrets.github_token }} + locale: US + reporter: github-check + level: warning + path: | + ./modules + pattern: | + *.md + + alex: + name: runner / alex + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: reviewdog/action-alex@v1 + with: + github_token: ${{ secrets.github_token }} + reporter: github-check + # GitHub Status Check won't become failure with warning. + level: warning + + shellcheck: + name: runner / shellcheck + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: reviewdog/action-shellcheck@v1 + with: + github_token: ${{ secrets.github_token }} + reporter: github-check + + gitleaks: + name: runner / gitleaks + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: reviewdog/action-gitleaks@v1 + with: + github_token: ${{ secrets.github_token }} + reporter: github-check + + actionlint: + name: runner / actionlint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: reviewdog/action-actionlint@v1 + with: + github_token: ${{ secrets.github_token }} + reporter: github-check + + eclint: + name: runner / eclint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: reviewdog/action-eclint@v1 + with: + github_token: ${{ secrets.github_token }} + reporter: github-check + + textlint: + name: runner / textlint + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: reviewdog/action-setup@v1 + - run: npm install + - env: + REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + npx textlint --format checkstyle --config "$(pwd)/.textlintrc" modules/**/*.md | \ + reviewdog -f=checkstyle -name="textlint" -reporter=github-check -level=info + + grafana-agent: + name: runner / agent + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: reviewdog/action-setup@v1 + - run: | + GITHUB_ORG="grafana" && \ + GITHUB_REPO="agent" && \ + ARCHIVE="grafana-agent" && \ + BINARY="grafana-agent" && \ + BIN_PATH="/usr/local/bin" && \ + TMP_PATH="/tmp" && \ + AGENT_VERSION=$(wget -q -O - "https://api.github.com/repos/$GITHUB_ORG/$GITHUB_REPO/releases/latest" \ + | grep -m 1 tag_name \ + | cut -d '"' -f 4 | cut -c2-) && \ + HOST_OS=$(uname -a | awk '{print tolower($1)}') && \ + HOST_ARCH=$(arch | sed 's/x86_64/amd64/') && \ + DOWNLOAD_URL="https://github.com/$GITHUB_ORG/$GITHUB_REPO/releases/download" && \ + DOWNLOAD_URL="$DOWNLOAD_URL/v$AGENT_VERSION/$ARCHIVE-$HOST_OS-$HOST_ARCH.zip" + echo "Downloading $DOWNLOAD_URL to $TMP_PATH/$ARCHIVE.zip" && \ + wget -q -O "$TMP_PATH/$ARCHIVE.zip" "$DOWNLOAD_URL" && \ + echo "Unzipping: $TMP_PATH/$ARCHIVE.zip" && \ + ( cd "$TMP_PATH" || exit ; unzip -qq -o -d "$TMP_PATH" "$TMP_PATH/$BINARY.zip" ) && \ + rm -rf "$TMP_PATH/$ARCHIVE.zip" && \ + chmod a+x "$TMP_PATH/$BINARY-$HOST_OS-$HOST_ARCH" && + echo "Moving: $TMP_PATH/$ARCHIVE-$HOST_OS-$HOST_ARCH/$BINARY to $BIN_PATH/$BINARY" && \ + sudo mv "$TMP_PATH/$BINARY-$HOST_OS-$HOST_ARCH" "$BIN_PATH/$BINARY" && \ + rm -rf "$TMP_PATH/$ARCHIVE-$HOST_OS-$HOST_ARCH" + + echo "" + # shellcheck disable=SC2034 + AGENT_MODE="flow" && grafana-agent --help + - env: + REVIEWDOG_GITHUB_API_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + ./tools/lint-river.sh --format=checkstyle | \ + reviewdog -f=checkstyle -name="grafana-agent" -reporter=github-pr-check diff --git a/.github/workflows/stale-branches.yml b/.github/workflows/stale-branches.yml new file mode 100644 index 0000000..4be26bb --- /dev/null +++ b/.github/workflows/stale-branches.yml @@ -0,0 +1,25 @@ +--- +name: Stale Branches + +# yamllint disable-line rule:truthy +on: + schedule: + # Run every Monday to Friday at 6:00 AM + - cron: '0 6 * * 1-5' + +permissions: + issues: write + contents: write + +jobs: + stale_branches: + runs-on: ubuntu-latest + steps: + - name: Stale Branches + uses: crs-k/stale-branches@v3.0.0 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + days-before-stale: 60 + days-before-delete: 90 + comment-updates: true + tag-committer: true diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..026d910 --- /dev/null +++ b/.gitignore @@ -0,0 +1,230 @@ +# MacOS +# --------------------------------------------------- +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + + +# Windows +# --------------------------------------------------- +# Windows thumbnail cache files +Thumbs.db +Thumbs.db:encryptable +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + + +# MS Office +# --------------------------------------------------- +*.tmp + +# Word temporary +~$*.doc* + +# Word Auto Backup File +Backup of *.doc* + +# Excel temporary +~$*.xls* + +# Excel Backup File +*.xlk + +# PowerPoint temporary +~$*.ppt* + +# Visio autosave temporary files +*.~vsd* + + +# VS Code +# --------------------------------------------------- +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +*.code-workspace + +# Local History for Visual Studio Code +.history/ +*.app +.snapshots/* + + +# NodeJS +# --------------------------------------------------- +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* +.pnpm-debug.log* + +# Diagnostic reports (https://nodejs.org/api/report.html) +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Directory for instrumented libs generated by jscoverage/JSCover +lib-cov + +# Coverage directory used by tools like istanbul +coverage +*.lcov + +# nyc test coverage +.nyc_output + +# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) +.grunt + +# Bower dependency directory (https://bower.io/) +bower_components + +# node-waf configuration +.lock-wscript + +# Compiled binary addons (https://nodejs.org/api/addons.html) +build/Release + +# Dependency directories +node_modules/ +jspm_packages/ + +# Snowpack dependency directory (https://snowpack.dev/) +web_modules/ + +# TypeScript cache +*.tsbuildinfo + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Optional stylelint cache +.stylelintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# dotenv environment variable files +.env +.env.development.local +.env.test.local +.env.production.local +.env.local + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next +out + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +# Comment in the public line in if your project uses Gatsby and not Next.js +# https://nextjs.org/blog/next-9-1#public-directory-support +# public + +# vuepress build output +.vuepress/dist + +# vuepress v2.x temp and cache directory +.temp +.cache + +# Docusaurus cache and generated files +.docusaurus + +# Serverless directories +.serverless/ + +# FuseBox cache +.fusebox/ + +# DynamoDB Local files +.dynamodb/ + +# TernJS port file +.tern-port + +# Stores VSCode versions used for testing VSCode extensions +.vscode-test + +# yarn v2 +.yarn/cache +.yarn/unplugged +.yarn/build-state.yml +.yarn/install-state.gz +.pnp.* + +alertmanager-auth.yaml diff --git a/.markdownlint.yml b/.markdownlint.yml new file mode 100644 index 0000000..a7d9921 --- /dev/null +++ b/.markdownlint.yml @@ -0,0 +1,261 @@ +--- +# Example markdownlint YAML configuration with all properties set to their default value + +# Default state for all rules +default: true + +# Path to configuration file to extend +extends: null + +# MD001/heading-increment/header-increment - Heading levels should only increment by one level at a time +MD001: true + +# MD002/first-heading-h1/first-header-h1 - First heading should be a top-level heading +MD002: + # Heading level + level: 1 + +# MD003/heading-style/header-style - Heading style +MD003: + # Heading style + style: "consistent" + +# MD004/ul-style - Unordered list style +MD004: + # List style + style: "consistent" + +# MD005/list-indent - Inconsistent indentation for list items at the same level +MD005: true + +# MD006/ul-start-left - Consider starting bulleted lists at the beginning of the line +MD006: true + +# MD007/ul-indent - Unordered list indentation +MD007: + # Spaces for indent + indent: 4 + # Whether to indent the first level of the list + start_indented: false + # Spaces for first level indent (when start_indented is set) + start_indent: 2 + +# MD009/no-trailing-spaces - Trailing spaces +MD009: + # Spaces for line break + br_spaces: 2 + # Allow spaces for empty lines in list items + list_item_empty_lines: false + # Include unnecessary breaks + strict: false + +# MD010/no-hard-tabs - Hard tabs +MD010: + # Include code blocks + code_blocks: true + # Fenced code languages to ignore + ignore_code_languages: [] + # Number of spaces for each hard tab + spaces_per_tab: 2 + +# MD011/no-reversed-links - Reversed link syntax +MD011: true + +# MD012/no-multiple-blanks - Multiple consecutive blank lines +MD012: + # Consecutive blank lines + maximum: 1 + +# MD013/line-length - Line length +MD013: + # Number of characters + line_length: 500 + # Number of characters for headings + heading_line_length: 80 + # Number of characters for code blocks + code_block_line_length: 500 + # Include code blocks + code_blocks: true + # Include tables + tables: false + # Include headings + headings: true + # Include headings + headers: true + # Strict length checking + strict: false + # Stern length checking + stern: false + +# MD014/commands-show-output - Dollar signs used before commands without showing output +MD014: false + +# MD018/no-missing-space-atx - No space after hash on atx style heading +MD018: true + +# MD019/no-multiple-space-atx - Multiple spaces after hash on atx style heading +MD019: true + +# MD020/no-missing-space-closed-atx - No space inside hashes on closed atx style heading +MD020: true + +# MD021/no-multiple-space-closed-atx - Multiple spaces inside hashes on closed atx style heading +MD021: true + +# MD022/blanks-around-headings/blanks-around-headers - Headings should be surrounded by blank lines +MD022: + # Blank lines above heading + lines_above: 1 + # Blank lines below heading + lines_below: 1 + +# MD023/heading-start-left/header-start-left - Headings must start at the beginning of the line +MD023: true + +# MD024/no-duplicate-heading/no-duplicate-header - Multiple headings with the same content +MD024: false + +# MD025/single-title/single-h1 - Multiple top-level headings in the same document +MD025: + # Heading level + level: 1 + # RegExp for matching title in front matter + front_matter_title: "^\\s*title\\s*[:=]" + +# MD026/no-trailing-punctuation - Trailing punctuation in heading +MD026: + # Punctuation characters not allowed at end of headings + punctuation: ".,;:!。,;:!" + +# MD027/no-multiple-space-blockquote - Multiple spaces after blockquote symbol +MD027: true + +# MD028/no-blanks-blockquote - Blank line inside blockquote +MD028: true + +# MD029/ol-prefix - Ordered list item prefix +MD029: + # List style + style: "one_or_ordered" + +# MD030/list-marker-space - Spaces after list markers +MD030: + # Spaces for single-line unordered list items + ul_single: 3 + # Spaces for single-line ordered list items + ol_single: 2 + # Spaces for multi-line unordered list items + ul_multi: 3 + # Spaces for multi-line ordered list items + ol_multi: 2 + +# MD031/blanks-around-fences - Fenced code blocks should be surrounded by blank lines +MD031: + # Include list items + list_items: true + +# MD032/blanks-around-lists - Lists should be surrounded by blank lines +MD032: true + +# MD033/no-inline-html - Inline HTML +MD033: + # Allowed elements + allowed_elements: + - div + - br + - hr + +# MD034/no-bare-urls - Bare URL used +MD034: true + +# MD035/hr-style - Horizontal rule style +MD035: + # Horizontal rule style + style: "consistent" + +# MD036/no-emphasis-as-heading/no-emphasis-as-header - Emphasis used instead of a heading +MD036: false + +# MD037/no-space-in-emphasis - Spaces inside emphasis markers +MD037: true + +# MD038/no-space-in-code - Spaces inside code span elements +MD038: true + +# MD039/no-space-in-links - Spaces inside link text +MD039: true + +# MD040/fenced-code-language - Fenced code blocks should have a language specified +MD040: + # List of languages + allowed_languages: [] + # Require language only + language_only: false + +# MD041/first-line-heading/first-line-h1 - First line in a file should be a top-level heading +MD041: + # Heading level + level: 1 + # RegExp for matching title in front matter + front_matter_title: "^\\s*title\\s*[:=]" + +# MD042/no-empty-links - No empty links +MD042: true + +# MD043/required-headings/required-headers - Required heading structure +MD043: + # List of headings + headings: + - "*" + # List of headings + headers: [] + # Match case of headings + match_case: false + +# MD044/proper-names - Proper names should have the correct capitalization +MD044: + # List of proper names + names: [] + # Include code blocks + code_blocks: true + # Include HTML elements + html_elements: true + +# MD045/no-alt-text - Images should have alternate text (alt text) +MD045: true + +# MD046/code-block-style - Code block style +MD046: + # Block style + style: "consistent" + +# MD047/single-trailing-newline - Files should end with a single newline character +MD047: true + +# MD048/code-fence-style - Code fence style +MD048: + # Code fence style + style: "consistent" + +# MD049/emphasis-style - Emphasis style should be consistent +MD049: + # Emphasis style should be consistent + style: "consistent" + +# MD050/strong-style - Strong style should be consistent +MD050: + # Strong style should be consistent + style: "consistent" + +# MD051/link-fragments - Link fragments should be valid +MD051: false + +# MD052/reference-links-images - Reference links and images should use a label that is defined +MD052: true + +# MD053/link-image-reference-definitions - Link and image reference definitions should be needed +MD053: + # Ignored definitions + ignored_definitions: [ + "//" + ] diff --git a/.shellcheckrc b/.shellcheckrc new file mode 100644 index 0000000..9eac5ba --- /dev/null +++ b/.shellcheckrc @@ -0,0 +1,7 @@ +# Allow opening any 'source'd file, even if not specified as input +external-sources=true + +# some files are sourced which can make some areas appear unreachable +disable=SC2317 +disable=SC2250 +disable=SC2312 diff --git a/.textlintrc b/.textlintrc new file mode 100644 index 0000000..2c0266a --- /dev/null +++ b/.textlintrc @@ -0,0 +1,187 @@ +{ + "rules": { + "common-misspellings": true, + "no-todo": true, + "terminology": { + "defaultTerms": false, + "terms": [ + "Grafana", + ["GrafanaLabs", "Grafana Labs"], + ["GrafanaCloud", "Grafana Cloud"], + "Mimir", + "Loki", + "Phlare", + "Tempo", + "Faro", + "Raintank", + "Prometheus", + "PromQL", + ["(E|e)xamplars", "$1xemplars"], + ["(D|d)atasource", "$1ata source"], + "CData", + "Google", + "Amazon", + "RedHat", + "Azure", + "Airbnb", + "Android", + "AppleScript", + "AppVeyor", + "AVA", + "BrowserStack", + "Browsersync", + "Codecov", + "CodePen", + "CodeSandbox", + "DefinitelyTyped", + "EditorConfig", + "ESLint", + "GitHub", + "GraphQL", + "iOS", + "JavaScript", + "JetBrains", + "jQuery", + "LinkedIn", + "Lodash", + "MacBook", + "Markdown", + "OpenType", + "PayPal", + "PhpStorm", + "RubyMine", + "Sass", + "SemVer", + "TypeScript", + "UglifyJS", + "Wasm", + "WebAssembly", + "WebStorm", + "WordPress", + "YouTube", + ["Common[ .]js", "CommonJS"], + ["JSDocs?", "JSDoc"], + ["Nodejs", "Node.js"], + ["React[ .]js", "React"], + ["SauceLabs", "Sauce Labs"], + ["StackOverflow", "Stack Overflow"], + ["styled ?components", "styled-components"], + ["HTTP[ /]2(?:\\.0)?", "HTTP/2"], + ["OS X", "macOS"], + ["Mac ?OS", "macOS"], + ["a npm", "an npm"], + "ECMAScript", + ["ES2015", "ES6"], + ["ES7", "ES2016"], + "3D", + ["3-D", "3D"], + "Ajax", + "API", + ["API['’]?s", "APIs"], + "CSS", + "GIF", + " HTML ", + "HTTPS", + "IoT", + "I/O", + ["I-O", "I/O"], + "JPEG", + "MIME", + "OK", + "PaaS", + " PDF ", + "PNG", + "SaaS", + "URL", + ["URL['’]?s", "URLs"], + ["an URL", "a URL"], + ["wi[- ]?fi", "Wi-Fi"], + "McKenzie", + "McConnell", + [" id", " ID"], + ["id['’]?s", "IDs"], + ["backwards compatible", "backward compatible"], + ["build system(s?)", "build tool$1"], + ["CLI tool(s?)", "command-line tool$1"], + ["he or she", "they"], + ["he/she", "they"], + ["\\(s\\)he", "they"], + ["repo\\b", "repository"], + ["smartphone(s?)", "mobile phone$1"], + ["web[- ]?site(s?)", "site$1"], + ["auto[- ]complete", "autocomplete"], + ["auto[- ]format", "autoformat"], + ["auto[- ]fix", "autofix"], + ["auto[- ]fixing", "autofixing"], + ["back[- ]end(\\w*)", "backend$1"], + ["bug[- ]fix(es?)", "bugfix$1"], + ["change[- ]log(s?)", "changelog$1"], + ["check[- ]box(es?)", "checkbox$1"], + ["code[- ]base(es?)", "codebase$1"], + ["co[- ]locate(d?)", "colocate$1"], + ["end[- ]point(s?)", "endpoint$1"], + ["e[- ]mail(s?)", "email$1"], + ["file[- ]name(s?)", "filename$1"], + ["front[- ]end(\\w*)", "frontend$1"], + ["hack[- ]a[- ]thon(s?)", "hackathon$1"], + ["host[- ]name(s?)", "hostname$1"], + ["hot[- ]key(s?)", "hotkey$1"], + ["life[- ]cycle", "lifecycle"], + ["life[- ]stream(s?)", "lifestream$1"], + ["lock[- ]file(s?)", "lockfile$1"], + ["mark-up", "markup"], + ["meta[- ]data", "metadata"], + ["micro[- ]service(s?)", "microservice$1"], + ["name[- ]space(s?)", "namespace$1"], + ["pre[- ]condition(s?)", "precondition$1"], + ["pre[- ]defined", "predefined"], + ["pre[- ]release(s?)", "prerelease$1"], + ["re[- ]write", "rewrite"], + ["run[- ]time", "runtime"], + ["screen[- ]shot(s?)", "screenshot$1"], + ["screen[- ]?snap(s?)", "screenshot$1"], + ["sub[- ]class((?:es|ing)?)", "subclass$1"], + ["sub[- ]tree(s?)", "subtree$1"], + ["time[- ]stamp(s?)", "timestamp$1"], + ["touch[- ]screen(s?)", "touchscreen$1"], + ["user[- ]name(s?)", "username$1"], + ["walk[- ]through", "walkthrough"], + ["white[- ]space", "whitespace"], + ["wild[- ]card(s?)", "wildcard$1"], + ["css-?in-?js", "CSS in JS"], + ["code-?review(s?)", "code review$1"], + ["code-?splitting", "code splitting"], + ["end-?user(s?)", "end user$1"], + ["file-?type(s?)", "file type$1"], + ["micro-?frontend(s?)", "micro frontend$1"], + ["open-?source(ed?)", "open source$1"], + ["regexp?(s?)", "regular expression$1"], + ["style-?guide(s?)", "style guide$1"], + ["tree-?shaking", "tree shaking"], + ["source-?map(s?)", "source map$1"], + ["style-?sheet(s?)", "style sheet$1"], + ["user-?base", "user base"], + ["web-?page(s?)", "web page$1"], + ["built ?in", "built-in"], + ["client ?side", "client-side"], + ["command ?line", "command-line"], + ["end ?to ?end", "end-to-end"], + ["error ?prone", "error-prone"], + ["higher ?order", "higher-order"], + ["key[/ ]?value", "key-value"], + ["server ?side", "server-side"], + ["two ?steps?", "two-step"], + ["2 ?steps?", "two-step"], + ["(\\w+[^.?!]\\)? )base64", "$1base64"], + ["(\\w+[^.?!]\\)? )internet", "$1internet"], + ["(\\w+[^.?!]\\)? )stylelint", "$1stylelint"], + ["(\\w+[^.?!]\\)? )webpack", "$1webpack"], + ["(\\w+[^.?!]\\)? )npm", "$1npm"], + ["environemnt(s?)", "environment$1"], + ["pacakge(s?)", "package$1"], + ["tilda", "tilde"], + ["falsey", "falsy"] + ] + } + } +} diff --git a/.yamllint.yml b/.yamllint.yml new file mode 100644 index 0000000..d80f10f --- /dev/null +++ b/.yamllint.yml @@ -0,0 +1,17 @@ +--- +yaml-files: + - "*.yaml" + - "*.yml" + - ".yamllint" + +ignore: + - node_modules + - .git + - .github + +extends: default + +rules: + line-length: + max: 150 + level: warning diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..12380d5 --- /dev/null +++ b/Makefile @@ -0,0 +1,68 @@ +.DEFAULT_GOAL:= lint +PATH := ./node_modules/.bin:$(PATH) +SHELL := /bin/bash +args = $(filter-out $@, $(MAKECMDGOALS)) +.PHONY: all setup install clean reinstall lint lint-sh lint-shell lint-md lint-markdown lint-txt lint-text lint-yaml lint-yml lint-editorconfig lint-ec lint-alex lint-misspell lint-actionlint lint-river + +default: all + +all: install + +#################################################################### +# Installation / Setup # +#################################################################### +setup: + @./tools/setup.sh + +install: + yarn install + +# remove the build and log folders +clean: + rm -rf node_modules + +# reinstall the node_modules and start with a fresh node build +reinstall: clean install + +#################################################################### +# Linting # +#################################################################### +lint: lint-shell lint-markdown lint-text lint-yaml lint-editorconfig lint-alex lint-misspell lint-actionlint lint-river + +# Note "|| true" is added to locally make lint can be ran and all linting is preformmed, regardless of exit code + +# Shell Linting +lint-sh lint-shell: + @./tools/lint-shell.sh || true + +# Markdown Linting +lint-md lint-markdown: + @./tools/lint-markdown.sh || true + +# Text Linting +lint-txt lint-text: + @./tools/lint-text.sh || true + +# Yaml Linting +lint-yml lint-yaml: + @./tools/lint-yaml.sh || true + +# Editorconfig Linting +lint-ec lint-editorconfig: + @./tools/lint-editorconfig.sh || true + +# Alex Linting +lint-alex: + @./tools/lint-alex.sh || true + +# Misspell Linting +lint-misspell: + @./tools/lint-misspell.sh || true + +# Actionlint Linting +lint-actionlint: + @./tools/lint-actionlint.sh || true + +# River Linting +lint-river: + @./tools/lint-river.sh || true diff --git a/README.md b/README.md index 6e6f96f..530353f 100644 --- a/README.md +++ b/README.md @@ -1,35 +1,28 @@ -# flow-modules +# Flow Modules -

Grafana Agent logo

- -## Stability promise + + + Grafana + -The default branch is `unstable` and is considered to never be used or any promise of backwards compatibility. Instead any tags or commits will be unchanging. +

Grafana Agent logo

**NOTE: This is only to be used for Modules not Modules Classic.** -[Modules](https://grafana.com/docs/agent/latest/flow/concepts/modules/) are a -way to create Grafana Agent [Flow](https://grafana.com/docs/agent/latest/flow/) -configurations which can be loaded as a component. Modules are a great way to -parameterize a configuration to create reusable pipelines. +[Modules](https://grafana.com/docs/agent/latest/flow/concepts/modules/) are a way to create Grafana Agent [Flow](https://grafana.com/docs/agent/latest/flow/) configurations which can be loaded as a component. Modules are a great way to parameterize a configuration to create reusable pipelines. -## Contents -- modules: A library of usable modules out of the box -- example: A practical example shown for each module loader plus without modules for comparison -- util: Utilities for managing modules in this repo - -## Modules +## Submitting modules -| Name | Description | Agent Version | -| ---- | ----------- | ------------- | +Create a folder for the module under the `./modules` directory in the appropriate category. Each module must have a `README.md` that provides the following information: -## Submitting modules +- Components +- Brief description +- Applicable Agent Versions +- Declare arguments and exports +- Example -Add modules to the `modules` folder. Each module must have a README.MD that provides the following information: -* Name -* Brief description -* Applicable Agent Versions -* Declare arguments and exports -* Example +## Referencing Modules +Whenever a new module is submitted and a pull request is merged to the `main` branch, a tag is automatically created and published, by default this is a patch bump. +Modules can be reference directly from this git repository using the `import.git` or `import.http` components. It is recommended to always reference a tagged version, and not the `main` branch. diff --git a/modules/README.md b/modules/README.md new file mode 100644 index 0000000..ba47f8e --- /dev/null +++ b/modules/README.md @@ -0,0 +1,39 @@ +# Modules + +- [broker](modules/broker/) + - [rabbitmq](modules/broker/rabbitmq/) +- [cloud](modules/cloud/) + - [grafana](modules/cloud/grafana/) + - [cloud](modules/cloud/grafana/cloud/) +- [collector](modules/collector/) + - [agent](modules/collector/agent/) + - [push-gateway](modules/collector/push-gateway/) +- [databases](modules/databases/) + - [kv](modules/databases/kv/) + - [etcd](modules/databases/kv/etcd/) + - [memcached](modules/databases/kv/memcached/) + - [redis](modules/databases/kv/redis/) + - [sql](modules/databases/sql/) + - [mysql](modules/databases/sql/mysql/) + - [postgres](modules/databases/sql/postgres/) + - [timeseries](modules/databases/timeseries/) + - [loki](modules/databases/timeseries/loki/) + - [mimir](modules/databases/timeseries/mimir/) + - [pyroscope](modules/databases/timeseries/pyroscope/) + - [tempo](modules/databases/timeseries/tempo/) +- [kubernetes](modules/kubernetes/) + - [annotations](modules/kubernetes/annotations/) + - [cert-manager](modules/kubernetes/cert-manager/) + - [core](modules/kubernetes/core/) + - [konnectivity-agent](modules/kubernetes/konnectivity-agent/) + - [kube-state-metrics](modules/kubernetes/kube-state-metrics/) + - [opencost](modules/kubernetes/opencost/) +- [networking](modules/networking/) + - [consul](modules/networking/consul/) + - [haproxy](modules/networking/haproxy/) +- [source-control](modules/source-control/) + - [gitlab](modules/source-control/gitlab/) +- [system](modules/system/) + - [node-exporter](modules/system/node-exporter/) +- [ui](modules/ui/) + - [grafana](modules/ui/grafana/) diff --git a/modules/broker/README.md b/modules/broker/README.md new file mode 100644 index 0000000..640d15e --- /dev/null +++ b/modules/broker/README.md @@ -0,0 +1,3 @@ +# Broker Modules + +- [rabbitmq](./rabbitmq/) diff --git a/modules/broker/rabbitmq/README.md b/modules/broker/rabbitmq/README.md new file mode 100644 index 0000000..78c84b5 --- /dev/null +++ b/modules/broker/rabbitmq/README.md @@ -0,0 +1,168 @@ +# RabbitMQ Module + +Handles scraping RabbitMQ metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `rabbitmq.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :-------------------------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=prometheus-rabbitmq-exporter"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `9419` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/rabbitmq` | The job label to add for all metrics | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---- | :--------------------------------------------- | +| `job` | Set to the value of `argument.job_label.value` | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all rabbitmq instances in cluster. + +```river +import.git "rabbitmq" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/brokers/rabbitmq/metric.river" + pull_frequency = "15m" +} + +// get the targets +rabbitmq.kubernetes "targets" {} + +// scrape the targets +rabbitmq.scrape "metrics" { + targets = rabbitmq.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape rabbitmq for metrics on the local machine. + +```river +import.git "rabbitmq" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/brokers/rabbitmq/metric.river" + pull_frequency = "15m" +} + +// get the targets +rabbitmq.local "targets" {} + +// scrape the targets +rabbitmq.scrape "metrics" { + targets = rabbitmq.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/broker/rabbitmq/metrics.river b/modules/broker/rabbitmq/metrics.river new file mode 100644 index 0000000..488a57a --- /dev/null +++ b/modules/broker/rabbitmq/metrics.river @@ -0,0 +1,238 @@ +/* +Module: job-rabbitmq +Description: Scrapes rabbitmq + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=prometheus-rabbitmq-exporter\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: rabbitmq-exporter)" + optional = true + } + + // rabbitmq service discovery for all of the pods + discovery.kubernetes "rabbitmq" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=prometheus-rabbitmq-exporter"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // rabbitmq relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.rabbitmq.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "rabbitmq-exporter") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + argument "port" { + comment = "The port to use (default: 9419)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "9419")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all rabbitmq metric (default: integrations/rabbitmq)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // rabbitmq scrape job + prometheus.scrape "rabbitmq" { + job_name = coalesce(argument.job_label.value, "integrations/rabbitmq") + forward_to = [prometheus.relabel.rabbitmq.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // rabbitmq metric relabelings (post-scrape) + prometheus.relabel "rabbitmq" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|rabbitmq_(commands_total|connections_total|current_(bytes|connections|items)|items_(evicted_total|total)|max_connections|read_bytes_total|up|uptime_seconds|version|written_bytes_total))") + action = "keep" + } + } +} diff --git a/modules/cloud/README.md b/modules/cloud/README.md new file mode 100644 index 0000000..fa9e425 --- /dev/null +++ b/modules/cloud/README.md @@ -0,0 +1,4 @@ +# Cloud Modules + +- [grafana](./grafana/) + - [cloud](./grafana/cloud/) diff --git a/modules/cloud/aws/.gitkeep b/modules/cloud/aws/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/modules/cloud/azure/.gitkeep b/modules/cloud/azure/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/modules/cloud/gcp/.gitkeep b/modules/cloud/gcp/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/modules/cloud/grafana/cloud/README.md b/modules/cloud/grafana/cloud/README.md new file mode 100644 index 0000000..dabca9c --- /dev/null +++ b/modules/cloud/grafana/cloud/README.md @@ -0,0 +1,70 @@ +# Grafana Cloud Auto-Configuration Module + +Module to interact with Grafana Cloud. + +## Components + +- [`stack`](#stack) + +### `stack` + +Module to automatically configure receivers for Grafana Cloud. + +To create a token: + +1. Navigate to the [Grafana Cloud Portal](https://grafana.com/profile/org) +2. Go to either the `Access Policies` or `API Keys` page, located in the `Security` section +3. Create an Access Policy or API token with the correct permissions + +The token must have permissions to read stack information. The setup of these permissions depends on the type of token: + +- Access Policies need the `stacks:read` scope +- API Keys need at least the the `MetricsPublisher` role + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :------ | :------------------------------------------------- | +| `stack_name` | _yes_ | `N/A` | Name of your stack as shown in the account console | +| `token` | _yes_ | `N/A` | Access policy token or API Key. | + +#### Exports + +| Name | Type | Description | +| ---------- | ------------------------ | ---------------------------------------------------------------------------------------------------------------------------- | +| `metrics` | `prometheus.Interceptor` | A value that other components can use to send metrics data to. | +| `logs` | `loki.LogsReceiver` | A value that other components can use to send logs data to. | +| `traces` | `otelcol.Consumer` | A value that other components can use to send trace data to. | +| `profiles` | `write.fanOutClient` | A value that other components can use to send profiling data to. | +| `info` | `object` | Decoded representation of the [Stack info endpoint](https://grafana.com/docs/grafana-cloud/api-reference/cloud-api/#stacks). | + +--- + +## Usage + +### `stack` + +```river +import.git "grafana_cloud" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/cloud/grafana.river" + pull_frequency = "15m" +} + +// get the receivers +grafana_cloud.stack "receivers" { + stack = "DashyMcDashFace" + token = "XXXXXXXXXXXXX" +} + +// scrape metrics and write to grafana cloud +prometheus.scrape "default" { + targets = [ + {"__address__" = "127.0.0.1:12345"}, + ] + forward_to = [ + grafana_cloud.stack.receivers.metrics, + ] +} +``` diff --git a/modules/cloud/grafana/cloud/module.river b/modules/cloud/grafana/cloud/module.river new file mode 100644 index 0000000..1b52573 --- /dev/null +++ b/modules/cloud/grafana/cloud/module.river @@ -0,0 +1,86 @@ + +declare "stack" { + argument "stack_name" { + comment = "The name of the grafana cloud stack to get the configuration from." + } + + argument "token" { + comment = "The token to authenticate with the Grafana Cloud API." + } + + // Get the configuration from the Grafana Cloud API + remote.http "config" { + url = "https://grafana.com/api/instances/" + argument.stack_name.value + client { + bearer_token = argument.token.value + } + poll_frequency = "24h" + } + + // Setup the prometheus remote write receiver + prometheus.remote_write "default" { + endpoint { + url = json_decode(remote.http.config_file.content)["hmInstancePromUrl"] + "/api/prom/push" + + basic_auth { + username = json_decode(remote.http.config_file.content)["hmInstancePromId"] + password = argument.token.value + } + } + } + + // Setup the loki write receiver + loki.write "default" { + endpoint { + url = json_decode(remote.http.config_file.content)["hlInstanceUrl"] + "/loki/api/v1/push" + + basic_auth { + username = json_decode(remote.http.config_file.content)["hlInstanceId"] + password = argument.token.value + } + } + } + + // Setup the traces receiver + otelcol.auth.basic "default" { + username = json_decode(remote.http.config_file.content)["htInstanceId"] + password = argument.token.value + } + + otelcol.exporter.otlp "default" { + client { + endpoint = json_decode(remote.http.config_file.content)["htInstanceUrl"] + ":443" + auth = otelcol.auth.basic.default.handler + } + } + + // Setup the pyroscope write receiver + pyroscope.write "default" { + endpoint { + url = json_decode(remote.http.config_file.content)["hpInstanceUrl"] + + basic_auth { + username = json_decode(remote.http.config_file.content)["hpInstanceId"] + password = argument.token.value + } + } + } + + + // Export the receivers + export "metrics" { + value = prometheus.remote_write.default.receiver + } + export "logs" { + value = loki.write.default.receiver + } + export "traces" { + value = otelcol.exporter.otlp.default.input + } + export "profiles" { + value = pyroscope.write.default.receiver + } + export "info" { + value = json_decode(remote.http.config_file.content) + } +} diff --git a/modules/collector/README.md b/modules/collector/README.md new file mode 100644 index 0000000..f403d8a --- /dev/null +++ b/modules/collector/README.md @@ -0,0 +1,4 @@ +# Collector Modules + +- [agent](./agent/) +- [push-gateway](./push-gateway/) diff --git a/modules/collector/agent/README.md b/modules/collector/agent/README.md new file mode 100644 index 0000000..08ed5d9 --- /dev/null +++ b/modules/collector/agent/README.md @@ -0,0 +1,167 @@ +# Agent Module + +Handles scraping Grafana Agent metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `agent.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :----------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=grafana-agent"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `12345` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/agent` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---- | :--------------------------------------------- | +| `job` | Set to the value of `argument.job_label.value` | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all agents in cluster. + +```river +import.git "agent" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "module/collectors/agent/metric.river" + pull_frequency = "15m" +} + +// get the targets +agent.kubernetes "targets" {} + +// scrape the targets +agent.scrape "metrics" { + targets = agent.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape the agent for metrics on the local machine. + +```river +import.git "agent" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "module/collectors/agent/metric.river" + pull_frequency = "15m" +} + +// get the targets +agent.local "targets" {} + +// scrape the targets +agent.scrape "metrics" { + targets = agent.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/collector/agent/metrics.river b/modules/collector/agent/metrics.river new file mode 100644 index 0000000..3116aac --- /dev/null +++ b/modules/collector/agent/metrics.river @@ -0,0 +1,304 @@ +/* +Module: job-agent +Description: Scrapes grafana agent + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=grafana-agent\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // grafana agent service discovery for all of the pods + discovery.kubernetes "agent" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=grafana-agent"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // grafana agent relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.agent.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "metrics") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + + argument "port_name" { + comment = "The port to use (default: 12345)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "12345")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all grafana-agent metric (default: integrations/agent)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // grafana agent scrape job + prometheus.scrape "agent" { + job_name = coalesce(argument.job_label.value, "integrations/agent") + forward_to = [prometheus.relabel.agent.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // grafana-agent metric relabelings (post-scrape) + prometheus.relabel "agent" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|log_.+|(agent_(build_info|tcp_connections|wal_(samples_appended_total|storage_active_series))|go_(gc_duration_seconds_count|goroutines|memstats_heap_inuse_bytes)|process_(cpu_seconds_total|start_time_seconds)|prometheus_(remote_storage_(enqueue_retries_total|highest_timestamp_in_seconds|queue_highest_sent_timestamp_seconds|samples_(dropped_total|failed_total|pending|retried_total|total)|sent_batch_duration_seconds_(bucket|count|sum)|shard_(capacity|s(_desired|_max|_min))|succeeded_samples_total)|sd_discovered_targets|target_(interval_length_seconds_(count|sum)|scrapes_(exceeded_sample_limit_total|sample_(duplicate_timestamp_total|out_of_bounds_total|out_of_order_total)))|target_sync_length_seconds_sum|wal_watcher_current_segment)|traces_(exporter_send_failed_spans|exporter_sent_spans|loadbalancer_(backend_outcome|num_backends)|receiver_(accepted_spans|refused_spans))))") + action = "keep" + } + + // remove the component_id label from any metric that starts with log_bytes or log_lines, these are custom metrics that are generated + // as part of the log annotation modules in this repo + rule { + action = "replace" + source_labels = ["__name__"] + regex = "^log_(bytes|lines).+" + replacement = "" + target_label = "component_id" + } + + // set the namespace label to that of the exported_namespace + rule { + action = "replace" + source_labels = ["__name__", "exported_namespace"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "namespace" + } + + // set the pod label to that of the exported_pod + rule { + action = "replace" + source_labels = ["__name__", "exported_pod"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "pod" + } + + // set the container label to that of the exported_container + rule { + action = "replace" + source_labels = ["__name__", "exported_container"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "container" + } + + // set the job label to that of the exported_job + rule { + action = "replace" + source_labels = ["__name__", "exported_job"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "job" + } + + // set the instance label to that of the exported_instance + rule { + action = "replace" + source_labels = ["__name__", "exported_instance"] + separator = "@" + regex = "^log_(bytes|lines).+@(.+)" + replacement = "$2" + target_label = "instance" + } + + rule { + action = "labeldrop" + regex = "exported_(namespace|pod|container|job|instance)" + } + } +} diff --git a/modules/collector/push-gateway/README.md b/modules/collector/push-gateway/README.md new file mode 100644 index 0000000..ee233ff --- /dev/null +++ b/modules/collector/push-gateway/README.md @@ -0,0 +1,99 @@ +# Push Gateway Module + +Handles scraping Grafana Push Gateway metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `push-gateway.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :-------------------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=prometheus-pushgateway"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :-------- | :---------------------------------------------------------------------------------------------------------------------------- | +| `service` | The name of the service the endpoint/pod is associated with, derived from the metadata label `__meta_kubernetes_service_name` | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/push-gateway` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/push-gateway/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---- | :--------------------------------------------- | +| `job` | Set to the value of `argument.job_label.value` | + +--- + +## Usage + +The following example will scrape all push-gateway in cluster. + +```river +import.git "push_gateway" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/collectors/push-gateway/metrics.river" + pull_frequency = "15m" +} + +// get the targets +push_gateway.kubernetes "targets" {} + +// scrape the targets +push_gateway.scrape "metrics" { + targets = push_gateway.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/collector/push-gateway/metrics.river b/modules/collector/push-gateway/metrics.river new file mode 100644 index 0000000..a335b50 --- /dev/null +++ b/modules/collector/push-gateway/metrics.river @@ -0,0 +1,150 @@ +/* +Module: job-push-gateway +Description: Scrapes push-gateway + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=prometheus-pushgateway\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http)" + optional = true + } + + // kube state metrics service discovery for all of the pods + discovery.kubernetes "push_gateway" { + role = "service" + + selectors { + role = "service" + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=prometheus-pushgateway"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // kube-state-metrics relabelings (pre-scrape) + discovery.relabel "push_gateway" { + targets = discovery.kubernetes.push_gateway.targets + + // only keep targets with a matching port name + rule { + source_labels = ["__meta_kubernetes_service_port_name"] + regex = coalesce(argument.port_name.value, "http") + action = "keep" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.push_gateway.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all kube-push_gateway metrics (default: integrations/pushgateway)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // kube-state-metrics scrape job + prometheus.scrape "push_gateway" { + job_name = coalesce(argument.job_label.value, "integrations/pushgateway") + forward_to = [prometheus.relabel.push_gateway.receiver] + targets = discovery.relabel.push_gateway.output + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // push_gateway metric relabelings (post-scrape) + prometheus.relabel "push_gateway" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + } +} diff --git a/modules/databases/README.md b/modules/databases/README.md new file mode 100644 index 0000000..dadc08d --- /dev/null +++ b/modules/databases/README.md @@ -0,0 +1,14 @@ +# Database Modules + +- [kv](./kv/) + - [etcd](./kv/etcd/) + - [memcached](./kv/memcached/) + - [redis](./kv/redis/) +- [sql](./sql/) + - [mysql](./sql/mysql/) + - [postgres](./sql/postgres/) +- [timeseries](./timeseries/) + - [loki](./timeseries/loki/) + - [mimir](./timeseries/mimir/) + - [pyroscope](./timeseries/pyroscope/) + - [tempo](./timeseries/tempo/) diff --git a/modules/databases/kv/etcd/README.md b/modules/databases/kv/etcd/README.md new file mode 100644 index 0000000..935b404 --- /dev/null +++ b/modules/databases/kv/etcd/README.md @@ -0,0 +1,166 @@ +# Etcd Module + +Handles scraping Etcd metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `etcd.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/component=etcd"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `2379` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/etcd` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---- | :--------------------------------------------- | +| `job` | Set to the value of `argument.job_label.value` | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all etcd instances in cluster. + +```river +import.git "etcd" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/kv/etcd/metrics.river" + pull_frequency = "15m" +} + +// get the targets +etcd.kubernetes "targets" {} + +// scrape the targets +etcd.scrape "metrics" { + targets = etcd.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape etcd for metrics on the local machine. + +```river +import.git "etcd" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/kv/etcd/metrics.river" + pull_frequency = "15m" +} + +// get the targets +etcd.local "targets" {} + +// scrape the targets +etcd.scrape "metrics" { + targets = etcd.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/kv/etcd/metrics.river b/modules/databases/kv/etcd/metrics.river new file mode 100644 index 0000000..db60c2d --- /dev/null +++ b/modules/databases/kv/etcd/metrics.river @@ -0,0 +1,238 @@ +/* +Module: job-etcd +Description: Scrapes etcd + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/component=etcd\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: metrics)" + optional = true + } + + // etcd service discovery for all of the pods + discovery.kubernetes "etcd" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/component=etcd"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // etcd relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.etcd.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "metrics") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + argument "port" { + comment = "The port to use (default: 9150)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "9150")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all etcd metric (default: integrations/etcd)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // etcd scrape job + prometheus.scrape "etcd" { + job_name = coalesce(argument.job_label.value, "integrations/etcd") + forward_to = [prometheus.relabel.etcd.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // etcd metric relabelings (post-scrape) + prometheus.relabel "etcd" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|etcd_(commands_total|connections_total|current_(bytes|connections|items)|items_(evicted_total|total)|max_connections|read_bytes_total|up|uptime_seconds|version|written_bytes_total))") + action = "keep" + } + } +} diff --git a/modules/databases/kv/memcached/README.md b/modules/databases/kv/memcached/README.md new file mode 100644 index 0000000..e32285d --- /dev/null +++ b/modules/databases/kv/memcached/README.md @@ -0,0 +1,167 @@ +# Memcached Module + +Handles scraping Memcached metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `memcached.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=memcached"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `9150` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/memcached` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---- | :--------------------------------------------- | +| `job` | Set to the value of `argument.job_label.value` | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all memcached instances in cluster. + +```river +import.git "memcached" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/kv/memcached/metrics.river" + pull_frequency = "15m" +} + +// get the targets +memcached.kubernetes "targets" {} + +// scrape the targets +memcached.scrape "metrics" { + targets = memcached.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape memcached for metrics on the local machine. + +```river +import.git "memcached" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/kv/memcached/metrics.river" + pull_frequency = "15m" +} + +// get the targets +memcached.local "targets" {} + +// scrape the targets +memcached.scrape "metrics" { + targets = memcached.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/kv/memcached/metrics.river b/modules/databases/kv/memcached/metrics.river new file mode 100644 index 0000000..63051f2 --- /dev/null +++ b/modules/databases/kv/memcached/metrics.river @@ -0,0 +1,238 @@ +/* +Module: job-memcached +Description: Scrapes memcached + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=memcached\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: metrics)" + optional = true + } + + // memcached service discovery for all of the pods + discovery.kubernetes "memcached" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=memcached"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // memcached relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.memcached.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "metrics") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + argument "port" { + comment = "The port to use (default: 9150)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "9150")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all memcached metric (default: integrations/memcached)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // memcached scrape job + prometheus.scrape "memcached" { + job_name = coalesce(argument.job_label.value, "integrations/memcached") + forward_to = [prometheus.relabel.memcached.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // memcached metric relabelings (post-scrape) + prometheus.relabel "memcached" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|memcached_(commands_total|connections_total|current_(bytes|connections|items)|items_(evicted_total|total)|max_connections|read_bytes_total|up|uptime_seconds|version|written_bytes_total))") + action = "keep" + } + } +} diff --git a/modules/databases/kv/redis/README.md b/modules/databases/kv/redis/README.md new file mode 100644 index 0000000..59e928e --- /dev/null +++ b/modules/databases/kv/redis/README.md @@ -0,0 +1,167 @@ +# Redis Module + +Handles scraping Redis metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `redis.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :----------------------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=prometheus-redis-exporter"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `9121` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/redis` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---- | :--------------------------------------------- | +| `job` | Set to the value of `argument.job_label.value` | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all redis instances in cluster. + +```river +import.git "redis" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/kv/redis/metrics.river" + pull_frequency = "15m" +} + +// get the targets +redis.kubernetes "targets" {} + +// scrape the targets +redis.scrape "metrics" { + targets = redis.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape redis for metrics on the local machine. + +```river +import.git "redis" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/kv/redis/metrics.river" + pull_frequency = "15m" +} + +// get the targets +redis.local "targets" {} + +// scrape the targets +redis.scrape "metrics" { + targets = redis.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/kv/redis/metrics.river b/modules/databases/kv/redis/metrics.river new file mode 100644 index 0000000..d6c3453 --- /dev/null +++ b/modules/databases/kv/redis/metrics.river @@ -0,0 +1,238 @@ +/* +Module: job-redis +Description: Scrapes redis + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=prometheus-redis-exporter\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: redis-exporter)" + optional = true + } + + // redis service discovery for all of the pods + discovery.kubernetes "redis" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=prometheus-redis-exporter"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // redis relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.redis.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "redis-exporter") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + argument "port" { + comment = "The port to use (default: 9121)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "9150")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all redis metric (default: integrations/redis)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // redis scrape job + prometheus.scrape "redis" { + job_name = coalesce(argument.job_label.value, "integrations/redis") + forward_to = [prometheus.relabel.redis.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // redis metric relabelings (post-scrape) + prometheus.relabel "redis" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|redis_(blocked_clients|cluster_slots_(fail|pfail)|cluster_state|commands_(duration_seconds_total|total)|connected_(clients|slaves)|db_keys(_expiring)?|evicted_keys_total|keyspace_(hits|misses)_total|master_last_io_seconds_ago|memory_(fragmentation_ratio|max_bytes|used_(bytes|rss_bytes))|total_system_memory_bytes|up))") + action = "keep" + } + } +} diff --git a/modules/databases/sql/mysql/README.md b/modules/databases/sql/mysql/README.md new file mode 100644 index 0000000..09a79c4 --- /dev/null +++ b/modules/databases/sql/mysql/README.md @@ -0,0 +1,167 @@ +# MySQL Module + +Handles scraping MySQL metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `mysql.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :----------------------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=prometheus-mysql-exporter"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `8080` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/mysql` | The job label to add for all mysql metric | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all mysql instances in cluster. + +```river +import.git "mysql" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/sql/mysql/metrics.river" + pull_frequency = "15m" +} + +// get the targets +mysql.kubernetes "targets" {} + +// scrape the targets +mysql.scrape "metrics" { + targets = mysql.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mysql:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape mysql for metrics on the local machine. + +```river +import.git "mysql" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/sql/mysql/metrics.river" + pull_frequency = "15m" +} + +// get the targets +mysql.local "targets" {} + +// scrape the targets +mysql.scrape "metrics" { + targets = mysql.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mysql:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/sql/mysql/metrics.river b/modules/databases/sql/mysql/metrics.river new file mode 100644 index 0000000..3a8529a --- /dev/null +++ b/modules/databases/sql/mysql/metrics.river @@ -0,0 +1,239 @@ +/* +Module: job-mysql +Description: Scrapes mysql + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=prometheus-mysql-exporter\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: mysql-exporter)" + optional = true + } + + // mysql service discovery for all of the pods + discovery.kubernetes "mysql" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=prometheus-mysql-exporter"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // mysql relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.mysql.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "mysql-exporter") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + + argument "port_name" { + comment = "The port to use (default: 9104)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "9104")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all mysql metric (default: integrations/mysql)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // mysql scrape job + prometheus.scrape "mysql" { + job_name = coalesce(argument.job_label.value, "integrations/mysql") + forward_to = [prometheus.relabel.mysql.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // mysql metric relabelings (post-scrape) + prometheus.relabel "mysql" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(mysql_(global_status_(aborted_(clients|connects)|buffer_pool_pages|bytes_(received|sent)|commands_total|created_tmp_(disk_tables|files|tables)|handlers_total|innodb_(log_waits|mem_(adaptive_hash|dictionary)|num_open_files|page_size)|max_used_connections|open_(files|table_definitions|tables)|opened_(files|table_definitions|tables)|qcache_(free_memory|hits|inserts|lowmem_prunes|not_cached|queries_in_cache)|queries|questions|select_(full_(join|range_join)|range(|_check)|scan)|slow_queries|sort_(merge_passes|range|rows|scan)|table_locks_(immediate|waited)|table_open_cache_(hits|misses|overflows)|threads_(cached|connected|created|running)|uptime|wsrep_(local_(recv_queue|state)|ready))|global_variables_(innodb_(additional_mem_pool_size|buffer_pool_size|log_buffer_size)|key_buffer_size|max_connections|open_files_limit|query_cache_size|table_(definition_cache|open_cache)|thread_cache_size|tokudb_cache_size|wsrep_desync)|heartbeat_(now|stored)_timestamp_seconds|info_schema_processlist_threads|slave_status_(seconds_behind_master|slave_(io|sql)_running|sql_delay)|up))") + action = "keep" + } + } +} diff --git a/modules/databases/sql/postgres/README.md b/modules/databases/sql/postgres/README.md new file mode 100644 index 0000000..6a925e7 --- /dev/null +++ b/modules/databases/sql/postgres/README.md @@ -0,0 +1,167 @@ +# Postgres Module + +Handles scraping Postgres metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `postgres.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :-------------------------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=prometheus-postgres-exporter"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `9104` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/postgres` | The job label to add for all postgres metric | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all postgres instances in cluster. + +```river +import.git "postgres" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/sql/postgres/metrics.river" + pull_frequency = "15m" +} + +// get the targets +postgres.kubernetes "targets" {} + +// scrape the targets +postgres.scrape "metrics" { + targets = postgres.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://postgres:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape postgres for metrics on the local machine. + +```river +import.git "postgres" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/sql/postgres/metrics.river" + pull_frequency = "15m" +} + +// get the targets +postgres.local "targets" {} + +// scrape the targets +postgres.scrape "metrics" { + targets = postgres.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://postgres:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/sql/postgres/metrics.river b/modules/databases/sql/postgres/metrics.river new file mode 100644 index 0000000..c7849dc --- /dev/null +++ b/modules/databases/sql/postgres/metrics.river @@ -0,0 +1,239 @@ +/* +Module: job-postgres +Description: Scrapes postgres + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=prometheus-postgres-exporter\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http)" + optional = true + } + + // postgres service discovery for all of the pods + discovery.kubernetes "postgres" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=prometheus-postgres-exporter"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // postgres relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.postgres.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "postgres-exporter") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + + argument "port_name" { + comment = "The port to use (default: 9187)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "9187")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all postgres metric (default: integrations/postgres_exporter)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // postgres scrape job + prometheus.scrape "postgres" { + job_name = coalesce(argument.job_label.value, "integrations/postgres_exporter") + forward_to = [prometheus.relabel.postgres.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // postgres metric relabelings (post-scrape) + prometheus.relabel "postgres" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|pg_(settings_(max_connections|superuser_reserved_connections)|stat_(activity_(count|max_tx_duration)|bgwriter_buffers_(alloc|backend(_fsync)?|checkpoint|clean)|database_(blks_(hit|read)|conflicts|deadlocks|numbackends|tup_(deleted|fetched|inserted|returned|updated)|xact_(commit|rollback)))|up))") + action = "keep" + } + } +} diff --git a/modules/databases/timeseries/loki/README.md b/modules/databases/timeseries/loki/README.md new file mode 100644 index 0000000..e9a147f --- /dev/null +++ b/modules/databases/timeseries/loki/README.md @@ -0,0 +1,166 @@ +# Loki Module + +Handles scraping Loki metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `loki.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :-------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=loki"]` | The label selectors to use to find matching targets
**Note:** for Grafana Enterprise Logs this should be `["app.kubernetes.io/name=enterprise-logs"]` | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `3100` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/loki` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---- | :------------------------------------------------------------------------------- | +| `job` | For kubernetes scrapes, this label is set to `{{namespace}}/{{controller_name}}` | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all Loki instances in cluster. + +```river +import.git "loki" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/timeseries/loki/metrics.river" + pull_frequency = "15m" +} + +// get the targets +loki.kubernetes "targets" {} + +// scrape the targets +loki.scrape "metrics" { + targets = loki.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape Loki for metrics on the local machine. + +```river +import.git "loki" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/timeseries/loki/metrics.river" + pull_frequency = "15m" +} + +// get the targets +loki.local "targets" {} + +// scrape the targets +loki.scrape "metrics" { + targets = loki.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/timeseries/loki/metrics.river b/modules/databases/timeseries/loki/metrics.river new file mode 100644 index 0000000..d186521 --- /dev/null +++ b/modules/databases/timeseries/loki/metrics.river @@ -0,0 +1,252 @@ +/* +Module: job-loki +Description: Scrapes loki + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=loki\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // loki service discovery for all of the pods + discovery.kubernetes "loki" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=loki"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // loki relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.loki.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "metrics") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + + argument "port_name" { + comment = "The port to use (default: 3000)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "3000")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all loki metric (default: integrations/loki)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // loki scrape job + prometheus.scrape "loki" { + job_name = coalesce(argument.job_label.value, "integrations/loki") + forward_to = [prometheus.relabel.loki.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // loki metric relabelings (post-scrape) + prometheus.relabel "loki" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // set the job label to be namespace/pod as this is what the cloud integration expects + rule { + action = "replace" + source_labels = [ + "namespace", + "workload", + ] + separator = "/" + regex = "(.+)/.+/(.+)" + replacement = "$1/$2" + target_label = "job" + } + } +} diff --git a/modules/databases/timeseries/mimir/README.md b/modules/databases/timeseries/mimir/README.md new file mode 100644 index 0000000..703e986 --- /dev/null +++ b/modules/databases/timeseries/mimir/README.md @@ -0,0 +1,167 @@ +# Mimir Module + +Handles scraping Mimir metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `mimir.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :--------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=mimir"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `8080` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/mimir` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all Mimir instances in cluster. + +```river +import.git "mimir" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/timeseries/mimir/metrics.river" + pull_frequency = "15m" +} + +// get the targets +mimir.kubernetes "targets" {} + +// scrape the targets +mimir.scrape "metrics" { + targets = mimir.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape Mimir for metrics on the local machine. + +```river +import.git "mimir" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/timeseries/mimir/metrics.river" + pull_frequency = "15m" +} + +// get the targets +mimir.local "targets" {} + +// scrape the targets +mimir.scrape "metrics" { + targets = mimir.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/timeseries/mimir/metrics.river b/modules/databases/timeseries/mimir/metrics.river new file mode 100644 index 0000000..eacc056 --- /dev/null +++ b/modules/databases/timeseries/mimir/metrics.river @@ -0,0 +1,252 @@ +/* +Module: job-mimir +Description: Scrapes mimir + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=mimir\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // mimir service discovery for all of the pods + discovery.kubernetes "mimir" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=mimir"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // mimir relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.mimir.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "http-metrics") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + + argument "port_name" { + comment = "The port to use (default: 8080)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "8080")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all metrics (default: integrations/mimir)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // mimir scrape job + prometheus.scrape "mimir" { + job_name = coalesce(argument.job_label.value, "integrations/mimir") + forward_to = [prometheus.relabel.mimir.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // mimir metric relabelings (post-scrape) + prometheus.relabel "mimir" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // set the job label to be namespace/pod as this is what the cloud integration expects + rule { + action = "replace" + source_labels = [ + "namespace", + "workload", + ] + separator = "/" + regex = "(.+)/.+/(.+)" + replacement = "$1/$2" + target_label = "job" + } + } +} diff --git a/modules/databases/timeseries/pyroscope/README.md b/modules/databases/timeseries/pyroscope/README.md new file mode 100644 index 0000000..7dc5991 --- /dev/null +++ b/modules/databases/timeseries/pyroscope/README.md @@ -0,0 +1,167 @@ +# pyroscope Module + +Handles scraping pyroscope metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `pyroscope.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=pyroscope"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `4040` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/pyroscope` | The job label to add for all pyroscope metric | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all pyroscope instances in cluster. + +```river +import.git "pyroscope" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/timeseries/pyroscope/metrics.river" + pull_frequency = "15m" +} + +// get the targets +pyroscope.kubernetes "targets" {} + +// scrape the targets +pyroscope.scrape "metrics" { + targets = pyroscope.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://pyroscope:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape pyroscope for metrics on the local machine. + +```river +import.git "pyroscope" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/timeseries/pyroscope/metrics.river" + pull_frequency = "15m" +} + +// get the targets +pyroscope.local "targets" {} + +// scrape the targets +pyroscope.scrape "metrics" { + targets = pyroscope.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://pyroscope:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/timeseries/pyroscope/metrics.river b/modules/databases/timeseries/pyroscope/metrics.river new file mode 100644 index 0000000..0330fa9 --- /dev/null +++ b/modules/databases/timeseries/pyroscope/metrics.river @@ -0,0 +1,252 @@ +/* +Module: job-pyroscope +Description: Scrapes pyroscope + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=pyroscope\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // pyroscope service discovery for all of the pods + discovery.kubernetes "pyroscope" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=pyroscope"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // pyroscope relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.pyroscope.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "http-metrics") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + + argument "port_name" { + comment = "The port to use (default: 4040)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "4040")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all pyroscope metric (default: integrations/pyroscope)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // pyroscope scrape job + prometheus.scrape "pyroscope" { + job_name = coalesce(argument.job_label.value, "integrations/pyroscope") + forward_to = [prometheus.relabel.pyroscope.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // pyroscope metric relabelings (post-scrape) + prometheus.relabel "pyroscope" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // set the job label to be namespace/pod as this is what the cloud integration expects + rule { + action = "replace" + source_labels = [ + "namespace", + "workload", + ] + separator = "/" + regex = "(.+)/.+/(.+)" + replacement = "$1/$2" + target_label = "job" + } + } +} diff --git a/modules/databases/timeseries/tempo/README.md b/modules/databases/timeseries/tempo/README.md new file mode 100644 index 0000000..fcb6a6d --- /dev/null +++ b/modules/databases/timeseries/tempo/README.md @@ -0,0 +1,167 @@ +# Tempo Module + +Handles scraping Tempo metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`local`](#local) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `tempo.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :--------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=tempo"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `local` + +#### Arguments + +| Name | Optional | Default | Description | +| :----- | :------- | :------ | :------------------------------------- | +| `port` | `true` | `8080` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/tempo` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------- | +| `source` | Constant value of `local`, denoting where the results came from, this can be useful for LBAC | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all Tempo instances in cluster. + +```river +import.git "tempo" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/timeseries/tempo/metrics.river" + pull_frequency = "15m" +} + +// get the targets +tempo.kubernetes "targets" {} + +// scrape the targets +tempo.scrape "metrics" { + targets = tempo.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://tempo:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape Tempo for metrics on the local machine. + +```river +import.git "tempo" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/databases/timeseries/tempo/metrics.river" + pull_frequency = "15m" +} + +// get the targets +tempo.local "targets" {} + +// scrape the targets +tempo.scrape "metrics" { + targets = tempo.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://tempo:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/databases/timeseries/tempo/metrics.river b/modules/databases/timeseries/tempo/metrics.river new file mode 100644 index 0000000..6307a33 --- /dev/null +++ b/modules/databases/timeseries/tempo/metrics.river @@ -0,0 +1,252 @@ +/* +Module: job-tempo +Description: Scrapes tempo + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=tempo\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // tempo service discovery for all of the pods + discovery.kubernetes "tempo" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.fieldselectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=tempo"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // tempo relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.tempo.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "http-metrics") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "local" { + + argument "port_name" { + comment = "The port to use (default: 3200)" + optional = true + } + + // arguments for local (static) + discovery.relabel "local" { + targets = [ + { + "__address__" = "localhost" + format("%s", coalesce(argument.port.value, "3200")), + "source" = "local", + }, + ] + } + + export "output" { + value = discovery.relabel.local.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all tempo metric (default: integrations/tempo)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // tempo scrape job + prometheus.scrape "tempo" { + job_name = coalesce(argument.job_label.value, "integrations/tempo") + forward_to = [prometheus.relabel.tempo.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // tempo metric relabelings (post-scrape) + prometheus.relabel "tempo" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + action = "keep" + } + + // set the job label to be namespace/pod as this is what the cloud integration expects + rule { + action = "replace" + source_labels = [ + "namespace", + "workload", + ] + separator = "/" + regex = "(.+)/.+/(.+)" + replacement = "$1/$2" + target_label = "job" + } + } +} diff --git a/modules/kubernetes/README.md b/modules/kubernetes/README.md new file mode 100644 index 0000000..c7ce462 --- /dev/null +++ b/modules/kubernetes/README.md @@ -0,0 +1,8 @@ +# Kubernetes Modules + +- [annotations](./annotations/) +- [cert-manager](./cert-manager/) +- [core](./core/) +- [konnectivity-agent](./konnectivity-agent/) +- [kube-state-metrics](./kube-state-metrics/) +- [opencost](./opencost/) diff --git a/modules/kubernetes/annotations/README.md b/modules/kubernetes/annotations/README.md new file mode 100644 index 0000000..e1e95d7 --- /dev/null +++ b/modules/kubernetes/annotations/README.md @@ -0,0 +1,359 @@ +# Kubernetes Annotation Modules + +**Modules:** + +- [`metrics.river`](#metricsriver) +- [`probes.river`](#probesriver) + +## `metrics.river` + +This module is meant to be used to automatically scrape targets based on a certain role and set of annotations. This module can be consumed +multiple times with different roles. The supported roles are: + +- pod +- service +- endpoints + +Typically, if mimicking the behavior of the prometheus-operator, and ServiceMonitor functionality you would use role="endpoints", if +mimicking the behavior of the PodMonitor functionality you would use role="pod". It is important to understand that with endpoints, +the target is typically going to be a pod, and whatever annotations that are set on the service will automatically be propagated to the +endpoints. This is why the role "endpoints" is used, because it will scrape the pod, but also consider the service annotations. Using +role="endpoints", which scrape each endpoint associated to the service. If role="service" is used, it will only scrape the service, only +hitting one of the endpoints associated to the service. + +This is where you must consider your scraping strategy, for example if you scrape a service like "kube-state-metrics" using +role="endpoints" you should only have a single replica of the kube-state-metrics pod, if you have multiple replicas, you should use +role="service" or a separate non-annotation job completely. Scraping a service instead of endpoints, is typically a rare use case, but +it is supported. + +There are other considerations for using annotation based scraping as well, which is metric relabeling rules that happen post scrape. If +you have a target that you want to apply a bunch of relabelings to or a very large metrics response payload, performance wise it will be +better to have a separate job for that target, rather than using use annotations. As every targert will go through the ssame relabeling. +Typical deployment strategies/options would be: + +**Option #1 (recommended):** + +- Annotation Scraping for role="endpoints" +- Separate Jobs for specific service scrapes (i.e. kube-state-metrics, node-exporter, etc.) or large metric payloads +- Separate Jobs for K8s API scraping (i.e. cadvisor, kube-apiserver, kube-scheduler, etc.) + +**Option #2:** + +- Annotation Scraping for `role="pod"` +- Annotation Scraping for `role="service"` (i.e. kube-state-metrics, node-exporter, etc.) +- Separate Jobs for specific use cases or large metric payloads +- Separate Jobs for K8s API scraping (i.e. cadvisor, kube-apiserver, kube-scheduler, etc.) + +At no point should you use role="endpoints" and role="pod" together, as this will result in duplicate targets being scraped, thus +generating duplicate metrics. If you want to scrape both the pod and the service, use Option #2. + +Each port attached to an service/pod/endpoint is an eligible target, oftentimes it will have multiple ports. +There may be instances when you want to scrape all ports or some ports and not others. To support this +the following annotations are available: + +```yaml +metrics.grafana.com/scrape: true +``` + +The default scraping scheme is http, this can be specified as a single value which would override, the schema being used for all +ports attached to the target: + +```yaml +metrics.grafana.com/scheme: https +``` + +The default path to scrape is /metrics, this can be specified as a single value which would override, the scrape path being used +for all ports attached to the target: + +```yaml +metrics.grafana.com/path: /metrics/some_path +``` + +The default port to scrape is the target port, this can be specified as a single value which would override the scrape port being +used for all ports attached to the target, note that even if aan target had multiple targets, the relabel_config targets are +deduped before scraping: + +```yaml +metrics.grafana.com/port: 8080 +``` + +The default interval to scrape is 1m, this can be specified as a single value which would override, the scrape interval being used +for all ports attached to the target: + +```yaml +metrics.grafana.com/interval: 5m +``` + +The default timeout for scraping is 10s, this can be specified as a single value which would override, the scrape interval being +used for all ports attached to the target: + +```yaml +metrics.grafana.com/timeout: 30s +``` + +The default job is namespace/{{ service name }} or namespace/{{ controller_name }} depending on the role, there may be instances +in which a different job name is required because of a set of dashboards, rules, etc. to support this there is a job annotation +which will override the default value: + +```yaml +metrics.grafana.com/job: integrations/kubernetes/kube-state-metrics +``` + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `cert_manager.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :-------------------------- | :------- | :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `role` | _no_ | `endpoints` | The role to use when looking for targets to scrape via annotations, can be: endpoints, service, pod | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `field_selectors` | _no_ | `[]` | The [field selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/) to use to find matching targets | +| `label_selectors` | _no_ | `[]` | The [label selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) to use to find matching targets | +| `annotation` | _no_ | `metrics.grafana.com` | The domain to use when looking for annotations, Kubernetes selectors do not support a logical `OR`, if multiple types of annotations are needed, this module should be invoked multiple times. | +| `tenant` | _no_ | `.*` | The tenant to write metrics to. This does not have to be the tenantId, this is the value to look for in the `{{argument.annotation.value}}/tenant` annotation i.e. (`metrics.grafana.com/tenant`), and this can be a regular expression. It is recommended to use a default i.e. `primary\|`, which would match the primary tenant or an empty string meaning the tenant is not set. | +| `scrape_port_named_metrics` | _no_ | `false` | Whether or not to automatically scrape targets that have a port with `.*metrics.*` in the name | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +N/A + +--- + +## `probes.river` + +This module is meant to be used to automatically scrape targets based on a certain role and set of annotations. This module can be consumed +multiple times with different roles. The supported roles are: + +- service +- ingress + +Each port attached to an service is an eligible target, oftentimes service will have multiple ports. +There may be instances when you want to probe all ports or some ports and not others. To support this +the following annotations are available: + +Only probe services with probe set to true, this can be single valued i.e. probe all ports for +the service: + +```yaml +probes.grafana.com/probe: true +``` + +The default probing scheme is "", this can be specified as a single value which would override, +if using HTTP prober specify "http" or "https": + +```yaml +probes.grafana.com/scheme: https +``` + +The default path to probe is /metrics, this can be specified as a single value which would override, +the probe path being used for all ports attached to the service: + +```yaml +probes.grafana.com/path: /metrics/some_path +``` + +The default module to use for probing the default value is "unknown" as the modules are defined are in your blackbox exporter +configuration file, this can be specified as a single value which would override, the probe module being used for all ports +attached to the service: + +```yaml +probes.grafana.com/module: http_2xx +``` + +The default port to probe is the service port, this can be specified as a single value which would +override the probe port being used for all ports attached to the service, note that even if aan service had +multiple targets, the relabel_config targets are deduped before scraping: + +```yaml +probes.grafana.com/port: 8080 +``` + +The value to set for the job label, by default this would be "integrations/blackbox_exporter" if not specified: + +```yaml +probes.grafana.com/job: blackbox-exporter +``` + +The default interval to probe is 1m, this can be specified as a single value which would override, +the probe interval being used for all ports attached to the service: + +```yaml +probes.grafana.com/interval: 5m +``` + +The default timeout for scraping is 10s, this can be specified as a single value which would override, +the probe interval being used for all ports attached to the service: + +```yaml +probes.grafana.com/timeout: 30s +``` + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `cert_manager.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :-------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `role` | _no_ | `endpoints` | The role to use when looking for targets to scrape via annotations, can be: endpoints, service, pod | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `field_selectors` | _no_ | `[]` | The [field selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/field-selectors/) to use to find matching targets | +| `label_selectors` | _no_ | `[]` | The [label selectors](https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/) to use to find matching targets | +| `annotation` | _no_ | `metrics.grafana.com` | The domain to use when looking for annotations, Kubernetes selectors do not support a logical `OR`, if multiple types of annotations are needed, this module should be invoked multiple times. | +| `tenant` | _no_ | `.*` | The tenant to write metrics to. This does not have to be the tenantId, this is the value to look for in the `{{argument.annotation.value}}/tenant` annotation i.e. (`metrics.grafana.com/tenant`), and this can be a regular expression. It is recommended to use a default i.e. `primary\|`, which would match the primary tenant or an empty string meaning the tenant is not set. | +| `blackbox_url` | _no_ | `""` | The address of the blackbox exporter to use (without the protocol), only the hostname and port i.e. `blackbox-prometheus-blackbox-exporter.default.svc.cluster.local:9115` | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `namespace` | The namespace the target was found in. | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +N/A + +--- + +## Usage + +### `metrics` + +The following example will scrape all metric annotation instances in cluster. + +```river +import.git "metric_annotations" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/kubernetes/annotations/metrics.river" + pull_frequency = "15m" +} + +// get the targets +metric_annotations.kubernetes "targets" { + annotation = "metrics.grafana.com" +} + +// scrape the annotations +metric_annotations.scrape "metrics" { + targets = metric_annotations.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `probes` + +The following example will scrape all probe annotation instances in cluster. + +```river +import.git "probe_annotations" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/kubernetes/annotations/probes.river" + pull_frequency = "15m" +} + +// get the targets +probe_annotations.kubernetes "targets" { + annotation = "probes.grafana.com" +} + +// scrape the annotations +probe_annotations.scrape "probes" { + targets = probe_annotations.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/kubernetes/annotations/metrics.river b/modules/kubernetes/annotations/metrics.river new file mode 100644 index 0000000..2445aa7 --- /dev/null +++ b/modules/kubernetes/annotations/metrics.river @@ -0,0 +1,604 @@ +/* +Module: job-annotation-scrape +Description: Scrapes targets for metrics based on annotations + +Note: Every argument except for "forward_to" and "role" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. + +Kubernetes Annotation Auto-Scraping +------------------------------------------------------------------------------------------------------------------------------------ +This module is meant to be used to automatically scrape targets based on a certain role and set of annotations. This module can be consumed +multiple times with different roles. The supported roles are: + + - pod + - service + - endpoints + +Typically, if mimicking the behavior of the prometheus-operator, and ServiceMonitor functionality you would use role="endpoints", if +mimicking the behavior of the PodMonitor functionality you would use role="pod". It is important to understand that with endpoints, +the target is typically going to be a pod, and whatever annotations that are set on the service will automatically be propagated to the +endpoints. This is why the role "endpoints" is used, because it will scrape the pod, but also consider the service annotations. Using +role="endpoints", which scrape each endpoint associated to the service. If role="service" is used, it will only scrape the service, only +hitting one of the endpoints associated to the service. + +This is where you must consider your scraping strategy, for example if you scrape a service like "kube-state-metrics" using +role="endpoints" you should only have a single replica of the kube-state-metrics pod, if you have multiple replicas, you should use +role="service" or a separate non-annotation job completely. Scraping a service instead of endpoints, is typically a rare use case, but +it is supported. + +There are other considerations for using annotation based scraping as well, which is metric relabeling rules that happen post scrape. If +you have a target that you want to apply a bunch of relabelings to or a very large metrics response payload, performance wise it will be +better to have a separate job for that target, rather than using use annotations. As every targert will go through the ssame relabeling. +Typical deployment strategies/options would be: + +Option #1 (recommended): + - Annotation Scraping for role="endpoints" + - Separate Jobs for specific service scrapes (i.e. kube-state-metrics, node-exporter, etc.) or large metric payloads + - Separate Jobs for K8s API scraping (i.e. cadvisor, kube-apiserver, kube-scheduler, etc.) + +Option #2: + - Annotation Scraping for role="pod" + - Annotation Scraping for role="service" (i.e. kube-state-metrics, node-exporter, etc.) + - Separate Jobs for specific use cases or large metric payloads + - Separate Jobs for K8s API scraping (i.e. cadvisor, kube-apiserver, kube-scheduler, etc.) + +At no point should you use role="endpoints" and role="pod" together, as this will result in duplicate targets being scraped, thus +generating duplicate metrics. If you want to scrape both the pod and the service, use Option #2. + +Each port attached to an service/pod/endpoint is an eligible target, oftentimes it will have multiple ports. +There may be instances when you want to scrape all ports or some ports and not others. To support this +the following annotations are available: + + metrics.grafana.com/scrape: true + +the default scraping scheme is http, this can be specified as a single value which would override, the schema being used for all +ports attached to the target: + + metrics.grafana.com/scheme: https + +the default path to scrape is /metrics, this can be specified as a single value which would override, the scrape path being used +for all ports attached to the target: + + metrics.grafana.com/path: /metrics/some_path + +the default port to scrape is the target port, this can be specified as a single value which would override the scrape port being +used for all ports attached to the target, note that even if aan target had multiple targets, the relabel_config targets are +deduped before scraping: + + metrics.grafana.com/port: 8080 + +the default interval to scrape is 1m, this can be specified as a single value which would override, the scrape interval being used +for all ports attached to the target: + + metrics.grafana.com/interval: 5m + +the default timeout for scraping is 10s, this can be specified as a single value which would override, the scrape interval being +used for all ports attached to the target: + + metrics.grafana.com/timeout: 30s + +the default job is namespace/{{ service name }} or namespace/{{ controller_name }} depending on the role, there may be instances +in which a different job name is required because of a set of dashboards, rules, etc. to support this there is a job annotation +which will override the default value: + + metrics.grafana.com/job: integrations/kubernetes/kube-state-metrics +*/ +declare "kubernetes" { + argument "role" { + comment = "The role to use when looking for targets to scrape via annotations, can be: endpoints, service, pod (default: endpoints)" + } + + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=grafana-agent\"])" + optional = true + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors d not support a logical OR, if multiple types of annotations are needed, this module should be invoked multiple times + // i.e. metrics.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: metrics.grafana.com)" + default = "metrics.grafana.com" + optional = true + } + + argument "tenant" { + comment = "The tenant to write metrics to. This does not have to be the tenantId, this is the value to look for in the logs.agent.grafana.com/tenant annotation, and this can be a regex." + optional = true + default = ".*" + } + + argument "scrape_port_named_metrics" { + comment = "Whether or not to automatically scrape endpoints that have a port with 'metrics' in the name" + optional = true + default = false + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__pod_role" { + comment = "Most annotation targets service or pod that is all you want, however if the role is endpoints you want the pod" + optional = true + default = replace(coalesce(argument.role.value, "endpoints"), "endpoints", "pod") + } + + argument "__service_role" { + comment = "Most annotation targets service or pod that is all you want, however if the role is endpoints you we also want to consider service annotations" + optional = true + default = replace(coalesce(argument.role.value, "endpoints"), "endpoints", "service") + } + + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "metrics.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + // annotations service discovery + discovery.kubernetes "annotations" { + role = coalesce(argument.role.value, "endpoints") + + selectors { + role = coalesce(argument.role.value, "endpoints") + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, []), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + discovery.relabel "annotations" { + targets = discovery.kubernetes.annotations.targets + + /**************************************************************************************************************** + * Handle Targets to Keep or Drop + ****************************************************************************************************************/ + // allow resources to declare their metrics scraped or not + // Example Annotation: + // metrics.grafana.com/scrape: false + // + // the label prometheus.io/service-monitor: "false" is a common label for headless services, when performing endpoint + // service discovery, if there is both a load-balanced service and headless service, this can result in duplicate + // scrapes if the name of the service is attached as a label. any targets with this label or annotation set should be dropped + rule { + action = "replace" + replacement = "false" + target_label = "__tmp_scrape" + } + + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_scrape", + "__meta_kubernetes_" + argument.__service_role.value + "_annotation_" + argument.__sd_annotation.value + "_scrape", + "__meta_kubernetes_" + argument.role.value + "_label_prometheus_io_service_monitor", + ] + separator = ";" + // only allow empty or true, otherwise defaults to false + regex = "^(?:;*)?(true)(;|true)*$" + replacement = "$1" + target_label = "__tmp_scrape" + } + + // add a __tmp_scrape_port_named_metrics from the argument.scrape_port_named_metrics + rule { + replacement = format("%t", argument.scrape_port_named_metrics.value) + target_label = "__tmp_scrape_port_named_metrics" + } + + // only keep targets that have scrape: true or "metrics" in the port name if the argument scrape_port_named_metrics + rule { + action = "keep" + source_labels = [ + "__tmp_scrape", + "__tmp_scrape_port_named_metrics", + // endpoints is the role and most meta labels started with "endpoints", however the port name is an exception and starts with "endpoint" + "__meta_kubernetes_" + replace(coalesce(argument.role.value, "endpoints"), "endpoints", "endpoint") + "_port_name", + ] + separator = ";" + regex = "^(true;.*|(|true);true;(.*metrics.*))$" + } + + // only keep targets where the pod is running or the pod_phase is empty and is not an init container. This will only exist for role="pod" or + // potentially role="endpoints", if it is a service the value is empty and thus allowed to pass, if it is an endpoint but not associated to a + // pod but rather a static IP or hostname, that could be outside of kubernetes allow endpoints to declare what tenant their metrics should be + // written to + rule { + action = "keep" + source_labels = ["__meta_kubernetes_pod_phase"] + regex = "^(?i)(Running|)$" + } + rule { + action = "keep" + source_labels = ["__meta_kubernetes_pod_ready"] + regex = "^(true|)$" + } + // if the container is an init container, drop it + rule { + action = "drop" + source_labels = ["__meta_kubernetes_pod_container_init"] + regex = "^(true)$" + } + + // allow resources to declare their metrics the tenant their metrics should be sent to, + // Example Annotation: + // metrics.grafana.com/tenant: primary + // + // Note: This does not necessarily have to be the actual tenantId, it can be a friendly name as well that is simply used + // to determine if the metrics should be gathered for the current tenant + rule { + action = "keep" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_tenant", + "__meta_kubernetes_" + argument.__service_role.value + "_annotation_" + argument.__sd_annotation.value + "_tenant", + ] + regex = "^(" + argument.tenant.value + ")$" + } + + /**************************************************************************************************************** + * Handle Setting Scrape Metadata i.e. path, port, interval etc. + ****************************************************************************************************************/ + // allow resources to declare the protocol to use when collecting metrics, the default value is "http", + // Example Annotation: + // metrics.grafana.com/scheme: http + rule { + action = "replace" + replacement = "http" + target_label = "__scheme__" + } + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_scheme", + "__meta_kubernetes_" + argument.__service_role.value + "_annotation_" + argument.__sd_annotation.value + "_scheme", + ] + separator = ";" + regex = "^(?:;*)?(https?).*$" + replacement = "$1" + target_label = "__scheme__" + } + + // allow resources to declare the port to use when collecting metrics, the default value is the discovered port from + // Example Annotation: + // metrics.grafana.com/port: 9090 + rule { + action = "replace" + source_labels = [ + "__address__", + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_port", + "__meta_kubernetes_" + argument.__service_role.value + "_annotation_" + argument.__sd_annotation.value + "_port", + ] + separator = ";" + regex = "^([^:]+)(?::\\d+)?;(\\d+)$" + replacement = "$1:$2" + target_label = "__address__" + } + + // allow resources to declare their the path to use when collecting their metrics, the default value is "/metrics", + // Example Annotation: + // metrics.grafana.com/path: /metrics/foo + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_path", + "__meta_kubernetes_" + argument.__service_role.value + "_annotation_" + argument.__sd_annotation.value + "_path", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "__metrics_path__" + } + + // allow resources to declare how often their metrics should be collected, the default value is 1m, + // the following duration formats are supported (s|m|ms|h|d): + // Example Annotation: + // metrics.grafana.com/interval: 5m + rule { + action = "replace" + replacement = coalesce(argument.scrape_interval.value, "60s") + target_label = "__scrape_interval__" + } + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_interval", + "__meta_kubernetes_" + argument.__service_role.value + "_annotation_" + argument.__sd_annotation.value + "_interval", + ] + separator = ";" + regex = "^(?:;*)?(\\d+(s|m|ms|h|d)).*$" + replacement = "$1" + target_label = "__scrape_interval__" + } + + // allow resources to declare the timeout of the scrape request, the default value is 10s, + // the following duration formats are supported (s|m|ms|h|d): + // Example Annotation: + // metrics.grafana.com/timeout: 30s + rule { + action = "replace" + replacement = coalesce(argument.scrape_timeout.value, "10s") + target_label = "__scrape_timeout__" + } + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_timeout", + "__meta_kubernetes_" + argument.__service_role.value + "_annotation_" + argument.__sd_annotation.value + "_timeout", + ] + separator = ";" + regex = "^(?:;*)?(\\d+(s|m|ms|h|d)).*$" + replacement = "$1" + target_label = "__scrape_timeout__" + } + + /**************************************************************************************************************** + * Handle Setting Common Labels + ****************************************************************************************************************/ + // set the namespace label + rule { + action = "replace" + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the target name label i.e. service name, pod name, etc. + // if the role is endpoints, the first valued field is used which would be __meta_kubernetes_pod_name, if the pod name is empty + // then the endpoint name would be used + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.__pod_role.value + "_name", + "__meta_kubernetes_" + argument.role.value + "_name", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = argument.__pod_role.value + } + + // set a default job label to be the namespace/pod_controller_name or namespace/service_name + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_namespace", + "__meta_kubernetes_pod_controller_name", + argument.__pod_role.value, + ] + separator = ";" + regex = "^([^;]+)(?:;*)?([^;]+).*$" + replacement = "$1/$2" + target_label = "job" + } + + // if the controller is a ReplicaSet, drop the hash from the end of the ReplicaSet + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_controller_type", + "__meta_kubernetes_namespace", + "__meta_kubernetes_pod_controller_name", + ] + separator = ";" + regex = "^(?:ReplicaSet);([^;]+);([^;]+)-.+$" + replacement = "$1/$2" + target_label = "job" + } + + // allow resources to declare their the job label value to use when collecting their metrics, the default value is "", + // Example Annotation: + // metrics.grafana.com/job: integrations/kubernetes/cadvisor + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_job", + "__meta_kubernetes_" + argument.__service_role.value + "_annotation_" + argument.__sd_annotation.value + "_job", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "job" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_label_app_kubernetes_io_name", + "__meta_kubernetes_" + argument.role.value + "_label_k8s_app", + "__meta_kubernetes_" + argument.role.value + "_label_app", + "__meta_kubernetes_" + argument.__pod_role.value + "_label_app_kubernetes_io_name", + "__meta_kubernetes_" + argument.__pod_role.value + "_label_k8s_app", + "__meta_kubernetes_" + argument.__pod_role.value + "_label_app", + "__meta_kubernetes_" + argument.__service_role.value + "_label_app_kubernetes_io_name", + "__meta_kubernetes_" + argument.__service_role.value + "_label_k8s_app", + "__meta_kubernetes_" + argument.__service_role.value + "_label_app", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the app component if specified as metadata labels "component:" or "app.kubernetes.io/component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_label_app_kubernetes_io_component", + "__meta_kubernetes_" + argument.role.value + "_label_component", + "__meta_kubernetes_" + argument.__pod_role.value + "_label_app_kubernetes_io_component", + "__meta_kubernetes_" + argument.__pod_role.value + "_label_component", + "__meta_kubernetes_" + argument.__service_role.value + "_label_app_kubernetes_io_component", + "__meta_kubernetes_" + argument.__service_role.value + "_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a workload label if the resource is a pod + // example: grafana-agent-68nv9 becomes DaemonSet/grafana-agent + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = ";" + regex = "(.+);(.+)" + replacement = "$1/$2" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.annotations.output + } +} + +declare "metrics" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "keep_metrics" { + comment = "A regex of metrics to keep (default: (.+))" + optional = true + } + + argument "drop_metrics" { + comment = "A regex of metrics to drop (default: \"\")" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // only keep http targets + discovery.relabel "http_annotations" { + targets = argument.targets.value + + rule { + action = "keep" + source_labels = ["__scheme__"] + regex ="http" + } + } + + // scrape http only targtets + prometheus.scrape "http_annotations" { + job_name = "annotation-metrics-http" + forward_to = [prometheus.relabel.annotations.receiver] + targets = discovery.relabel.http_annotations.output + scheme = "http" + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // only keep https targets + discovery.relabel "https_annotations" { + targets = argument.targets.value + + rule { + action = "keep" + source_labels = ["__scheme__"] + regex ="https" + } + } + + // scrape https only targtets + prometheus.scrape "https_annotations" { + job_name = "annotation-metrics-https" + forward_to = [prometheus.relabel.annotations.receiver] + targets = discovery.relabel.https_annotations.output + scheme = "https" + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + + tls_config { + ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify = false + server_name = "kubernetes" + } + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + + } + + // perform generic relabeling using keep_metrics and drop_metrics + prometheus.relabel "annotations" { + forward_to = argument.forward_to.value + + // keep only metrics that match the keep_metrics regex + rule { + action = "keep" + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + } + + // drop metrics that match the drop_metrics regex + rule { + action = "drop" + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "") + } + } + +} diff --git a/modules/kubernetes/annotations/probes.river b/modules/kubernetes/annotations/probes.river new file mode 100644 index 0000000..92402ed --- /dev/null +++ b/modules/kubernetes/annotations/probes.river @@ -0,0 +1,523 @@ +/* +Module: job-agent +Description: Scrapes grafana agent + +Note: Every argument except for "forward_to" and "role" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. + +Kubernetes Service Auto-Probing +------------------------------------------------------------------------------------------------------------------------------------ +This module is meant to be used to automatically scrape targets based on a certain role and set of annotations. This module can be consumed +multiple times with different roles. The supported roles are: + + - service + - ingress + +Each port attached to an service is an eligible target, oftentimes service will have multiple ports. +There may be instances when you want to probe all ports or some ports and not others. To support this +the following annotations are available: + +only probe services with probe set to true, this can be single valued i.e. probe all ports for +the service: + +probes.grafana.com/probe: true + +the default probing scheme is "", this can be specified as a single value which would override, +if using HTTP prober specify "http" or "https": + +probes.grafana.com/scheme: https + +the default path to probe is /metrics, this can be specified as a single value which would override, +the probe path being used for all ports attached to the service: + +probes.grafana.com/path: /metrics/some_path + +the default module to use for probing the default value is "unknown" as the modules are defined are in your blackbox exporter +configuration file, this can be specified as a single value which would override, the probe module being used for all ports +attached to the service: + +probes.grafana.com/module: http_2xx + +the default port to probe is the service port, this can be specified as a single value which would +override the probe port being used for all ports attached to the service, note that even if aan service had +multiple targets, the relabel_config targets are deduped before scraping: + +probes.grafana.com/port: 8080 + +the value to set for the job label, by default this would be "integrations/blackbox_exporter" if not specified: + +probes.grafana.com/job: blackbox-exporter + +the default interval to probe is 1m, this can be specified as a single value which would override, +the probe interval being used for all ports attached to the service: + +probes.grafana.com/interval: 5m + +the default timeout for scraping is 10s, this can be specified as a single value which would override, +the probe interval being used for all ports attached to the service: + +probes.grafana.com/timeout: 30s +*/ +declare "kubernetes" { + argument "role" { + comment = "The role to use when looking for targets to scrape via annotations, can be: service or ingress (default: service)" + } + + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=grafana-agent\"])" + optional = true + } + + argument "annotation" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + // k8s selectors d not support a logical OR, if multiple types of annotations are needed, this module should be invoked multiple times + // i.e. probes.grafana.com, then again for prometheus.io + comment = "The annotation namespace to use (default: probes.grafana.com)" + default = "probes.grafana.com" + optional = true + } + + argument "tenant" { + comment = "The tenant to write metrics to. This does not have to be the tenantId, this is the value to look for in the probes.grafana.com/tenant annotation, and this can be a regex." + optional = true + default = ".*" + } + + argument "blackbox_url" { + comment = "The address of the blackbox exporter to use (without the protocol), only the hostname and port i.e. blackbox-prometheus-blackbox-exporter.default.svc.cluster.local:9115" + } + + /* + Hidden Arguments + These arguments are used to set reusable variables to avoid repeating logic + */ + argument "__sd_annotation" { + optional = true + comment = "The logic is used to transform the annotation argument into a valid label name by removing unsupported characters." + default = replace(replace(replace(coalesce(argument.annotation.value, "probes.grafana.com"),".", "_"),"/", "_"),"-", "_") + } + + // annotations service discovery + discovery.kubernetes "probes" { + role = coalesce(argument.role.value, "service") + + selectors { + role = coalesce(argument.role.value, "service") + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, []), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + discovery.relabel "probes" { + targets = discovery.kubernetes.probes.targets + + /**************************************************************************************************************** + * Handle Targets to Keep or Drop + ****************************************************************************************************************/ + // allow resources to declare they should be probed or not + // Example Annotation: + // probes.grafana.com/probe: false + // + // the label prometheus.io/service-monitor: "false" is a common label for headless services, if it is set to false, + // do not probe the target + rule { + action = "keep" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_probe", + "__meta_kubernetes_" + argument.role.value + "_label_prometheus_io_service_monitor", + ] + regex = "^true;(|true)$" + } + + // only keep targets where the pod is running or the pod_phase is empty and is not an init container. This will only exist for role="pod" or + // potentially role="endpoints", if it is a service the value is empty and thus allowed to pass, if it is an endpoint but not associated to a + // pod but rather a static IP or hostname, that could be outside of kubernetes allow endpoints to declare what tenant their metrics should be + // written to + rule { + action = "keep" + source_labels = ["__meta_kubernetes_pod_phase"] + regex = "^(?i)(Running|)$" + } + rule { + action = "keep" + source_labels = ["__meta_kubernetes_pod_ready"] + regex = "^(true|)$" + } + // if the container is an init container, drop it + rule { + action = "drop" + source_labels = ["__meta_kubernetes_pod_container_init"] + regex = "^(true)$" + } + + // allow resources to declare their metrics the tenant their metrics should be sent to, + // Example Annotation: + // probes.grafana.com/tenant: primary + // + // Note: This does not necessarily have to be the actual tenantId, it can be a friendly name as well that is simply used + // to determine if the metrics should be gathered for the current tenant + rule { + action = "keep" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_tenant", + ] + regex = "^(" + argument.tenant.value + ")$" + } + + /**************************************************************************************************************** + * Handle Setting Scrape Metadata i.e. path, port, interval etc. + ****************************************************************************************************************/ + // allow resources to declare the protocol to use when collecting metrics, the default value is "http", this is the scheme + // of the target address, not the scheme to use for blackbox exporter + // Example Annotation: + // probes.grafana.com/scheme: http + rule { + action = "replace" + replacement = "http" + target_label = "__scheme__" + } + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_scheme", + "__meta_kubernetes_ingress_scheme", // this would only exist for ingresses and can be used instead of setting the annotation + ] + separator = ";" + regex = "^(?:;*)?(https?).*$" + replacement = "$1" + target_label = "__scheme__" + } + + // allow resources to declare the port to use when collecting metrics, the default value is the discovered port from + // Example Annotation: + // probes.grafana.com/port: 9090 + rule { + action = "replace" + source_labels = [ + "__address__", + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_port", + "__meta_kubernetes_" + argument.role.value + "_port_number", + ] + separator = ";" + regex = "^([^:]+)(?::\\d+)?(?:;*)?([^;]+).*" + replacement = "$1:$2" + target_label = "__address__" + } + + // allow resources to declare their the path to use when collecting their metrics, the default value is "/metrics", + // Example Annotation: + // probes.grafana.com/path: /metrics/foo + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_path", + "__meta_kubernetes_ingress_path", // this would only exist for ingresses and can be used instead of setting the annotation + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "__metrics_path__" + } + + // set the target address to probe + rule { + action = "replace" + source_labels = [ + "__scheme__", + "__address__", + "__metrics_path__", + ] + separator = ";" + regex = "(.*);(.+);(.+)" + replacement = "${1}://${2}${3}" + target_label = "__param_target" + } + + // allow resources to declare their the module to use when probing, the default value is "unknown", + // Example Annotation: + // probes.grafana.com/module: http_2xx + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_module", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "__param_module" + } + + // allow resources to declare how often their metrics should be collected, the default value is 1m, + // the following duration formats are supported (s|m|ms|h|d): + // Example Annotation: + // probes.grafana.com/interval: 5m + rule { + action = "replace" + replacement = coalesce(argument.scrape_interval.value, "60s") + target_label = "__scrape_interval__" + } + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_interval", + ] + separator = ";" + regex = "^(?:;*)?(\\d+(s|m|ms|h|d)).*$" + replacement = "$1" + target_label = "__scrape_interval__" + } + + // allow resources to declare the timeout of the scrape request, the default value is 10s, + // the following duration formats are supported (s|m|ms|h|d): + // Example Annotation: + // probes.grafana.com/timeout: 30s + rule { + action = "replace" + replacement = coalesce(argument.scrape_timeout.value, "10s") + target_label = "__scrape_timeout__" + } + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_timeout", + ] + separator = ";" + regex = "^(?:;*)?(\\d+(s|m|ms|h|d)).*$" + replacement = "$1" + target_label = "__scrape_timeout__" + } + + /**************************************************************************************************************** + * Handle Setting Common Labels + ****************************************************************************************************************/ + // set the instance label to the target + rule { + action = "replace" + source_labels = ["__param_target"] + target_label = "instance" + } + + // ensure the __metrics_path is set to /probe + rule{ + action = "replace" + replacement = "/probe" + target_label = "__metrics_path__" + } + + // set the __address__ to send the scrape request to be the probing exporter service address that has been deployed + rule{ + action = "replace" + replacement = argument.blackbox_url.value + target_label = "__address__" + } + + // set the namespace label + rule { + action = "replace" + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the target name label i.e. service name, ingress name, etc. + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_name", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = argument.role.value + } + + // set a default job label to be the namespace/service_name or namespace/ingress_name + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_namespace", + argument.role.value, + ] + separator = ";" + regex = "^([^;]+)(?:;*)?([^;]+).*$" + replacement = "$1/$2" + target_label = "job" + } + + // allow resources to declare their the job label value to use when collecting their metrics, the default value is "", + // Example Annotation: + // probes.grafana.com/job: my-service/ready-probe + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_annotation_" + argument.__sd_annotation.value + "_job", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "job" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_label_app_kubernetes_io_name", + "__meta_kubernetes_" + argument.role.value + "_label_k8s_app", + "__meta_kubernetes_" + argument.role.value + "_label_app", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the app component if specified as metadata labels "component:" or "app.kubernetes.io/component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_" + argument.role.value + "_label_app_kubernetes_io_component", + "__meta_kubernetes_" + argument.role.value + "_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + } + + export "output" { + value = discovery.relabel.probes.output + } +} + + +declare "probe" { + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "keep_metrics" { + comment = "A regex of metrics to keep (default: (.+))" + optional = true + } + + argument "drop_metrics" { + comment = "A regex of metrics to drop (default: \"\")" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // only keep http targets + discovery.relabel "http_annotation_probes" { + targets = argument.targets.value + + rule { + action = "keep" + source_labels = ["__scheme__"] + regex ="http" + } + } + + // scrape http only targtets + prometheus.scrape "http_probe" { + job_name = "annotation-probe-http" + forward_to = [prometheus.relabel.probes.receiver] + targets = discovery.relabel.http_annotation_probes.output + scheme = "http" + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // only keep https targets + discovery.relabel "https_annotation_probes" { + targets = argument.targets.value + + rule { + action = "keep" + source_labels = ["__scheme__"] + regex ="https" + } + } + + // scrape https only targtets + prometheus.scrape "https_probe" { + job_name = "annotation-metrics-https" + forward_to = [prometheus.relabel.probes.receiver] + targets = discovery.relabel.https_annotation_probes.output + scheme = "https" + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + + tls_config { + ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify = false + server_name = "kubernetes" + } + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + + } + + // perform generic relabeling using keep_metrics and drop_metrics + prometheus.relabel "probes" { + forward_to = argument.forward_to.value + + // keep only metrics that match the keep_metrics regex + rule { + action = "keep" + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(.+)") + } + + // drop metrics that match the drop_metrics regex + rule { + action = "drop" + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "") + } + } +} diff --git a/modules/kubernetes/cert-manager/README.md b/modules/kubernetes/cert-manager/README.md new file mode 100644 index 0000000..32627d8 --- /dev/null +++ b/modules/kubernetes/cert-manager/README.md @@ -0,0 +1,142 @@ +# Cert-Manager Module + +Handles scraping Cert-Manager metrics. + +## Components + +- [`kubernetes`](#kubernetes) +- [`scrape`](#scrape) + +### `kubernetes` + +Handles discovery of kubernetes targets and exports them, this component does not perform any scraping at all and is not required to be used for kubernetes, as a custom service discovery and targets can be defined and passed to `cert_manager.scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :----------- | :------- | :---------------------------------------- | :-------------------------------------------------------------------------- | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `selectors` | _no_ | `["app.kubernetes.io/name=cert-manager"]` | The label selectors to use to find matching targets | +| `port_name` | _no_ | `http-metrics` | The of the port to scrape metrics from | + +#### Exports + +| Name | Type | Description | +| :------- | :------------------ | :------------------------- | +| `output` | `list(map(string))` | List of discovered targets | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +### `scrape` + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :---------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `targets` | _yes_ | `list(map(string))` | List of targets to scrape | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `job_label` | _no_ | `integrations/cert-manager` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L228) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L235) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---- | :-----------| +| `job` | Set to the value of `argument.job_label.value` | + +--- + +## Usage + +### `kubernetes` + +The following example will scrape all cert_manager instances in cluster. + +```river +import.git "cert_manager" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/kubernetes/cert-manager/metrics.river" + pull_frequency = "15m" +} + +// get the targets +cert_manager.kubernetes "targets" {} + +// scrape the targets +cert_manager.scrape "metrics" { + targets = cert_manager.kubernetes.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` + +### `local` + +The following example will scrape cert_manager for metrics on the local machine. + +```river +import.git "cert_manager" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "v2/integrations/cert_manager/module.river" + pull_frequency = "15m" +} + +// get the targets +cert_manager.local "targets" {} + +// scrape the targets +cert_manager.scrape "metrics" { + targets = cert_manager.local.targets.output + forward_to = [ + prometheus.remote_write.default.receiver, + ] +} + +// write the metrics +prometheus.remote_write "default" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/kubernetes/cert-manager/metrics.river b/modules/kubernetes/cert-manager/metrics.river new file mode 100644 index 0000000..fde0767 --- /dev/null +++ b/modules/kubernetes/cert-manager/metrics.river @@ -0,0 +1,217 @@ +/* +Module: job-cert-manager +Description: Scrapes cert-manager + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "kubernetes" { + // arguments for kubernetes discovery + argument "namespaces" { + comment = "The namespaces to look for targets in (default: [] is all namespaces)" + optional = true + } + + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"app.kubernetes.io/name=cert-manager\"])" + optional = true + } + + argument "port_name" { + comment = "The of the port to scrape metrics from (default: http-metrics)" + optional = true + } + + // cert-manager service discovery for all of the pods + discovery.kubernetes "cert_manager" { + role = "pod" + + selectors { + role = "pod" + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, ["app.kubernetes.io/name=cert-manager"]), ",") + } + + namespaces { + names = coalesce(argument.namespaces.value, []) + } + } + + // cert-manager relabelings (pre-scrape) + discovery.relabel "kubernetes" { + targets = discovery.kubernetes.cert-manager.targets + + // keep only the specified metrics port name, and pods that are Running and ready + rule { + source_labels = [ + "__meta_kubernetes_pod_container_port_name", + "__meta_kubernetes_pod_phase", + "__meta_kubernetes_pod_ready", + "__meta_kubernetes_pod_container_init", + ] + separator = "@" + regex = coalesce(argument.port_name.value, "metrics") + "@Running@true@false" + action = "keep" + } + + // set the namespace label + rule { + source_labels = ["__meta_kubernetes_namespace"] + target_label = "namespace" + } + + // set the pod label + rule { + source_labels = ["__meta_kubernetes_pod_name"] + target_label = "pod" + } + + // set the container label + rule { + source_labels = ["__meta_kubernetes_pod_container_name"] + target_label = "container" + } + + // set a workload label + rule { + source_labels = [ + "__meta_kubernetes_pod_controller_kind", + "__meta_kubernetes_pod_controller_name", + ] + separator = "/" + target_label = "workload" + } + // remove the hash from the ReplicaSet + rule { + source_labels = ["workload"] + regex = "(ReplicaSet/.+)-.+" + target_label = "workload" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_name", + "__meta_kubernetes_pod_label_k8s_app", + "__meta_kubernetes_pod_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set the component if specified as metadata labels "component:" or "app.kubernetes.io/component:" or "k8s-component:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_pod_label_app_kubernetes_io_component", + "__meta_kubernetes_pod_label_k8s_component", + "__meta_kubernetes_pod_label_component", + ] + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "component" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + export "output" { + value = discovery.relabel.kubernetes.output + } +} + +declare "scrape" { + argument "targets" { + comment = "Must be a list() of targets" + } + + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + + argument "job_label" { + comment = "The job label to add for all cert-manager metric (default: integrations/cert-manager)" + optional = true + } + + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + // cert-manager scrape job + prometheus.scrape "cert_manager" { + job_name = coalesce(argument.job_label.value, "integrations/cert-manager") + forward_to = [prometheus.relabel.cert-manager.receiver] + targets = argument.targets.value + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // cert-manager metric relabelings (post-scrape) + prometheus.relabel "cert_manager" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|(certmanager_(certificate_(expiration_timestamp_seconds|ready_status)|clock_time_seconds|controller_sync_call_count|http_acme_client_request_(count|duration_seconds_(count|sum)))|container_(cpu_(cfs_(periods|throttled_periods)_total|usage_seconds_total)|memory_usage_bytes|network_(receive|transmit)_bytes_total)|kube_pod_container_resource_(limits|requests)_(cpu_cores|memory_bytes)))") + action = "keep" + } + } +} diff --git a/modules/kubernetes/core/README.md b/modules/kubernetes/core/README.md new file mode 100644 index 0000000..cbe5df2 --- /dev/null +++ b/modules/kubernetes/core/README.md @@ -0,0 +1,284 @@ +# Agent Module + +Handles scraping Grafana Agent metrics. + +## Components + +- [`cadvisor`](#cadvisor) +- [`resources`](#resources) +- [`kubelet`](#kubelet) +- [`apiserver`](#apiserver) +- [`probes`](#probes) +- [`kube_dns`](#kube_dns) + +### `cadvisor` + +Handles scraping and collecting kubelet [cAdvisor](https://github.com/google/cadvisor/blob/master/docs/storage/prometheus.md) metrics from each worker in the cluster. + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :--------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `field_selectors` | _no_ | `[]` | The label selectors to use to find matching targets | +| `label_selectors` | _no_ | `["app.kubernetes.io/component=konnectivity"]` | The label selectors to use to find matching targets | +| `job_label` | _no_ | `integrations/agent` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L156) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L149) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Exports + +M/A + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------------ | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `job` | Set to the value of `argument.job_label.value` | +| `node` | Derived from the metadata label `__meta_kubernetes_node_name` | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `resources` + +Handles scraping and collecting kubelet [resource](https://kubernetes.io/docs/tasks/debug/debug-cluster/resource-metrics-pipeline/) metrics from each worker in the cluster. + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :--------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `field_selectors` | _no_ | `[]` | The label selectors to use to find matching targets | +| `label_selectors` | _no_ | `["app.kubernetes.io/component=konnectivity"]` | The label selectors to use to find matching targets | +| `job_label` | _no_ | `integrations/agent` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L373) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L380) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Exports + +M/A + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------------ | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `job` | Set to the value of `argument.job_label.value` | +| `node` | Derived from the metadata label `__meta_kubernetes_node_name` | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `kubelet` + +Handles scraping and collecting [kubelet](https://kubernetes.io/docs/reference/instrumentation/metrics/) metrics from each worker in the cluster. + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :--------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `field_selectors` | _no_ | `[]` | The label selectors to use to find matching targets | +| `label_selectors` | _no_ | `["app.kubernetes.io/component=konnectivity"]` | The label selectors to use to find matching targets | +| `job_label` | _no_ | `integrations/agent` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L532) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L525) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Exports + +M/A + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------------ | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `job` | Set to the value of `argument.job_label.value` | +| `node` | Derived from the metadata label `__meta_kubernetes_node_name` | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `apiserver` + +Handles scraping and collecting [kube-apiserver](https://kubernetes.io/docs/concepts/overview/components/#kube-apiserver) metrics from the default kubernetes service. + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :--------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `port_name` | _no_ | `https` | The of the port to scrape metrics from | +| `field_selectors` | _no_ | `[]` | The label selectors to use to find matching targets | +| `label_selectors` | _no_ | `["app.kubernetes.io/component=konnectivity"]` | The label selectors to use to find matching targets | +| `job_label` | _no_ | `integrations/agent` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L715) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L698) | A regular expression of metrics to drop | +| `drop_les` | _no_ | [see code](module.river#L708) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Exports + +M/A + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :------------------------------------------------------------------------------------------------ | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `job` | Set to the value of `argument.job_label.value` | +| `namespace` | The namespace the target was found in. | +| `service` | Derived from the metadata label `__meta_kubernetes_service_name` | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `probes` + +Handles scraping and collecting Kubernetes Probe metrics from each worker in the cluster. + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :--------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `port_name` | _no_ | `https` | The of the port to scrape metrics from | +| `field_selectors` | _no_ | `[]` | The label selectors to use to find matching targets | +| `label_selectors` | _no_ | `["app.kubernetes.io/component=konnectivity"]` | The label selectors to use to find matching targets | +| `job_label` | _no_ | `integrations/agent` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L867) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L860) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Exports + +M/A + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :------- | :------------------------------------------------------------------------------------------------ | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `job` | Set to the value of `argument.job_label.value` | +| `node` | Derived from the metadata label `__meta_kubernetes_node_name` | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | + +--- + +### `kube_dns` + +Handles scraping and collecting [CoreDNS/KubeDNS](https://coredns.io/plugins/metrics/) metrics from each pod. + +#### Arguments + +| Name | Required | Default | Description | +| :---------------- | :------- | :--------------------------------------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `forward_to` | _yes_ | `list(MetricsReceiver)` | Must be a where scraped should be forwarded to | +| `namespaces` | _no_ | `[]` | The namespaces to look for targets in, the default (`[]`) is all namespaces | +| `port_name` | _no_ | `https` | The of the port to scrape metrics from | +| `field_selectors` | _no_ | `[]` | The label selectors to use to find matching targets | +| `label_selectors` | _no_ | `["app.kubernetes.io/component=konnectivity"]` | The label selectors to use to find matching targets | +| `job_label` | _no_ | `integrations/agent` | The job label to add for all metrics | +| `keep_metrics` | _no_ | [see code](module.river#L867) | A regular expression of metrics to keep | +| `drop_metrics` | _no_ | [see code](module.river#L860) | A regular expression of metrics to drop | +| `scrape_interval` | _no_ | `60s` | How often to scrape metrics from the targets | +| `scrape_timeout` | _no_ | `10s` | How long before a scrape times out | +| `max_cache_size` | _no_ | `100000` | The maximum number of elements to hold in the relabeling cache. This should be at least 2x-5x your largest scrape target or samples appended rate. | +| `clustering` | _no_ | `false` | Whether or not [clustering](https://grafana.com/docs/agent/latest/flow/concepts/clustering/) should be enabled | + +#### Exports + +M/A + +#### Labels + +The following labels are automatically added to exported targets. + +| Label | Description | +| :---------- | :-------------------------------------------------------------------------------------------------------------------------------------------------- | +| `app` | Derived from the pod label value of `app.kubernetes.io/name`, `k8s-app`, or `app` | +| `component` | Derived from the pod label value of `app.kubernetes.io/component`, `k8s-component`, or `component | +| `container` | The name of the container, usually `haproxy` | +| `namespace` | The namespace the target was found in. | +| `pod` | The full name of the pod | +| `service` | The name of the service the endpoint/pod is associated with, derived from the metadata label `__meta_kubernetes_service_name` | +| `source` | Constant value of `kubernetes`, denoting where the results came from, this can be useful for LBAC | +| `workload` | Kubernetes workload, a combination of `__meta_kubernetes_pod_controller_kind` and `__meta_kubernetes_pod_controller_name`, i.e. `ReplicaSet/my-app` | + +--- + +## Usage + +The following example will scrape all agents in cluster. + +```river +import.git "k8s" { + repository = "https://github.com/grafana/flow-modules.git" + revision = "main" + path = "modules/kubernetes/core/metrics.river" + pull_frequency = "15m" +} + +k8s.cadvisor "scrape" { + forward_to = [prometheus.remote_write.local.receiver] +} +k8s.resources "scrape" { + forward_to = [prometheus.remote_write.local.receiver] +} +k8s.apiserver "scrape" { + forward_to = [prometheus.remote_write.local.receiver] +} +k8s.probes "scrape" { + forward_to = [prometheus.remote_write.local.receiver] +} +k8s.kube_dns "scrape" { + forward_to = [prometheus.remote_write.local.receiver] +} +k8s.kubelet "scrape" { + forward_to = [prometheus.remote_write.local.receiver] +} + +// write the metrics +prometheus.remote_write "local" { + endpoint { + url = "http://mimir:9009/api/v1/push" + + basic_auth { + username = "example-user" + password = "example-password" + } + } +} +``` diff --git a/modules/kubernetes/core/metrics.river b/modules/kubernetes/core/metrics.river new file mode 100644 index 0000000..016fb3d --- /dev/null +++ b/modules/kubernetes/core/metrics.river @@ -0,0 +1,1029 @@ +/* +Module: job-cadvisor +Description: Scrapes cadvisor + +Note: Every argument except for "forward_to" is optional, and does have a defined default value. However, the values for these + arguments are not defined using the default = " ... " argument syntax, but rather using the coalesce(argument.value, " ... "). + This is because if the argument passed in from another consuming module is set to null, the default = " ... " syntax will + does not override the value passed in, where coalesce() will return the first non-null value. +*/ +declare "cadvisor" { + argument "forward_to" { + comment = "Must be a list(MetricsReceiver) where collected logs should be forwarded to" + } + argument "field_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [\"metadata.name=kubernetes\"])" + optional = true + } + argument "label_selectors" { + // Docs: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ + comment = "The label selectors to use to find matching targets (default: [])" + optional = true + } + argument "job_label" { + comment = "The job label to add for all cadvisor metric (default: integrations/kubernetes/cadvisor)" + optional = true + } + argument "keep_metrics" { + comment = "A regular expression of metrics to keep (default: see below)" + optional = true + } + argument "drop_metrics" { + comment = "A regular expression of metrics to drop (default: see below)" + optional = true + } + argument "scrape_interval" { + comment = "How often to scrape metrics from the targets (default: 60s)" + optional = true + } + argument "scrape_timeout" { + comment = "How long before a scrape times out (default: 10s)" + optional = true + } + argument "max_cache_size" { + comment = "The maximum number of elements to hold in the relabeling cache (default: 100000). This should be at least 2x-5x your largest scrape target or samples appended rate." + optional = true + } + argument "clustering" { + // Docs: https://grafana.com/docs/agent/latest/flow/concepts/clustering/ + comment = "Whether or not clustering should be enabled (default: false)" + optional = true + } + + export "output" { + value = discovery.relabel.cadvisor.output + } + + // cadvisor service discovery for all of the nodes + discovery.kubernetes "cadvisor" { + role = "node" + + selectors { + role = "node" + field = join(coalesce(argument.field_selectors.value, []), ",") + label = join(coalesce(argument.label_selectors.value, []), ",") + } + } + + // cadvisor relabelings (pre-scrape) + discovery.relabel "cadvisor" { + targets = discovery.kubernetes.cadvisor.targets + + // set the address to use the kubernetes service dns name + rule { + target_label = "__address__" + replacement = "kubernetes.default.svc.cluster.local:443" + } + + // set the metrics path to use the proxy path to the nodes cadvisor metrics endpoint + rule { + source_labels = ["__meta_kubernetes_node_name"] + regex = "(.+)" + replacement = "/api/v1/nodes/${1}/proxy/metrics/cadvisor" + target_label = "__metrics_path__" + } + + // set the node label + rule { + source_labels = ["__meta_kubernetes_node_name"] + target_label = "node" + } + + // set the app name if specified as metadata labels "app:" or "app.kubernetes.io/name:" or "k8s-app:" + rule { + action = "replace" + source_labels = [ + "__meta_kubernetes_node_label_app_kubernetes_io_name", + "__meta_kubernetes_node_label_k8s_app", + "__meta_kubernetes_node_label_app", + ] + separator = ";" + regex = "^(?:;*)?([^;]+).*$" + replacement = "$1" + target_label = "app" + } + + // set a source label + rule { + action = "replace" + replacement = "kubernetes" + target_label = "source" + } + } + + // cadvisor scrape job + prometheus.scrape "cadvisor" { + job_name = coalesce(argument.job_label.value, "integrations/kubernetes/cadvisor") + forward_to = [prometheus.relabel.cadvisor.receiver] + targets = discovery.relabel.cadvisor.output + scheme = "https" + scrape_interval = coalesce(argument.scrape_interval.value, "60s") + scrape_timeout = coalesce(argument.scrape_timeout.value, "10s") + bearer_token_file = "/var/run/secrets/kubernetes.io/serviceaccount/token" + + tls_config { + ca_file = "/var/run/secrets/kubernetes.io/serviceaccount/ca.crt" + insecure_skip_verify = false + server_name = "kubernetes" + } + + clustering { + enabled = coalesce(argument.clustering.value, false) + } + } + + // cadvisor metric relabelings (post-scrape) + prometheus.relabel "cadvisor" { + forward_to = argument.forward_to.value + max_cache_size = coalesce(argument.max_cache_size.value, 100000) + + // drop metrics that match the drop_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.drop_metrics.value, "(^(go|process)_.+$)") + action = "drop" + } + + // keep only metrics that match the keep_metrics regex + rule { + source_labels = ["__name__"] + regex = coalesce(argument.keep_metrics.value, "(up|container_(cpu_(cfs_(periods|throttled_periods)_total|usage_seconds_total)|fs_(reads|writes)(_bytes)?_total|memory_(cache|rss|swap|working_set_bytes)|network_(receive|transmit)_(bytes|packets(_dropped)?_total))|machine_memory_bytes)") + action = "keep" + } + + // Drop empty container labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","container"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*)@" + action = "drop" + } + + // Drop empty image labels, addressing https://github.com/google/cadvisor/issues/2688 + rule { + source_labels = ["__name__","image"] + separator = "@" + regex = "(container_cpu_.*|container_fs_.*|container_memory_.*|container_network_.*)@" + action = "drop" + } + + // Normalizing unimportant labels (not deleting to continue satisfying