diff --git a/api/src/index.ts b/api/src/index.ts index 7c5c1f829..c3b0537ba 100644 --- a/api/src/index.ts +++ b/api/src/index.ts @@ -14,6 +14,7 @@ import { cors } from "hono/cors"; import { serve } from "@hono/node-server"; import { legacyRouter } from "./routers/legacyRouter"; import { sentry } from "@hono/sentry"; +import { internalRouter } from "./routers/internalRouter"; const appHono = new Hono(); appHono.use( @@ -70,6 +71,7 @@ appHono.route("/", apiRouter); appHono.route("/user", userRouter); appHono.route("/web3-index", web3IndexRouter); appHono.route("/dashboard", dashboardRouter); +appHono.route("/internal", internalRouter); appHono.get("/status", (c) => { const version = packageJson.version; diff --git a/api/src/routers/internalRouter.ts b/api/src/routers/internalRouter.ts new file mode 100644 index 000000000..3e850ebdd --- /dev/null +++ b/api/src/routers/internalRouter.ts @@ -0,0 +1,156 @@ +import { Provider } from "@shared/dbSchemas/akash"; +import { chainDb } from "@src/db/dbConnection"; +import { isValidBech32Address } from "@src/utils/addresses"; +import { round } from "@src/utils/math"; +import { Hono } from "hono"; +import * as semver from "semver"; +import { QueryTypes } from "sequelize"; + +export const internalRouter = new Hono(); + +internalRouter.get("/provider-versions", async (c) => { + const providers = await Provider.findAll({ + attributes: ["hostUri", "akashVersion"], + where: { + isOnline: true + }, + group: ["hostUri", "akashVersion"] + }); + + let grouped: { version: string; providers: string[] }[] = []; + + for (const provider of providers) { + const existing = grouped.find((x) => x.version === provider.akashVersion); + + if (existing) { + existing.providers.push(provider.hostUri); + } else { + grouped.push({ + version: provider.akashVersion, + providers: [provider.hostUri] + }); + } + } + + const nullVersionName = ""; + const results = grouped.map((x) => ({ + version: x.version ?? nullVersionName, + count: x.providers.length, + ratio: round(x.providers.length / providers.length, 2), + providers: Array.from(new Set(x.providers)) + })); + + const sorted = results + .filter((x) => x.version !== nullVersionName) // Remove version for sorting + .sort((a, b) => semver.compare(b.version, a.version)) + .concat(results.filter((x) => x.version === nullVersionName)) // Add back version at the end + .reduce((acc, x) => { + acc[x.version] = x; + return acc; + }, {} as any); + + return c.json(sorted); +}); + +internalRouter.get("/gpu", async (c) => { + const provider = c.req.query("provider"); + const vendor = c.req.query("vendor"); + const model = c.req.query("model"); + const memory_size = c.req.query("memory_size"); + + let provider_address = null; + let provider_hosturi = null; + + if (provider) { + if (isValidBech32Address(provider)) { + provider_address = provider; + } else if (URL.canParse(provider)) { + provider_hosturi = provider; + } else { + return c.json({ error: "Invalid provider parameter, should be a valid akash address or host uri" }, 400); + } + } + + const gpuNodes = (await chainDb.query( + ` + WITH snapshots AS ( + SELECT DISTINCT ON("hostUri") + ps.id AS id, + "hostUri", + p."owner" + FROM provider p + INNER JOIN "providerSnapshot" ps ON ps.id=p."lastSnapshotId" + WHERE p."isOnline" IS TRUE + ) + SELECT s."hostUri", n."name", n."gpuAllocatable" AS allocatable, n."gpuAllocated" AS allocated, gpu."modelId", gpu.vendor, gpu.name AS "modelName", gpu.interface, gpu."memorySize" + FROM snapshots s + INNER JOIN "providerSnapshotNode" n ON n."snapshotId"=s.id AND n."gpuAllocatable" > 0 + LEFT JOIN ( + SELECT DISTINCT ON (gpu."snapshotNodeId") gpu.* + FROM "providerSnapshotNodeGPU" gpu + ) gpu ON gpu."snapshotNodeId" = n.id + WHERE + (:vendor IS NULL OR gpu.vendor = :vendor) + AND (:model IS NULL OR gpu.name = :model) + AND (:memory_size IS NULL OR gpu."memorySize" = :memory_size) + AND (:provider_address IS NULL OR s."owner" = :provider_address) + AND (:provider_hosturi IS NULL OR s."hostUri" = :provider_hosturi) +`, + { + type: QueryTypes.SELECT, + replacements: { + vendor: vendor ?? null, + model: model ?? null, + memory_size: memory_size ?? null, + provider_address: provider_address ?? null, + provider_hosturi: provider_hosturi ?? null + } + } + )) as { + hostUri: string; + name: string; + allocatable: number; + allocated: number; + modelId: string; + vendor: string; + modelName: string; + interface: string; + memorySize: string; + }[]; + + const response = { + gpus: { + total: { + allocatable: gpuNodes.map((x) => x.allocatable).reduce((acc, x) => acc + x, 0), + allocated: gpuNodes.map((x) => x.allocated).reduce((acc, x) => acc + x, 0) + }, + details: {} as { [key: string]: { model: string; ram: string; interface: string; allocatable: number; allocated: number }[] } + } + }; + + for (const gpuNode of gpuNodes) { + const vendorName = gpuNode.vendor ?? ""; + if (!(vendorName in response.gpus.details)) { + response.gpus.details[vendorName] = []; + } + + const existing = response.gpus.details[vendorName].find( + (x) => x.model === gpuNode.modelName && x.interface === gpuNode.interface && x.ram === gpuNode.memorySize + ); + + if (existing) { + existing.allocatable += gpuNode.allocatable; + existing.allocated += gpuNode.allocated; + } else { + response.gpus.details[vendorName].push({ + model: gpuNode.modelName, + ram: gpuNode.memorySize, + interface: gpuNode.interface, + allocatable: gpuNode.allocatable, + allocated: gpuNode.allocated + }); + } + } + + return c.json(response); +}); diff --git a/doc/Internal_Endpoints.md b/doc/Internal_Endpoints.md new file mode 100644 index 000000000..b9d443a57 --- /dev/null +++ b/doc/Internal_Endpoints.md @@ -0,0 +1,100 @@ +# Internal endpoints that are not part of the public api + +Those endpoints are used for debugging and analytics purposes. + +- [GPU Stats](#gpu-stats) - Distribution of gpu vendor/model +- [Provider Versions](#provider-versions) - See what akash version providers are running + +## GPU Stats + +Url: https://api.cloudmos.io/internal/gpu + +Returns a summary of the gpus on the network. + +### Example Response + +``` +{ + "gpus": { + "total": { + "allocatable": 2, + "allocated": 0 + }, + "details": { + "nvidia": [ + { + "model": "t4", + "ram": "16Gi", + "interface": "PCIe", + "allocatable": 1, + "allocated": 0 + }, + { + "model": "rtx3060ti", + "ram": "8Gi", + "interface": "PCIe", + "allocatable": 1, + "allocated": 0 + } + ] + } + } +} +``` + +### Query parameters for filtering + +--- + +| Param | Description | +| ----------- | ------------------------------------------------------------------------------------------------------------------------------------------ | +| provider | Either a provider address (ex: `akash175llqyjvxfle9qwt740vm46772dzaznpzgm576`) or a host uri (ex: `https://provider.akashprovid.com:8443`) | +| vendor | Ex: `nvidia` | +| model | Ex: `t4` | +| memory_size | Ex: `16Gi` | + +All query parameters can be combined, ex: +`https://api.cloudmos.io/internal/gpu?provider=akash175llqyjvxfle9qwt740vm46772dzaznpzgm576&vendor=nvidia&model=rtx3060ti&memory_size=8Gi` + +## Provider Versions + +Url: https://api.cloudmos.io/internal/provider-versions + +Returns a list of versions and the providers that are currently on that version. The `` version correspond to providers where the version could not be determined. The `/version` endpoint was broken for a long time, but is now fixed in [v0.5.0-rc11](https://github.com/akash-network/provider/releases/tag/v0.5.0-rc11) + +### Example Response + +``` +{ + "0.5.0-rc16": { + "version": "0.5.0-rc16", + "count": 4, + "ratio": 0.05, + "providers": [ + "https://provider.moonbys.cloud:8443", + "https://provider.akashprovid.com:8443", + "https://provider.akashtesting.xyz:8443" + ] + }, + "0.5.0-rc15": { + "version": "0.5.0-rc16", + "count": 4, + "ratio": 0.05, + "providers": [ + "https://provider.akash.pro:8443" + ] + }, + "": { + "version": "", + "count": 80, + "ratio": 0.95, + "providers": [ + "https://provider.macptrading.com:8443", + "https://provider.digitaler-friedhof.com:8443", + "https://provider.qioi.io:8443", + "https://provider.bluepeer.io:8443", + ... + ] + } +} +```