rsc: Add more dashboards and improve others (#1619)

sifive · Aug 2, 2024 · 0cd9ac8 · 0cd9ac8
1 parent af316b3
commit 0cd9ac8
Show file tree

Hide file tree

Showing 4 changed files with 126 additions and 11 deletions.
diff --git a/rust/rsc/.config.json b/rust/rsc/.config.json
@@ -3,7 +3,7 @@
   "server_address": "0.0.0.0:3002",
   "connection_pool_timeout": 60,
   "standalone": false,
-  "active_store": "e9c2dac1-3882-442f-b8a4-1fc04582a003",
+  "active_store": "1f4a1ee7-d20f-4031-afe8-9dd002b4b0cf",
   "log_directory": null,
   "blob_eviction": {
     "tick_rate": 60,

diff --git a/rust/rsc/src/bin/rsc/dashboard.rs b/rust/rsc/src/bin/rsc/dashboard.rs
@@ -1,6 +1,6 @@
 use crate::types::{
-    DashboardStatsLostOpportunityJob, DashboardStatsMostReusedJob, DashboardStatsOldestJob,
-    DashboardStatsResponse, DashboardStatsSizeRuntimeValueJob,
+    DashboardStatsBlobUseByStore, DashboardStatsLostOpportunityJob, DashboardStatsMostReusedJob,
+    DashboardStatsOldestJob, DashboardStatsResponse, DashboardStatsSizeRuntimeValueJob,
 };
 use axum::Json;
 use rsc::database;
@@ -16,9 +16,11 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
         savings: 0,
         oldest_jobs: Vec::new(),
         most_reused_jobs: Vec::new(),
+        most_time_saved_jobs: Vec::new(),
         lost_opportunity_jobs: Vec::new(),
         most_space_efficient_jobs: Vec::new(),
         most_space_use_jobs: Vec::new(),
+        blob_use_by_store: Vec::new(),
     };
 
     let job_count = match database::count_jobs(db.as_ref()).await {
@@ -98,6 +100,24 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
         }
     };
 
+    let most_time_saved_jobs = match database::most_time_saved_jobs(db.as_ref()).await {
+        Ok(items) => {
+            let mut out = Vec::new();
+            for item in items {
+                out.push(DashboardStatsMostReusedJob {
+                    label: item.label,
+                    reuses: item.reuses,
+                    savings: item.savings,
+                });
+            }
+            out
+        }
+        Err(err) => {
+            tracing::error! {%err, "Failed to lookup most time saved jobs"};
+            return Json(empty);
+        }
+    };
+
     let lost_opportunity_jobs = match database::lost_opportuinty_jobs(db.as_ref()).await {
         Ok(items) => {
             let mut out = Vec::new();
@@ -107,6 +127,7 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
                     reuses: item.reuses,
                     misses: item.misses,
                     real_savings: item.real_savings,
+                    lost_savings: item.lost_savings,
                     potential_savings: item.potential_savings,
                 });
             }
@@ -126,7 +147,7 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
                     label: item.label,
                     runtime: item.runtime,
                     disk_usage: item.disk_usage,
-                    ms_saved_per_byte: item.ms_saved_per_byte,
+                    ns_saved_per_byte: item.ns_saved_per_byte,
                 });
             }
             out
@@ -145,7 +166,7 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
                     label: item.label,
                     runtime: item.runtime,
                     disk_usage: item.disk_usage,
-                    ms_saved_per_byte: item.ms_saved_per_byte,
+                    ns_saved_per_byte: item.ns_saved_per_byte,
                 });
             }
             out
@@ -156,15 +177,36 @@ pub async fn stats(db: Arc<DatabaseConnection>) -> Json<DashboardStatsResponse>
         }
     };
 
+    let blob_use_by_store = match database::blob_use_by_store(db.as_ref()).await {
+        Ok(items) => {
+            let mut out = Vec::new();
+            for item in items {
+                out.push(DashboardStatsBlobUseByStore {
+                    store_id: item.store_id.to_string(),
+                    store_type: item.store_type,
+                    refs: item.refs,
+                    blob_count: item.blob_count,
+                });
+            }
+            out
+        }
+        Err(err) => {
+            tracing::error! {%err, "Failed to lookup blob use by store"};
+            return Json(empty);
+        }
+    };
+
     Json(DashboardStatsResponse {
         job_count,
         blob_count,
         size,
         savings,
         oldest_jobs,
         most_reused_jobs,
+        most_time_saved_jobs,
         lost_opportunity_jobs,
         most_space_efficient_jobs,
         most_space_use_jobs,
+        blob_use_by_store,
     })
 }
diff --git a/rust/rsc/src/bin/rsc/types.rs b/rust/rsc/src/bin/rsc/types.rs
@@ -193,6 +193,7 @@ pub struct DashboardStatsLostOpportunityJob {
     pub reuses: i32,
     pub misses: i32,
     pub real_savings: i64,
+    pub lost_savings: i64,
     pub potential_savings: i64,
 }
 
@@ -201,7 +202,15 @@ pub struct DashboardStatsSizeRuntimeValueJob {
     pub label: String,
     pub runtime: i64,
     pub disk_usage: i64,
-    pub ms_saved_per_byte: i64,
+    pub ns_saved_per_byte: i64,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct DashboardStatsBlobUseByStore {
+    pub store_id: String,
+    pub store_type: String,
+    pub refs: i64,
+    pub blob_count: i64,
 }
 
 #[derive(Debug, Serialize, Deserialize)]
@@ -212,7 +221,9 @@ pub struct DashboardStatsResponse {
     pub savings: i64,
     pub oldest_jobs: Vec<DashboardStatsOldestJob>,
     pub most_reused_jobs: Vec<DashboardStatsMostReusedJob>,
+    pub most_time_saved_jobs: Vec<DashboardStatsMostReusedJob>,
     pub lost_opportunity_jobs: Vec<DashboardStatsLostOpportunityJob>,
     pub most_space_efficient_jobs: Vec<DashboardStatsSizeRuntimeValueJob>,
     pub most_space_use_jobs: Vec<DashboardStatsSizeRuntimeValueJob>,
+    pub blob_use_by_store: Vec<DashboardStatsBlobUseByStore>,
 }
diff --git a/rust/rsc/src/database.rs b/rust/rsc/src/database.rs
@@ -94,6 +94,50 @@ pub async fn read_test_blob_stores<T: ConnectionTrait>(
         .await
 }
 
+#[derive(Debug, FromQueryResult)]
+pub struct BlobUseByStore {
+    pub store_id: Uuid,
+    pub store_type: String,
+    pub refs: i64,
+    pub blob_count: i64,
+}
+
+pub async fn blob_use_by_store<T: ConnectionTrait>(db: &T) -> Result<Vec<BlobUseByStore>, DbErr> {
+    BlobUseByStore::find_by_statement(Statement::from_string(
+        DbBackend::Postgres,
+        r#"
+        WITH
+        reference_count AS (
+            SELECT b.store_id, count(b.store_id) refs
+            FROM blob b
+            INNER JOIN (
+                SELECT blob_id id FROM output_file
+                UNION ALL SELECT stdout_blob_id FROM job
+                UNION ALL SELECT stderr_blob_id FROM job
+            ) rbi
+            on b.id = rbi.id
+            GROUP BY b.store_id
+        ),
+        blob_count AS (
+            SELECT bs.id, bs.type, bbs.count as blob_count
+            FROM blob_store bs
+            INNER JOIN (
+                SELECT store_id, count(store_id)
+                FROM blob
+                GROUP BY store_id
+            ) bbs
+            ON bbs.store_id = bs.id
+        )
+        SELECT b.id store_id, b.type store_type, r.refs, b.blob_count
+        FROM reference_count r
+        INNER JOIN blob_count b
+        ON r.store_id = b.id
+        "#,
+    ))
+    .all(db)
+    .await
+}
+
 // ----------            Update            ----------
 
 // ----------            Delete            ----------
@@ -296,12 +340,29 @@ pub async fn most_reused_jobs<T: ConnectionTrait>(db: &T) -> Result<Vec<MostReus
     .await
 }
 
+pub async fn most_time_saved_jobs<T: ConnectionTrait>(db: &T) -> Result<Vec<MostReusedJob>, DbErr> {
+    MostReusedJob::find_by_statement(Statement::from_string(
+        DbBackend::Postgres,
+        r#"
+        SELECT j.label, h.hits as reuses, CAST(round(h.hits * j.runtime) as BIGINT) as savings
+        FROM job_history h
+        INNER JOIN job j
+        ON j.hash = h.hash
+        ORDER BY savings DESC
+        LIMIT 30
+        "#,
+    ))
+    .all(db)
+    .await
+}
+
 #[derive(Debug, FromQueryResult)]
 pub struct LostOpportunityJobs {
     pub label: String,
     pub reuses: i32,
     pub misses: i32,
     pub real_savings: i64,
+    pub lost_savings: i64,
     pub potential_savings: i64,
 }
 
@@ -316,11 +377,12 @@ pub async fn lost_opportuinty_jobs<T: ConnectionTrait>(
             h.hits as reuses,
             h.misses - 1 as misses,
             CAST(round(h.hits * j.runtime) as BIGINT) as real_savings,
+            CAST(round((h.misses - 1) * j.runtime) as BIGINT) as lost_savings,
             CAST(round((h.hits + h.misses - 1) * j.runtime) as BIGINT) as potential_savings
         FROM job_history h
         INNER JOIN job j
         ON j.hash = h.hash
-        ORDER BY potential_savings DESC
+        ORDER BY lost_savings DESC
         LIMIT 30;
         "#,
     ))
@@ -333,7 +395,7 @@ pub struct SizeRuntimeValueJob {
     pub label: String,
     pub runtime: i64,
     pub disk_usage: i64,
-    pub ms_saved_per_byte: i64,
+    pub ns_saved_per_byte: i64,
 }
 
 pub async fn most_space_efficient_jobs<T: ConnectionTrait>(
@@ -346,10 +408,10 @@ pub async fn most_space_efficient_jobs<T: ConnectionTrait>(
             j.label,
             CAST(round(j.runtime) as BIGINT) as runtime,
             j.size as disk_usage,
-            CAST(round(j.runtime / (j.size) * 1000) as BIGINT) as ms_saved_per_byte
+            CAST(round(j.runtime / (j.size) * 1000000000) as BIGINT) as ns_saved_per_byte
         FROM job j
         WHERE size IS NOT NULL
-        ORDER BY ms_saved_per_byte DESC
+        ORDER BY ns_saved_per_byte DESC
         LIMIT 30;
         "#,
     ))
@@ -367,7 +429,7 @@ pub async fn most_space_use_jobs<T: ConnectionTrait>(
             j.label,
             CAST(round(j.runtime) as BIGINT) as runtime,
             j.size as disk_usage,
-            CAST(round(j.runtime / (j.size) * 1000) as BIGINT) as ms_saved_per_byte
+            CAST(round(j.runtime / (j.size) * 1000000000) as BIGINT) as ns_saved_per_byte
         FROM job j
         WHERE size IS NOT NULL
         ORDER BY disk_usage DESC