From 4535f2c02744307273a27b577c13beb7ac1ae8ea Mon Sep 17 00:00:00 2001 From: Devanathan Sabapathy Date: Wed, 1 May 2024 16:04:44 +0000 Subject: [PATCH 1/4] modifying sql by removing pg tables in the query --- core/sql/aborted_queries.sql | 32 ++-- core/sql/cluster_level_metrics.sql | 132 ++++++++--------- core/sql/latency_distribution.sql | 105 +++++++------- core/sql/query_distribution.sql | 209 +++++++++++++-------------- core/sql/query_metrics.sql | 32 ++-- core/sql/statement_types.sql | 85 ++++++----- core/sql/sys_external_query_data.sql | 11 +- core/sql/sys_load_history.sql | 36 ++--- core/sql/sys_query_history.sql | 11 +- 9 files changed, 325 insertions(+), 328 deletions(-) diff --git a/core/sql/aborted_queries.sql b/core/sql/aborted_queries.sql index f18a80bd6..c45787cf1 100644 --- a/core/sql/aborted_queries.sql +++ b/core/sql/aborted_queries.sql @@ -1,20 +1,18 @@ -SELECT -q.user_id as "userid", -TRIM(u.usename) AS usename -,case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" -,date_trunc('hour', q.start_time) as "period" -,q.transaction_id as "xid" -,q.query_id as "query" -,q.query_text::char(50) as "querytxt" -,q.queue_time / 1000000.00 as "queue_s" -,q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric -,case when q.status = 'failed' then 1 else 0 end "aborted" -,q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric +/*AbortedQueries*/ +SELECT q.user_id as "userid" + , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" + , date_trunc('hour', q.start_time) as "period" + , q.transaction_id as "xid" + , q.query_id as "query" + , q.query_text::char(50) as "querytxt" + , q.queue_time / 1000000.00 as "queue_s" + , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric + , case when q.status = 'failed' then 1 else 0 end "aborted" + , q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric FROM sys_query_history q - LEFT JOIN pg_user u ON u.usesysid = q.user_id WHERE q.user_id > 1 - AND q.start_time >={{START_TIME}} - AND q.start_time <={{END_TIME}} - AND q.query_text LIKE '%replay_start%' - AND q.status = 'failed' + AND q.start_time >={{START_TIME}} + AND q.start_time <={{END_TIME}} + AND q.query_text LIKE '%replay_start%' + AND q.status = 'failed' ORDER BY total_elapsed_s DESC; diff --git a/core/sql/cluster_level_metrics.sql b/core/sql/cluster_level_metrics.sql index 007725f5f..f3703472a 100644 --- a/core/sql/cluster_level_metrics.sql +++ b/core/sql/cluster_level_metrics.sql @@ -1,70 +1,70 @@ +/*ClusterLevelMetrics*/ WITH queries AS -( - select - q.user_id as "userid" - ,date_trunc('hour', q.start_time) as "period" - ,q.transaction_id as "xid" - ,q.query_id as "query" - ,q.query_text::char(50) as "querytxt" - ,q.queue_time / 1000000.00 as "queue_s" - ,q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric - ,case when q.status = 'failed' then 1 else 0 end "aborted" - ,q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric - FROM sys_query_history q - WHERE q.user_id > 1 - AND q.start_time >= {{START_TIME}} - AND q.start_time <= {{END_TIME}} - AND q.query_text LIKE '%replay_start%' - AND q.status != 'failed' -), -elapsed_time AS -( - SELECT 'Query Latency' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT (0.25) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p25_s, - ROUND(PERCENTILE_CONT (0.50) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p50_s, - ROUND(PERCENTILE_CONT (0.75) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p75_s, - ROUND(PERCENTILE_CONT (0.90) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p90_s, - ROUND(PERCENTILE_CONT (0.95) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p95_s, - ROUND(PERCENTILE_CONT (0.99) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p99_s, - MAX(total_elapsed_s) AS max_s, - AVG(total_elapsed_s) AS avg_s, - stddev(total_elapsed_s) AS std_s - FROM queries - GROUP BY 1 -), -exec_time AS -( - SELECT 'Execution Time' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT (0.25) WITHIN GROUP(ORDER BY exec_time_s),2) AS p25_s, - ROUND(PERCENTILE_CONT (0.50) WITHIN GROUP(ORDER BY exec_time_s),2) AS p50_s, - ROUND(PERCENTILE_CONT (0.75) WITHIN GROUP(ORDER BY exec_time_s),2) AS p75_s, - ROUND(PERCENTILE_CONT (0.90) WITHIN GROUP(ORDER BY exec_time_s),2) AS p90_s, - ROUND(PERCENTILE_CONT (0.95) WITHIN GROUP(ORDER BY exec_time_s),2) AS p95_s, - ROUND(PERCENTILE_CONT (0.99) WITHIN GROUP(ORDER BY exec_time_s),2) AS p99_s, - MAX(exec_time_s) AS max_s, - AVG(exec_time_s) AS avg_s, - stddev(exec_time_s) AS std_s - FROM queries - GROUP BY 1 -), -queue_time AS -( - SELECT 'Queue Time' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT (0.25) WITHIN GROUP(ORDER BY queue_s),2) AS p25_s, - ROUND(PERCENTILE_CONT (0.50) WITHIN GROUP(ORDER BY queue_s),2) AS p50_s, - ROUND(PERCENTILE_CONT (0.75) WITHIN GROUP(ORDER BY queue_s),2) AS p75_s, - ROUND(PERCENTILE_CONT (0.90) WITHIN GROUP(ORDER BY queue_s),2) AS p90_s, - ROUND(PERCENTILE_CONT (0.95) WITHIN GROUP(ORDER BY queue_s),2) AS p95_s, - ROUND(PERCENTILE_CONT (0.99) WITHIN GROUP(ORDER BY queue_s),2) AS p99_s, - MAX(queue_s) AS max_s, - AVG(queue_s) AS avg_s, - stddev(queue_s) AS std_s - FROM queries - GROUP BY 1 -) + ( + select q.user_id as "userid" + , date_trunc('hour', q.start_time) as "period" + , q.transaction_id as "xid" + , q.query_id as "query" + , q.query_text::char(50) as "querytxt" + , q.queue_time / 1000000.00 as "queue_s" + , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric + , case when q.status = 'failed' then 1 else 0 end "aborted" + , q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric + FROM sys_query_history q + WHERE q.user_id > 1 + AND q.start_time >= {{START_TIME}} + AND q.start_time <= {{END_TIME}} + AND q.query_text LIKE '%replay_start%' + AND q.status != 'failed' + ), + elapsed_time AS + ( + SELECT 'Query Latency' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p99_s, + MAX(total_elapsed_s) AS max_s, + AVG(total_elapsed_s) AS avg_s, + stddev(total_elapsed_s) AS std_s + FROM queries + GROUP BY 1 + ), + exec_time AS + ( + SELECT 'Execution Time' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p99_s, + MAX(exec_time_s) AS max_s, + AVG(exec_time_s) AS avg_s, + stddev(exec_time_s) AS std_s + FROM queries + GROUP BY 1 + ), + queue_time AS + ( + SELECT 'Queue Time' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY queue_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY queue_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY queue_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY queue_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY queue_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY queue_s), 2) AS p99_s, + MAX(queue_s) AS max_s, + AVG(queue_s) AS avg_s, + stddev(queue_s) AS std_s + FROM queries + GROUP BY 1 + ) SELECT measure_type, query_count, p25_s, diff --git a/core/sql/latency_distribution.sql b/core/sql/latency_distribution.sql index 11de11d1d..d2a1e219d 100644 --- a/core/sql/latency_distribution.sql +++ b/core/sql/latency_distribution.sql @@ -1,61 +1,62 @@ +/*LatencyDistribution*/ WITH queries AS -( - SELECT q.query_id - ,q.elapsed_time / 1000000.00 as total_elapsed_s - FROM sys_query_history q - WHERE q.user_id > 1 - AND q.start_time >= {{START_TIME}} - AND q.start_time <= {{END_TIME}} - AND q.query_text LIKE '%replay_start%' - AND status != 'failed' -) -, -pct AS -( - SELECT ROUND(PERCENTILE_CONT (0.98) WITHIN GROUP(ORDER BY q1.total_elapsed_s),2) AS p98_s, - COUNT(*) AS query_count, - MAX(q1.total_elapsed_s) max_s, - MIN(q1.total_elapsed_s) min_s, - MIN(CASE WHEN q1.total_elapsed_s = 0.00 THEN NULL ELSE q1.total_elapsed_s END) min_2s - FROM queries q1 -), -bucket_count AS -( - SELECT CASE - WHEN query_count > 100 THEN 40 - ELSE 5 - END AS b_count - FROM pct -), -buckets AS -( - SELECT (min_2s +((n)*(p98_s / b_count))) AS sec_end, - n, - (min_2s +((n -1)*(p98_s / b_count))) AS sec_start - FROM (SELECT ROW_NUMBER() OVER () n FROM pg_class LIMIT 39), - bucket_count, - pct - WHERE sec_end <= p98_s - UNION ALL - SELECT min_2s AS sec_end, - 0 AS n, - 0.00 AS sec_start - FROM pct - UNION ALL - SELECT (max_s +0.01) AS sec_end, - b_count AS n, - p98_s AS sec_start - FROM pct, - bucket_count -) + ( + SELECT q.query_id + , q.elapsed_time / 1000000.00 as total_elapsed_s + FROM sys_query_history q + WHERE q.user_id > 1 + AND q.start_time >= {{START_TIME}} + AND q.start_time <= {{END_TIME}} + AND q.query_text LIKE '%replay_start%' + AND status != 'failed' + ) + , + pct AS + ( + SELECT ROUND(PERCENTILE_CONT(0.98) WITHIN GROUP (ORDER BY q1.total_elapsed_s), 2) AS p98_s, + COUNT(*) AS query_count, + MAX(q1.total_elapsed_s) max_s, + MIN(q1.total_elapsed_s) min_s, + MIN(CASE WHEN q1.total_elapsed_s = 0.00 THEN NULL ELSE q1.total_elapsed_s END) min_2s + FROM queries q1 + ), + bucket_count AS + ( + SELECT CASE + WHEN query_count > 100 THEN 40 + ELSE 5 + END AS b_count + FROM pct + ), + buckets AS + ( + SELECT (min_2s + ((n) * (p98_s / b_count))) AS sec_end, + n, + (min_2s + ((n - 1) * (p98_s / b_count))) AS sec_start + FROM (SELECT ROW_NUMBER() OVER () n FROM pg_class LIMIT 39), + bucket_count, + pct + WHERE sec_end <= p98_s + UNION ALL + SELECT min_2s AS sec_end, + 0 AS n, + 0.00 AS sec_start + FROM pct + UNION ALL + SELECT (max_s + 0.01) AS sec_end, + b_count AS n, + p98_s AS sec_start + FROM pct, + bucket_count + ) SELECT sec_end, n, sec_start, COUNT(query_id) FROM buckets - LEFT JOIN queries - ON total_elapsed_s >= sec_start - AND total_elapsed_s < sec_end + LEFT JOIN queries + ON total_elapsed_s >= sec_start + AND total_elapsed_s < sec_end GROUP BY 1, 2, 3 diff --git a/core/sql/query_distribution.sql b/core/sql/query_distribution.sql index 073f9909a..c587681e0 100644 --- a/core/sql/query_distribution.sql +++ b/core/sql/query_distribution.sql @@ -1,109 +1,108 @@ +/*QueryDistribution*/ WITH queries AS -( - select - q.user_id as "userid", - TRIM(u.usename) AS usename - ,case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" - ,date_trunc('hour', q.start_time) as "period" - ,q.transaction_id as "xid" - ,q.query_id as "query" - ,q.query_text::char(50) as "querytxt" - ,q.queue_time / 1000000.00 as "queue_s" - ,q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric - ,case when q.status = 'failed' then 1 else 0 end "aborted" - ,q.elapsed_time / 1000000.00 as "total_elapsed_s" --again includes compile time - ,user_query_count - ,DENSE_RANK() OVER (ORDER BY user_query_count) AS rnk - FROM sys_query_history q - LEFT JOIN pg_user u ON u.usesysid = q.user_id - inner join (select user_id, count(*) user_query_count - from sys_query_history - where user_id>1 - AND start_time >={{START_TIME}} - AND start_time <={{END_TIME}} - AND query_text LIKE '%replay_start%' - AND status != 'failed' group by user_id) uc on uc.user_id = q.user_id - WHERE q.user_id > 1 - AND q.start_time >={{START_TIME}} - AND q.start_time <={{END_TIME}} - AND q.query_text LIKE '%replay_start%' - AND q.status != 'failed' -), -elapsed_time AS -( - SELECT CASE - WHEN rnk <= 100 THEN usename - ELSE 'Other Users' - END AS usename, - queue, - aborted, - 'Query Latency' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT (0.25) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p25_s, - ROUND(PERCENTILE_CONT (0.50) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p50_s, - ROUND(PERCENTILE_CONT (0.75) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p75_s, - ROUND(PERCENTILE_CONT (0.90) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p90_s, - ROUND(PERCENTILE_CONT (0.95) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p95_s, - ROUND(PERCENTILE_CONT (0.99) WITHIN GROUP(ORDER BY total_elapsed_s),2) AS p99_s, - MAX(total_elapsed_s) AS max_s, - AVG(total_elapsed_s) AS avg_s, - stddev(total_elapsed_s) AS std_s - FROM queries - GROUP BY 1, - 2, - 3, - 4 -), -exec_time AS -( - SELECT CASE - WHEN rnk <= 100 THEN usename - ELSE 'Other Users' - END AS usename, - queue, - aborted, - 'Execution Time' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT (0.25) WITHIN GROUP(ORDER BY exec_time_s),2) AS p25_s, - ROUND(PERCENTILE_CONT (0.50) WITHIN GROUP(ORDER BY exec_time_s),2) AS p50_s, - ROUND(PERCENTILE_CONT (0.75) WITHIN GROUP(ORDER BY exec_time_s),2) AS p75_s, - ROUND(PERCENTILE_CONT (0.90) WITHIN GROUP(ORDER BY exec_time_s),2) AS p90_s, - ROUND(PERCENTILE_CONT (0.95) WITHIN GROUP(ORDER BY exec_time_s),2) AS p95_s, - ROUND(PERCENTILE_CONT (0.99) WITHIN GROUP(ORDER BY exec_time_s),2) AS p99_s, - MAX(exec_time_s) AS max_s, - AVG(exec_time_s) AS avg_s, - stddev(exec_time_s) AS std_s - FROM queries - GROUP BY 1, - 2, - 3, - 4 -), -queue_time AS -( - SELECT CASE - WHEN rnk <= 100 THEN usename - ELSE 'Other Users' - END AS usename, - queue, - aborted, - 'Queue Time' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT (0.25) WITHIN GROUP(ORDER BY queue_s),2) AS p25_s, - ROUND(PERCENTILE_CONT (0.50) WITHIN GROUP(ORDER BY queue_s),2) AS p50_s, - ROUND(PERCENTILE_CONT (0.75) WITHIN GROUP(ORDER BY queue_s),2) AS p75_s, - ROUND(PERCENTILE_CONT (0.90) WITHIN GROUP(ORDER BY queue_s),2) AS p90_s, - ROUND(PERCENTILE_CONT (0.95) WITHIN GROUP(ORDER BY queue_s),2) AS p95_s, - ROUND(PERCENTILE_CONT (0.99) WITHIN GROUP(ORDER BY queue_s),2) AS p99_s, - MAX(queue_s) AS max_s, - AVG(queue_s) AS avg_s, - stddev(queue_s) AS std_s - FROM queries - GROUP BY 1, - 2, - 3, - 4 -) + ( + select q.user_id as "userid" + , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" + , date_trunc('hour', q.start_time) as "period" + , q.transaction_id as "xid" + , q.query_id as "query" + , q.query_text::char(50) as "querytxt" + , q.queue_time / 1000000.00 as "queue_s" + , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric + , case when q.status = 'failed' then 1 else 0 end "aborted" + , q.elapsed_time / 1000000.00 as "total_elapsed_s" --again includes compile time + , user_query_count + , DENSE_RANK() OVER (ORDER BY user_query_count) AS rnk + FROM sys_query_history q + inner join (select user_id, count(*) user_query_count + from sys_query_history + where user_id > 1 + AND start_time >={{START_TIME}} + AND start_time <={{END_TIME}} + AND query_text LIKE '%replay_start%' + AND status != 'failed' + group by user_id) uc on uc.user_id = q.user_id + WHERE q.user_id > 1 + AND q.start_time >={{START_TIME}} + AND q.start_time <={{END_TIME}} + AND q.query_text LIKE '%replay_start%' + AND q.status != 'failed' + ), + elapsed_time AS + ( + SELECT CASE + WHEN rnk <= 100 THEN cast(userid as char(6)) + ELSE 'Other Users' + END AS usename, + queue, + aborted, + 'Query Latency' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p99_s, + MAX(total_elapsed_s) AS max_s, + AVG(total_elapsed_s) AS avg_s, + stddev(total_elapsed_s) AS std_s + FROM queries + GROUP BY 1, + 2, + 3, + 4 + ), + exec_time AS + ( + SELECT CASE + WHEN rnk <= 100 THEN cast(userid as char(6)) + ELSE 'Other Users' + END AS usename, + queue, + aborted, + 'Execution Time' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p99_s, + MAX(exec_time_s) AS max_s, + AVG(exec_time_s) AS avg_s, + stddev(exec_time_s) AS std_s + FROM queries + GROUP BY 1, + 2, + 3, + 4 + ), + queue_time AS + ( + SELECT CASE + WHEN rnk <= 100 THEN cast(userid as char(6)) + ELSE 'Other Users' + END AS usename, + queue, + aborted, + 'Queue Time' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY queue_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY queue_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY queue_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY queue_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY queue_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY queue_s), 2) AS p99_s, + MAX(queue_s) AS max_s, + AVG(queue_s) AS avg_s, + stddev(queue_s) AS std_s + FROM queries + GROUP BY 1, + 2, + 3, + 4 + ) SELECT measure_type, usename, queue, diff --git a/core/sql/query_metrics.sql b/core/sql/query_metrics.sql index a87f76cff..5c9530077 100644 --- a/core/sql/query_metrics.sql +++ b/core/sql/query_metrics.sql @@ -1,19 +1,17 @@ -select -q.user_id as "userid" -,TRIM(u.usename) AS usename -,case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" -,date_trunc('hour', q.start_time) as "period" -,q.transaction_id as "xid" -,q.query_id as "query" -,q.query_text::char(50) as "querytxt" -,q.queue_time / 1000000.00 as "queue_s" -,q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric -,case when q.status = 'failed' then 1 else 0 end "aborted" -,q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric +/*QueryMetrics*/ +select q.user_id as "userid" + , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" + , date_trunc('hour', q.start_time) as "period" + , q.transaction_id as "xid" + , q.query_id as "query" + , q.query_text::char(50) as "querytxt" + , q.queue_time / 1000000.00 as "queue_s" + , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric + , case when q.status = 'failed' then 1 else 0 end "aborted" + , q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric FROM sys_query_history q -LEFT JOIN pg_user u ON u.usesysid = q.user_id WHERE q.user_id > 1 - AND q.start_time >={{START_TIME}} - AND q.start_time <={{END_TIME}} - AND q.query_text LIKE '%replay_start%' - AND q.status != 'failed'; + AND q.start_time >={{START_TIME}} + AND q.start_time <={{END_TIME}} + AND q.query_text LIKE '%replay_start%' + AND q.status != 'failed'; diff --git a/core/sql/statement_types.sql b/core/sql/statement_types.sql index be6259f4b..8e9ec49dd 100644 --- a/core/sql/statement_types.sql +++ b/core/sql/statement_types.sql @@ -1,46 +1,45 @@ -SELECT - CASE - WHEN REGEXP_INSTR ("query_text",'(padb_|pg_internal)') - THEN 'SYSTEM' - WHEN query_type = 'DELETE' - THEN 'DELETE' - WHEN query_type = 'COPY' - THEN 'COPY' - WHEN query_type = 'UPDATE' - THEN 'UPDATE' - WHEN query_type = 'INSERT' - THEN 'INSERT' - WHEN query_type = 'SELECT' - THEN 'SELECT' - WHEN query_type = 'UNLOAD' - THEN 'UNLOAD' - WHEN query_type = 'DDL' - THEN 'DDL' - WHEN query_type = 'UTILITY' - THEN CASE - WHEN REGEXP_INSTR ("query_text",'[vV][aA][cC][uU][uU][mM][ :]') - THEN 'VACUUM' - WHEN REGEXP_INSTR ("query_text",'[rR][oO][lL][lL][bB][aA][cC][kK] ') - THEN 'ROLLBACK' - WHEN REGEXP_INSTR ("query_text",'[fF][eE][tT][cC][hH] ') - THEN 'FETCH' - WHEN REGEXP_INSTR ("query_text",'[cC][uU][rR][sS][oO][rR] ') - THEN 'CURSOR' - ELSE 'UTILITY' - END - ELSE 'OTHER' - END statement_type - , COUNT(CASE - WHEN status = 'failed' - THEN 1 - END) AS aborted - , COUNT(*) AS total_count -FROM - sys_query_history -WHERE user_id > 1 - AND query_text LIKE '%replay_start%' - AND start_time >= {{START_TIME}} - AND start_time <= {{END_TIME}} +/*StatementTypes*/ +SELECT CASE + WHEN REGEXP_INSTR("query_text", '(padb_|pg_internal)') + THEN 'SYSTEM' + WHEN query_type = 'DELETE' + THEN 'DELETE' + WHEN query_type = 'COPY' + THEN 'COPY' + WHEN query_type = 'UPDATE' + THEN 'UPDATE' + WHEN query_type = 'INSERT' + THEN 'INSERT' + WHEN query_type = 'SELECT' + THEN 'SELECT' + WHEN query_type = 'UNLOAD' + THEN 'UNLOAD' + WHEN query_type = 'DDL' + THEN 'DDL' + WHEN query_type = 'UTILITY' + THEN CASE + WHEN REGEXP_INSTR("query_text", '[vV][aA][cC][uU][uU][mM][ :]') + THEN 'VACUUM' + WHEN REGEXP_INSTR("query_text", '[rR][oO][lL][lL][bB][aA][cC][kK] ') + THEN 'ROLLBACK' + WHEN REGEXP_INSTR("query_text", '[fF][eE][tT][cC][hH] ') + THEN 'FETCH' + WHEN REGEXP_INSTR("query_text", '[cC][uU][rR][sS][oO][rR] ') + THEN 'CURSOR' + ELSE 'UTILITY' + END + ELSE 'OTHER' + END statement_type + , COUNT(CASE + WHEN status = 'failed' + THEN 1 + END) AS aborted + , COUNT(*) AS total_count +FROM sys_query_history +WHERE user_id > 1 + AND query_text LIKE '%replay_start%' + AND start_time >= {{START_TIME}} + AND start_time <= {{END_TIME}} GROUP BY 1 ORDER BY diff --git a/core/sql/sys_external_query_data.sql b/core/sql/sys_external_query_data.sql index d12b8660c..6b1ff6f95 100644 --- a/core/sql/sys_external_query_data.sql +++ b/core/sql/sys_external_query_data.sql @@ -1,3 +1,4 @@ +/*SysExternalQueryData*/ SELECT user_id, query_id, child_query_sequence, @@ -14,8 +15,8 @@ SELECT user_id, returned_bytes, file_format, file_location, - external_query_text from SYS_EXTERNAL_QUERY_DETAIL - WHERE user_id > 1 - AND start_time >= {{START_TIME}} - AND start_time <= {{END_TIME}}; - ; \ No newline at end of file + external_query_text +from SYS_EXTERNAL_QUERY_DETAIL +WHERE user_id > 1 + AND start_time >= {{START_TIME}} + AND start_time <= {{END_TIME}}; \ No newline at end of file diff --git a/core/sql/sys_load_history.sql b/core/sql/sys_load_history.sql index cec1ade2e..032ddcbc6 100644 --- a/core/sql/sys_load_history.sql +++ b/core/sql/sys_load_history.sql @@ -1,19 +1,21 @@ +/*SysLoadHistory*/ SELECT user_id, -query_id, -status, -session_id, -transaction_id, -database_name, -table_name, -start_time, -end_time, -duration, -data_source, -loaded_rows, -loaded_bytes, -source_file_count, -source_file_bytes, -error_count from SYS_LOAD_HISTORY + query_id, + status, + session_id, + transaction_id, + database_name, + table_name, + start_time, + end_time, + duration, + data_source, + loaded_rows, + loaded_bytes, + source_file_count, + source_file_bytes, + error_count +from SYS_LOAD_HISTORY WHERE user_id > 1 - AND start_time >= {{START_TIME}} - AND start_time <= {{END_TIME}}; + AND start_time >= {{START_TIME}} + AND start_time <= {{END_TIME}}; diff --git a/core/sql/sys_query_history.sql b/core/sql/sys_query_history.sql index 0a5b4d9fd..e73189c0c 100644 --- a/core/sql/sys_query_history.sql +++ b/core/sql/sys_query_history.sql @@ -1,5 +1,5 @@ +/*SysQueryHistory*/ SELECT h.user_id, - u.usename as user_name, query_id, transaction_id, session_id, @@ -17,9 +17,8 @@ SELECT h.user_id, error_message, returned_rows, returned_bytes, - redshift_version from sys_query_history h - LEFT JOIN pg_user u on h.user_id=u.usesysid + redshift_version +from sys_query_history h WHERE user_id > 1 - AND start_time >= {{START_TIME}} - AND start_time <= {{END_TIME}}; -; \ No newline at end of file + AND start_time >= {{START_TIME}} + AND start_time <= {{END_TIME}}; \ No newline at end of file From 8258feb7f9f97923b601c453f9921e6ef415c600 Mon Sep 17 00:00:00 2001 From: Devanathan Sabapathy Date: Wed, 1 May 2024 16:04:44 +0000 Subject: [PATCH 2/4] modifying sql to do a join with the pg table after creating a temp table --- core/replay/report_gen.py | 49 +++-- core/sql/aborted_queries.sql | 50 +++-- core/sql/query_distribution.sql | 331 +++++++++++++++++--------------- core/sql/query_metrics.sql | 48 +++-- core/sql/sys_query_history.sql | 2 + 5 files changed, 278 insertions(+), 202 deletions(-) diff --git a/core/replay/report_gen.py b/core/replay/report_gen.py index c22f86414..a9cbd1641 100644 --- a/core/replay/report_gen.py +++ b/core/replay/report_gen.py @@ -55,7 +55,7 @@ def pdf_gen(report, summary=None): @param summary: list, replay summary """ - with open("core/replay/report_content.yaml", "r") as stream: + with open("/home/devsaba/code/redshift-test-drive/core/replay/report_content.yaml", "r") as stream: docs = yaml.safe_load(stream) style = g_stylesheet.get("styles") @@ -347,7 +347,7 @@ def unload(unload_location, iam_role, cluster, user, replay): logger = logging.getLogger("WorkloadReplicatorLogger") - directory = r"core/sql" + directory = r"/home/devsaba/code/redshift-test-drive/core/sql" queries = [] # used to return query names with initiate_connection(username=user, cluster=cluster) as conn: # initiate connection @@ -369,19 +369,40 @@ def unload(unload_location, iam_role, cluster, user, replay): query = re.sub(r"{{END_TIME}}", f"'{cluster.get('end_time')}'", query) parsed_location = f"s3://{unload_location.get('bucket_name')}/{unload_location.get('prefix')}" - # format unload query with actual query from sql/ - unload_query = ( - f"unload ($${query}$$) to '{parsed_location}analysis/{replay}/raw_data/" - f"{query_name}' iam_role '{iam_role}' CSV header allowoverwrite parallel off;" - ) - try: - cursor.execute(unload_query) # execute unload - except Exception as e: - logger.error( - f"Could not unload {query_name} results. Confirm IAM permissions include UNLOAD " - f"access for Redshift. {e}" + # create temp tables for queries joining with pg_user + if "CREATE TEMP TABLE" in query: + query_split = query.split('\n\n') + try: + cursor.execute(query_split[0]) + + unload_query = ( + f"unload ($${query_split[1]}$$) to '{parsed_location}analysis/{replay}/raw_data/" + f"{query_name}' iam_role '{iam_role}' CSV header allowoverwrite parallel off;" + ) + + cursor.execute(unload_query) + + except Exception as e: + logger.error( + f"Could not execute query {query_name}" + f"Error: {e}" + ) + exit(-1) + + else: + # format unload query with actual query from sql/ + unload_query = ( + f"unload ($${query}$$) to '{parsed_location}analysis/{replay}/raw_data/" + f"{query_name}' iam_role '{iam_role}' CSV header allowoverwrite parallel off;" ) - exit(-1) + try: + cursor.execute(unload_query) # execute unload + except Exception as e: + logger.error( + f"Could not unload {query_name} results. Confirm IAM permissions include UNLOAD " + f"access for Redshift. {e}" + ) + exit(-1) logger.info(f"Query results available in {unload_location.get('url')}") return queries diff --git a/core/sql/aborted_queries.sql b/core/sql/aborted_queries.sql index c45787cf1..7993cbee2 100644 --- a/core/sql/aborted_queries.sql +++ b/core/sql/aborted_queries.sql @@ -1,18 +1,34 @@ /*AbortedQueries*/ -SELECT q.user_id as "userid" - , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" - , date_trunc('hour', q.start_time) as "period" - , q.transaction_id as "xid" - , q.query_id as "query" - , q.query_text::char(50) as "querytxt" - , q.queue_time / 1000000.00 as "queue_s" - , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric - , case when q.status = 'failed' then 1 else 0 end "aborted" - , q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric -FROM sys_query_history q -WHERE q.user_id > 1 - AND q.start_time >={{START_TIME}} - AND q.start_time <={{END_TIME}} - AND q.query_text LIKE '%replay_start%' - AND q.status = 'failed' -ORDER BY total_elapsed_s DESC; +CREATE TEMP TABLE aborted_queries AS ( + SELECT q.user_id as "userid" + , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" + , date_trunc('hour', q.start_time) as "period" + , q.transaction_id as "xid" + , q.query_id as "query" + , q.query_text::char(50) as "querytxt" + , q.queue_time / 1000000.00 as "queue_s" + , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric + , case when q.status = 'failed' then 1 else 0 end "aborted" + , q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric + FROM sys_query_history q + WHERE q.user_id > 1 + AND q.start_time >={{START_TIME}} + AND q.start_time <={{END_TIME}} + AND q.query_text LIKE '%replay_start%' + AND q.status = 'failed' +); + +SELECT a.userid, + b.usename, + a.queue, + a.period, + a.xid, + a.query, + a.querytxt, + a.queue_s, + a.exec_time_s, + a.aborted, + a.total_elapsed_s +FROM aborted_queries a + LEFT JOIN pg_user b ON a.userid = b.usesysid +ORDER BY a.total_elapsed_s DESC; diff --git a/core/sql/query_distribution.sql b/core/sql/query_distribution.sql index c587681e0..c23330b5a 100644 --- a/core/sql/query_distribution.sql +++ b/core/sql/query_distribution.sql @@ -1,156 +1,177 @@ /*QueryDistribution*/ -WITH queries AS - ( - select q.user_id as "userid" - , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" - , date_trunc('hour', q.start_time) as "period" - , q.transaction_id as "xid" - , q.query_id as "query" - , q.query_text::char(50) as "querytxt" - , q.queue_time / 1000000.00 as "queue_s" - , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric - , case when q.status = 'failed' then 1 else 0 end "aborted" - , q.elapsed_time / 1000000.00 as "total_elapsed_s" --again includes compile time - , user_query_count - , DENSE_RANK() OVER (ORDER BY user_query_count) AS rnk - FROM sys_query_history q - inner join (select user_id, count(*) user_query_count - from sys_query_history - where user_id > 1 - AND start_time >={{START_TIME}} - AND start_time <={{END_TIME}} - AND query_text LIKE '%replay_start%' - AND status != 'failed' - group by user_id) uc on uc.user_id = q.user_id - WHERE q.user_id > 1 - AND q.start_time >={{START_TIME}} - AND q.start_time <={{END_TIME}} - AND q.query_text LIKE '%replay_start%' - AND q.status != 'failed' - ), - elapsed_time AS - ( - SELECT CASE - WHEN rnk <= 100 THEN cast(userid as char(6)) - ELSE 'Other Users' - END AS usename, - queue, - aborted, - 'Query Latency' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p25_s, - ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p50_s, - ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p75_s, - ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p90_s, - ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p95_s, - ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p99_s, - MAX(total_elapsed_s) AS max_s, - AVG(total_elapsed_s) AS avg_s, - stddev(total_elapsed_s) AS std_s - FROM queries - GROUP BY 1, - 2, - 3, - 4 - ), - exec_time AS - ( - SELECT CASE - WHEN rnk <= 100 THEN cast(userid as char(6)) - ELSE 'Other Users' - END AS usename, - queue, - aborted, - 'Execution Time' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p25_s, - ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p50_s, - ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p75_s, - ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p90_s, - ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p95_s, - ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p99_s, - MAX(exec_time_s) AS max_s, - AVG(exec_time_s) AS avg_s, - stddev(exec_time_s) AS std_s - FROM queries - GROUP BY 1, - 2, - 3, - 4 - ), - queue_time AS - ( - SELECT CASE - WHEN rnk <= 100 THEN cast(userid as char(6)) - ELSE 'Other Users' - END AS usename, - queue, - aborted, - 'Queue Time' AS measure_type, - COUNT(*) AS query_count, - ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY queue_s), 2) AS p25_s, - ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY queue_s), 2) AS p50_s, - ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY queue_s), 2) AS p75_s, - ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY queue_s), 2) AS p90_s, - ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY queue_s), 2) AS p95_s, - ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY queue_s), 2) AS p99_s, - MAX(queue_s) AS max_s, - AVG(queue_s) AS avg_s, - stddev(queue_s) AS std_s - FROM queries - GROUP BY 1, - 2, - 3, - 4 - ) -SELECT measure_type, - usename, - queue, - aborted, - query_count, - p25_s, - p50_s, - p75_s, - p90_s, - p95_s, - p99_s, - max_s, - avg_s, - std_s -FROM exec_time -UNION ALL -SELECT measure_type, - usename, - queue, - aborted, - query_count, - p25_s, - p50_s, - p75_s, - p90_s, - p95_s, - p99_s, - max_s, - avg_s, - std_s -FROM queue_time -UNION ALL -SELECT measure_type, - usename, - queue, - aborted, - query_count, - p25_s, - p50_s, - p75_s, - p90_s, - p95_s, - p99_s, - max_s, - avg_s, - std_s -FROM elapsed_time -ORDER BY 1, - 2, - 3, - 4; \ No newline at end of file +CREATE TEMP TABLE query_distribution AS ( + WITH queries AS + ( + select q.user_id as "userid" + , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" + , date_trunc('hour', q.start_time) as "period" + , q.transaction_id as "xid" + , q.query_id as "query" + , q.query_text::char(50) as "querytxt" + , q.queue_time / 1000000.00 as "queue_s" + , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric + , case when q.status = 'failed' then 1 else 0 end "aborted" + , q.elapsed_time / 1000000.00 as "total_elapsed_s" --again includes compile time + , user_query_count + , DENSE_RANK() OVER (ORDER BY user_query_count) AS rnk + FROM sys_query_history q + inner join (select user_id, count(*) user_query_count + from sys_query_history + where user_id > 1 + AND start_time >={{START_TIME}} + AND start_time <={{END_TIME}} + AND query_text LIKE '%replay_start%' + AND status != 'failed' + group by user_id) uc on uc.user_id = q.user_id + WHERE q.user_id > 1 + AND q.start_time >={{START_TIME}} + AND q.start_time <={{END_TIME}} + AND q.query_text LIKE '%replay_start%' + AND q.status != 'failed' + ), + elapsed_time AS + ( + SELECT userid, + rnk, + queue, + aborted, + 'Query Latency' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY total_elapsed_s), 2) AS p99_s, + MAX(total_elapsed_s) AS max_s, + AVG(total_elapsed_s) AS avg_s, + stddev(total_elapsed_s) AS std_s + FROM queries + GROUP BY 1, + 2, + 3, + 4, + 5 + ), + exec_time AS + ( + SELECT userid, + rnk, + queue, + aborted, + 'Execution Time' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY exec_time_s), 2) AS p99_s, + MAX(exec_time_s) AS max_s, + AVG(exec_time_s) AS avg_s, + stddev(exec_time_s) AS std_s + FROM queries + GROUP BY 1, + 2, + 3, + 4, + 5 + ), + queue_time AS + ( + SELECT userid, + rnk, + queue, + aborted, + 'Queue Time' AS measure_type, + COUNT(*) AS query_count, + ROUND(PERCENTILE_CONT(0.25) WITHIN GROUP (ORDER BY queue_s), 2) AS p25_s, + ROUND(PERCENTILE_CONT(0.50) WITHIN GROUP (ORDER BY queue_s), 2) AS p50_s, + ROUND(PERCENTILE_CONT(0.75) WITHIN GROUP (ORDER BY queue_s), 2) AS p75_s, + ROUND(PERCENTILE_CONT(0.90) WITHIN GROUP (ORDER BY queue_s), 2) AS p90_s, + ROUND(PERCENTILE_CONT(0.95) WITHIN GROUP (ORDER BY queue_s), 2) AS p95_s, + ROUND(PERCENTILE_CONT(0.99) WITHIN GROUP (ORDER BY queue_s), 2) AS p99_s, + MAX(queue_s) AS max_s, + AVG(queue_s) AS avg_s, + stddev(queue_s) AS std_s + FROM queries + GROUP BY 1, + 2, + 3, + 4, + 5 + ) + SELECT measure_type, + userid, + rnk, + queue, + aborted, + query_count, + p25_s, + p50_s, + p75_s, + p90_s, + p95_s, + p99_s, + max_s, + avg_s, + std_s + FROM exec_time + UNION ALL + SELECT measure_type, + userid, + rnk, + queue, + aborted, + query_count, + p25_s, + p50_s, + p75_s, + p90_s, + p95_s, + p99_s, + max_s, + avg_s, + std_s + FROM queue_time + UNION ALL + SELECT measure_type, + userid, + rnk, + queue, + aborted, + query_count, + p25_s, + p50_s, + p75_s, + p90_s, + p95_s, + p99_s, + max_s, + avg_s, + std_s + FROM elapsed_time +); + +SELECT a.measure_type, + CASE + WHEN rnk <= 100 THEN u.usename + ELSE 'Other Users' END as usename, + a.queue, + a.aborted, + a.query_count, + a.p25_s, + a.p50_s, + a.p75_s, + a.p90_s, + a.p95_s, + a.p99_s, + a.max_s, + a.avg_s, + a.std_s +FROM query_distribution a + LEFT JOIN pg_user u on a.userid = u.usesysid + ORDER BY 1, + 2, + 3, + 4; diff --git a/core/sql/query_metrics.sql b/core/sql/query_metrics.sql index 5c9530077..2e4ccb83f 100644 --- a/core/sql/query_metrics.sql +++ b/core/sql/query_metrics.sql @@ -1,17 +1,33 @@ /*QueryMetrics*/ -select q.user_id as "userid" - , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" - , date_trunc('hour', q.start_time) as "period" - , q.transaction_id as "xid" - , q.query_id as "query" - , q.query_text::char(50) as "querytxt" - , q.queue_time / 1000000.00 as "queue_s" - , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric - , case when q.status = 'failed' then 1 else 0 end "aborted" - , q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric -FROM sys_query_history q -WHERE q.user_id > 1 - AND q.start_time >={{START_TIME}} - AND q.start_time <={{END_TIME}} - AND q.query_text LIKE '%replay_start%' - AND q.status != 'failed'; +CREATE TEMP TABLE query_metrics AS ( + select q.user_id as "userid" + , case when q.result_cache_hit = 't' then 'Result Cache' else 'Default queue' end as "queue" + , date_trunc('hour', q.start_time) as "period" + , q.transaction_id as "xid" + , q.query_id as "query" + , q.query_text::char(50) as "querytxt" + , q.queue_time / 1000000.00 as "queue_s" + , q.execution_time / 1000000.00 as "exec_time_s" -- This includes compile time. Differs in behavior from provisioned metric + , case when q.status = 'failed' then 1 else 0 end "aborted" + , q.elapsed_time / 1000000.00 as "total_elapsed_s" -- This includes compile time. Differs in behavior from provisioned metric + FROM sys_query_history q + WHERE q.user_id > 1 + AND q.start_time >={{START_TIME}} + AND q.start_time <={{END_TIME}} + AND q.query_text LIKE '%replay_start%' + AND q.status != 'failed' +); + +SELECT a.userid, + u.usename, + a.queue, + a.period, + a.xid, + a.query, + a.querytxt, + a.queue_s, + a.exec_time_s, + a.aborted, + a.total_elapsed_s +FROM query_metrics a + LEFT JOIN pg_user u on a.userid = u.usesysid; diff --git a/core/sql/sys_query_history.sql b/core/sql/sys_query_history.sql index e73189c0c..4311a62c9 100644 --- a/core/sql/sys_query_history.sql +++ b/core/sql/sys_query_history.sql @@ -1,5 +1,6 @@ /*SysQueryHistory*/ SELECT h.user_id, + u.usename, query_id, transaction_id, session_id, @@ -19,6 +20,7 @@ SELECT h.user_id, returned_bytes, redshift_version from sys_query_history h +LEFT JOIN pg_user u on h.user_id = u.usesysid WHERE user_id > 1 AND start_time >= {{START_TIME}} AND start_time <= {{END_TIME}}; \ No newline at end of file From d02091de11be15326d17efdb7f0d26a70424d28f Mon Sep 17 00:00:00 2001 From: Devanathan Sabapathy Date: Thu, 2 May 2024 20:04:34 +0000 Subject: [PATCH 3/4] changing the directory paths in two places --- core/replay/report_gen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/replay/report_gen.py b/core/replay/report_gen.py index a9cbd1641..602cdc2f9 100644 --- a/core/replay/report_gen.py +++ b/core/replay/report_gen.py @@ -55,7 +55,7 @@ def pdf_gen(report, summary=None): @param summary: list, replay summary """ - with open("/home/devsaba/code/redshift-test-drive/core/replay/report_content.yaml", "r") as stream: + with open("core/replay/report_content.yaml", "r") as stream: docs = yaml.safe_load(stream) style = g_stylesheet.get("styles") @@ -347,7 +347,7 @@ def unload(unload_location, iam_role, cluster, user, replay): logger = logging.getLogger("WorkloadReplicatorLogger") - directory = r"/home/devsaba/code/redshift-test-drive/core/sql" + directory = r"core/sql" queries = [] # used to return query names with initiate_connection(username=user, cluster=cluster) as conn: # initiate connection From 383ff668231dffad08e5e9954f62b56530d3a839 Mon Sep 17 00:00:00 2001 From: Devanathan Sabapathy Date: Thu, 2 May 2024 20:08:50 +0000 Subject: [PATCH 4/4] modifying a column name in a query --- core/sql/sys_query_history.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/sql/sys_query_history.sql b/core/sql/sys_query_history.sql index 4311a62c9..ac01acb3e 100644 --- a/core/sql/sys_query_history.sql +++ b/core/sql/sys_query_history.sql @@ -1,6 +1,6 @@ /*SysQueryHistory*/ SELECT h.user_id, - u.usename, + u.usename as user_name, query_id, transaction_id, session_id,