From 455146e643465f0ae62493796aa9abbcf95d6d05 Mon Sep 17 00:00:00 2001 From: Alexey Lesovsky Date: Wed, 11 Aug 2021 14:13:01 +0500 Subject: [PATCH] Add checksum and session metrics from pg_stat_database added in Postgres 12 and 14. --- internal/collector/postgres_common.go | 1 + internal/collector/postgres_database.go | 251 +++++++++++++------ internal/collector/postgres_database_test.go | 61 ++++- 3 files changed, 224 insertions(+), 89 deletions(-) diff --git a/internal/collector/postgres_common.go b/internal/collector/postgres_common.go index 15a060c..a786afe 100644 --- a/internal/collector/postgres_common.go +++ b/internal/collector/postgres_common.go @@ -16,6 +16,7 @@ const ( PostgresV10 = 100000 PostgresV12 = 120000 PostgresV13 = 130000 + PostgresV14 = 140000 // Minimal required version is 9.5. PostgresVMinNum = PostgresV95 diff --git a/internal/collector/postgres_database.go b/internal/collector/postgres_database.go index bf868e0..531a68c 100644 --- a/internal/collector/postgres_database.go +++ b/internal/collector/postgres_database.go @@ -9,7 +9,7 @@ import ( ) const ( - databaseQuery = "SELECT " + + databasesQuery11 = "SELECT " + "coalesce(datname, 'global') AS database, " + "xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, " + "conflicts, temp_files, temp_bytes, deadlocks, blk_read_time, blk_write_time, pg_database_size(datname) as size_bytes, " + @@ -17,25 +17,51 @@ const ( "FROM pg_stat_database WHERE datname IN (SELECT datname FROM pg_database WHERE datallowconn AND NOT datistemplate) " + "OR datname IS NULL" + databasesQuery12 = "SELECT " + + "coalesce(datname, 'global') AS database, " + + "xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, " + + "conflicts, temp_files, temp_bytes, deadlocks, checksum_failures, coalesce(extract(epoch from checksum_last_failure), 0) AS last_checksum_failure_unixtime, " + + "blk_read_time, blk_write_time, pg_database_size(datname) as size_bytes, " + + "coalesce(extract('epoch' from age(now(), stats_reset)), 0) as stats_age_seconds " + + "FROM pg_stat_database WHERE datname IN (SELECT datname FROM pg_database WHERE datallowconn AND NOT datistemplate) " + + "OR datname IS NULL" + + databasesQueryLatest = "SELECT " + + "coalesce(datname, 'global') AS database, " + + "xact_commit, xact_rollback, blks_read, blks_hit, tup_returned, tup_fetched, tup_inserted, tup_updated, tup_deleted, " + + "conflicts, temp_files, temp_bytes, deadlocks, checksum_failures, coalesce(extract(epoch from checksum_last_failure), 0) AS last_checksum_failure_unixtime, " + + "blk_read_time, blk_write_time, " + + "session_time, active_time, idle_in_transaction_time, sessions, sessions_abandoned, sessions_fatal, sessions_killed, " + + "pg_database_size(datname) as size_bytes, " + + "coalesce(extract('epoch' from age(now(), stats_reset)), 0) as stats_age_seconds " + + "FROM pg_stat_database WHERE datname IN (SELECT datname FROM pg_database WHERE datallowconn AND NOT datistemplate) " + + "OR datname IS NULL" + xidLimitQuery = "SELECT 'database' AS src, 2147483647 - greatest(max(age(datfrozenxid)), max(age(coalesce(nullif(datminmxid, 1), datfrozenxid)))) AS to_limit FROM pg_database " + "UNION SELECT 'prepared_xacts' AS src, 2147483647 - coalesce(max(age(transaction)), 0) AS to_limit FROM pg_prepared_xacts " + "UNION SELECT 'replication_slots' AS src, 2147483647 - greatest(coalesce(min(age(xmin)), 0), coalesce(min(age(catalog_xmin)), 0)) AS to_limit FROM pg_replication_slots" ) type postgresDatabasesCollector struct { - commits typedDesc - rollbacks typedDesc - conflicts typedDesc - deadlocks typedDesc - blocks typedDesc - tuples typedDesc - tempbytes typedDesc - tempfiles typedDesc - blockstime typedDesc - sizes typedDesc - statsage typedDesc - xidlimit typedDesc - labelNames []string + commits typedDesc + rollbacks typedDesc + blocks typedDesc + tuples typedDesc + tempbytes typedDesc + tempfiles typedDesc + conflicts typedDesc + deadlocks typedDesc + csumfails typedDesc + csumlastfailunixts typedDesc + blockstime typedDesc + sessionalltime typedDesc + sessiontime typedDesc + sessionsall typedDesc + sessions typedDesc + sizes typedDesc + statsage typedDesc + xidlimit typedDesc + labelNames []string } // NewPostgresDatabasesCollector returns a new Collector exposing postgres databases stats. @@ -57,18 +83,6 @@ func NewPostgresDatabasesCollector(constLabels labels, settings model.CollectorS labels, constLabels, settings.Filters, ), - conflicts: newBuiltinTypedDesc( - descOpts{"postgres", "database", "conflicts_total", "Total number of recovery conflicts occurred.", 0}, - prometheus.CounterValue, - labels, constLabels, - settings.Filters, - ), - deadlocks: newBuiltinTypedDesc( - descOpts{"postgres", "database", "deadlocks_total", "Total number of deadlocks occurred.", 0}, - prometheus.CounterValue, - labels, constLabels, - settings.Filters, - ), blocks: newBuiltinTypedDesc( descOpts{"postgres", "database", "blocks_total", "Total number of disk blocks had been accessed by each type of access.", 0}, prometheus.CounterValue, @@ -93,12 +107,60 @@ func NewPostgresDatabasesCollector(constLabels labels, settings model.CollectorS labels, constLabels, settings.Filters, ), + conflicts: newBuiltinTypedDesc( + descOpts{"postgres", "database", "conflicts_total", "Total number of recovery conflicts occurred.", 0}, + prometheus.CounterValue, + labels, constLabels, + settings.Filters, + ), + deadlocks: newBuiltinTypedDesc( + descOpts{"postgres", "database", "deadlocks_total", "Total number of deadlocks occurred.", 0}, + prometheus.CounterValue, + labels, constLabels, + settings.Filters, + ), + csumfails: newBuiltinTypedDesc( + descOpts{"postgres", "database", "checksum_failures_total", "Total number of checksum failures occurred.", 0}, + prometheus.CounterValue, + labels, constLabels, + settings.Filters, + ), + csumlastfailunixts: newBuiltinTypedDesc( + descOpts{"postgres", "database", "last_checksum_failure_seconds", "Time of the last checksum failure occurred, in unixtime.", 0}, + prometheus.CounterValue, + labels, constLabels, + settings.Filters, + ), blockstime: newBuiltinTypedDesc( descOpts{"postgres", "database", "blk_time_seconds_total", "Total time spent accessing data blocks by backends in this database in each access type, in seconds.", .001}, prometheus.CounterValue, []string{"database", "type"}, constLabels, settings.Filters, ), + sessionalltime: newBuiltinTypedDesc( + descOpts{"postgres", "database", "session_time_all_seconds_total", "Total time spent by database sessions in this database in all states, in seconds", .001}, + prometheus.CounterValue, + labels, constLabels, + settings.Filters, + ), + sessiontime: newBuiltinTypedDesc( + descOpts{"postgres", "database", "session_time_seconds_total", "Total time spent by database sessions in this database in each state, in seconds", .001}, + prometheus.CounterValue, + []string{"database", "state"}, constLabels, + settings.Filters, + ), + sessionsall: newBuiltinTypedDesc( + descOpts{"postgres", "database", "sessions_all_total", "Total number of sessions established to this database.", 0}, + prometheus.CounterValue, + labels, constLabels, + settings.Filters, + ), + sessions: newBuiltinTypedDesc( + descOpts{"postgres", "database", "sessions_total", "Total number of sessions established to this database and closed by each reason.", 0}, + prometheus.CounterValue, + []string{"database", "reason"}, constLabels, + settings.Filters, + ), sizes: newBuiltinTypedDesc( descOpts{"postgres", "database", "size_bytes", "Total size of the database, in bytes.", 0}, prometheus.GaugeValue, @@ -128,7 +190,7 @@ func (c *postgresDatabasesCollector) Update(config Config, ch chan<- prometheus. } defer conn.Close() - res, err := conn.Query(databaseQuery) + res, err := conn.Query(selectDatabasesQuery(config.serverVersionNum)) if err != nil { return err } @@ -145,8 +207,6 @@ func (c *postgresDatabasesCollector) Update(config Config, ch chan<- prometheus. for _, stat := range stats { ch <- c.commits.newConstMetric(stat.xactcommit, stat.database) ch <- c.rollbacks.newConstMetric(stat.xactrollback, stat.database) - ch <- c.conflicts.newConstMetric(stat.conflicts, stat.database) - ch <- c.deadlocks.newConstMetric(stat.deadlocks, stat.database) ch <- c.blocks.newConstMetric(stat.blksread, stat.database, "read") ch <- c.blocks.newConstMetric(stat.blkshit, stat.database, "hit") ch <- c.tuples.newConstMetric(stat.tupreturned, stat.database, "returned") @@ -157,10 +217,30 @@ func (c *postgresDatabasesCollector) Update(config Config, ch chan<- prometheus. ch <- c.tempbytes.newConstMetric(stat.tempbytes, stat.database) ch <- c.tempfiles.newConstMetric(stat.tempfiles, stat.database) + ch <- c.conflicts.newConstMetric(stat.conflicts, stat.database) + ch <- c.deadlocks.newConstMetric(stat.deadlocks, stat.database) + ch <- c.blockstime.newConstMetric(stat.blkreadtime, stat.database, "read") ch <- c.blockstime.newConstMetric(stat.blkwritetime, stat.database, "write") ch <- c.sizes.newConstMetric(stat.sizebytes, stat.database) ch <- c.statsage.newConstMetric(stat.statsage, stat.database) + + if config.serverVersionNum >= PostgresV12 { + ch <- c.csumfails.newConstMetric(stat.csumfails, stat.database) + ch <- c.csumlastfailunixts.newConstMetric(stat.csumlastfailunixts, stat.database) + } + + if config.serverVersionNum >= PostgresV14 { + ch <- c.sessionalltime.newConstMetric(stat.sessiontime, stat.database) + ch <- c.sessiontime.newConstMetric(stat.activetime, stat.database, "active") + ch <- c.sessiontime.newConstMetric(stat.idletxtime, stat.database, "idle_in_transaction") + ch <- c.sessiontime.newConstMetric(stat.sessiontime-(stat.activetime+stat.idletxtime), stat.database, "idle") + ch <- c.sessionsall.newConstMetric(stat.sessions, stat.database) + ch <- c.sessions.newConstMetric(stat.sessabandoned, stat.database, "abandoned") + ch <- c.sessions.newConstMetric(stat.sessfatal, stat.database, "fatal") + ch <- c.sessions.newConstMetric(stat.sesskilled, stat.database, "killed") + ch <- c.sessions.newConstMetric(stat.sessions-(stat.sessabandoned+stat.sessfatal+stat.sesskilled), stat.database, "normal") + } } ch <- c.xidlimit.newConstMetric(xidStats.database, "pg_database") @@ -172,24 +252,33 @@ func (c *postgresDatabasesCollector) Update(config Config, ch chan<- prometheus. // postgresDatabaseStat represents per-database stats based on pg_stat_database. type postgresDatabaseStat struct { - database string - xactcommit float64 - xactrollback float64 - blksread float64 - blkshit float64 - tupreturned float64 - tupfetched float64 - tupinserted float64 - tupupdated float64 - tupdeleted float64 - conflicts float64 - tempfiles float64 - tempbytes float64 - deadlocks float64 - blkreadtime float64 - blkwritetime float64 - sizebytes float64 - statsage float64 + database string + xactcommit float64 + xactrollback float64 + blksread float64 + blkshit float64 + tupreturned float64 + tupfetched float64 + tupinserted float64 + tupupdated float64 + tupdeleted float64 + conflicts float64 + tempfiles float64 + tempbytes float64 + deadlocks float64 + csumfails float64 + csumlastfailunixts float64 + blkreadtime float64 + blkwritetime float64 + sessiontime float64 + activetime float64 + idletxtime float64 + sessions float64 + sessabandoned float64 + sessfatal float64 + sesskilled float64 + sizebytes float64 + statsage float64 } // parsePostgresDatabasesStats parses PGResult, extract data and return struct with stats values. @@ -235,79 +324,67 @@ func parsePostgresDatabasesStats(r *model.PGResult, labelNames []string) map[str continue } + s := stats[databaseFQName] // Run column-specific logic switch string(colname.Name) { case "xact_commit": - s := stats[databaseFQName] s.xactcommit = v - stats[databaseFQName] = s case "xact_rollback": - s := stats[databaseFQName] s.xactrollback = v - stats[databaseFQName] = s case "blks_read": - s := stats[databaseFQName] s.blksread = v - stats[databaseFQName] = s case "blks_hit": - s := stats[databaseFQName] s.blkshit = v - stats[databaseFQName] = s case "tup_returned": - s := stats[databaseFQName] s.tupreturned = v - stats[databaseFQName] = s case "tup_fetched": - s := stats[databaseFQName] s.tupfetched = v - stats[databaseFQName] = s case "tup_inserted": - s := stats[databaseFQName] s.tupinserted = v - stats[databaseFQName] = s case "tup_updated": - s := stats[databaseFQName] s.tupupdated = v - stats[databaseFQName] = s case "tup_deleted": - s := stats[databaseFQName] s.tupdeleted = v - stats[databaseFQName] = s case "conflicts": - s := stats[databaseFQName] s.conflicts = v - stats[databaseFQName] = s case "temp_files": - s := stats[databaseFQName] s.tempfiles = v - stats[databaseFQName] = s case "temp_bytes": - s := stats[databaseFQName] s.tempbytes = v - stats[databaseFQName] = s case "deadlocks": - s := stats[databaseFQName] s.deadlocks = v - stats[databaseFQName] = s + case "checksum_failures": + s.csumfails = v + case "last_checksum_failure_unixtime": + s.csumlastfailunixts = v case "blk_read_time": - s := stats[databaseFQName] s.blkreadtime = v - stats[databaseFQName] = s case "blk_write_time": - s := stats[databaseFQName] s.blkwritetime = v - stats[databaseFQName] = s + case "session_time": + s.sessiontime = v + case "active_time": + s.activetime = v + case "idle_in_transaction_time": + s.idletxtime = v + case "sessions": + s.sessions = v + case "sessions_abandoned": + s.sessabandoned = v + case "sessions_fatal": + s.sessfatal = v + case "sessions_killed": + s.sesskilled = v case "size_bytes": - s := stats[databaseFQName] s.sizebytes = v - stats[databaseFQName] = s case "stats_age_seconds": - s := stats[databaseFQName] s.statsage = v - stats[databaseFQName] = s default: continue } + + // Store updated stats into local store. + stats[databaseFQName] = s } } @@ -348,3 +425,15 @@ func parsePostgresXidLimitStats(r *model.PGResult) xidLimitStats { return stats } + +// selectDatabasesQuery returns suitable databases query depending on passed version. +func selectDatabasesQuery(version int) string { + switch { + case version < PostgresV12: + return databasesQuery11 + case version < PostgresV14: + return databasesQuery12 + default: + return databasesQueryLatest + } +} diff --git a/internal/collector/postgres_database_test.go b/internal/collector/postgres_database_test.go index 546e7bd..f3dfecb 100644 --- a/internal/collector/postgres_database_test.go +++ b/internal/collector/postgres_database_test.go @@ -13,17 +13,27 @@ func TestPostgresDatabasesCollector_Update(t *testing.T) { required: []string{ "postgres_database_xact_commits_total", "postgres_database_xact_rollbacks_total", - "postgres_database_conflicts_total", - "postgres_database_deadlocks_total", "postgres_database_blocks_total", "postgres_database_tuples_total", "postgres_database_temp_bytes_total", "postgres_database_temp_files_total", + "postgres_database_conflicts_total", + "postgres_database_deadlocks_total", + "postgres_database_checksum_failures_total", + "postgres_database_last_checksum_failure_seconds", "postgres_database_blk_time_seconds_total", "postgres_database_size_bytes", "postgres_database_stats_age_seconds_total", "postgres_xacts_left_before_wraparound", }, + // TODO: wait until Postgres 14 has been released, update Postgres version on pgscv-testing docker image + // and move these metrics to 'required' slice. + optional: []string{ + "postgres_database_session_time_all_seconds_total", + "postgres_database_session_time_seconds_total", + "postgres_database_sessions_all_total", + "postgres_database_sessions_total", + }, collector: NewPostgresDatabasesCollector, service: model.ServiceTypePostgresql, } @@ -41,13 +51,17 @@ func Test_parsePostgresDatabasesStats(t *testing.T) { name: "normal output", res: &model.PGResult{ Nrows: 2, - Ncols: 18, + Ncols: 27, Colnames: []pgproto3.FieldDescription{ {Name: []byte("database")}, {Name: []byte("xact_commit")}, {Name: []byte("xact_rollback")}, {Name: []byte("blks_read")}, {Name: []byte("blks_hit")}, {Name: []byte("tup_returned")}, {Name: []byte("tup_fetched")}, {Name: []byte("tup_inserted")}, {Name: []byte("tup_updated")}, {Name: []byte("tup_deleted")}, {Name: []byte("conflicts")}, {Name: []byte("temp_files")}, {Name: []byte("temp_bytes")}, {Name: []byte("deadlocks")}, - {Name: []byte("blk_read_time")}, {Name: []byte("blk_write_time")}, {Name: []byte("size_bytes")}, {Name: []byte("stats_age_seconds")}, + {Name: []byte("checksum_failures")}, {Name: []byte("last_checksum_failure_unixtime")}, + {Name: []byte("blk_read_time")}, {Name: []byte("blk_write_time")}, + {Name: []byte("session_time")}, {Name: []byte("active_time")}, {Name: []byte("idle_in_transaction_time")}, + {Name: []byte("sessions")}, {Name: []byte("sessions_abandoned")}, {Name: []byte("sessions_fatal")}, {Name: []byte("sessions_killed")}, + {Name: []byte("size_bytes")}, {Name: []byte("stats_age_seconds")}, }, Rows: [][]sql.NullString{ { @@ -55,14 +69,22 @@ func Test_parsePostgresDatabasesStats(t *testing.T) { {String: "100", Valid: true}, {String: "5", Valid: true}, {String: "10000", Valid: true}, {String: "845785", Valid: true}, {String: "758", Valid: true}, {String: "542", Valid: true}, {String: "452", Valid: true}, {String: "174", Valid: true}, {String: "125", Valid: true}, {String: "33", Valid: true}, {String: "41", Valid: true}, {String: "85642585", Valid: true}, {String: "25", Valid: true}, - {String: "542542", Valid: true}, {String: "150150", Valid: true}, {String: "485254752", Valid: true}, {String: "4589", Valid: true}, + {String: "13", Valid: true}, {String: "1628668483", Valid: true}, + {String: "542542", Valid: true}, {String: "150150", Valid: true}, + {String: "12345678", Valid: true}, {String: "5425682", Valid: true}, {String: "125478", Valid: true}, + {String: "54872", Valid: true}, {String: "458", Valid: true}, {String: "8942", Valid: true}, {String: "69", Valid: true}, + {String: "485254752", Valid: true}, {String: "4589", Valid: true}, }, { {String: "testdb2", Valid: true}, {String: "254", Valid: true}, {String: "41", Valid: true}, {String: "4853", Valid: true}, {String: "48752", Valid: true}, {String: "7856", Valid: true}, {String: "4254", Valid: true}, {String: "894", Valid: true}, {String: "175", Valid: true}, {String: "245", Valid: true}, {String: "26", Valid: true}, {String: "84", Valid: true}, {String: "125784686", Valid: true}, {String: "11", Valid: true}, - {String: "458751", Valid: true}, {String: "235578", Valid: true}, {String: "856964774", Valid: true}, {String: "6896", Valid: true}, + {String: "1", Valid: true}, {String: "54324565", Valid: true}, + {String: "458751", Valid: true}, {String: "235578", Valid: true}, + {String: "78541256", Valid: true}, {String: "8542214", Valid: true}, {String: "85475", Valid: true}, + {String: "854124", Valid: true}, {String: "8874", Valid: true}, {String: "4114", Valid: true}, {String: "5477", Valid: true}, + {String: "856964774", Valid: true}, {String: "6896", Valid: true}, }, }, }, @@ -71,13 +93,21 @@ func Test_parsePostgresDatabasesStats(t *testing.T) { database: "testdb1", xactcommit: 100, xactrollback: 5, blksread: 10000, blkshit: 845785, tupreturned: 758, tupfetched: 542, tupinserted: 452, tupupdated: 174, tupdeleted: 125, conflicts: 33, tempfiles: 41, tempbytes: 85642585, deadlocks: 25, - blkreadtime: 542542, blkwritetime: 150150, sizebytes: 485254752, statsage: 4589, + csumfails: 13, csumlastfailunixts: 1628668483, + blkreadtime: 542542, blkwritetime: 150150, + sessiontime: 12345678, activetime: 5425682, idletxtime: 125478, + sessions: 54872, sessabandoned: 458, sessfatal: 8942, sesskilled: 69, + sizebytes: 485254752, statsage: 4589, }, "testdb2": { database: "testdb2", xactcommit: 254, xactrollback: 41, blksread: 4853, blkshit: 48752, tupreturned: 7856, tupfetched: 4254, tupinserted: 894, tupupdated: 175, tupdeleted: 245, conflicts: 26, tempfiles: 84, tempbytes: 125784686, deadlocks: 11, - blkreadtime: 458751, blkwritetime: 235578, sizebytes: 856964774, statsage: 6896, + csumfails: 1, csumlastfailunixts: 54324565, + blkreadtime: 458751, blkwritetime: 235578, + sessiontime: 78541256, activetime: 8542214, idletxtime: 85475, + sessions: 854124, sessabandoned: 8874, sessfatal: 4114, sesskilled: 5477, + sizebytes: 856964774, statsage: 6896, }, }, }, @@ -121,3 +151,18 @@ func Test_parsePostgresXidLimitStats(t *testing.T) { }) } } + +func Test_selectDatabasesQuery(t *testing.T) { + testcases := []struct { + version int + want string + }{ + {version: PostgresV10, want: databasesQuery11}, + {version: PostgresV12, want: databasesQuery12}, + {version: PostgresV14, want: databasesQueryLatest}, + } + + for _, tc := range testcases { + assert.Equal(t, tc.want, selectDatabasesQuery(tc.version)) + } +}