Skip to content

Commit

Permalink
Fix index errors for large values
Browse files Browse the repository at this point in the history
A Postgres unique constraint fails if the spec or message is too large. The
solution was to split out the spec to a separate table and include a
hash of the message that the unique constraint is on.

Relates:
https://issues.redhat.com/browse/ACM-10794

Signed-off-by: mprahl <[email protected]>
  • Loading branch information
mprahl committed Mar 29, 2024
1 parent 17a1c9e commit 4fe9a24
Show file tree
Hide file tree
Showing 5 changed files with 285 additions and 49 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
BEGIN;

DROP EXTENSION IF EXISTS pgcrypto;

-- Drop message_hash
ALTER TABLE compliance_events DROP CONSTRAINT compliance_events_cluster_id_policy_id_parent_policy_id_com_key;
DROP INDEX compliance_events_null1;
ALTER TABLE compliance_events DROP COLUMN message_hash;
ALTER TABLE compliance_events ADD CONSTRAINT compliance_events_cluster_id_policy_id_parent_policy_id_com_key UNIQUE (cluster_id, policy_id, parent_policy_id, compliance, message, timestamp);
CREATE UNIQUE INDEX compliance_events_null1 ON compliance_events (cluster_id, policy_id, compliance, message, timestamp) WHERE parent_policy_id IS NULL;
DROP INDEX compliance_events_messages;


-- Add back the spec column directly on the policies table
ALTER TABLE policies ADD spec JSONB;

DO
$DO$
DECLARE temprow RECORD;
BEGIN FOR temprow IN
SELECT "id", "spec_id" FROM policies
LOOP
UPDATE policies SET spec = (SELECT spec FROM specs WHERE id=temprow.spec_id) WHERE id = temprow.id;
END LOOP;
END;
$DO$;

ALTER TABLE policies DROP CONSTRAINT policies_kind_api_group_name_namespace_spec_id_severity_key;
DROP INDEX policies_null1;
DROP INDEX policies_null2;
DROP INDEX policies_null3;

ALTER TABLE policies DROP COLUMN spec_id;

DROP TABLE specs;

ALTER TABLE policies ADD CONSTRAINT policies_kind_api_group_name_namespace_spec_severity_key UNIQUE (kind, api_group, name, namespace, spec, severity);
CREATE UNIQUE INDEX policies_null1 ON policies (kind, api_group, name, spec, severity) WHERE namespace IS NULL;
CREATE UNIQUE INDEX policies_null2 ON policies (kind, api_group, name, namespace, spec) WHERE severity IS NULL;
CREATE UNIQUE INDEX policies_null3 ON policies (kind, api_group, name, spec) WHERE namespace IS NULL AND severity IS NULL;
CREATE INDEX idx_policies_spec ON policies (spec);

COMMIT;
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
BEGIN;

CREATE EXTENSION IF NOT EXISTS pgcrypto;

-- If compliance messages are too long, the unique index gets too large and fails. This is a workaround for a unique
-- constraint while still allowing for long messages.

-- Drop the soon to be invalid unique constraints.
ALTER TABLE compliance_events DROP CONSTRAINT compliance_events_cluster_id_policy_id_parent_policy_id_com_key;
DROP INDEX compliance_events_null1;

-- SHA1 hex of the message for the uniqueness constraint.
ALTER TABLE compliance_events ADD message_hash VARCHAR(40);

-- Populate the SHA1 hex
DO
$DO$
DECLARE temprow RECORD;
BEGIN FOR temprow IN
SELECT "id", "message" FROM compliance_events
LOOP
UPDATE compliance_events SET message_hash = encode(digest(temprow.message, 'sha1'), 'hex') WHERE id = temprow.id;
END LOOP;
END;
$DO$;

ALTER TABLE compliance_events ALTER COLUMN message_hash SET NOT NULL;

-- Set the unique constraints
ALTER TABLE compliance_events ADD CONSTRAINT compliance_events_cluster_id_policy_id_parent_policy_id_com_key UNIQUE (cluster_id, policy_id, parent_policy_id, compliance, message_hash, timestamp);

CREATE UNIQUE INDEX compliance_events_null1 ON compliance_events (cluster_id, policy_id, compliance, message_hash, timestamp) WHERE parent_policy_id IS NULL;

-- Provide an index for equality comparisons on the message.
CREATE INDEX compliance_events_messages ON compliance_events USING HASH (message);

-- If the spec is too long, the unique index gets too large and fails. This is a workaround for a unique
-- constraint while still allowing for spec uniqueness.

CREATE TABLE specs(
id serial PRIMARY KEY,
spec JSONB NOT NULL,
EXCLUDE USING HASH (spec with =)
);

-- Drop the soon to be invalid unique constraints.
ALTER TABLE policies DROP CONSTRAINT policies_kind_api_group_name_namespace_spec_severity_key;
DROP INDEX policies_null1;
DROP INDEX policies_null2;
DROP INDEX policies_null3;
DROP INDEX idx_policies_spec;

ALTER TABLE policies ADD spec_id INT;
ALTER TABLE policies ADD FOREIGN KEY (spec_id) REFERENCES specs(id);

-- Populate the specs table
DO
$DO$
DECLARE temprow RECORD;
BEGIN FOR temprow IN
SELECT "id", "spec" FROM policies
LOOP
INSERT INTO specs (spec) VALUES (temprow.spec) ON CONFLICT DO NOTHING;
UPDATE policies SET spec_id = (SELECT id FROM specs WHERE spec=temprow.spec) WHERE id = temprow.id;
END LOOP;
END;
$DO$;

ALTER TABLE policies ALTER COLUMN spec_id SET NOT NULL;

ALTER TABLE policies DROP spec;

ALTER TABLE policies ADD CONSTRAINT policies_kind_api_group_name_namespace_spec_id_severity_key UNIQUE (kind, api_group, name, namespace, spec_id, severity);
CREATE UNIQUE INDEX policies_null1 ON policies (kind, api_group, name, spec_id, severity) WHERE namespace IS NULL;
CREATE UNIQUE INDEX policies_null2 ON policies (kind, api_group, name, namespace, spec_id) WHERE severity IS NULL;
CREATE UNIQUE INDEX policies_null3 ON policies (kind, api_group, name, spec_id) WHERE namespace IS NULL AND severity IS NULL;

COMMIT;
15 changes: 10 additions & 5 deletions controllers/complianceeventsapi/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -569,14 +569,18 @@ func setAuthorizedClusters(ctx context.Context, db *sql.DB, parsed *queryOptions
// generateGetComplianceEventsQuery will return a SELECT query with results ready to be parsed by
// scanIntoComplianceEvent. The caller is responsible for adding filters to the query.
func generateGetComplianceEventsQuery(includeSpec bool) string {
return fmt.Sprintf(`SELECT %s
query := `SELECT %s
FROM
compliance_events
LEFT JOIN clusters ON compliance_events.cluster_id = clusters.id
LEFT JOIN parent_policies ON compliance_events.parent_policy_id = parent_policies.id
LEFT JOIN policies ON compliance_events.policy_id = policies.id`,
strings.Join(generateSelectedArgs(includeSpec), ", "),
)
LEFT JOIN policies ON compliance_events.policy_id = policies.id`

if includeSpec {
query += "\n LEFT JOIN specs ON policies.spec_id=specs.id"
}

return fmt.Sprintf(query, strings.Join(generateSelectedArgs(includeSpec), ", "))
}

func generateSelectedArgs(includeSpec bool) []string {
Expand Down Expand Up @@ -604,7 +608,7 @@ func generateSelectedArgs(includeSpec bool) []string {
}

if includeSpec {
selectArgs = append(selectArgs, "policies.spec")
selectArgs = append(selectArgs, "specs.spec")
}

return selectArgs
Expand Down Expand Up @@ -1145,6 +1149,7 @@ func getComplianceEventsQuery(whereClause string, queryArgs *queryOptions) strin
// LEFT JOIN clusters ON compliance_events.cluster_id = clusters.id
// LEFT JOIN parent_policies ON compliance_events.parent_policy_id = parent_policies.id
// LEFT JOIN policies ON compliance_events.policy_id = policies.id
// LEFT JOIN specs ON policies.spec_id=specs.id
// WHERE (policies.name=$1 OR policies.name=$2) AND (policies.kind=$3)
// ORDER BY compliance_events.timestamp desc
// LIMIT 20
Expand Down
81 changes: 73 additions & 8 deletions controllers/complianceeventsapi/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package complianceeventsapi

import (
"context"
"crypto/sha1"
"database/sql"
"database/sql/driver"
"encoding/hex"
"encoding/json"
"errors"
"fmt"
Expand Down Expand Up @@ -280,11 +282,24 @@ func (e EventDetails) Validate() error {
}

func (e *EventDetails) InsertQuery() (string, []any) {
hasher := sha1.New()
hasher.Write([]byte(e.Message))
messageHash := hex.EncodeToString(hasher.Sum(nil))

sql := `INSERT INTO compliance_events` +
`(cluster_id, compliance, message, metadata, parent_policy_id, policy_id, reported_by, timestamp) ` +
`VALUES($1, $2, $3, $4, $5, $6, $7, $8)`
`(cluster_id, compliance, message, message_hash, metadata, parent_policy_id, policy_id, reported_by, ` +
`timestamp) ` +
`VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9)`
values := []any{
e.ClusterID, e.Compliance, e.Message, e.Metadata, e.ParentPolicyID, e.PolicyID, e.ReportedBy, e.Timestamp,
e.ClusterID,
e.Compliance,
e.Message,
messageHash,
e.Metadata,
e.ParentPolicyID,
e.PolicyID,
e.ReportedBy,
e.Timestamp,
}

return sql, values
Expand Down Expand Up @@ -408,6 +423,37 @@ func ParentPolicyFromPolicyObj(plc *policiesv1.Policy) ParentPolicy {
}
}

type Spec struct {
KeyID int32 `db:"id" json:"id"`
Spec JSONMap `json:"spec,omitempty"`
}

func (s *Spec) InsertQuery() (string, []any) {
sql := `INSERT INTO specs(spec) VALUES($1)`
values := []any{s.Spec}

return sql, values
}

func (s *Spec) SelectQuery(returnedColumns ...string) (string, []any) {
if len(returnedColumns) == 0 {
returnedColumns = []string{"*"}
}

sql := fmt.Sprintf(
`SELECT %s FROM specs WHERE spec=$1`,
strings.Join(returnedColumns, ", "),
)

values := []any{s.Spec}

return sql, values
}

func (s *Spec) GetOrCreate(ctx context.Context, db *sql.DB) error {
return getOrCreate(ctx, db, s)
}

func PolicyFromUnstructured(obj unstructured.Unstructured) *Policy {
policy := &Policy{}

Expand Down Expand Up @@ -447,7 +493,8 @@ type Policy struct {
APIGroup string `db:"api_group" json:"apiGroup"`
Name string `db:"name" json:"name"`
Namespace *string `db:"namespace" json:"namespace"`
Spec JSONMap `db:"spec" json:"spec,omitempty"`
Spec JSONMap `json:"spec,omitempty"`
SpecID int32 `db:"spec_id" json:"-"`
Severity *string `db:"severity" json:"severity"`
}

Expand Down Expand Up @@ -475,21 +522,30 @@ func (p *Policy) Validate() error {

func (p *Policy) InsertQuery() (string, []any) {
sql := `INSERT INTO policies` +
`(api_group, kind, name, namespace, severity, spec)` +
`(api_group, kind, name, namespace, severity, spec_id)` +
`VALUES($1, $2, $3, $4, $5, $6)`
values := []any{p.APIGroup, p.Kind, p.Name, p.Namespace, p.Severity, p.Spec}
values := []any{p.APIGroup, p.Kind, p.Name, p.Namespace, p.Severity, p.SpecID}

return sql, values
}

func (p *Policy) SelectQuery(returnedColumns ...string) (string, []any) {
if len(returnedColumns) == 0 {
returnedColumns = []string{"*"}
returnedColumns = []string{
"policies.id",
"policies.kind",
"policies.api_group",
"policies.name",
"policies.namespace",
"policies.severity",
"specs.spec",
}
}

sql := fmt.Sprintf(
`SELECT %s FROM policies `+
`WHERE api_group=$1 AND kind=$2 AND name=$3 AND spec=$4`,
`LEFT JOIN specs ON policies.spec_id=specs.id`+
` WHERE api_group=$1 AND kind=$2 AND name=$3 AND spec=$4`,
strings.Join(returnedColumns, ", "),
)

Expand Down Expand Up @@ -517,6 +573,15 @@ func (p *Policy) SelectQuery(returnedColumns ...string) (string, []any) {
}

func (p *Policy) GetOrCreate(ctx context.Context, db *sql.DB) error {
if p.SpecID == 0 {
spec := &Spec{Spec: p.Spec}
if err := getOrCreate(ctx, db, spec); err != nil {
return err
}

p.SpecID = spec.KeyID
}

return getOrCreate(ctx, db, p)
}

Expand Down
Loading

0 comments on commit 4fe9a24

Please sign in to comment.