From e325dab44ede2bd0981140bd5a35c3a0691f9fdc Mon Sep 17 00:00:00 2001 From: psrok1 Date: Fri, 12 Jul 2024 11:55:01 +0200 Subject: [PATCH 1/2] Set collation of some string fields to 'C' --- mwdb/model/blob.py | 4 +- mwdb/model/config.py | 4 +- mwdb/model/file.py | 18 ++--- ...95_set_collation_of_fields_that_may_be_.py | 70 +++++++++++++++++++ mwdb/model/tag.py | 2 +- 5 files changed, 85 insertions(+), 13 deletions(-) create mode 100644 mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py diff --git a/mwdb/model/blob.py b/mwdb/model/blob.py index cf094e63b..f8adb75b0 100644 --- a/mwdb/model/blob.py +++ b/mwdb/model/blob.py @@ -11,9 +11,9 @@ class TextBlob(Object): - blob_name = db.Column(db.String, index=True) + blob_name = db.Column(db.String(collation="C"), index=True) blob_size = db.Column(db.Integer, index=True) - blob_type = db.Column(db.String(32), index=True) + blob_type = db.Column(db.String(32, collation="C"), index=True) _content = db.Column("content", db.String()) last_seen = db.Column(db.DateTime, index=True) diff --git a/mwdb/model/config.py b/mwdb/model/config.py index 3070a5f17..162959691 100644 --- a/mwdb/model/config.py +++ b/mwdb/model/config.py @@ -11,8 +11,8 @@ class Config(Object): - family = db.Column(db.String(32), index=True) - config_type = db.Column(db.String(32), index=True) + family = db.Column(db.String(32, collation="C"), index=True) + config_type = db.Column(db.String(32, collation="C"), index=True) _cfg = db.Column("cfg", JSONB) __mapper_args__ = { diff --git a/mwdb/model/file.py b/mwdb/model/file.py index c1a2357b5..f57d9f8bb 100644 --- a/mwdb/model/file.py +++ b/mwdb/model/file.py @@ -33,17 +33,19 @@ class EmptyFileError(ValueError): class File(Object): - file_name = db.Column(db.String, index=True) + file_name = db.Column(db.String(collation="C"), index=True) file_size = db.Column(db.Integer, index=True) file_type = db.Column(db.Text, index=True) - md5 = db.Column(db.String(32), index=True) - crc32 = db.Column(db.String(8), index=True) - sha1 = db.Column(db.String(40), index=True) - sha256 = db.Column(db.String(64), index=True, unique=True) - sha512 = db.Column(db.String(128), index=True) - ssdeep = db.Column(db.String(255), index=True) + md5 = db.Column(db.String(32, collation="C"), index=True) + crc32 = db.Column(db.String(8, collation="C"), index=True) + sha1 = db.Column(db.String(40, collation="C"), index=True) + sha256 = db.Column(db.String(64, collation="C"), index=True, unique=True) + sha512 = db.Column(db.String(128, collation="C"), index=True) + ssdeep = db.Column(db.String(255, collation="C"), index=True) alt_names = db.Column( - MutableList.as_mutable(ARRAY(db.String)), nullable=False, server_default="{}" + MutableList.as_mutable(ARRAY(db.String(collation="C"))), + nullable=False, + server_default="{}", ) __mapper_args__ = { diff --git a/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py b/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py new file mode 100644 index 000000000..52cbe80cc --- /dev/null +++ b/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py @@ -0,0 +1,70 @@ +"""Set collation of fields that may be searched by prefix + +Revision ID: 6fc42e070495 +Revises: 1a46a79d9108 +Create Date: 2024-07-12 09:35:20.591920 + +""" +import logging + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = "6fc42e070495" +down_revision = "1a46a79d9108" +branch_labels = None +depends_on = None + +current_change = 1 +total_changes = 13 + +logger = logging.getLogger("alembic") + + +def alter_column(table, column, type_): + global current_change + logger.info( + f"[{current_change}/{total_changes}] Setting collation of {table}.{column}" + ) + op.alter_column(table, column, type_=type_) + current_change += 1 + + +def upgrade(): + logger.info(f"Changing column collation, this may take a while...") + alter_column("object", "blob_name", type_=sa.String(collation="C")) + alter_column("object", "blob_type", type_=sa.String(32, collation="C")) + alter_column("object", "family", type_=sa.String(32, collation="C")) + alter_column("object", "config_type", type_=sa.String(32, collation="C")) + alter_column("object", "file_name", type_=sa.String(collation="C")) + alter_column("object", "md5", type_=sa.String(32, collation="C")) + alter_column("object", "crc32", type_=sa.String(8, collation="C")) + alter_column("object", "sha1", type_=sa.String(40, collation="C")) + alter_column("object", "sha256", type_=sa.String(64, collation="C")) + alter_column("object", "sha512", type_=sa.String(128, collation="C")) + alter_column("object", "ssdeep", type_=sa.String(255, collation="C")) + alter_column( + "object", "alt_names", type_=postgresql.ARRAY(sa.String(collation="C")) + ) + alter_column("tag", "tag", type_=sa.String(collation="C")) + op.execute("ANALYZE") + + +def downgrade(): + logger.info(f"Changing column collation, this may take a while...") + alter_column("object", "blob_name", type_=sa.String()) + alter_column("object", "blob_type", type_=sa.String(32)) + alter_column("object", "family", type_=sa.String(32)) + alter_column("object", "config_type", type_=sa.String(32)) + alter_column("object", "file_name", type_=sa.String()) + alter_column("object", "md5", type_=sa.String(32)) + alter_column("object", "crc32", type_=sa.String(8)) + alter_column("object", "sha1", type_=sa.String(40)) + alter_column("object", "sha256", type_=sa.String(64)) + alter_column("object", "sha512", type_=sa.String(128)) + alter_column("object", "ssdeep", type_=sa.String(255)) + alter_column("object", "alt_names", type_=postgresql.ARRAY(sa.String())) + alter_column("tag", "tag", type_=sa.String()) + op.execute("ANALYZE") diff --git a/mwdb/model/tag.py b/mwdb/model/tag.py index c24aa19b5..bef7a4dc7 100644 --- a/mwdb/model/tag.py +++ b/mwdb/model/tag.py @@ -8,7 +8,7 @@ class Tag(db.Model): ) id = db.Column(db.Integer, primary_key=True) - tag = db.Column(db.String, nullable=False, index=True) + tag = db.Column(db.String(collation="C"), nullable=False, index=True) object_id = db.Column( db.Integer, db.ForeignKey("object.id", ondelete="CASCADE"), From 717f46b2cc5ab0739a67c84bf37a9910f6d54b68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Srokosz?= Date: Fri, 12 Jul 2024 12:11:39 +0200 Subject: [PATCH 2/2] Apply suggestions from code review Co-authored-by: msm-cert <156842376+msm-cert@users.noreply.github.com> --- .../6fc42e070495_set_collation_of_fields_that_may_be_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py b/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py index 52cbe80cc..cf8590126 100644 --- a/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py +++ b/mwdb/model/migrations/versions/6fc42e070495_set_collation_of_fields_that_may_be_.py @@ -33,7 +33,7 @@ def alter_column(table, column, type_): def upgrade(): - logger.info(f"Changing column collation, this may take a while...") + logger.info("Changing column collation, this may take a while...") alter_column("object", "blob_name", type_=sa.String(collation="C")) alter_column("object", "blob_type", type_=sa.String(32, collation="C")) alter_column("object", "family", type_=sa.String(32, collation="C")) @@ -53,7 +53,7 @@ def upgrade(): def downgrade(): - logger.info(f"Changing column collation, this may take a while...") + logger.info("Changing column collation, this may take a while...") alter_column("object", "blob_name", type_=sa.String()) alter_column("object", "blob_type", type_=sa.String(32)) alter_column("object", "family", type_=sa.String(32))