Skip to content
This repository has been archived by the owner on Apr 26, 2024. It is now read-only.

Add a test which ensures that new tables get PRIMARY KEYs. #16648

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog.d/16648.misc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add a test which ensures that new tables get PRIMARY KEYs.
135 changes: 134 additions & 1 deletion tests/storage/test_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, List, Tuple
from typing import Callable, List, Set, Tuple
from unittest.mock import Mock, call

from twisted.internet import defer
Expand Down Expand Up @@ -361,3 +361,136 @@ def _list_tables_with_missing_replica_identities_txn(
0,
f"The following tables in the {pool.name()!r} database are missing REPLICA IDENTITIES: {missing!r}.",
)


class PostgresPrimaryKeyTestCase(unittest.HomeserverTestCase):
if not USE_POSTGRES_FOR_TESTS:
skip = "Requires Postgres"

# Some tables are exempt, mostly for historical reasons
EXEMPT_TABLES = {
"appservice_room_list",
"batch_events",
"blocked_rooms",
"cache_invalidation_stream_by_instance",
"current_state_delta_stream",
"deleted_pushers",
"device_auth_providers",
"device_federation_inbox",
"device_federation_outbox",
"device_inbox",
"device_lists_changes_in_room",
"device_lists_outbound_last_success",
"device_lists_outbound_pokes",
"device_lists_remote_cache",
"device_lists_remote_extremeties",
"device_lists_remote_resync",
"device_lists_stream",
"e2e_cross_signing_keys",
"e2e_cross_signing_signatures",
"e2e_room_keys",
"e2e_room_keys_versions",
"erased_users",
"event_auth",
"event_auth_chain_links",
"event_push_actions_staging",
"event_push_summary",
"event_relations",
"event_search",
"federation_inbound_events_staging",
"federation_stream_position",
"ignored_users",
"insertion_event_edges",
"insertion_event_extremities",
"insertion_events",
"local_media_repository_thumbnails",
"local_media_repository_url_cache",
"monthly_active_users",
"presence_stream",
"push_rules_stream",
"ratelimit_override",
"remote_media_cache_thumbnails",
"room_alias_servers",
"room_stats_earliest_token",
"room_stats_state",
"state_group_edges",
"state_groups_state",
"stream_ordering_to_exterm",
"stream_positions",
"threepid_guest_access_tokens",
"timeline_gaps",
"user_daily_visits",
"user_directory",
"user_directory_search",
"user_filters",
"user_ips",
"user_signature_stream",
"user_threepid_id_server",
"users_in_public_rooms",
"users_pending_deactivation",
"users_who_share_private_rooms",
"worker_locks",
}

def prepare(
self, reactor: MemoryReactor, clock: Clock, homeserver: HomeServer
) -> None:
self.db_pools = homeserver.get_datastores().databases

def test_all_nonexempt_tables_have_primary_key(self) -> None:
"""
Tests that all tables have a PRIMARY KEY.

Whilst there are occasionally potentially good reasons to not have one,
we should default to having PRIMARY KEYs on new tables.

Historically we have not created PRIMARY KEYs when they should be created
(sometimes even creating a UNIQUE index on the same columns instead).

It is preferable to use PRIMARY KEYs instead of UNIQUE indices where possible
because this:

- provides a default REPLICA IDENTITY, ensuring logical replication works
without extra complication;
- is sometimes more helpful for understanding; and
- is more useful to external tools which expect PRIMARY KEYs (e.g. some database
migration tools on some cloud providers do not work with tables without PRIMARY
KEYs)
"""

sql = """
SELECT tbl.table_name
FROM information_schema.tables tbl
WHERE table_type = 'BASE TABLE'
AND table_schema not in ('pg_catalog', 'information_schema')
AND NOT EXISTS (
SELECT 1
FROM information_schema.key_column_usage kcu
WHERE kcu.table_name = tbl.table_name
AND kcu.table_schema = tbl.table_schema
)
"""

def _list_tables_with_missing_primary_keys_txn(
txn: LoggingTransaction,
) -> Set[str]:
txn.execute(sql)
return {table_name for table_name, in txn}

for pool in self.db_pools:
missing = sorted(
self.get_success(
pool.runInteraction(
"test_list_missing_primary_keys",
_list_tables_with_missing_primary_keys_txn,
)
)
- self.EXEMPT_TABLES
)
self.assertEqual(
len(missing),
0,
f"The following tables in the {pool.name()!r} database are missing PRIMARY KEYs: {missing!r}.\n"
f"Please consider adding a primary key if it makes sense to do so — this gives the table a replica identity for free and helps humans and tools to understand the table better.\n"
f"If this lack of a primary key is intentional, please add an exemption.",
)
Loading