From c8602c028033602b80bffd2ca58b849cac607c39 Mon Sep 17 00:00:00 2001 From: Pavel Kvach Date: Sat, 27 Apr 2024 18:54:14 +0300 Subject: [PATCH] db: Make 'text' field in 'comments' table NOT NULL and handling data migration This update introduces a schema migration to version 4 for the database, focusing on enhancing the 'comments' table. This ensures that the 'text' field in the 'comments' table will always have a value, which improves data consistency and integrity. See: - https://github.com/isso-comments/isso/issues/979 - https://github.com/isso-comments/isso/pull/994 --- CHANGES.rst | 2 + isso/db/__init__.py | 116 +++++++++++++++++++++++----- isso/db/comments.py | 36 ++++++--- isso/tests/test_db.py | 172 +++++++++++++++++++++++++++++++++++++++++- 4 files changed, 294 insertions(+), 32 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 394b866d4..e303396dc 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -45,6 +45,7 @@ Bugfixes & Improvements - Fix newline character handling in data-isso-* i18n strings (`#992`_, pkvach) - Add link logging for management of new comments in Stdout (`#1016`_, pkvach) - Change logging to include datetime and loglevel (`#1023`_, ix5) +- Make 'text' field in 'comments' table NOT NULL and handling data migration (`#1019`_, pkvach) .. _#951: https://github.com/posativ/isso/pull/951 .. _#967: https://github.com/posativ/isso/pull/967 @@ -56,6 +57,7 @@ Bugfixes & Improvements .. _#992: https://github.com/isso-comments/isso/pull/992 .. _#1016: https://github.com/isso-comments/isso/pull/1016 .. _#1023: https://github.com/isso-comments/isso/pull/1023 +.. _#1019: https://github.com/isso-comments/isso/pull/1019 0.13.1.dev0 (2023-02-05) ------------------------ diff --git a/isso/db/__init__.py b/isso/db/__init__.py index d38318678..e7209feab 100644 --- a/isso/db/__init__.py +++ b/isso/db/__init__.py @@ -22,7 +22,7 @@ class SQLite3: a trigger for automated orphan removal. """ - MAX_VERSION = 3 + MAX_VERSION = 5 def __init__(self, path, conf): @@ -68,7 +68,7 @@ def migrate(self, to): if self.version >= to: return - logger.info("migrate database from version %i to %i", self.version, to) + logger.info("Migrating database from version %i to %i", self.version, to) # re-initialize voters blob due a bug in the bloomfilter signature # which added older commenter's ip addresses to the current voters blob @@ -78,20 +78,36 @@ def migrate(self, to): bf = memoryview(Bloomfilter(iterable=["127.0.0.0"]).array) with sqlite3.connect(self.path) as con: - con.execute('UPDATE comments SET voters=?', (bf, )) - con.execute('PRAGMA user_version = 1') - logger.info("%i rows changed", con.total_changes) + con.execute("BEGIN TRANSACTION") + try: + con.execute('UPDATE comments SET voters=?', (bf, )) + con.execute('PRAGMA user_version = 1') + con.execute("COMMIT") + logger.info("Migrating DB version 0 to 1 by re-initializing voters blob, %i rows changed", + con.total_changes) + except sqlite3.Error as e: + con.execute("ROLLBACK") + logger.error("Migrating DB version 0 to 1 failed: %s", e) + raise RuntimeError("Migrating DB version 0 to 1 failed: %s" % e) # move [general] session-key to database if self.version == 1: with sqlite3.connect(self.path) as con: - if self.conf.has_option("general", "session-key"): - con.execute('UPDATE preferences SET value=? WHERE key=?', ( - self.conf.get("general", "session-key"), "session-key")) - - con.execute('PRAGMA user_version = 2') - logger.info("%i rows changed", con.total_changes) + con.execute("BEGIN TRANSACTION") + try: + if self.conf.has_option("general", "session-key"): + con.execute('UPDATE preferences SET value=? WHERE key=?', ( + self.conf.get("general", "session-key"), "session-key")) + + con.execute('PRAGMA user_version = 2') + con.execute("COMMIT") + logger.info("Migrating DB version 1 to 2 by moving session-key to database, %i rows changed", + con.total_changes) + except sqlite3.Error as e: + con.execute("ROLLBACK") + logger.error("Migrating DB version 1 to 2 failed: %s", e) + raise RuntimeError("Migrating DB version 1 to 2 failed: %s" % e) # limit max. nesting level to 1 if self.version == 2: @@ -114,10 +130,76 @@ def first(rv): ids.extend(rv) flattened[id].update(set(rv)) - for id in flattened.keys(): - for n in flattened[id]: - con.execute( - "UPDATE comments SET parent=? WHERE id=?", (id, n)) + con.execute("BEGIN TRANSACTION") + try: + for id in flattened.keys(): + for n in flattened[id]: + con.execute( + "UPDATE comments SET parent=? WHERE id=?", (id, n)) + + con.execute('PRAGMA user_version = 3') + con.execute("COMMIT") + logger.info("Migrating DB version 2 to 3 by limiting nesting level to 1, %i rows changed", + con.total_changes) + except sqlite3.Error as e: + con.execute("ROLLBACK") + logger.error("Migrating DB version 2 to 3 failed: %s", e) + raise RuntimeError("Migrating DB version 2 to 3 failed: %s" % e) + + # add notification field to comments (moved from Comments class to migration) + if self.version == 3: + with sqlite3.connect(self.path) as con: + self.migrate_to_version_4(con) + + # "text" field in "comments" became NOT NULL + if self.version == 4: + with sqlite3.connect(self.path) as con: + con.execute("BEGIN TRANSACTION") + con.execute("UPDATE comments SET text = '' WHERE text IS NULL") + + # create new table with NOT NULL constraint for "text" field + con.execute(Comments.create_table_query("comments_new")) + + try: + # copy data from old table to new table + con.execute(""" + INSERT INTO comments_new ( + tid, id, parent, created, modified, mode, remote_addr, text, author, email, website, likes, dislikes, voters, notification + ) + SELECT + tid, id, parent, created, modified, mode, remote_addr, text, author, email, website, likes, dislikes, voters, notification + FROM comments + """) + + # swap tables and drop old table + con.execute("ALTER TABLE comments RENAME TO comments_backup_v4") + con.execute("ALTER TABLE comments_new RENAME TO comments") + con.execute("DROP TABLE comments_backup_v4") + + con.execute('PRAGMA user_version = 5') + con.execute("COMMIT") + logger.info("Migrating DB version 4 to 5 by setting empty comments.text to '', %i rows changed", + con.total_changes) + except sqlite3.Error as e: + con.execute("ROLLBACK") + logger.error("Migrating DB version 4 to 5 failed: %s", e) + raise RuntimeError("Migrating DB version 4 to 5 failed: %s" % e) + + def migrate_to_version_4(self, con): + # check if "notification" column exists in "comments" table + rv = con.execute("PRAGMA table_info(comments)").fetchall() + if any([row[1] == 'notification' for row in rv]): + logger.info("Migrating DB version 3 to 4 skipped, 'notification' field already exists in comments") + con.execute('PRAGMA user_version = 4') + return - con.execute('PRAGMA user_version = 3') - logger.info("%i rows changed", con.total_changes) + con.execute("BEGIN TRANSACTION") + try: + con.execute('ALTER TABLE comments ADD COLUMN notification INTEGER DEFAULT 0;') + con.execute('PRAGMA user_version = 4') + con.execute("COMMIT") + logger.info("Migrating DB version 3 to 4 by adding 'notification' field to comments") + except sqlite3.Error as e: + con.execute("ROLLBACK") + logger.error("Migrating DB version 3 to 4 failed: %s", e) + raise RuntimeError("Migrating DB version 3 to 4 failed: %s" % e) diff --git a/isso/db/comments.py b/isso/db/comments.py index f2aa4fffb..c41c77fe7 100644 --- a/isso/db/comments.py +++ b/isso/db/comments.py @@ -31,20 +31,34 @@ class Comments: 'remote_addr', 'text', 'author', 'email', 'website', 'likes', 'dislikes', 'voters', 'notification'] + # This method is used in the migration script from version 4 to 5. + # You need to write a new migration if you change the database schema! + @staticmethod + def create_table_query(table_name): + return f''' + CREATE TABLE IF NOT EXISTS {table_name} ( + tid REFERENCES threads(id), + id INTEGER PRIMARY KEY, + parent INTEGER, + created FLOAT NOT NULL, + modified FLOAT, + mode INTEGER, + remote_addr VARCHAR, + text VARCHAR NOT NULL, + author VARCHAR, + email VARCHAR, + website VARCHAR, + likes INTEGER DEFAULT 0, + dislikes INTEGER DEFAULT 0, + voters BLOB NOT NULL, + notification INTEGER DEFAULT 0 + ); + ''' + def __init__(self, db): self.db = db - self.db.execute([ - 'CREATE TABLE IF NOT EXISTS comments (', - ' tid REFERENCES threads(id), id INTEGER PRIMARY KEY, parent INTEGER,', - ' created FLOAT NOT NULL, modified FLOAT, mode INTEGER, remote_addr VARCHAR,', - ' text VARCHAR NOT NULL, author VARCHAR, email VARCHAR, website VARCHAR,', - ' likes INTEGER DEFAULT 0, dislikes INTEGER DEFAULT 0, voters BLOB NOT NULL,', - ' notification INTEGER DEFAULT 0);']) - try: - self.db.execute(['ALTER TABLE comments ADD COLUMN notification INTEGER DEFAULT 0;']) - except Exception: - pass + self.db.execute(Comments.create_table_query("comments")) def add(self, uri, c): """ diff --git a/isso/tests/test_db.py b/isso/tests/test_db.py index 09860f179..f753032f1 100644 --- a/isso/tests/test_db.py +++ b/isso/tests/test_db.py @@ -82,19 +82,19 @@ def test_limit_nested_comments(self): " id INTEGER PRIMARY KEY," " parent INTEGER," " created FLOAT NOT NULL, modified FLOAT," - " text VARCHAR, email VARCHAR, website VARCHAR," + " text VARCHAR, author VARCHAR, email VARCHAR, website VARCHAR," " mode INTEGER," " remote_addr VARCHAR," " likes INTEGER DEFAULT 0," " dislikes INTEGER DEFAULT 0," - " voters BLOB)") + " voters BLOB NOT NULL)") con.execute( "INSERT INTO threads (uri, title) VALUES (?, ?)", ("/", "Test")) for (id, parent) in tree.items(): con.execute("INSERT INTO comments (" - " id, parent, created)" - "VALUEs (?, ?, ?)", (id, parent, id)) + " id, parent, created, voters)" + "VALUEs (?, ?, ?, ?)", (id, parent, id, sqlite3.Binary(b""))) conf = config.new({ "general": { @@ -118,3 +118,167 @@ def test_limit_nested_comments(self): rv = con.execute( "SELECT id, parent FROM comments ORDER BY created").fetchall() self.assertEqual(flattened, rv) + + def test_comment_add_notification_column_migration(self): + with sqlite3.connect(self.path) as con: + con.execute("PRAGMA user_version = 3") + + con.execute("CREATE TABLE comments (" + " tid REFERENCES threads(id)," + " id INTEGER PRIMARY KEY," + " parent INTEGER," + " created FLOAT NOT NULL, modified FLOAT," + " text VARCHAR, author VARCHAR, email VARCHAR, website VARCHAR," + " mode INTEGER," + " remote_addr VARCHAR," + " likes INTEGER DEFAULT 0," + " dislikes INTEGER DEFAULT 0," + " voters BLOB NOT NULL)") + + conf = config.new({ + "general": { + "dbpath": "/dev/null", + "max-age": "1h" + } + }) + + db = SQLite3(self.path, conf) + + self.assertEqual(db.version, SQLite3.MAX_VERSION) + + with sqlite3.connect(self.path) as con: + # assert that the "notification" column has been added + rv = con.execute("SELECT name FROM pragma_table_info('comments') WHERE name='notification'").fetchone() + self.assertEqual(rv, ("notification",)) + + def test_comment_add_notification_column_migration_with_existing_column(self): + with sqlite3.connect(self.path) as con: + con.execute("PRAGMA user_version = 3") + + con.execute("CREATE TABLE comments (" + " tid REFERENCES threads(id)," + " id INTEGER PRIMARY KEY," + " parent INTEGER," + " created FLOAT NOT NULL, modified FLOAT," + " text VARCHAR, author VARCHAR, email VARCHAR, website VARCHAR," + " mode INTEGER," + " remote_addr VARCHAR," + " likes INTEGER DEFAULT 0," + " dislikes INTEGER DEFAULT 0," + " voters BLOB NOT NULL," + " notification INTEGER DEFAULT 0)") + + conf = config.new({ + "general": { + "dbpath": "/dev/null", + "max-age": "1h" + } + }) + + db = SQLite3(self.path, conf) + + self.assertEqual(db.version, SQLite3.MAX_VERSION) + + def test_comment_text_not_null_migration(self): + with sqlite3.connect(self.path) as con: + con.execute("PRAGMA user_version = 4") + + con.execute("CREATE TABLE threads (" + " id INTEGER PRIMARY KEY," + " uri VARCHAR UNIQUE," + " title VARCHAR)") + con.execute("CREATE TABLE comments (" + " tid REFERENCES threads(id)," + " id INTEGER PRIMARY KEY," + " parent INTEGER," + " created FLOAT NOT NULL, modified FLOAT," + " text VARCHAR, author VARCHAR, email VARCHAR, website VARCHAR," + " mode INTEGER," + " remote_addr VARCHAR," + " likes INTEGER DEFAULT 0," + " dislikes INTEGER DEFAULT 0," + " voters BLOB NOT NULL," + " notification INTEGER DEFAULT 0)") + + con.execute( + "INSERT INTO threads (uri, title) VALUES (?, ?)", ("/", "Test")) + + con.execute("INSERT INTO comments (id, parent, created, text, voters) VALUES (?, ?, ?, ?, ?)", + (1, None, 1, None, sqlite3.Binary(b""))) + + con.execute("INSERT INTO comments (id, parent, created, text, voters) VALUES (?, ?, ?, ?, ?)", + (2, 1, 2, "foo", sqlite3.Binary(b""))) + + conf = config.new({ + "general": { + "dbpath": "/dev/null", + "max-age": "1h" + } + }) + + db = SQLite3(self.path, conf) + + self.assertEqual(db.version, SQLite3.MAX_VERSION) + + with sqlite3.connect(self.path) as con: + # assert that the "text" field has "NOT NULL" constraint + rv = con.execute("SELECT \"notnull\" FROM pragma_table_info('comments') WHERE name='text'").fetchone() + self.assertEqual(rv, (1,)) + + rv = con.execute("SELECT text FROM comments WHERE id IN (1, 2)").fetchall() + self.assertEqual(rv, [("",), ("foo",)]) + + def test_comment_text_not_null_migration_with_rollback_after_error(self): + with sqlite3.connect(self.path) as con: + con.execute("PRAGMA user_version = 4") + + con.execute("CREATE TABLE threads (" + " id INTEGER PRIMARY KEY," + " uri VARCHAR UNIQUE," + " title VARCHAR)") + + # emulate a migration error by creating a source table without the "notification" field + con.execute("CREATE TABLE comments (" + " tid REFERENCES threads(id)," + " id INTEGER PRIMARY KEY," + " parent INTEGER," + " created FLOAT NOT NULL, modified FLOAT," + " text VARCHAR, author VARCHAR, email VARCHAR, website VARCHAR," + " mode INTEGER," + " remote_addr VARCHAR," + " likes INTEGER DEFAULT 0," + " dislikes INTEGER DEFAULT 0," + " voters BLOB NOT NULL)") + + con.execute( + "INSERT INTO threads (uri, title) VALUES (?, ?)", ("/", "Test")) + + con.execute("INSERT INTO comments (id, parent, created, text, voters) VALUES (?, ?, ?, ?, ?)", + (1, None, 1, None, sqlite3.Binary(b""))) + + con.execute("INSERT INTO comments (id, parent, created, text, voters) VALUES (?, ?, ?, ?, ?)", + (2, 1, 2, "foo", sqlite3.Binary(b""))) + + conf = config.new({ + "general": { + "dbpath": "/dev/null", + "max-age": "1h" + } + }) + + try: + SQLite3(self.path, conf) + except RuntimeError as e: + self.assertEqual(str(e), "Migrating DB version 4 to 5 failed: no such column: notification") + + with sqlite3.connect(self.path) as con: + # assert that DB version has not been updated + rv = con.execute("PRAGMA user_version").fetchone() + self.assertEqual(rv, (4,)) + + # assert that the "text" field has no "NOT NULL" constraint + rv = con.execute("SELECT \"notnull\" FROM pragma_table_info('comments') WHERE name='text'").fetchone() + self.assertEqual(rv, (0,)) + + rv = con.execute("SELECT text FROM comments WHERE id IN (1, 2)").fetchall() + self.assertEqual(rv, [(None,), ("foo",)])