From 20fce9232c2bcecfb0ca50995f9e2e3aa9462962 Mon Sep 17 00:00:00 2001 From: KernelMaker Date: Mon, 25 Nov 2024 12:07:54 +0100 Subject: [PATCH 1/8] Support TTL purging 1. add ttl_only_expired flag to only retrieve expired rows 2. ttl_purge_demo.cpp is an example of how ttl purging is going to work in the restapi server 3. bugfix, set op_type from ZINSERT_TTL to ZUPDATE to make sure the rondb internal triggers can be done correctly. TODO: 1. integret purge program into restapi 2. handle ttl_only_expired on non-TTL table 3. double checke the ZINSERT_TTL bugfix, add test cases --- mysql-test/ttl_test.py | 3 + .../ndb/include/kernel/signaldata/LqhKey.hpp | 15 +- .../include/kernel/signaldata/ScanFrag.hpp | 21 +- .../ndb/include/kernel/signaldata/ScanTab.hpp | 19 +- .../include/kernel/signaldata/TcKeyReq.hpp | 17 + storage/ndb/include/ndbapi/NdbDictionary.hpp | 2 +- storage/ndb/include/ndbapi/NdbOperation.hpp | 6 +- .../ndb/include/ndbapi/NdbScanOperation.hpp | 6 +- storage/ndb/ndbapi-examples/CMakeLists.txt | 2 + .../ndbapi_ttl_purge/ttl_purge_demo.cpp | 1692 +++++++++++++++++ .../src/common/debugger/signaldata/LqhKey.cpp | 3 +- .../common/debugger/signaldata/TcKeyReq.cpp | 2 + storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp | 5 +- .../ndb/src/kernel/blocks/dblqh/DblqhMain.cpp | 12 +- storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp | 1 + .../ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 5 + storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp | 4 +- .../kernel/blocks/dbtup/DbtupExecQuery.cpp | 94 +- storage/ndb/src/ndbapi/NdbDictionary.cpp | 2 +- storage/ndb/src/ndbapi/NdbOperationDefine.cpp | 4 + storage/ndb/src/ndbapi/NdbOperationExec.cpp | 2 + storage/ndb/src/ndbapi/NdbScanOperation.cpp | 13 +- 22 files changed, 1881 insertions(+), 49 deletions(-) create mode 100644 storage/ndb/ndbapi-examples/ndbapi_ttl_purge/ttl_purge_demo.cpp diff --git a/mysql-test/ttl_test.py b/mysql-test/ttl_test.py index ff90851e0aca..9a75a7c2a8f4 100644 --- a/mysql-test/ttl_test.py +++ b/mysql-test/ttl_test.py @@ -3195,6 +3195,9 @@ def case(num): "col_c INT, " "PRIMARY KEY(col_a)) " "ENGINE = NDB, " + "COMMENT=\"NDB_TABLE=TTL=66@col_b\"") + time.sleep(5) + cur.execute("ALTER TABLE test.sz " "COMMENT=\"NDB_TABLE=TTL=10@col_b\"") except Exception as e: print(f"Create DB/TABLE failed: {e}") diff --git a/storage/ndb/include/kernel/signaldata/LqhKey.hpp b/storage/ndb/include/kernel/signaldata/LqhKey.hpp index f44cee28589b..69c0ee6014f2 100644 --- a/storage/ndb/include/kernel/signaldata/LqhKey.hpp +++ b/storage/ndb/include/kernel/signaldata/LqhKey.hpp @@ -228,6 +228,8 @@ class LqhKeyReq { */ static void setTTLIgnoreFlag(UintR &requestInfo, UintR val); static UintR getTTLIgnoreFlag(const UintR &requestInfo); + static void setTTLOnlyExpiredFlag(UintR &requestInfo, UintR val); + static UintR getTTLOnlyExpiredFlag(const UintR &requestInfo); enum RequestInfo { RI_KEYLEN_SHIFT = 0, @@ -244,8 +246,8 @@ class LqhKeyReq { */ RI_TTL_IGNORE_SHIFT = 6, RI_INTERPRETED_INSERT_SHIFT = 7, + RI_TTL_ONLY_EXPIRED_SHIFT = 8, /* Currently unused */ - RI_CLEAR_SHIFT8 = 8, RI_CLEAR_SHIFT9 = 9, RI_LAST_REPL_SHIFT = 10, @@ -646,7 +648,7 @@ inline UintR LqhKeyReq::getDisableFkConstraints(const UintR &requestInfo) { } inline UintR LqhKeyReq::getLongClearBits(const UintR &requestInfo) { - const Uint32 mask = (1 << RI_CLEAR_SHIFT8) | (1 << RI_CLEAR_SHIFT9); + const Uint32 mask = (1 << RI_CLEAR_SHIFT9); return (requestInfo & mask); } @@ -696,6 +698,15 @@ inline UintR LqhKeyReq::getTTLIgnoreFlag(const UintR & requestInfo){ return (requestInfo >> RI_TTL_IGNORE_SHIFT) & 1; } +inline void LqhKeyReq::setTTLOnlyExpiredFlag(UintR &requestInfo, UintR val){ + ASSERT_BOOL(val, "LqhKeyReq::setTTLOnlyExpiredFlag"); + requestInfo |= (val << RI_TTL_ONLY_EXPIRED_SHIFT); +} + +inline UintR LqhKeyReq::getTTLOnlyExpiredFlag(const UintR & requestInfo){ + return (requestInfo >> RI_TTL_ONLY_EXPIRED_SHIFT) & 1; +} + inline Uint32 table_version_major_lqhkeyreq(Uint32 x) { // LQHKEYREQ only contains 16-bit schema version... return x & 0xFFFF; diff --git a/storage/ndb/include/kernel/signaldata/ScanFrag.hpp b/storage/ndb/include/kernel/signaldata/ScanFrag.hpp index 6e13a98f43fd..81ad5ef01d66 100644 --- a/storage/ndb/include/kernel/signaldata/ScanFrag.hpp +++ b/storage/ndb/include/kernel/signaldata/ScanFrag.hpp @@ -144,6 +144,9 @@ class ScanFragReq { static void setTTLIgnoreFragFlag(Uint32 & requestInfo, Uint32 val); static Uint32 getTTLIgnoreFragFlag(const Uint32 & requestInfo); + + static void setTTLOnlyExpiredFragFlag(Uint32 & requestInfo, Uint32 val); + static Uint32 getTTLOnlyExpiredFragFlag(const Uint32 & requestInfo); }; /* @@ -348,11 +351,12 @@ class ScanFragNextReq { * q = Query thread flag - 1 Bit 22 * g = Aggregation flag - 1 Bit 23 * I = TTL ignore flag - 1 Bit 24 + * e = TTL only expired flag - 1 Bit 25 * * 1111111111222222222233 * 01234567890123456789012345678901 * rrcdlxhkrztppppaaaaaaaaaaaaaaaa Short variant ( < 6.4.0) - * rrcdlxhkrztppppCsaim gI Long variant (6.4.0 +) + * rrcdlxhkrztppppCsaim gIe Long variant (6.4.0 +) */ #define SF_LOCK_MODE_SHIFT (5) #define SF_LOCK_MODE_MASK (1) @@ -385,6 +389,7 @@ class ScanFragNextReq { #define SF_QUERY_THREAD_SHIFT (22) #define SF_AGGREGATION_SHIFT (23) #define SF_TTL_IGNORE_SHIFT (24) +#define SF_TTL_ONLY_EXPIRED_SHIFT (25) inline Uint32 ScanFragReq::getLockMode(const Uint32 &requestInfo) { return (requestInfo >> SF_LOCK_MODE_SHIFT) & SF_LOCK_MODE_MASK; @@ -604,6 +609,20 @@ ScanFragReq::getTTLIgnoreFragFlag(const Uint32 & requestInfo) { return (requestInfo >> SF_TTL_IGNORE_SHIFT) & 1; } +inline +void +ScanFragReq::setTTLOnlyExpiredFragFlag(Uint32 & requestInfo, UintR val) { + ASSERT_BOOL(val, "ScanFragReq::setTTLOnlyExpiredFlag"); + requestInfo= (requestInfo & ~(1 << SF_TTL_ONLY_EXPIRED_SHIFT)) | + (val << SF_TTL_ONLY_EXPIRED_SHIFT); +} + +inline +Uint32 +ScanFragReq::getTTLOnlyExpiredFragFlag(const Uint32 & requestInfo) { + return (requestInfo >> SF_TTL_ONLY_EXPIRED_SHIFT) & 1; +} + /** * Request Info (SCAN_NEXTREQ) * diff --git a/storage/ndb/include/kernel/signaldata/ScanTab.hpp b/storage/ndb/include/kernel/signaldata/ScanTab.hpp index fd09897e14af..9b9c474b0605 100644 --- a/storage/ndb/include/kernel/signaldata/ScanTab.hpp +++ b/storage/ndb/include/kernel/signaldata/ScanTab.hpp @@ -123,6 +123,7 @@ class ScanTabReq { static Uint8 getReadCommittedBaseFlag(const UintR &requestInfo); static Uint32 getMultiFragFlag(const Uint32 &requestInfo); static Uint32 getTTLIgnoreFlag(const Uint32 &requestInfo); + static Uint32 getTTLOnlyExpiredFlag(const Uint32 &requestInfo); /** * Set:ers for requestInfo @@ -147,6 +148,7 @@ class ScanTabReq { static void setReadCommittedBaseFlag(Uint32 &requestInfo, Uint32 val); static void setMultiFragFlag(Uint32 &requestInfo, Uint32 val); static void setTTLIgnoreFlag(Uint32 &requestInfo, Uint32 val); + static void setTTLOnlyExpiredFlag(Uint32 &requestInfo, Uint32 val); }; /** @@ -177,11 +179,12 @@ class ScanTabReq { f = 4 word conf - 1 Bit 29 R = Read Committed base - 1 Bit 30 I = IgnoreTTL - 1 Bit 3 + e = TTL only expired - 1 Bit 4 1111111111222222222233 01234567890123456789012345678901 pppppppplnhcktzxbbbbbbbbbbdjafR - I g + Ie g */ #define PARALLEL_SHIFT (0) @@ -230,6 +233,7 @@ class ScanTabReq { #define SCAN_MULTI_FRAG_SHIFT (31) #define SCAN_TTL_IGNORE_SHIFT (3) +#define SCAN_TTL_ONLY_EXPIRED_SHIFT (4) inline Uint8 ScanTabReq::getReadCommittedBaseFlag(const UintR &requestInfo) { return (Uint8)((requestInfo >> SCAN_READ_COMMITTED_BASE_SHIFT) & 1); @@ -436,6 +440,19 @@ ScanTabReq::setTTLIgnoreFlag(UintR & requestInfo, Uint32 flag) { requestInfo= (requestInfo & ~(1 << SCAN_TTL_IGNORE_SHIFT)) | (flag << SCAN_TTL_IGNORE_SHIFT); } +inline +UintR +ScanTabReq::getTTLOnlyExpiredFlag(const UintR & requestInfo) { + return (requestInfo >> SCAN_TTL_ONLY_EXPIRED_SHIFT) & 1; +} + +inline +void +ScanTabReq::setTTLOnlyExpiredFlag(UintR & requestInfo, Uint32 flag) { + ASSERT_BOOL(flag, "TcKeyReq::setTTLOnlyExpiredFlag"); + requestInfo= (requestInfo & ~(1 << SCAN_TTL_ONLY_EXPIRED_SHIFT)) | + (flag << SCAN_TTL_ONLY_EXPIRED_SHIFT); +} /** * * SENDER: Dbtc diff --git a/storage/ndb/include/kernel/signaldata/TcKeyReq.hpp b/storage/ndb/include/kernel/signaldata/TcKeyReq.hpp index 554c25b1451f..b3b3637df165 100644 --- a/storage/ndb/include/kernel/signaldata/TcKeyReq.hpp +++ b/storage/ndb/include/kernel/signaldata/TcKeyReq.hpp @@ -259,6 +259,8 @@ class TcKeyReq { */ static void setTTLIgnoreFlag(UintR &requestInfo, UintR val); static UintR getTTLIgnoreFlag(const UintR &requestInfo); + static void setTTLOnlyExpiredFlag(UintR &requestInfo, UintR val); + static UintR getTTLOnlyExpiredFlag(const UintR &requestInfo); }; /** @@ -439,6 +441,7 @@ class TcKeyReq { */ #define TC_TTL_IGNORE_SHIFT (26) #define INTERPRETED_INSERT_SHIFT (27) +#define TC_TTL_ONLY_EXPIRED_SHIFT (28) /** * Scan Info @@ -815,6 +818,20 @@ TcKeyReq::getTTLIgnoreFlag(const UintR & requestInfo) return (requestInfo >> TC_TTL_IGNORE_SHIFT) & 1; } +inline +void +TcKeyReq::setTTLOnlyExpiredFlag(UintR & requestInfo, UintR flag){ + ASSERT_BOOL(flag, "TcKeyReq::setTTLOnlyExpiredFlag"); + requestInfo |= (flag << TC_TTL_ONLY_EXPIRED_SHIFT); +} + +inline +UintR +TcKeyReq::getTTLOnlyExpiredFlag(const UintR & requestInfo) +{ + return (requestInfo >> TC_TTL_ONLY_EXPIRED_SHIFT) & 1; +} + #undef JAM_FILE_ID #endif diff --git a/storage/ndb/include/ndbapi/NdbDictionary.hpp b/storage/ndb/include/ndbapi/NdbDictionary.hpp index dd19eee877b3..004aed0da401 100644 --- a/storage/ndb/include/ndbapi/NdbDictionary.hpp +++ b/storage/ndb/include/ndbapi/NdbDictionary.hpp @@ -1326,7 +1326,7 @@ class NdbDictionary { /* * Is TTL enabled */ - bool isTTLEnabled(); + bool isTTLEnabled() const; private: #ifndef DOXYGEN_SHOULD_SKIP_INTERNAL diff --git a/storage/ndb/include/ndbapi/NdbOperation.hpp b/storage/ndb/include/ndbapi/NdbOperation.hpp index 040685a06403..f0b3f556d765 100644 --- a/storage/ndb/include/ndbapi/NdbOperation.hpp +++ b/storage/ndb/include/ndbapi/NdbOperation.hpp @@ -1131,7 +1131,8 @@ class NdbOperation { */ OO_TTL_IGNORE = 0x8000, OO_INTERPRETED_INSERT = 0x10000, - OO_DIRTY_FLAG = 0x20000 + OO_DIRTY_FLAG = 0x20000, + OO_TTL_ONLY_EXPIRED = 0x40000 }; /* An operation-specific abort option. @@ -1572,7 +1573,8 @@ class NdbOperation { OF_NOWAIT = 0x20, OF_BLOB_PART_READ = 0x40, OF_REPLICA_APPLIER = 0x80, - OF_TTL_IGNORE = 0x100 + OF_TTL_IGNORE = 0x100, + OF_TTL_ONLY_EXPIRED = 0x200 }; /* * Zart diff --git a/storage/ndb/include/ndbapi/NdbScanOperation.hpp b/storage/ndb/include/ndbapi/NdbScanOperation.hpp index 6cc37a17c4d7..e34326109ce3 100644 --- a/storage/ndb/include/ndbapi/NdbScanOperation.hpp +++ b/storage/ndb/include/ndbapi/NdbScanOperation.hpp @@ -123,7 +123,8 @@ class NdbScanOperation : public NdbOperation { It is enabled by default for scans using LM_Exclusive, but must be explicitly specified to enable the taking-over of LM_Read locks. */ - SF_KeyInfo = 1 + SF_KeyInfo = 1, + SF_OnlyExpiredScan = (4 << 16) }; /* @@ -163,7 +164,8 @@ class NdbScanOperation : public NdbOperation { SO_INTERPRETED = 0x20, SO_CUSTOMDATA = 0x40, SO_PART_INFO = 0x80, - SO_TTL_IGNORE = 0x100 + SO_TTL_IGNORE = 0x100, + SO_TTL_ONLY_EXPIRED = 0x200 }; /* Flags controlling scan behaviour diff --git a/storage/ndb/ndbapi-examples/CMakeLists.txt b/storage/ndb/ndbapi-examples/CMakeLists.txt index 460fa60592ba..a49e9c02e453 100644 --- a/storage/ndb/ndbapi-examples/CMakeLists.txt +++ b/storage/ndb/ndbapi-examples/CMakeLists.txt @@ -59,6 +59,7 @@ ADD_EXECUTABLE(ndb_ndbapi_simple_dual ndbapi_simple_dual/main.cpp) ADD_EXECUTABLE(ndb_ndbapi_simple_index ndbapi_simple_index/main.cpp) ADD_EXECUTABLE(ndb_ndbapi_agg ndbapi_agg/ndbapi_agg.cpp) ADD_EXECUTABLE(ndb_ndbapi_agg_update ndbapi_agg/update.cpp) +ADD_EXECUTABLE(ndb_ndbapi_ttl_purge ndbapi_ttl_purge/ttl_purge_demo.cpp) # MgmApi examples ADD_EXECUTABLE(ndb_mgmapi_logevent mgmapi_logevent/main.cpp) @@ -86,6 +87,7 @@ SET(EXAMPLES ndb_ndbapi_simple_index ndb_ndbapi_agg ndb_ndbapi_agg_update + ndb_ndbapi_ttl_purge ndb_mgmapi_logevent ndb_mgmapi_logevent2 diff --git a/storage/ndb/ndbapi-examples/ndbapi_ttl_purge/ttl_purge_demo.cpp b/storage/ndb/ndbapi-examples/ndbapi_ttl_purge/ttl_purge_demo.cpp new file mode 100644 index 000000000000..4a4434547d48 --- /dev/null +++ b/storage/ndb/ndbapi-examples/ndbapi_ttl_purge/ttl_purge_demo.cpp @@ -0,0 +1,1692 @@ +/* + Copyright (c) 2005, 2024, Oracle and/or its affiliates. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License, version 2.0, + as published by the Free Software Foundation. + + This program is designed to work with certain software (including + but not limited to OpenSSL) that is licensed under separate terms, + as designated in a particular file or component or in included license + documentation. The authors of MySQL hereby grant you an additional + permission to link the program and your derivative works with the + separately licensed software that they have either included with + the program or referenced in the documentation. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License, version 2.0, for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +/** + * ndbapi_event.cpp: Using API level events in NDB API + * + * Classes and methods used in this example: + * + * Ndb_cluster_connection + * connect() + * wait_until_ready() + * + * Ndb + * init() + * getDictionary() + * createEventOperation() + * dropEventOperation() + * pollEvents() + * nextEvent() + * + * NdbDictionary + * createEvent() + * dropEvent() + * + * NdbDictionary::Event + * setTable() + * addTableEvent() + * addEventColumn() + * + * NdbEventOperation + * getValue() + * getPreValue() + * execute() + * getEventType() + * + */ + +#include + +#include +#include +// Used for cout +#include +#include +#ifdef VM_TRACE +#include +#endif +#ifndef assert +#include +#endif + +#include +#include +#include "storage/ndb/plugin/ndb_schema_dist.h" +#include "mysql_time.h" +#include "my_systime.h" +#include "myisampack.h" +#define APIERROR(error) \ + { \ + std::cout << "Error in " << __FILE__ << ", line:" << __LINE__ \ + << ", code:" << error.code << ", msg: " << error.message << "." \ + << std::endl; \ + exit(-1); \ + } + +constexpr int NDB_INVALID_SCHEMA_OBJECT = 241; + +char* GetEventName(NdbDictionary::Event::TableEvent event_type, + char* name_buf) { + switch (event_type) { + case NdbDictionary::Event::TE_INSERT: + strcpy(name_buf, "TE_INSERT"); + break; + case NdbDictionary::Event::TE_DELETE: + strcpy(name_buf, "TE_DELETE"); + break; + case NdbDictionary::Event::TE_UPDATE: + strcpy(name_buf, "TE_UPDATE"); + break; + case NdbDictionary::Event::TE_SCAN: + strcpy(name_buf, "TE_SCAN"); + break; + case NdbDictionary::Event::TE_DROP: + strcpy(name_buf, "TE_DROP"); + break; + case NdbDictionary::Event::TE_ALTER: + strcpy(name_buf, "TE_ALTER"); + break; + case NdbDictionary::Event::TE_CREATE: + strcpy(name_buf, "TE_CREATE"); + break; + case NdbDictionary::Event::TE_GCP_COMPLETE: + strcpy(name_buf, "TE_GCP_COMPLETE"); + break; + case NdbDictionary::Event::TE_CLUSTER_FAILURE: + strcpy(name_buf, "TE_CLUSTER_FAILURE"); + break; + case NdbDictionary::Event::TE_STOP: + strcpy(name_buf, "TE_STOP"); + break; + case NdbDictionary::Event::TE_NODE_FAILURE: + strcpy(name_buf, "TE_NODE_FAILURE"); + break; + case NdbDictionary::Event::TE_SUBSCRIBE: + strcpy(name_buf, "TE_SUBSCRIBE"); + break; + case NdbDictionary::Event::TE_UNSUBSCRIBE: + strcpy(name_buf, "TE_UNSUBSCRIBE"); + break; + case NdbDictionary::Event::TE_EMPTY: + strcpy(name_buf, "TE_EMPTY"); + break; + case NdbDictionary::Event::TE_INCONSISTENT: + strcpy(name_buf, "TE_INCONSISTENT"); + break; + case NdbDictionary::Event::TE_OUT_OF_MEMORY: + strcpy(name_buf, "TE_OUT_OF_MEMEORY"); + break; + case NdbDictionary::Event::TE_ALL: + strcpy(name_buf, "TE_ALL"); + break; + default: + strcpy(name_buf, "UNKNOWN"); + break; + } + return name_buf; +} + +bool IsExit() { + return false; +} + +typedef struct { + int32_t table_id; + uint32_t ttl_sec; + uint32_t col_no; + char last_purged[8] = {0}; // Only valid in local ttl cache +} TTLInfo; + +std::mutex g_mutex; +std::atomic g_cache_updated; +std::map g_ttl_cache; +bool UpdateLocalCache(const std::string& db, + const std::string& table, + const NdbDictionary::Table* tab) { + bool updated = false; + auto iter = g_ttl_cache.find(db + "/" + table); + if (tab != nullptr) { + if (iter != g_ttl_cache.end()) { + if (tab->isTTLEnabled()) { + assert(iter->second.table_id == tab->getTableId()); + std::cerr << "Update TTL of table " << db + "/" + table + << " in cache: [" << iter->second.table_id + << ", " << iter->second.ttl_sec + << ", " << iter->second.col_no + << "] -> [" << tab->getTableId() + << ", " << tab->getTTLSec() + << ", " << tab->getTTLColumnNo() + << "]" << std::endl; + iter->second.ttl_sec = tab->getTTLSec(); + iter->second.col_no = tab->getTTLColumnNo(); + } else { + std::cerr << "Remove[1] TTL of table " << db + "/" + table + << " in cache: [" << iter->second.table_id + << ", " << iter->second.ttl_sec + << ", " << iter->second.col_no + << "]" << std::endl; + g_ttl_cache.erase(iter); + } + updated = true; + } else { + if (tab->isTTLEnabled()) { + std::cerr << "Insert TTL of table " << db + "/" + table + << " in cache: [" << tab->getTableId() + << ", " << tab->getTTLSec() + << ", " << tab->getTTLColumnNo() + << "]" << std::endl; + g_ttl_cache.insert({db + "/" + table, {tab->getTableId(), + tab->getTTLSec(), tab->getTTLColumnNo()}}); + updated = true; + } else { + // check mysql.ttl_purge_nodes + // TODO(zhao): handle ttl_purge_tables as well + if (db == "mysql" && table == "ttl_purge_nodes") { + updated = true; + } + } + } + } else { + if (iter != g_ttl_cache.end()) { + std::cerr << "Remove[2] TTL of table " << db + "/" + table + << " in cache: [" << iter->second.table_id + << ", " << iter->second.ttl_sec + << ", " << iter->second.col_no + << "]" << std::endl; + g_ttl_cache.erase(iter); + updated = true; + } else { + // check mysql.ttl_purge_nodes + // TODO(zhao): handle ttl_purge_tables as well + if (db == "mysql" && table == "ttl_purge_nodes") { + updated = true; + } + } + } + return updated; +} + +bool UpdateLocalCache(const std::string& db, + const std::string& table, + const std::string& new_table, + const NdbDictionary::Table* tab) { + // 1. Remove old table + bool ret = UpdateLocalCache(db, table, nullptr); + assert(ret); + // 2. Insert new table + ret = UpdateLocalCache(db, new_table, tab); + assert(ret); + return ret; +} + +bool DropDBLocalCache(std::string& db_str) { + bool updated = false; + for (auto iter = g_ttl_cache.begin(); iter != g_ttl_cache.end();) { + auto pos = iter->first.find('/'); + if (pos != std::string::npos) { + std::string db = iter->first.substr(0, pos); + if (db == db_str) { + std::cerr << "Remove[3] TTL of table " << iter->first + << " in cache: [" << iter->second.table_id + << ", " << iter->second.ttl_sec + << ", " << iter->second.col_no + << "]" << std::endl; + iter = g_ttl_cache.erase(iter); + updated = true; + continue; + } + } + iter++; + } + return updated; +} + +longlong Datetime2ll(const MYSQL_TIME &my_time) { + const longlong ymd = ((my_time.year * 13 + my_time.month) << 5) | my_time.day; + const longlong hms = + (my_time.hour << 12) | (my_time.minute << 6) | my_time.second; + assert(std::abs(static_cast(my_time.second_part)) <= 0xffffffLL); + const longlong tmp = (static_cast((ymd << 17) | hms) << 24) + + my_time.second_part; + return my_time.neg ? -tmp : tmp; +} + +void LL2datetime(MYSQL_TIME *ltime, int64_t tmp) { + int64_t ymd; + int64_t hms; + int64_t ymdhms; + int64_t ym; + + if ((ltime->neg = (tmp < 0))) tmp = -tmp; + + ltime->second_part = (tmp % (1LL << 24)); + ymdhms = (tmp >> 24); + + ymd = ymdhms >> 17; + ym = ymd >> 5; + hms = ymdhms % (1 << 17); + + ltime->day = ymd % (1 << 5); + ltime->month = ym % 13; + ltime->year = static_cast(ym / 13); + + ltime->second = hms % (1 << 6); + ltime->minute = (hms >> 6) % (1 << 6); + ltime->hour = static_cast(hms >> 12); + + ltime->time_type = MYSQL_TIMESTAMP_DATETIME; + ltime->time_zone_displacement = 0; +} + +uint32_t g_batch_size = 5; +std::atomic g_purge_thread_exit = false; +void SetPurgeThreadExit(bool exit) { + g_purge_thread_exit = exit; +} +bool IsPurgeThreadExit() { + return (IsExit() || g_purge_thread_exit); +} + +std::atomic g_purge_thread_asks_for_retry = false; + +bool IsNodeAlive(unsigned char* last_active); +bool GetShard(Ndb* myNdb, int32_t* shard, int32_t* n_purge_nodes, + bool update_objects) { + /* + * shard: + * -2: The current node is not a purging node + * -1: No configure table found, purge by default + * [0 - X]: shard no + */ + *shard = -1; + *n_purge_nodes = 0; + if (myNdb->setDatabaseName("mysql") != 0) { + std::cerr << "Failed to select database: " << "mysql" + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + return false; + } + NdbDictionary::Dictionary* myDict = myNdb->getDictionary(); + if (update_objects) { + myDict->removeCachedTable("ttl_purge_nodes"); + } + const NdbDictionary::Table* tab = myDict->getTable("ttl_purge_nodes"); + if (tab == nullptr) { + if (myDict->getNdbError().code == 723) { + // std::cerr << "ttl_purge_nodes table is not found, no sharding" + // << std::endl; + return true; + } else { + std::cerr << "Failed to get table: " << "ttl_purge_nodes" + << ", error: " << myDict->getNdbError().code << "(" + << myDict->getNdbError().message << "), retry..." + << std::endl; + return false; + } + } + NdbRecAttr *myRecAttr[3]; + NdbTransaction* trans = nullptr; + NdbScanOperation *scan_op = nullptr; + int32_t n_nodes = 0;; + std::vector purge_nodes; + size_t pos = 0; + bool check = 0; + + trans = myNdb->startTransaction(); + if (trans == nullptr) { + std::cerr << "Failed to start transaction, error: " + << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + scan_op = trans->getNdbScanOperation(tab); + if (scan_op == nullptr) { + std::cerr << "Failed to get scan operation, " << "ttl_purge_nodes" + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + if (scan_op->readTuples(NdbOperation::LM_CommittedRead) != 0) { + std::cerr << "Failed to readTuples, " << "ttl_purge_nodes" + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + myRecAttr[0] = scan_op->getValue("node_id"); + if (myRecAttr[0] == nullptr) { + std::cerr << "Failed to getValue, " << "ttl_purge_nodes" + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + myRecAttr[1] = scan_op->getValue("last_active"); + if (myRecAttr[1] == nullptr) { + std::cerr << "Failed to getValue, " << "ttl_purge_nodes" + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + if (trans->execute(NdbTransaction::NoCommit) != 0) { + std::cerr << "Failed to execute, " << "ttl_purge_nodes" + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + n_nodes = 0; + purge_nodes.clear(); + pos = 0; + while ((check = scan_op->nextResult(true)) == 0) { + do { + if (myRecAttr[0]->int32_value() != myNdb->getNodeId() && + (myRecAttr[1]->isNULL() || + !IsNodeAlive(reinterpret_cast( + myRecAttr[1]->aRef())))) { + std::cerr << "Detected inactive purging node " + << myRecAttr[0]->int32_value() << ", skip it" + << std::endl; + continue; + } + n_nodes++; + purge_nodes.push_back(myRecAttr[0]->int32_value()); + } while ((check = scan_op->nextResult(false)) == 0); + } + + std::sort(purge_nodes.begin(), purge_nodes.end()); + if (!purge_nodes.empty()) { + fprintf(stderr, "Alive configured purge nodes: "); + for (auto iter : purge_nodes) { + fprintf(stderr, "%u ", iter); + if (myNdb->getNodeId() == iter) { + *shard = pos; + } + pos++; + } + fprintf(stderr, "\n"); + } + if (!purge_nodes.empty() && *shard == -1) { + // if the current node id is not in the purge nodes list, + // set shard to -2 to tell the purge thread sleep + *shard = -2; + } + *n_purge_nodes = n_nodes; + myNdb->closeTransaction(trans); + return true; + +err: + if (trans != nullptr) { + myNdb->closeTransaction(trans); + } + return false; +} + +#define DATETIMEF_INT_OFS 0x8000000000LL +int64_t GetNow(unsigned char* now_char) { + assert(now_char != nullptr); + int64_t now = 0; + memset(now_char, 0, 8); + MYSQL_TIME curr_dt; + struct tm tmp_tm; + time_t t_now = (time_t)my_micro_time() / 1000000; /* second */ + gmtime_r(&t_now, &tmp_tm); + curr_dt.neg = false; + curr_dt.second_part = 0; + curr_dt.year = ((tmp_tm.tm_year + 1900) % 10000); + curr_dt.month = tmp_tm.tm_mon + 1; + curr_dt.day = tmp_tm.tm_mday; + curr_dt.hour = tmp_tm.tm_hour; + curr_dt.minute = tmp_tm.tm_min; + curr_dt.second = tmp_tm.tm_sec; + curr_dt.time_zone_displacement = 0; + curr_dt.time_type = MYSQL_TIMESTAMP_DATETIME; + if (curr_dt.second == 60 || curr_dt.second == 61) { + curr_dt.second = 59; + } + now = Datetime2ll(curr_dt); + mi_int5store(now_char, (now >> 24) + DATETIMEF_INT_OFS); + return now; +} + +long calc_daynr(uint year, uint month, uint day) { + long delsum; + int temp; + int y = year; /* may be < 0 temporarily */ + + if (y == 0 && month == 0) return 0; /* Skip errors */ + /* Cast to int to be able to handle month == 0 */ + delsum = static_cast(365 * y + 31 * (static_cast(month) - 1) + + static_cast(day)); + if (month <= 2) + y--; + else + delsum -= static_cast(static_cast(month) * 4 + 23) / 10; + temp = ((y / 100 + 1) * 3) / 4; + assert(delsum + static_cast(y) / 4 - temp >= 0); + return (delsum + static_cast(y) / 4 - temp); +} /* calc_daynr */ + +uint calc_days_in_year(uint year) { + return ((year & 3) == 0 && (year % 100 || (year % 400 == 0 && year)) ? 366 + : 365); +} + +const uchar days_in_month[] = {31, 28, 31, 30, 31, 30, 31, + 31, 30, 31, 30, 31, 0}; +void get_date_from_daynr(int64_t daynr, uint *ret_year, uint *ret_month, + uint *ret_day) { + uint year; + uint temp; + uint leap_day; + uint day_of_year; + uint days_in_year; + const uchar *month_pos; + + if (daynr <= 365L || daynr >= 3652500) { /* Fix if wrong daynr */ + *ret_year = *ret_month = *ret_day = 0; + } else { + year = static_cast(daynr * 100 / 36525L); + temp = (((year - 1) / 100 + 1) * 3) / 4; + day_of_year = static_cast(daynr - static_cast(year) * 365L) - + (year - 1) / 4 + temp; + while (day_of_year > (days_in_year = calc_days_in_year(year))) { + day_of_year -= days_in_year; + (year)++; + } + leap_day = 0; + if (days_in_year == 366) { + if (day_of_year > 31 + 28) { + day_of_year--; + if (day_of_year == 31 + 28) + leap_day = 1; /* Handle leap year's leap day */ + } + } + *ret_month = 1; + for (month_pos = days_in_month; day_of_year > static_cast(*month_pos); + day_of_year -= *(month_pos++), (*ret_month)++) + ; + *ret_year = year; + *ret_day = day_of_year + leap_day; + } +} + +ulonglong TIME_to_ulonglong_datetime(const MYSQL_TIME &my_time) { + return (static_cast(my_time.year * 10000UL + + my_time.month * 100UL + my_time.day) * + 1000000ULL + + static_cast(my_time.hour * 10000UL + + my_time.minute * 100UL + my_time.second)); +} + +#define LEASE_SECONDS 10 +bool IsNodeAlive(unsigned char* last_active) { + assert(last_active != nullptr); + const int64_t intpart = mi_uint5korr(last_active) - DATETIMEF_INT_OFS; + int64_t last_active_ll = (static_cast(intpart) << 24); + MYSQL_TIME ltime; + LL2datetime(<ime, last_active_ll); + + longlong sec, days, daynr; + sec = + ((ltime.day - 1) * 3600LL * 24LL + ltime.hour * 3600LL + + ltime.minute * 60LL + ltime.second + + LEASE_SECONDS); + days = sec / (3600 * 24LL); + sec -= days * 3600 * 24LL; + if (sec < 0) { + days--; + sec += 3600 * 24LL; + } + ltime.second_part = 0; + ltime.second = static_cast(sec % 60); + ltime.minute = static_cast(sec / 60 % 60); + ltime.hour = static_cast(sec / 3600); + daynr = calc_daynr(ltime.year, ltime.month, 1) + days; + get_date_from_daynr(daynr, <ime.year, <ime.month, <ime.day); + + MYSQL_TIME curr_dt; + struct tm tmp_tm; + time_t t_now = (time_t)my_micro_time() / 1000000; /* second */ + gmtime_r(&t_now, &tmp_tm); + curr_dt.neg = false; + curr_dt.second_part = 0; + curr_dt.year = ((tmp_tm.tm_year + 1900) % 10000); + curr_dt.month = tmp_tm.tm_mon + 1; + curr_dt.day = tmp_tm.tm_mday; + curr_dt.hour = tmp_tm.tm_hour; + curr_dt.minute = tmp_tm.tm_min; + curr_dt.second = tmp_tm.tm_sec; + curr_dt.time_zone_displacement = 0; + curr_dt.time_type = MYSQL_TIMESTAMP_DATETIME; + if (curr_dt.second == 60 || curr_dt.second == 61) { + curr_dt.second = 59; + } + uint64_t last_active_add_lease_llu = TIME_to_ulonglong_datetime(ltime); + uint64_t now_llu = TIME_to_ulonglong_datetime(curr_dt); + // fprintf(stderr, "lease_expires_at: %lu, now_llu: %lu\n", + // last_active_add_lease_llu, now_llu); + if (last_active_add_lease_llu >= now_llu) { + return true; + } else { + return false; + } +} + +bool UpdateLease(Ndb* myNdb, unsigned char* now_char) { + if (myNdb->setDatabaseName("mysql") != 0) { + std::cerr << "Failed to select database: " << "mysql" + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + return false; + } + NdbDictionary::Dictionary* myDict = myNdb->getDictionary(); + const NdbDictionary::Table* tab = myDict->getTable("ttl_purge_nodes"); + if (tab == nullptr) { + if (myDict->getNdbError().code == 723) { + std::cerr << "ttl_purge_nodes table is not found, no lease" + << std::endl; + return true; + } else { + std::cerr << "Failed to get table: " << "ttl_purge_nodes" + << ", error: " << myDict->getNdbError().code << "(" + << myDict->getNdbError().message << "), retry..." + << std::endl; + return false; + } + } + NdbTransaction* trans = nullptr; + NdbOperation *op = nullptr; + + trans = myNdb->startTransaction(); + if (trans == nullptr) { + std::cerr << "Failed to start transaction, error: " + << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + op = trans->getNdbOperation(tab); + if (op == nullptr) { + std::cerr << "Failed to get operation, " << "ttl_purge_nodes" + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + op->updateTuple(); + op->equal("node_id", myNdb->getNodeId()); + op->setValue("last_active", reinterpret_cast(now_char)); + + if (trans->execute(NdbTransaction::Commit) != 0) { + if (trans->getNdbError().code != 626 /*not found*/) { + std::cerr << "Failed to commit, " << "ttl_purge_nodes" + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + } + myNdb->closeTransaction(trans); + return true; +err: + if (trans != nullptr) { + myNdb->closeTransaction(trans); + } + return false; +} + +void PurgeTTL(Ndb_cluster_connection* cluster_connection) { + SetPurgeThreadExit(false); + Ndb* myNdb = new Ndb(cluster_connection); + myNdb->setNdbObjectName("TTL record-cleaner"); + NdbDictionary::Dictionary* myDict = nullptr; + std::map local_ttl_cache; + const NdbDictionary::Table* ttl_tab = nullptr; + const NdbDictionary::Index *ttl_index = nullptr; + NdbTransaction* trans = nullptr; + NdbScanOperation *scan_op = nullptr; + NdbRecAttr *myRecAttr[3]; + myDict = myNdb->getDictionary(); + g_cache_updated = true; + int32_t shard = -1; + int32_t n_purge_nodes = 0; + size_t pos = 0; + std::string db_str; + std::string table_str; + uint32_t ttl_col_no = 0; + int check = 0; + NdbError err; + uint32_t deletedRows = 0; + int64_t now = 0; + unsigned char now_char[8]; + unsigned char buf[8]; + int trx_failure_times = 0; + bool batch_done = false; + bool update_objects = false; + std::map::iterator iter; + bool purge_trx_started = false; + + if (myNdb->init() != 0) { + std::cerr << "Failed to init Ndb object, error: " + << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + + do { + purge_trx_started = false; + + update_objects = false; + if (g_cache_updated) { + std::cerr << "found global ttl cache updated" << std::endl; + const std::lock_guard lock(g_mutex); + local_ttl_cache = g_ttl_cache; + g_cache_updated = false; + std::cerr << "update local ttl cache, total ttl table nums: " + << local_ttl_cache.size() + << std::endl; + update_objects = true; + } + + shard = -1; + n_purge_nodes = 0; + if (GetShard(myNdb, &shard, &n_purge_nodes, update_objects) == false) { + std::cerr << "Failed to select shard" + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } else { + fprintf(stderr, "Get shard: %d of %d\n", shard, n_purge_nodes); + } + if (shard == -2) { + std::cerr << "I'm not responsible for purging..." << std::endl; + sleep(2); + continue; + } + + { + GetNow(now_char); + // update lease if shard is activated + if (shard >= 0 && !UpdateLease(myNdb, now_char)) { + std::cerr << "Failed to update the lease" << std::endl; + goto err; + } + } + if (local_ttl_cache.empty()) { + std::cerr << "No ttl table is found, no need to purge" << std::endl; + sleep(1); + continue; + } + for (iter = local_ttl_cache.begin(); iter != local_ttl_cache.end(); + iter++) { + purge_trx_started = false; + { + GetNow(now_char); + if (shard >= 0 && !UpdateLease(myNdb, now_char)) { + std::cerr << "Failed to update the lease[2]" << std::endl; + goto err; + } + } + if (g_cache_updated) { + break; + } + + std::cerr << "Processing " << iter->first << ": " << std::endl; + + pos = iter->first.find('/'); + assert(pos != std::string::npos); + db_str = iter->first.substr(0, pos); + assert(pos + 1 < iter->first.length()); + table_str = iter->first.substr(pos + 1); + ttl_col_no = iter->second.col_no; + check = 0; + deletedRows = 0; + now = 0; + trx_failure_times = 0; + batch_done = false; + + if (myNdb->setDatabaseName(db_str.c_str()) != 0) { + std::cerr << "Failed to select database: " << db_str + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + if (update_objects) { + /* + * Notice: + * Based on the comment below, + * here we need to call invalidateIndex() for ttl_index, the reason is + * removeCachedTable() just decrease the reference count of the table + * object in the global list, it won't remove the object even the counter + * becomes to 0. But invalidateIndex() will set the object to DROP and + * remove it if the counter is 0. Since we don't call invalidateIndex + * in main thread(it's a major different with other normal table objects), + * so here we need to call invalidateIndex() + */ + myDict->invalidateIndex("ttl_index", table_str.c_str()); + /* + * Notice: + * Purge thread can only call removeCachedXXX to remove its + * thread local cached table object and decrease the reference + * count of the global cached table object. + * If we call invalidateTable() and following by getTable() here, + * Purge thread will invalidate the global cached table object + * and generate a new version of table object, which will make + * the main thread's following invalidateTable() + getTable() gets + * this table object, stops the chance to get the latest one from + * data nodes. + */ + myDict->removeCachedTable(table_str.c_str()); + } + ttl_tab= myDict->getTable(table_str.c_str()); + if (ttl_tab == nullptr) { + std::cerr << "Failed to get table: " << table_str + << ", error: " << myDict->getNdbError().code << "(" + << myDict->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + if (shard >= 0 && n_purge_nodes > 0 && + std::hash{}( + (std::to_string(ttl_tab->getTableId()) + table_str)) % + n_purge_nodes != static_cast(shard)) { + std::cerr << " Skip" << std::endl; + continue; + } + trx_failure_times = 0; +retry_trx: + trans = myNdb->startTransaction(); + if (trans == nullptr) { + std::cerr << "Failed to start transaction, " << table_str + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + purge_trx_started = true; + + ttl_index = myDict->getIndex("ttl_index", table_str.c_str()); + + check = 0; + deletedRows = 0; + now = 0; + if (ttl_index != nullptr) { + // Found index on ttl column, use it + std::cerr << " Using index scan, "; + const NdbDictionary::Column* ttl_col_index = ttl_index->getColumn(0); + assert(ttl_col_index != nullptr && ttl_col_index->getType() == + NdbDictionary::Column::Datetime2); + const NdbDictionary::Column* ttl_col_table = + ttl_tab->getColumn(ttl_col_index->getName()); + assert(ttl_col_table != nullptr && ttl_col_table->getType() == + NdbDictionary::Column::Datetime2 && + ttl_col_table->getColumnNo() == static_cast(ttl_col_no)); + + NdbIndexScanOperation *index_scan_op = + trans->getNdbIndexScanOperation(ttl_index); + /* Index Scan */ + Uint32 scanFlags= NdbScanOperation::SF_OrderBy | + /*NdbScanOperation::SF_MultiRange |*/ + NdbScanOperation::SF_KeyInfo | + NdbScanOperation::SF_OnlyExpiredScan; + + if (index_scan_op->readTuples(NdbOperation::LM_Exclusive, + scanFlags + /*(Uint32) 0 // batch */ + /*(Uint32) 0 // parallel */ + ) != 0) { + std::cerr << "Failed to readTuples, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + { + now = GetNow(now_char); + std::cerr << "bound ["; + for (Uint32 i = 0; i < 8; i++) { + std::cerr << std::hex + << static_cast(iter->second.last_purged[i]) + << " "; + } + std::cerr << " --- "; + for (Uint32 i = 0; i < 8; i++) { + std::cerr << std::hex + << static_cast(now_char[i]) + << " "; + } + std::cerr << "), " << std::endl; + } + if (index_scan_op->setBound(ttl_col_index->getName(), + NdbIndexScanOperation::BoundLE, + iter->second.last_purged)) { + std::cerr << "Failed to setBound, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + if (index_scan_op->setBound(ttl_col_index->getName(), + NdbIndexScanOperation::BoundGT, now_char)) { + std::cerr << "Failed to setBound, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + myRecAttr[0] = index_scan_op->getValue(ttl_col_no); + if (myRecAttr[0] == nullptr) { + std::cerr << "Failed to getValue, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + if (trans->execute(NdbTransaction::NoCommit) != 0) { + std::cerr << "Failed to execute, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + memset(buf, 0, 8); + batch_done = false; + while ((check = index_scan_op->nextResult(true)) == 0) { + do { + memset(buf, 0, 8); + memcpy(buf, myRecAttr[0]->aRef(), + myRecAttr[0]->get_size_in_bytes()); + // std::cerr << "Get a expired row: timestamp = [" + // << myRecAttr[0]->get_size_in_bytes() << "]"; + // for (Uint32 i = 0; i < myRecAttr[0]->get_size_in_bytes(); i++) { + // std::cerr << std::hex + // << static_cast(myRecAttr[0]->aRef()[i]) + // << " "; + // } + // std::cerr << std::endl; + if (index_scan_op->deleteCurrentTuple() != 0) { + std::cerr << "Failed to delete, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + deletedRows++; + if (deletedRows >= g_batch_size) { + batch_done = true; + break; + } + } while ((check = index_scan_op->nextResult(false)) == 0); + + if (check != -1) { + check = trans->execute(NdbTransaction::NoCommit); + } + + if (check == -1) { + std::cerr << "Failed to execute[2], " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + if (deletedRows >= g_batch_size) { + break; + } + } + /** + * Commit all prepared operations + */ + if (trans->execute(NdbTransaction::Commit) == -1) { + std::cerr << "Failed to commit, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } else if (*(uint64*)(buf) != 0) { + memcpy(iter->second.last_purged, + buf, 8); + } + } else if (myDict->getNdbError().code == 4243) { + // Can't find the index on ttl column, use table instead + std::cerr << " Using table scan" << std::endl; + scan_op = trans->getNdbScanOperation(ttl_tab); + if (scan_op == nullptr) { + std::cerr << "Failed to get scan operation, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + Uint32 scanFlags= NdbScanOperation::SF_OnlyExpiredScan; + if (scan_op->readTuples(NdbOperation::LM_Exclusive, scanFlags) != 0) { + std::cerr << "Failed to readTuples, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + myRecAttr[0] = scan_op->getValue(ttl_col_no); + if (myRecAttr[0] == nullptr) { + std::cerr << "Failed to getValue, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + if (trans->execute(NdbTransaction::NoCommit) != 0) { + std::cerr << "Failed to execute, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + batch_done = false; + while ((check = scan_op->nextResult(true)) == 0) { + do { + // std::cerr << "Get a expired row: timestamp = [" + // << myRecAttr[0]->get_size_in_bytes() << "]"; + // for (Uint32 i = 0; i < myRecAttr[0]->get_size_in_bytes(); i++) { + // std::cerr << std::hex + // << static_cast(myRecAttr[0]->aRef()[i]) + // << " "; + // } + // std::cerr << std::endl; + if (scan_op->deleteCurrentTuple() != 0) { + std::cerr << "Failed to delete, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + deletedRows++; + if (deletedRows >= g_batch_size) { + batch_done = true; + break; + } + + } while ((check = scan_op->nextResult(false)) == 0); + + if (check != -1) { + check = trans->execute(NdbTransaction::NoCommit); + } + + if (check == -1) { + std::cerr << "Failed to execute[2], " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + + if (batch_done) { + break; + } + } + /** + * Commit all prepared operations + */ + if (trans->execute(NdbTransaction::Commit) == -1) { + std::cerr << "Failed to commit, " << table_str + << ", error: " << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + } else { + std::cerr << "Failed to get Table/Index object, error: " + << myDict->getNdbError().code << "(" + << myDict->getNdbError().message << ")" + << std::endl; + goto err; + } + myNdb->closeTransaction(trans); + trans = nullptr; + fprintf(stderr, " Purged %u rows\n", deletedRows); + // Finish 1 batch + // keep the ttl_tab in local table cache ? + continue; +err: + if (trans != nullptr) { + myNdb->closeTransaction(trans); + } + trx_failure_times++; + sleep(1); + if (trx_failure_times > 10) { + std::cerr << "PurgeTTL has retried for 10 times...It's time to ask for " + "retring from connecting" << std::endl; + g_purge_thread_asks_for_retry = true; + SetPurgeThreadExit(true); + break; + } else if (purge_trx_started) { + goto retry_trx; + } else { + // retry from begining + break; // jump out from for-loop + } + } + // Finish 1 round + sleep(2); + } while (!IsPurgeThreadExit()); + + delete myNdb; + std::cerr << "PurgeTTL thread exit" << std::endl; + return; +} + +int main(int argc, char **argv) { + if (argc != 2) { + std::cout << "Arguments are \n"; + exit(-1); + } + const char *connectstring = argv[1]; + + const char* eventName = "REPL$mysql/ndb_schema"; + const char* schema_db_name = "mysql"; + const char* schema_tab_name = "ndb_schema"; + const char* schema_res_tab_name = "ndb_schema_result"; + NdbDictionary::Dictionary* myDict = nullptr; + const NdbDictionary::Table* schema_tab = nullptr; + const NdbDictionary::Table* schema_res_tab = nullptr; + const int noEventColumnName = 10; + const char *eventColumnName[noEventColumnName] = { + "db", + "name", + "slock", + "query", + "node_id", + "epoch", + "id", + "version", + "type", + "schema_op_id" + }; + typedef union { + NdbRecAttr* ra; + NdbBlob* bh; + } RA_BH; + char slock_buf_pre[32]; + char slock_buf[32]; + bool initialized_cache = false; + const char* message_buf = "API_OK"; + char event_name_buf[128]; + uint32_t event_nums = 0; + bool init_succ = false; + NdbEventOperation *op = nullptr; + std::thread purge_thread; + + ndb_init(); + +retry_from_connecting: + Ndb_cluster_connection *cluster_connection = new Ndb_cluster_connection( + connectstring); + + int r = cluster_connection->connect(-1 /* retries */, + 3 /* delay between retries */, + 1 /* verbose */); + if (r > 0) { + std::cout + << "Cluster connect failed, possibly resolved with more retries.\n"; + exit(-1); + } else if (r < 0) { + std::cout << "Cluster connect failed.\n"; + exit(-1); + } + + if (cluster_connection->wait_until_ready(30, 30)) { + std::cout << "Cluster was not ready within 30 secs." << std::endl; + exit(-1); + } + + Ndb *myNdb = new Ndb(cluster_connection); + myNdb->setNdbObjectName("TTL schema-watcher"); + + if (myNdb->init() != 0) { + std::cerr << "Failed to init Ndb object, error: " + << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + + myDict = nullptr; + schema_tab = nullptr; + schema_res_tab = nullptr; + initialized_cache = false; + init_succ = false; + g_ttl_cache.clear(); + op = nullptr; + + do { + + if (myNdb->setDatabaseName(schema_db_name) != 0) { + std::cerr << "Failed to select system database: " << schema_db_name + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + myDict = myNdb->getDictionary(); + schema_tab= myDict->getTable(schema_tab_name); + if (schema_tab == nullptr) { + std::cerr << "Failed to get system table: " << schema_tab_name + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + schema_res_tab= myDict->getTable(schema_res_tab_name); + if (schema_res_tab == nullptr) { + std::cerr << "Failed to get system table: " << schema_res_tab_name + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + + NdbDictionary::Event my_event(eventName); + my_event.setTable(*schema_tab); + my_event.addTableEvent(NdbDictionary::Event::TE_ALL); + my_event.mergeEvents(true); + my_event.setReportOptions(NdbDictionary::Event::ER_ALL | NdbDictionary::Event::ER_SUBSCRIBE | + NdbDictionary::Event::ER_DDL); + const int n_cols = schema_tab->getNoOfColumns(); + for (int i = 0; i < n_cols; i++) { + // const NdbDictionary::Column* col = schema_tab->getColumn(i); + // std::cerr << "Column: " << col->getName() + // << ", type: " << col->getType() + // << ", length: " << col->getLength() + // << ", size_in_bytes: " << col->getSizeInBytes() << std::endl; + my_event.addEventColumn(i); + } + + NdbDictionary::Dictionary *dict = myNdb->getDictionary(); + if (dict->createEvent(my_event)) { + if (dict->getNdbError().classification != NdbError::SchemaObjectExists) { + std::cerr << "Failed to create event, error: " + << dict->getNdbError().code << "(" + << dict->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + } + NdbDictionary::Event_ptr ev(dict->getEvent(eventName)); + if (ev) { + // The event already exists in NDB + init_succ = true; + } else { + if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT && + dict->dropEvent(my_event.getName(), 1)) { + std::cerr << "Failed to drop the old event, error: " + << dict->getNdbError().code << "(" + << dict->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + std::cerr << "Failed to get the event, error: " + << dict->getNdbError().code << "(" + << dict->getNdbError().message << "), " + << "drop the old one and retry..." + << std::endl; + } + } while (!init_succ); + + if ((op = myNdb->createEventOperation(eventName)) == nullptr) { + std::cerr << "Failed to create event operation, error: " + << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry... " + << std::endl; + goto err; + } + op->mergeEvents(true); + + RA_BH recAttr[noEventColumnName]; + RA_BH recAttrPre[noEventColumnName]; + for (int i = 0; i < noEventColumnName; i++) { + if (i != 3) { + recAttr[i].ra = op->getValue(eventColumnName[i]); + recAttrPre[i].ra = op->getPreValue(eventColumnName[i]); + } else { + recAttr[i].bh = op->getBlobHandle(eventColumnName[i]); + recAttrPre[i].bh = op->getPreBlobHandle(eventColumnName[i]); + } + } + + if (op->execute()) { + std::cerr << "Failed to execute event operation, error: " + << op->getNdbError().code << "(" + << op->getNdbError().message << "), retry... " + << std::endl; + goto err; + } + + if (initialized_cache == false) { + g_ttl_cache.clear(); + NdbDictionary::Dictionary::List list; + if (myDict->listObjects(list, NdbDictionary::Object::UserTable) != 0) { + std::cerr << "Failed to list objects, error: " + << myDict->getNdbError().code << "(" + << myDict->getNdbError().message << "), retry... " + << std::endl; + goto err; + } + + for (uint i = 0; i < list.count; i++) { + NdbDictionary::Dictionary::List::Element &elmt = list.elements[i]; + + const char* db_str = elmt.database; + assert(elmt.schema == std::string("def")); // always "/def/" + const char *table_str = elmt.name; + // std::cerr << "Hi " << db_str << "/" << table_str << std::endl; + if (strcmp(db_str, "mysql") == 0) { + continue; + } + + if (myNdb->setDatabaseName(db_str) != 0) { + std::cerr << "Failed to select database: " << db_str + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + const NdbDictionary::Table* tab= myDict->getTable( + table_str); + if (tab == nullptr) { + std::cerr << "Failed to get table: " << table_str + << ", error: " << myDict->getNdbError().code << "(" + << myDict->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + UpdateLocalCache(db_str, table_str, tab); + } + initialized_cache = true; + } + + purge_thread = std::thread(PurgeTTL, cluster_connection); + + while (!IsExit()) { + int res = myNdb->pollEvents(1000); // wait for event or 1000 ms + if (res > 0) { + while ((op = myNdb->nextEvent())) { + if (op->hasError()) { + std::cerr << "Get an event error, " << op->getNdbError().code + << "(" << op->getNdbError().message + << ") on handling ndb_schema event, retry..." + << std::endl; + goto err; + } + event_nums++; + std::cerr << "EVENT [" << event_nums << "]: " + << GetEventName(op->getEventType(), event_name_buf) + << ", GCI = " << op->getGCI() << std::endl; + char* ptr = nullptr; + char* ptr_pre = nullptr; + std::string db_str_pre; + std::string db_str; + std::string table_str_pre; + std::string table_str; + std::string query_str_pre; + std::string query_str; + uint32_t node_id = 0; + uint32_t type = 0; + uint32_t id = 0; + uint32_t schema_op_id = 0; + NdbTransaction* trans = nullptr; + NdbOperation* top = nullptr; + bool clear_slock = false; + bool trx_succ = false; + uint32_t trx_failure_times = 0; + bool cache_updated = false; + std::cerr << "----------------------------" << std::endl; + switch (op->getEventType()) { + case NdbDictionary::Event::TE_CLUSTER_FAILURE: + case NdbDictionary::Event::TE_CREATE: + case NdbDictionary::Event::TE_ALTER: + case NdbDictionary::Event::TE_DROP: + case NdbDictionary::Event::TE_STOP: + case NdbDictionary::Event::TE_INCONSISTENT: + case NdbDictionary::Event::TE_OUT_OF_MEMORY: + // Retry from beginning + goto err; + case NdbDictionary::Event::TE_INSERT: + case NdbDictionary::Event::TE_UPDATE: + case NdbDictionary::Event::TE_DELETE: + for (int l = 0; l < noEventColumnName; l++) { + ptr_pre = recAttrPre[l].ra->aRef(); + ptr = recAttr[l].ra->aRef(); + switch(l) { + case 0: + db_str_pre = std::string(ptr_pre + 1, + recAttrPre[l].ra->u_8_value()); + db_str = std::string(ptr + 1, + recAttr[l].ra->u_8_value()); + std::cerr << " db: " + << "[" << static_cast( + recAttrPre[l].ra->u_8_value()) + << "]" + << db_str_pre << " -> " + << "[" << static_cast( + recAttr[l].ra->u_8_value()) + << "]" + << db_str << std::endl; + break; + case 1: + table_str_pre = std::string(ptr_pre + 1, + recAttrPre[l].ra->u_8_value()); + table_str = std::string(ptr + 1, + recAttr[l].ra->u_8_value()); + std::cerr << " table: " + << "[" << static_cast( + recAttrPre[l].ra->u_8_value()) + << "]" + << table_str_pre << " -> " + << "[" << static_cast( + recAttr[l].ra->u_8_value()) + << "]" + << table_str << std::endl; + break; + case 2: + memset(slock_buf_pre, 0, 32); + memcpy(slock_buf_pre, recAttrPre[l].ra->aRef(), 32); + std::cerr << " slock: "; + for (int i = 0; i < 32; i++) { + std::cerr << static_cast(slock_buf_pre[i]) << " "; + } + std::cerr << std::endl; + std::cerr << " ->"; + memset(slock_buf, 0, 32); + memcpy(slock_buf, recAttr[l].ra->aRef(), 32); + for (int i = 0; i < 32; i++) { + std::cerr << static_cast(slock_buf[i]) << " "; + } + std::cerr << std::endl; + break; + case 3: + { + int blob_is_null = 0; + Uint64 blob_len = 0; + recAttrPre[l].bh->getNull(blob_is_null); + recAttrPre[l].bh->getLength(blob_len); + std::cerr << " query: "; + if (blob_is_null == 0 && blob_len != 0) { + std::cerr << "[" << blob_len <<"] "; + Uint32 read_len = static_cast(blob_len); + query_str_pre.resize(read_len, '\0'); + recAttrPre[l].bh->readData(query_str_pre.data(), read_len); + std::cerr << query_str_pre; + } else { + std::cerr << "[0] "; + } + std::cerr << std::endl; + std::cerr << " ->"; + blob_is_null = 0; + blob_len = 0; + recAttr[l].bh->getNull(blob_is_null); + recAttr[l].bh->getLength(blob_len); + if (blob_is_null == 0 && blob_len != 0) { + std::cerr << "[" << blob_len <<"] "; + Uint32 read_len = static_cast(blob_len); + query_str.resize(read_len, '\0'); + recAttr[l].bh->readData(query_str.data(), read_len); + std::cerr << query_str; + } else { + std::cerr << "[0]"; + } + std::cerr << std::endl; + break; + } + case 4: + node_id = recAttr[l].ra->u_32_value(); + std::cerr << " node_id: " + << recAttrPre[l].ra->u_32_value() << " -> " + << node_id << std::endl; + break; + case 5: + std::cerr << " epoch: " + << recAttrPre[l].ra->u_32_value() << " -> " + << recAttr[l].ra->u_32_value() << std::endl; + break; + case 6: + id = recAttr[l].ra->u_32_value(); + std::cerr << " id: " + << recAttrPre[l].ra->u_32_value() << " -> " + << recAttr[l].ra->u_32_value() << std::endl; + break; + case 7: + std::cerr << " version: " + << recAttrPre[l].ra->u_32_value() << " -> " + << recAttr[l].ra->u_32_value() << std::endl; + break; + case 8: + // SCHEMA_OP_TYPE + type = recAttr[l].ra->u_32_value(); + std::cerr << " type: " + << recAttrPre[l].ra->u_32_value() << " -> " + << type << std::endl; + break; + case 9: + schema_op_id = recAttr[l].ra->u_32_value(); + std::cerr << " schema_op_id: " + << recAttrPre[l].ra->u_32_value() << " -> " + << schema_op_id << std::endl; + break; + default: + break; + } + } + std::cerr << "----------------------------" << std::endl; + + clear_slock = false; + cache_updated = false; + switch(type) { + case SCHEMA_OP_TYPE::SOT_RENAME_TABLE: + { + std::string new_table_str; + auto pos = query_str_pre.find(db_str); + if (pos != std::string::npos) { + pos += db_str.length(); + assert(query_str_pre.at(pos) == '/'); + pos += 1; + new_table_str = query_str_pre.substr(pos); + } + if (myNdb->setDatabaseName(db_str.c_str()) != 0) { + std::cerr << "Failed to select database: " << db_str + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + myDict->invalidateTable(table_str.c_str()); + const NdbDictionary::Table* tab= myDict->getTable( + new_table_str.c_str()); + if (tab == nullptr) { + std::cerr << "Failed to get table: " << new_table_str + << ", error: " << myDict->getNdbError().code << "(" + << myDict->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + const std::lock_guard lock(g_mutex); + cache_updated = UpdateLocalCache(db_str, table_str, + new_table_str, tab); + break; + } + case SCHEMA_OP_TYPE::SOT_DROP_TABLE: + { + const std::lock_guard lock(g_mutex); + cache_updated = UpdateLocalCache(db_str, table_str, nullptr); + break; + } + case SCHEMA_OP_TYPE::SOT_DROP_DB: + { + const std::lock_guard lock(g_mutex); + cache_updated = DropDBLocalCache(db_str); + break; + } + case SCHEMA_OP_TYPE::SOT_CREATE_TABLE: + case SCHEMA_OP_TYPE::SOT_ALTER_TABLE_COMMIT: + case SCHEMA_OP_TYPE::SOT_ONLINE_ALTER_TABLE_COMMIT: + { + if (myNdb->setDatabaseName(db_str.c_str()) != 0) { + std::cerr << "Failed to select database: " << db_str + << ", error: " << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + myDict->invalidateTable(table_str.c_str()); + const NdbDictionary::Table* tab= myDict->getTable( + table_str.c_str()); + if (tab == nullptr) { + std::cerr << "Failed to get table: " << table_str + << ", error: " << myDict->getNdbError().code << "(" + << myDict->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + const std::lock_guard lock(g_mutex); + cache_updated = UpdateLocalCache(db_str, table_str, tab); + break; + } + case SCHEMA_OP_TYPE::SOT_CLEAR_SLOCK: + clear_slock = true; + break; + default: + break; + } + + // Only cleaner can set g_cache_updated to false; + if (cache_updated) { + g_cache_updated = true; + } + + if (clear_slock) { + continue; + } + + trx_succ = false; + trx_failure_times = 0; + do { + trans = myNdb->startTransaction(); + if (trans == nullptr) { + std::cerr << "Failed to start transaction, error: " + << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto trx_err; + } + top = trans->getNdbOperation(schema_res_tab); + if (top == nullptr) { + std::cerr << "Failed to get Ndb operation, error: " + << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto trx_err; + } + if (top->insertTuple() != 0 || + /*Ndb_schema_result_table::COL_NODEID*/ + top->equal("nodeid", node_id) != 0 || + /*Ndb_schema_result_table::COL_SCHEMA_OP_ID*/ + top->equal("schema_op_id", schema_op_id) != 0 || + /*Ndb_schema_result_table::COL_PARTICIPANT_NODEID*/ + top->equal("participant_nodeid", myNdb->getNodeId()) != 0 || + /*Ndb_schema_result_table::COL_RESULT*/ + top->setValue("result", 0) != 0 || + /*Ndb_schema_result_table::COL_MESSAGE*/ + top->setValue("message", message_buf) != 0) { + std::cerr << "Failed to insert tuple, error: " + << top->getNdbError().code << "(" + << top->getNdbError().message << "), retry..." + << std::endl; + goto trx_err; + } + if (trans->execute(NdbTransaction::Commit, + NdbOperation::DefaultAbortOption, + 1 /*force send*/) != 0) { + APIERROR(trans->getNdbError()); + std::cerr << "Failed to execute transaction, error: " + << trans->getNdbError().code << "(" + << trans->getNdbError().message << "), retry..." + << std::endl; + goto trx_err; + } else { + trx_succ = true; + } +trx_err: + if (trans != nullptr) { + myNdb->closeTransaction(trans); + } + if (!trx_succ) { + trx_failure_times++; + if (trx_failure_times > 10) { + goto err; + } else { + sleep(1); + } + } + } while (!trx_succ); + break; + default: + break; + } + } + } else if (g_purge_thread_asks_for_retry) { + std::cerr << "Purge thread asks for retry" << std::endl; + goto err; + } else if (res < 0) { + std::cerr << "Failed to poll event, error: " + << myNdb->getNdbError().code << "(" + << myNdb->getNdbError().message << "), retry..." + << std::endl; + goto err; + } + } +err: + if (op != nullptr) { + myNdb->dropEventOperation(op); + } + op = nullptr; + myDict->dropEvent(eventName); + if (purge_thread.joinable()) { + SetPurgeThreadExit(true); + purge_thread.join(); + } + delete myNdb; + delete cluster_connection; + if (!IsExit()) { + goto retry_from_connecting; + } + ndb_end(0); + return 0; +} diff --git a/storage/ndb/src/common/debugger/signaldata/LqhKey.cpp b/storage/ndb/src/common/debugger/signaldata/LqhKey.cpp index ed0074c3ce4a..5c1937cd7f98 100644 --- a/storage/ndb/src/common/debugger/signaldata/LqhKey.cpp +++ b/storage/ndb/src/common/debugger/signaldata/LqhKey.cpp @@ -86,7 +86,8 @@ bool printLQHKEYREQ(FILE *output, const Uint32 *theData, Uint32 len, if (LqhKeyReq::getNoTriggersFlag(reqInfo)) fprintf(output, "NoTriggers "); if (LqhKeyReq::getUtilFlag(reqInfo)) fprintf(output, "UtilFlag "); if (LqhKeyReq::getNoWaitFlag(reqInfo)) fprintf(output, "NoWait "); - if(LqhKeyReq::getTTLIgnoreFlag(reqInfo)) fprintf(output, "ttl_ignore "); + if (LqhKeyReq::getTTLIgnoreFlag(reqInfo)) fprintf(output, "ttl_ignore "); + if (LqhKeyReq::getTTLOnlyExpiredFlag(reqInfo)) fprintf(output, "ttl_only_expire "); fprintf(output, "ScanInfo/noFiredTriggers: H\'%x\n", sig->scanInfo); diff --git a/storage/ndb/src/common/debugger/signaldata/TcKeyReq.cpp b/storage/ndb/src/common/debugger/signaldata/TcKeyReq.cpp index 8cb8aefd58e0..a1f903d8e9f2 100644 --- a/storage/ndb/src/common/debugger/signaldata/TcKeyReq.cpp +++ b/storage/ndb/src/common/debugger/signaldata/TcKeyReq.cpp @@ -109,6 +109,8 @@ bool printTCKEYREQ(FILE *output, const Uint32 *theData, Uint32 len, if (sig->getTTLIgnoreFlag(sig->requestInfo)) fprintf(output, " ttl_ignore"); + if (sig->getTTLOnlyExpiredFlag(sig->requestInfo)) + fprintf(output, " ttl_only_expired"); fprintf(output, "\n"); } diff --git a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp index 1eba3decc4df..7aeb4159c2e8 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp +++ b/storage/ndb/src/kernel/blocks/dblqh/Dblqh.hpp @@ -758,6 +758,7 @@ class Dblqh : public SimulatedBlock { // TTL Uint8 m_ttl_ignore; // ignore set by API Uint8 m_ttl_ignore_for_ral; // ignore set by Read after lock + Uint8 m_ttl_only_expired; // Only be insterested in expired rows }; static constexpr Uint32 DBLQH_SCAN_RECORD_TRANSIENT_POOL_INDEX = 1; typedef Ptr ScanRecordPtr; @@ -2810,7 +2811,8 @@ class Dblqh : public SimulatedBlock { //scanKeyInfoPos only used when m_flags has OP_SCANKEYINFOPOSSAVED set //m_nr_delete only used in Copy fragment, set before used original_operation(0xFF), - ttl_ignore(0) + ttl_ignore(0), + ttl_only_expired(0) { m_dealloc_data.m_unused = RNIL; #ifdef DEBUG_USAGE_COUNT @@ -3000,6 +3002,7 @@ class Dblqh : public SimulatedBlock { Uint32 accOpPtr; /* for scan lock take over */ Uint8 original_operation; /* Zart, original operation */ Uint8 ttl_ignore; /* Zart, ttl ignore */ + Uint8 ttl_only_expired; }; /* p2c: size = 308 bytes */ static constexpr Uint32 DBLQH_OPERATION_RECORD_TRANSIENT_POOL_INDEX = 0; diff --git a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp index be6e51d6a348..7cff7239cde3 100644 --- a/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp +++ b/storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp @@ -6848,6 +6848,7 @@ void Dblqh::seizeTcrec(TcConnectionrecPtr& tcConnectptr, */ locTcConnectptr.p->original_operation = 0xFF; locTcConnectptr.p->ttl_ignore = 0; + locTcConnectptr.p->ttl_only_expired = 0; tcConnectptr = locTcConnectptr; ndbrequire(Magic::check_ptr(locTcConnectptr.p->tupConnectPtrP)); @@ -9074,10 +9075,12 @@ void Dblqh::execLQHKEYREQ(Signal *signal) { * TTL */ regTcPtr->ttl_ignore = LqhKeyReq::getTTLIgnoreFlag(Treqinfo); + regTcPtr->ttl_only_expired = LqhKeyReq::getTTLOnlyExpiredFlag(Treqinfo); #ifdef TTL_DEBUG if (NEED_PRINT(tabptr.i)) { - g_eventLogger->info("Zart, Dblqh::execLQHKEYREQ(), ttl_ignore: %u", - regTcPtr->ttl_ignore); + g_eventLogger->info("Zart, Dblqh::execLQHKEYREQ(), ttl_ignore: %u, only_expired: %u", + regTcPtr->ttl_ignore, + regTcPtr->ttl_only_expired); } #endif // TTL_DEBUG @@ -10167,7 +10170,7 @@ void Dblqh::exec_acckeyreq(Signal *signal, TcConnectionrecPtr regTcPtr) { if (NEED_PRINT(regTcPtr.p->tableref)) { g_eventLogger->info("Zart, Dblqh::execACCKEYCONF[1], final ignore_ttl: %u, " "table id: %u", - signal->theData[5], + regTcPtr.p->ttl_ignore, regTcPtr.p->tableref); } #endif // TTL_DEBUG @@ -14125,6 +14128,7 @@ void Dblqh::releaseTcrec(Signal *signal, TcConnectionrecPtr locTcConnectptr) { } locTcConnectptr.p->original_operation = 0xFF; locTcConnectptr.p->ttl_ignore = 0; + locTcConnectptr.p->ttl_only_expired = 0; Dblqh *lqh = m_curr_lqh; if (likely(locTcConnectptr.i < lqh->ctcConnectReserved)) { @@ -19424,6 +19428,7 @@ Uint32 Dblqh::initScanrec(const ScanFragReq *scanFragReq, Uint32 aiLen, const Uint32 firstMatch = ScanFragReq::getFirstMatchFlag(reqinfo); const Uint32 aggregation = ScanFragReq::getAggregationFlag(reqinfo); const Uint32 ttl_ignore = ScanFragReq::getTTLIgnoreFragFlag(reqinfo); + const Uint32 ttl_only_expired = ScanFragReq::getTTLOnlyExpiredFragFlag(reqinfo); scanPtr->scanLockMode = scanLockMode; scanPtr->readCommitted = readCommitted; @@ -19433,6 +19438,7 @@ Uint32 Dblqh::initScanrec(const ScanFragReq *scanFragReq, Uint32 aiLen, scanPtr->m_aggregation = aggregation; scanPtr->m_ttl_ignore = ttl_ignore; scanPtr->m_ttl_ignore_for_ral = false; + scanPtr->m_ttl_only_expired = ttl_only_expired; const Uint32 descending = ScanFragReq::getDescendingFlag(reqinfo); Uint32 tupScan = ScanFragReq::getTupScanFlag(reqinfo); diff --git a/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp b/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp index 271fef4e7c7b..6848d7cfab7d 100644 --- a/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp +++ b/storage/ndb/src/kernel/blocks/dbtc/Dbtc.hpp @@ -1577,6 +1577,7 @@ class Dbtc : public SimulatedBlock { * TTL */ Uint8 m_ttl_ignore; + Uint8 m_ttl_only_expired; /* End of TCKEYREQ/TCINDXREQ only fields */ }; diff --git a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index 9cec501165a1..89f9a4325c78 100644 --- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -4168,6 +4168,7 @@ void Dbtc::execTCKEYREQ(Signal *signal) { regCachePtr->m_noWait = TcKeyReq::getNoWaitFlag(Treqinfo); regCachePtr->m_ttl_ignore = TcKeyReq::getTTLIgnoreFlag(Treqinfo); + regCachePtr->m_ttl_only_expired = TcKeyReq::getTTLOnlyExpiredFlag(Treqinfo); } else { TkeyLength = TcKeyReq::getKeyLength(Treqinfo); TattrLen = TcKeyReq::getAttrinfoLen(tcKeyReq->attrLen); @@ -4180,6 +4181,7 @@ void Dbtc::execTCKEYREQ(Signal *signal) { * unable to ignore TTL in ShortTcKeyReq? */ regCachePtr->m_ttl_ignore = 0; + regCachePtr->m_ttl_only_expired = 0; } bool util_flag = ZFALSE; if (unlikely(refToMain(sendersBlockRef) == DBUTIL)) @@ -5443,6 +5445,7 @@ void Dbtc::sendlqhkeyreq(Signal *signal, BlockReference TBRef, LqhKeyReq::setReplicaApplierFlag(Tdata10, (replica_applier == ApiConnectRecord::TF_REPLICA_APPLIER)); LqhKeyReq::setTTLIgnoreFlag(Tdata10, regCachePtr->m_ttl_ignore); + LqhKeyReq::setTTLOnlyExpiredFlag(Tdata10, regCachePtr->m_ttl_only_expired); /* ----------------------------------------------------------------------- * If we are sending a short LQHKEYREQ, then there will be some AttrInfo @@ -15937,6 +15940,8 @@ Uint32 Dbtc::initScanrec(ScanRecordPtr scanptr, const ScanTabReq *scanTabReq, ScanFragReq::setMultiFragFlag(tmp, ScanTabReq::getMultiFragFlag(ri)); ScanFragReq::setAggregationFlag(tmp, ScanTabReq::getAggregation(ri)); ScanFragReq::setTTLIgnoreFragFlag(tmp, ScanTabReq::getTTLIgnoreFlag(ri)); + ScanFragReq::setTTLOnlyExpiredFragFlag(tmp, + ScanTabReq::getTTLOnlyExpiredFlag(ri)); if (unlikely(ScanTabReq::getViaSPJFlag(ri))) { jam(); diff --git a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp index 32d86f54b0a2..efa6b5c13abf 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp +++ b/storage/ndb/src/kernel/blocks/dbtup/Dbtup.hpp @@ -986,7 +986,8 @@ struct Operationrec { op_type(ZREAD), trans_state(Uint32(TRANS_DISCONNECTED)), original_op_type(ZREAD), - ttl_ignore(0) + ttl_ignore(0), + ttl_only_expired(0) { op_struct.bit_field.in_active_list = false; op_struct.bit_field.tupVersion = ZNIL; @@ -1154,6 +1155,7 @@ struct Operationrec { */ Uint32 original_op_type; Uint8 ttl_ignore; + Uint8 ttl_only_expired; }; Uint32 m_base_header_bits; diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp index 5b0a7e89a0e8..78815850f902 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp @@ -1339,12 +1339,15 @@ bool Dbtup::execTUPKEYREQ(Signal* signal, } else { regOperPtr->ttl_ignore = 0; } + regOperPtr->ttl_only_expired = lqhScanPtrP->m_ttl_only_expired; #ifdef TTL_DEBUG if (NEED_PRINT(prepare_fragptr.p->fragTableId)) { - g_eventLogger->info("Zart, Dbtup::execTUPKEYREQ(), Ignore TTL[%u, %u]: %u", + g_eventLogger->info("Zart, Dbtup::execTUPKEYREQ(), Ignore TTL[%u, %u]: %u, " + "only expired: %u", lqhScanPtrP->m_ttl_ignore, lqhScanPtrP->m_ttl_ignore_for_ral, - regOperPtr->ttl_ignore); + regOperPtr->ttl_ignore, + regOperPtr->ttl_only_expired); } #endif // TTL_DEBUG /* @@ -1394,6 +1397,7 @@ bool Dbtup::execTUPKEYREQ(Signal* signal, req_struct.m_row_id.m_page_idx = row_id_page_idx; req_struct.scan_rec = nullptr; regOperPtr->ttl_ignore = lqhOpPtrP->ttl_ignore; + regOperPtr->ttl_only_expired = lqhOpPtrP->ttl_only_expired; #ifdef TTL_DEBUG if (NEED_PRINT(prepare_fragptr.p->fragTableId) && regOperPtr->ttl_ignore) { @@ -1443,11 +1447,13 @@ bool Dbtup::execTUPKEYREQ(Signal* signal, g_eventLogger->info("Zart, [TableId: %u]" "Set Dbtup::Operationrec::original_op_type: %u, " "current Dbtup::Operationrec::op_type: %u, " - "ignore TTL ?(%u)", + "ignore TTL ?(%u), " + "only expired?(%u)", prepare_fragptr.p->fragTableId, regOperPtr->original_op_type, regOperPtr->op_type, - regOperPtr->ttl_ignore); + regOperPtr->ttl_ignore, + regOperPtr->ttl_only_expired); } #endif // TTL_DEBUG } @@ -1836,6 +1842,13 @@ bool Dbtup::execTUPKEYREQ(Signal* signal, &req_struct, disk_page != RNIL) == -1)) { return false; } + /* + * Here we set op_type from ZINSERT_TTL to ZUPDATE to make + * the following trigger stuffs regard this operation as a + * normal update operation. + */ + // TODO(Zhao): double check this solution + regOperPtr->op_type = ZUPDATE; /** * The lock on the TUP fragment is required to update header info on * the base row, thus we use the variable m_base_header_bits in @@ -2317,46 +2330,63 @@ int Dbtup::handleReadReq( #endif // TTL_DEBUG cmp_ret = checkTTL(regTabPtr, req_struct, &has_error, &err_no); if (!has_error) { - if (cmp_ret <= 0) { - // Expired - bool ttl_ignore_for_ral = false; - if (req_struct->scan_rec != nullptr) { - Dblqh::ScanRecord* scan_rec_ptr = - reinterpret_cast(req_struct->scan_rec); - if (!scan_rec_ptr->scanLockMode /* X */ && - !scan_rec_ptr->scanLockHold /* S */) { - ndbrequire(scan_rec_ptr->readCommitted); - if (scan_rec_ptr->scanBlock == this) { - PrepareAccLockReq4RAL(req_struct->scan_rec, signal); + if (_regOperPtr->ttl_only_expired == 0) { + if (cmp_ret <= 0) { + // Expired + bool ttl_ignore_for_ral = false; + if (req_struct->scan_rec != nullptr) { + Dblqh::ScanRecord* scan_rec_ptr = + reinterpret_cast(req_struct->scan_rec); + if (!scan_rec_ptr->scanLockMode /* X */ && + !scan_rec_ptr->scanLockHold /* S */) { + ndbrequire(scan_rec_ptr->readCommitted); + if (scan_rec_ptr->scanBlock == this) { + PrepareAccLockReq4RAL(req_struct->scan_rec, signal); + } else { + ndbrequire(reinterpret_cast(c_lqh->get_c_tux()) == + reinterpret_cast( + scan_rec_ptr->scanBlock)); + reinterpret_cast(scan_rec_ptr->scanBlock)-> + PrepareAccLockReq4RAL( + req_struct->scan_rec, + signal); + } + ttl_ignore_for_ral = c_acc->WhetherSkipTTL(signal); +#ifdef TTL_DEBUG + g_eventLogger->info("Zart, Dbtup::handleReadReq() check whether needs " + "to ignore TTL: %d", ttl_ignore_for_ral); +#endif // TTL_DEBUG } else { - ndbrequire(reinterpret_cast(c_lqh->get_c_tux()) == - reinterpret_cast( - scan_rec_ptr->scanBlock)); - reinterpret_cast(scan_rec_ptr->scanBlock)-> - PrepareAccLockReq4RAL( - req_struct->scan_rec, - signal); - } - ttl_ignore_for_ral = c_acc->WhetherSkipTTL(signal); #ifdef TTL_DEBUG - g_eventLogger->info("Zart, Dbtup::handleReadReq() check whether needs " - "to ignore TTL: %d", ttl_ignore_for_ral); + g_eventLogger->info("Zart, Dbtup::handleReadReq() skip TTL " + "checking for locking-scan on TTL " + "table"); #endif // TTL_DEBUG - } else { + } + } + if (!ttl_ignore_for_ral) { #ifdef TTL_DEBUG - g_eventLogger->info("Zart, Dbtup::handleReadReq() skip TTL " - "checking for locking-scan on TTL " - "table"); + g_eventLogger->info("Zart, (READ) TTL expired"); #endif // TTL_DEBUG + terrorCode = 626; + tupkeyErrorLab(req_struct); + return -1; } } - if (!ttl_ignore_for_ral) { + } else { + if (cmp_ret > 0) { #ifdef TTL_DEBUG - g_eventLogger->info("Zart, (READ) TTL expired"); + g_eventLogger->info("Zart, (READ) TTL skip non-expired row " + "since only_expired flag is set"); #endif // TTL_DEBUG terrorCode = 626; tupkeyErrorLab(req_struct); return -1; + } else { +#ifdef TTL_DEBUG + g_eventLogger->info("Zart, (READ) TTL return expired row " + "since only_expired flag is set"); +#endif // TTL_DEBUG } } } else { diff --git a/storage/ndb/src/ndbapi/NdbDictionary.cpp b/storage/ndb/src/ndbapi/NdbDictionary.cpp index 70c422414097..75dfaff3056e 100644 --- a/storage/ndb/src/ndbapi/NdbDictionary.cpp +++ b/storage/ndb/src/ndbapi/NdbDictionary.cpp @@ -838,7 +838,7 @@ NdbDictionary::Table::getTTLColumnNo() const { } bool -NdbDictionary::Table::isTTLEnabled() { +NdbDictionary::Table::isTTLEnabled() const { return (m_impl.m_ttl_sec != RNIL && m_impl.m_ttl_col_no != RNIL); } diff --git a/storage/ndb/src/ndbapi/NdbOperationDefine.cpp b/storage/ndb/src/ndbapi/NdbOperationDefine.cpp index aa2836e2d660..4009f3ea5729 100644 --- a/storage/ndb/src/ndbapi/NdbOperationDefine.cpp +++ b/storage/ndb/src/ndbapi/NdbOperationDefine.cpp @@ -1559,5 +1559,9 @@ int NdbOperation::handleOperationOptions(const OperationType type, op->theDirtyIndicator = 1; op->theSimpleIndicator = 1; } + if (opts->optionsPresent & OperationOptions::OO_TTL_ONLY_EXPIRED) + { + op->m_flags |= OF_TTL_ONLY_EXPIRED; + } return 0; } diff --git a/storage/ndb/src/ndbapi/NdbOperationExec.cpp b/storage/ndb/src/ndbapi/NdbOperationExec.cpp index d54136f791f8..e875c4e33243 100644 --- a/storage/ndb/src/ndbapi/NdbOperationExec.cpp +++ b/storage/ndb/src/ndbapi/NdbOperationExec.cpp @@ -160,6 +160,8 @@ void NdbOperation::setRequestInfoTCKEYREQ(bool lastFlag, bool longSignal) { */ TcKeyReq::setTTLIgnoreFlag(requestInfo, (m_flags & OF_TTL_IGNORE) != 0); + TcKeyReq::setTTLOnlyExpiredFlag(requestInfo, + (m_flags & OF_TTL_ONLY_EXPIRED) != 0); req->requestInfo = requestInfo; } diff --git a/storage/ndb/src/ndbapi/NdbScanOperation.cpp b/storage/ndb/src/ndbapi/NdbScanOperation.cpp index c64630e28658..7074218af816 100644 --- a/storage/ndb/src/ndbapi/NdbScanOperation.cpp +++ b/storage/ndb/src/ndbapi/NdbScanOperation.cpp @@ -484,7 +484,9 @@ inline int NdbScanOperation::scanImpl( options->optionsPresent & ScanOptions::SO_TTL_IGNORE) { m_flags |= OF_TTL_IGNORE; } - + if (options->optionsPresent & ScanOptions::SO_TTL_ONLY_EXPIRED) { + m_flags |= OF_TTL_ONLY_EXPIRED; + } /* Add interpreted code words to ATTRINFO signal * chain as necessary @@ -2044,6 +2046,14 @@ int NdbScanOperation::finaliseScanOldApi() { ScanOptions::SO_PARALLEL | ScanOptions::SO_BATCH); options.scan_flags = m_savedScanFlagsOldApi; + /* + * Zart + * Here is where we set SO_TTL_ONLY_EXPIRED + * from OldApi(SF_OnlyExpiredScan) + */ + if (options.scan_flags & SF_OnlyExpiredScan) { + options.optionsPresent |= ScanOptions::SO_TTL_ONLY_EXPIRED; + } options.parallel = m_savedParallelOldApi; options.batch = m_savedBatchOldApi; @@ -2205,6 +2215,7 @@ int NdbScanOperation::prepareSendScan(Uint32 /*aTC_ConnectPtr*/, * TTL */ ScanTabReq::setTTLIgnoreFlag(reqInfo, (m_flags & OF_TTL_IGNORE) != 0); + ScanTabReq::setTTLOnlyExpiredFlag(reqInfo, (m_flags & OF_TTL_ONLY_EXPIRED) != 0); req->requestInfo = reqInfo; req->distributionKey = theDistributionKey; From 5d28eaeb9723605f1e75ebf5e2430792657efb06 Mon Sep 17 00:00:00 2001 From: KernelMaker Date: Wed, 27 Nov 2024 12:10:24 +0100 Subject: [PATCH 2/8] TTL-purging-demo: 1. fixed potential deadlock; 2. Destroy NdbEventOperation in a correct way --- .../ndbapi_ttl_purge/ttl_purge_demo.cpp | 81 +++++++++---------- .../ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 3 +- storage/ndb/src/ndbapi/NdbScanOperation.cpp | 13 +-- 3 files changed, 46 insertions(+), 51 deletions(-) diff --git a/storage/ndb/ndbapi-examples/ndbapi_ttl_purge/ttl_purge_demo.cpp b/storage/ndb/ndbapi-examples/ndbapi_ttl_purge/ttl_purge_demo.cpp index 4a4434547d48..b6d5ae418186 100644 --- a/storage/ndb/ndbapi-examples/ndbapi_ttl_purge/ttl_purge_demo.cpp +++ b/storage/ndb/ndbapi-examples/ndbapi_ttl_purge/ttl_purge_demo.cpp @@ -156,6 +156,7 @@ typedef struct { int32_t table_id; uint32_t ttl_sec; uint32_t col_no; + uint32_t part_id = {0}; // Only valid in local ttl cache char last_purged[8] = {0}; // Only valid in local ttl cache } TTLInfo; @@ -300,7 +301,7 @@ void LL2datetime(MYSQL_TIME *ltime, int64_t tmp) { ltime->time_zone_displacement = 0; } -uint32_t g_batch_size = 5; +uint32_t g_batch_size = 10; std::atomic g_purge_thread_exit = false; void SetPurgeThreadExit(bool exit) { g_purge_thread_exit = exit; @@ -689,7 +690,6 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { unsigned char now_char[8]; unsigned char buf[8]; int trx_failure_times = 0; - bool batch_done = false; bool update_objects = false; std::map::iterator iter; bool purge_trx_started = false; @@ -761,7 +761,7 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { break; } - std::cerr << "Processing " << iter->first << ": " << std::endl; + std::cerr << "Processing " << iter->first << ":" << std::endl; pos = iter->first.find('/'); assert(pos != std::string::npos); @@ -773,7 +773,6 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { deletedRows = 0; now = 0; trx_failure_times = 0; - batch_done = false; if (myNdb->setDatabaseName(db_str.c_str()) != 0) { std::cerr << "Failed to select database: " << db_str @@ -824,6 +823,10 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { std::cerr << " Skip" << std::endl; continue; } + std::cerr << " [P" << iter->second.part_id + << "/" << ttl_tab->getPartitionCount() << "]" << std::endl; + assert(iter->second.part_id < ttl_tab->getPartitionCount()); + trx_failure_times = 0; retry_trx: trans = myNdb->startTransaction(); @@ -855,16 +858,19 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { NdbIndexScanOperation *index_scan_op = trans->getNdbIndexScanOperation(ttl_index); + index_scan_op->setPartitionId(iter->second.part_id); /* Index Scan */ - Uint32 scanFlags= NdbScanOperation::SF_OrderBy | - /*NdbScanOperation::SF_MultiRange |*/ + Uint32 scanFlags= + /*NdbScanOperation::SF_OrderBy | + *NdbScanOperation::SF_MultiRange | + */ NdbScanOperation::SF_KeyInfo | NdbScanOperation::SF_OnlyExpiredScan; if (index_scan_op->readTuples(NdbOperation::LM_Exclusive, - scanFlags - /*(Uint32) 0 // batch */ - /*(Uint32) 0 // parallel */ + scanFlags, + 1, // parallel + g_batch_size // batch ) != 0) { std::cerr << "Failed to readTuples, " << table_str << ", error: " << trans->getNdbError().code << "(" @@ -921,7 +927,6 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { goto err; } memset(buf, 0, 8); - batch_done = false; while ((check = index_scan_op->nextResult(true)) == 0) { do { memset(buf, 0, 8); @@ -943,16 +948,8 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { goto err; } deletedRows++; - if (deletedRows >= g_batch_size) { - batch_done = true; - break; - } } while ((check = index_scan_op->nextResult(false)) == 0); - if (check != -1) { - check = trans->execute(NdbTransaction::NoCommit); - } - if (check == -1) { std::cerr << "Failed to execute[2], " << table_str << ", error: " << trans->getNdbError().code << "(" @@ -960,9 +957,7 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { << std::endl; goto err; } - if (deletedRows >= g_batch_size) { - break; - } + break; } /** * Commit all prepared operations @@ -988,8 +983,10 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { << std::endl; goto err; } + scan_op->setPartitionId(iter->second.part_id); Uint32 scanFlags= NdbScanOperation::SF_OnlyExpiredScan; - if (scan_op->readTuples(NdbOperation::LM_Exclusive, scanFlags) != 0) { + if (scan_op->readTuples(NdbOperation::LM_Exclusive, scanFlags, + 1, g_batch_size) != 0) { std::cerr << "Failed to readTuples, " << table_str << ", error: " << trans->getNdbError().code << "(" << trans->getNdbError().message << "), retry..." @@ -1011,7 +1008,6 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { << std::endl; goto err; } - batch_done = false; while ((check = scan_op->nextResult(true)) == 0) { do { // std::cerr << "Get a expired row: timestamp = [" @@ -1030,17 +1026,9 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { goto err; } deletedRows++; - if (deletedRows >= g_batch_size) { - batch_done = true; - break; - } } while ((check = scan_op->nextResult(false)) == 0); - if (check != -1) { - check = trans->execute(NdbTransaction::NoCommit); - } - if (check == -1) { std::cerr << "Failed to execute[2], " << table_str << ", error: " << trans->getNdbError().code << "(" @@ -1049,9 +1037,7 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { goto err; } - if (batch_done) { - break; - } + break; } /** * Commit all prepared operations @@ -1073,6 +1059,8 @@ void PurgeTTL(Ndb_cluster_connection* cluster_connection) { myNdb->closeTransaction(trans); trans = nullptr; fprintf(stderr, " Purged %u rows\n", deletedRows); + iter->second.part_id = + ((iter->second.part_id + 1) % ttl_tab->getPartitionCount()); // Finish 1 batch // keep the ttl_tab in local table cache ? continue; @@ -1142,6 +1130,7 @@ int main(int argc, char **argv) { char event_name_buf[128]; uint32_t event_nums = 0; bool init_succ = false; + NdbEventOperation *ev_op = nullptr; NdbEventOperation *op = nullptr; std::thread purge_thread; @@ -1185,6 +1174,7 @@ int main(int argc, char **argv) { initialized_cache = false; init_succ = false; g_ttl_cache.clear(); + ev_op = nullptr; op = nullptr; do { @@ -1261,31 +1251,31 @@ int main(int argc, char **argv) { } } while (!init_succ); - if ((op = myNdb->createEventOperation(eventName)) == nullptr) { + if ((ev_op = myNdb->createEventOperation(eventName)) == nullptr) { std::cerr << "Failed to create event operation, error: " << myNdb->getNdbError().code << "(" << myNdb->getNdbError().message << "), retry... " << std::endl; goto err; } - op->mergeEvents(true); + ev_op->mergeEvents(true); RA_BH recAttr[noEventColumnName]; RA_BH recAttrPre[noEventColumnName]; for (int i = 0; i < noEventColumnName; i++) { if (i != 3) { - recAttr[i].ra = op->getValue(eventColumnName[i]); - recAttrPre[i].ra = op->getPreValue(eventColumnName[i]); + recAttr[i].ra = ev_op->getValue(eventColumnName[i]); + recAttrPre[i].ra = ev_op->getPreValue(eventColumnName[i]); } else { - recAttr[i].bh = op->getBlobHandle(eventColumnName[i]); - recAttrPre[i].bh = op->getPreBlobHandle(eventColumnName[i]); + recAttr[i].bh = ev_op->getBlobHandle(eventColumnName[i]); + recAttrPre[i].bh = ev_op->getPreBlobHandle(eventColumnName[i]); } } - if (op->execute()) { + if (ev_op->execute()) { std::cerr << "Failed to execute event operation, error: " - << op->getNdbError().code << "(" - << op->getNdbError().message << "), retry... " + << ev_op->getNdbError().code << "(" + << ev_op->getNdbError().message << "), retry... " << std::endl; goto err; } @@ -1673,9 +1663,10 @@ int main(int argc, char **argv) { } } err: - if (op != nullptr) { - myNdb->dropEventOperation(op); + if (ev_op != nullptr) { + myNdb->dropEventOperation(ev_op); } + ev_op = nullptr; op = nullptr; myDict->dropEvent(eventName); if (purge_thread.joinable()) { diff --git a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index 89f9a4325c78..8f0a18bacd1a 100644 --- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -16210,7 +16210,8 @@ void Dbtc::execDIH_SCAN_TAB_CONF(Signal *signal, ScanRecordPtr scanptr, { jamDebug(); ndbrequire(DictTabInfo::isOrderedIndex(tabPtr.p->tableType) || - tabPtr.p->get_user_defined_partitioning()); + tabPtr.p->get_user_defined_partitioning() || + is_ttl_table(tabPtr.p)); /** * Prepare for sendDihGetNodeReq to request DBDIH info for diff --git a/storage/ndb/src/ndbapi/NdbScanOperation.cpp b/storage/ndb/src/ndbapi/NdbScanOperation.cpp index 7074218af816..54630711e8a3 100644 --- a/storage/ndb/src/ndbapi/NdbScanOperation.cpp +++ b/storage/ndb/src/ndbapi/NdbScanOperation.cpp @@ -303,9 +303,11 @@ int NdbScanOperation::handleScanOptions(const ScanOptions *options) { */ if (unlikely(!(m_attribute_record->flags & NdbRecord::RecHasUserDefinedPartitioning))) { - /* Explicit partitioning info not allowed for table and operation*/ - setErrorCodeAbort(4546); - return -1; + if (!(options->scan_flags & SF_OnlyExpiredScan)) { + /* Explicit partitioning info not allowed for table and operation*/ + setErrorCodeAbort(4546); + return -1; + } } m_pruneState = SPS_FIXED; @@ -314,8 +316,9 @@ int NdbScanOperation::handleScanOptions(const ScanOptions *options) { /* And set the vars in the operation now too */ theDistributionKey = options->partitionId; theDistrKeyIndicator_ = 1; - assert((m_attribute_record->flags & - NdbRecord::RecHasUserDefinedPartitioning) != 0); + assert(((m_attribute_record->flags & + NdbRecord::RecHasUserDefinedPartitioning) != 0) || + (options->scan_flags & SF_OnlyExpiredScan)); DBUG_PRINT("info", ("NdbScanOperation::handleScanOptions(dist key): %u", theDistributionKey)); } From 0bcb6b310a9f8ad49350b07136fd97f99fcc5e5c Mon Sep 17 00:00:00 2001 From: KernelMaker Date: Thu, 28 Nov 2024 18:11:49 +0100 Subject: [PATCH 3/8] Develop TTLPurger in rest-server2 [PART-1] --- .../rest-server2/server/src/CMakeLists.txt | 1 + storage/ndb/rest-server2/server/src/main.cc | 7 + .../server/src/rdrs_rondb_connection_pool.cpp | 4 +- .../server/src/rdrs_rondb_connection_pool.hpp | 19 + .../ndb/rest-server2/server/src/ttl_purge.cpp | 808 ++++++++++++++++++ .../ndb/rest-server2/server/src/ttl_purge.hpp | 90 ++ 6 files changed, 927 insertions(+), 2 deletions(-) create mode 100644 storage/ndb/rest-server2/server/src/ttl_purge.cpp create mode 100644 storage/ndb/rest-server2/server/src/ttl_purge.hpp diff --git a/storage/ndb/rest-server2/server/src/CMakeLists.txt b/storage/ndb/rest-server2/server/src/CMakeLists.txt index 92ffa61bbd08..182c0cef4dca 100644 --- a/storage/ndb/rest-server2/server/src/CMakeLists.txt +++ b/storage/ndb/rest-server2/server/src/CMakeLists.txt @@ -45,6 +45,7 @@ file(GLOB_RECURSE SRC db_operations/pk/pkr_request.cpp db_operations/pk/pkr_response.cpp db_operations/ronsql/ronsql_operation.cpp + ttl_purge.cpp ) NDB_ADD_EXECUTABLE(${PROJECT_NAME} ${SRC} STATIC_NDBCLIENT) add_dependencies(${PROJECT_NAME} DROGON_IS_BUILD SIMDJSON_IS_BUILD) diff --git a/storage/ndb/rest-server2/server/src/main.cc b/storage/ndb/rest-server2/server/src/main.cc index 1a0c0ad22b36..6cefa4b7afe8 100644 --- a/storage/ndb/rest-server2/server/src/main.cc +++ b/storage/ndb/rest-server2/server/src/main.cc @@ -176,6 +176,7 @@ constexpr const char* const configHelp = #include "src/api_key.hpp" #include "src/fs_cache.hpp" #include "tls_util.hpp" +#include "src/ttl_purge.hpp" #include #include @@ -193,6 +194,7 @@ static const char* g_pidfile = nullptr; static RonDBConnection* g_rondbConnection = nullptr; static bool g_drogon_running = false; static int g_deferred_exit_code = 0; +static TTLPurger* g_ttl_purger = nullptr; NdbMutex *globalConfigsMutex = nullptr; static void do_exit(int exit_code) { @@ -225,9 +227,14 @@ static void do_exit(int exit_code) { } static void before_drogon_run() { g_drogon_running = true; + g_ttl_purger = TTLPurger::CreateTTLPurger(); + g_ttl_purger->Run(); } static void after_drogon_run() { g_drogon_running = false; + if (g_ttl_purger != nullptr) { + delete g_ttl_purger; + } if (g_deferred_exit_code != 0) { do_exit(g_deferred_exit_code); } diff --git a/storage/ndb/rest-server2/server/src/rdrs_rondb_connection_pool.cpp b/storage/ndb/rest-server2/server/src/rdrs_rondb_connection_pool.cpp index 59d5bfb51204..b6fd0ccaee5c 100644 --- a/storage/ndb/rest-server2/server/src/rdrs_rondb_connection_pool.cpp +++ b/storage/ndb/rest-server2/server/src/rdrs_rondb_connection_pool.cpp @@ -120,7 +120,7 @@ void RDRSRonDBConnectionPool::shutdown() { RS_Status RDRSRonDBConnectionPool::Init(Uint32 numThreads, Uint32 numClusterConnections) { - m_num_threads = numThreads; + m_num_threads = numThreads + kNoTTLPurgeThreads; m_num_data_connections = numClusterConnections; m_thread_context = (ThreadContext**) @@ -135,7 +135,7 @@ RS_Status RDRSRonDBConnectionPool::Init(Uint32 numThreads, 0, sizeof(RDRSRonDBConnection**) * m_num_data_connections); - for (Uint32 i = 0; i < numThreads; i++) { + for (Uint32 i = 0; i < m_num_threads; i++) { m_thread_context[i] = new ThreadContext(); check_startup(m_thread_context[i] != nullptr); } diff --git a/storage/ndb/rest-server2/server/src/rdrs_rondb_connection_pool.hpp b/storage/ndb/rest-server2/server/src/rdrs_rondb_connection_pool.hpp index abf0553386f2..ae3e1096f1af 100644 --- a/storage/ndb/rest-server2/server/src/rdrs_rondb_connection_pool.hpp +++ b/storage/ndb/rest-server2/server/src/rdrs_rondb_connection_pool.hpp @@ -40,6 +40,7 @@ class RDRSRonDBConnectionPool { RDRSRonDBConnection **dataConnections; RDRSRonDBConnection *metadataConnection; ThreadContext **m_thread_context; + static const Uint32 kNoTTLPurgeThreads = 2; Uint32 m_num_threads; Uint32 m_num_data_connections; bool is_shutdown = true; @@ -127,6 +128,15 @@ class RDRSRonDBConnectionPool { RS_Status GetNdbObject(Ndb **ndb_object, Uint32 threadIndex); + // Get the specific NdbObject to the TTL schema watcher + RS_Status GetTTLSchemaWatcherNdbObject(Ndb **ndb_object) { + return GetNdbObject(ndb_object, m_num_threads - 2); + } + + // Get the specific NdbObject to the TTL purge worker + RS_Status GetTTLPurgeWorkerNdbObject(Ndb **ndb_object) { + return GetNdbObject(ndb_object, m_num_threads - 1); + } /** * @brief Return NDB Object back to the pool * @@ -139,6 +149,15 @@ class RDRSRonDBConnectionPool { RS_Status *status, Uint32 threadIndex); + RS_Status ReturnTTLSchemaWatcherNdbObject(Ndb *ndb_object, + RS_Status *status) { + return ReturnNdbObject(ndb_object, status, m_num_threads - 2); + } + + RS_Status ReturnTTLPurgeWorkerNdbObject(Ndb *ndb_object, + RS_Status *status) { + return ReturnNdbObject(ndb_object, status, m_num_threads - 1); + } /** * @brief Get ndb object for metadata operation * diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.cpp b/storage/ndb/rest-server2/server/src/ttl_purge.cpp new file mode 100644 index 000000000000..ec6200692d57 --- /dev/null +++ b/storage/ndb/rest-server2/server/src/ttl_purge.cpp @@ -0,0 +1,808 @@ +/* + * Copyright (C) 2024 Hopsworks AB + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + * USA. + */ +#include "src/rdrs_rondb_connection_pool.hpp" +#include "src/ttl_purge.hpp" +#include "src/status.hpp" +#include "storage/ndb/plugin/ndb_schema_dist.h" + +#include +extern EventLogger *g_eventLogger; +#ifdef DEBUG_EVENT +#define DEB_EVENT(...) do { g_eventLogger->info(__VA_ARGS__); } while (0) +#else +#define DEB_EVENT(...) do { } while (0) +#endif + +TTLPurger::TTLPurger() : + ndb_(nullptr), exit_(false), cache_updated_(false), + purge_worker_asks_for_retry_(false), + schema_watcher_running_(false), schema_watcher_(), + purge_worker_running_(false), purge_worker_() { +} + +extern RDRSRonDBConnectionPool *rdrsRonDBConnectionPool; +TTLPurger::~TTLPurger() { + exit_ = true; + if (schema_watcher_running_ && schema_watcher_->joinable()) { + schema_watcher_->join(); + schema_watcher_ = nullptr; + schema_watcher_running_ = false; + if (ndb_) { + RS_Status status; + rdrsRonDBConnectionPool->ReturnTTLSchemaWatcherNdbObject(ndb_, &status); + } + } +} + +bool TTLPurger::Init() { + RS_Status status = rdrsRonDBConnectionPool-> + GetTTLSchemaWatcherNdbObject(&ndb_); + if (status.http_code != SUCCESS) { + ndb_ = nullptr; + return false; + } + return true; +} + +TTLPurger* TTLPurger::CreateTTLPurger() { + TTLPurger* ttl_purger = new TTLPurger(); + if (!ttl_purger->Init()) { + delete ttl_purger; + ttl_purger = nullptr; + } + return ttl_purger; +} + +static constexpr int NDB_INVALID_SCHEMA_OBJECT = 241; +void TTLPurger::SchemaWatcherJob() { + bool init_event_succ = false; + NdbDictionary::Dictionary* dict = nullptr; + const NdbDictionary::Table* schema_tab = nullptr; + const NdbDictionary::Table* schema_res_tab = nullptr; + NdbEventOperation* ev_op = nullptr; + NdbEventOperation* op = nullptr; + NdbDictionary::Dictionary::List list; + const char* message_buf = "API_OK"; + Uint32 event_nums = 0; + [[maybe_unused]] char event_name_buf[128]; + char slock_buf_pre[32]; + char slock_buf[32]; + +retry: + init_event_succ = false; + dict = nullptr; + schema_tab = nullptr; + schema_res_tab = nullptr; + ev_op = nullptr; + op = nullptr; + // Init event + do { + if (ndb_ == nullptr) { + RS_Status status = rdrsRonDBConnectionPool-> + GetTTLSchemaWatcherNdbObject(&ndb_); + if (status.http_code != SUCCESS) { + g_eventLogger->warning("[TTL SWatcher] Failed to get NdbObject. Retry"); + goto err; + } + } + + if (ndb_->setDatabaseName(kSystemDBName) != 0) { + g_eventLogger->warning("[TTL SWatcher] Failed to select system database: " + "%s, error: %d(%s). Retry...", + kSystemDBName, + ndb_->getNdbError().code, + ndb_->getNdbError().message); + goto err; + } + + dict = ndb_->getDictionary(); + schema_tab = dict->getTable(kSchemaTableName); + if (schema_tab == nullptr) { + g_eventLogger->warning("[TTL SWatcher] Failed to get system table: %s" + ", error: %d(%s). Retry...", + kSchemaTableName, + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + schema_res_tab = dict->getTable(kSchemaResTabName); + if (schema_res_tab == nullptr) { + g_eventLogger->warning("[TTL SWatcher] Failed to get system table: %s" + ", error: %d(%s). Retry...", + kSchemaResTabName, + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + + NdbDictionary::Event my_event(kSchemaEventName); + my_event.setTable(*schema_tab); + my_event.addTableEvent(NdbDictionary::Event::TE_ALL); + my_event.mergeEvents(true); + my_event.setReportOptions(NdbDictionary::Event::ER_ALL | + NdbDictionary::Event::ER_SUBSCRIBE | + NdbDictionary::Event::ER_DDL); + const int n_cols = schema_tab->getNoOfColumns(); + for (int i = 0; i < n_cols; i++) { + my_event.addEventColumn(i); + } + + if (dict->createEvent(my_event)) { + if (dict->getNdbError().classification != NdbError::SchemaObjectExists) { + g_eventLogger->warning("[TTL SWatcher] Failed to create event" + ", error: %d(%s). Retry...", + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + } + NdbDictionary::Event_ptr ev(dict->getEvent(kSchemaEventName)); + if (ev) { + init_event_succ = true; + } else { + if (dict->getNdbError().code == NDB_INVALID_SCHEMA_OBJECT && + dict->dropEvent(my_event.getName(), 1)) { + g_eventLogger->warning("[TTL SWatcher] Failed to drop the old event" + ", error: %d(%s). Retry...", + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + g_eventLogger->warning("[TTL SWatcher] Failed to get the event" + ", error: %d(%s). " + "Dropped the old one and retry...", + dict->getNdbError().code, + dict->getNdbError().message); + } + } while (!exit_ && !init_event_succ); + + // Create event operation + if ((ev_op = ndb_->createEventOperation(kSchemaEventName)) == nullptr) { + g_eventLogger->warning("[TTL SWatcher] Failed to create event operation" + ", error: %d(%s). Retry...", + ndb_->getNdbError().code, + ndb_->getNdbError().message); + goto err; + } + ev_op->mergeEvents(true); + typedef union { + NdbRecAttr* ra; + NdbBlob* bh; + } RA_BH; + RA_BH rec_attr_pre[kNoEventCol]; + RA_BH rec_attr[kNoEventCol]; + for (int i = 0; i < kNoEventCol; i++) { + if (i != 3) { + rec_attr_pre[i].ra = ev_op->getPreValue(kEventColNames[i]); + rec_attr[i].ra = ev_op->getValue(kEventColNames[i]); + } else { + rec_attr_pre[i].bh = ev_op->getPreBlobHandle(kEventColNames[i]); + rec_attr[i].bh = ev_op->getBlobHandle(kEventColNames[i]); + } + } + if (ev_op->execute()) { + g_eventLogger->warning("[TTL SWatcher] Failed to execute event operation" + ", error: %d(%s). Retry...", + ev_op->getNdbError().code, + ev_op->getNdbError().message); + goto err; + } + + // Fetch tables + ttl_cache_.clear(); + if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) { + g_eventLogger->warning("[TTL SWatcher] Failed to list objects" + ", error: %d(%s). Retry...", + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + for (uint i = 0; i < list.count; i++) { + NdbDictionary::Dictionary::List::Element& elmt = list.elements[i]; + + const char* db_str = elmt.database; + assert(elmt.schema == std::string("def")); // always "/def/" + const char *table_str = elmt.name; + if (strcmp(db_str, "mysql") == 0) { + continue; + } + if (ndb_->setDatabaseName(db_str) != 0) { + g_eventLogger->warning("[TTL SWatcher] Failed to select database: %s" + ", error: %d(%s). Retry...", + db_str, + ndb_->getNdbError().code, + ndb_->getNdbError().message); + goto err; + } + const NdbDictionary::Table* tab = dict->getTable( + table_str); + if (tab == nullptr) { + g_eventLogger->warning("[TTL SWatcher] Failed to get table: %s" + ", error: %d(%s). Retry...", + table_str, + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + UpdateLocalCache(db_str, table_str, tab); + } + + // TODO(Zhao): start purge worker + + // Main schema_watcher_ task + while (!exit_) { + int res = ndb_->pollEvents(1000); // wait for event or 1000 ms + if (res > 0) { + while ((op = ndb_->nextEvent())) { + if (op->hasError()) { + std::cerr << "Get an event error, " << op->getNdbError().code + << "(" << op->getNdbError().message + << ") on handling ndb_schema event, retry..." + << std::endl; + g_eventLogger->warning("[TTL SWatcher] Get an event error on " + "handling event" + ", error: %d(%s). Retry...", + op->getNdbError().code, + op->getNdbError().message); + goto err; + } + event_nums++; + DEB_EVENT("EVENT [%u]: %s, GCI = %llu", + event_nums, + GetEventName(op->getEventType(), event_name_buf), + op->getGCI()); + char* ptr_pre = nullptr; + char* ptr = nullptr; + std::string db_str_pre; + std::string db_str; + std::string table_str_pre; + std::string table_str; + std::string query_str_pre; + std::string query_str; + Uint32 node_id = 0; + Uint32 type = 0; + [[maybe_unused]] Uint32 id = 0; + Uint32 schema_op_id = 0; + NdbTransaction* trans = nullptr; + NdbOperation* top = nullptr; + bool clear_slock = false; + bool trx_succ = false; + Uint32 trx_failure_times = 0; + bool cache_updated = false; + DEB_EVENT("----------------------------"); + switch (op->getEventType()) { + case NdbDictionary::Event::TE_CLUSTER_FAILURE: + case NdbDictionary::Event::TE_CREATE: + case NdbDictionary::Event::TE_ALTER: + case NdbDictionary::Event::TE_DROP: + case NdbDictionary::Event::TE_STOP: + case NdbDictionary::Event::TE_INCONSISTENT: + case NdbDictionary::Event::TE_OUT_OF_MEMORY: + // Retry from beginning + goto err; + case NdbDictionary::Event::TE_INSERT: + case NdbDictionary::Event::TE_UPDATE: + case NdbDictionary::Event::TE_DELETE: + for (int l = 0; l < kNoEventCol; l++) { + ptr_pre = rec_attr_pre[l].ra->aRef(); + ptr = rec_attr[l].ra->aRef(); + switch (l) { + case 0: + db_str_pre = std::string(ptr_pre + 1, + rec_attr_pre[l].ra->u_8_value()); + db_str = std::string(ptr + 1, + rec_attr[l].ra->u_8_value()); + DEB_EVENT(" db: %s[%u] -> %s[%u]", + db_str_pre.c_str(), + rec_attr_pre[l].ra->u_8_value(), + db_str.c_str(), + rec_attr[l].ra->u_8_value()); + break; + case 1: + table_str_pre = std::string(ptr_pre + 1, + rec_attr_pre[l].ra->u_8_value()); + table_str = std::string(ptr + 1, + rec_attr[l].ra->u_8_value()); + DEB_EVENT(" table: %s[%u] -> %s[%u]", + db_str_pre.c_str(), + rec_attr_pre[l].ra->u_8_value(), + db_str.c_str(), + rec_attr[l].ra->u_8_value()); + break; + case 2: + { + std::string info_buf; + memset(slock_buf_pre, 0, 32); + memcpy(slock_buf_pre, rec_attr_pre[l].ra->aRef(), 32); + info_buf = " slock: "; + for (int i = 0; i < 32; i++) { + info_buf += std::to_string( + static_cast(slock_buf_pre[i])); + info_buf += " "; + } + DEB_EVENT("%s", info_buf.c_str()); + info_buf = " ->"; + memset(slock_buf, 0, 32); + memcpy(slock_buf, rec_attr[l].ra->aRef(), 32); + for (int i = 0; i < 32; i++) { + info_buf += std::to_string( + static_cast(slock_buf[i])); + info_buf += " "; + } + DEB_EVENT("%s", info_buf.c_str()); + } + break; + case 3: + { + int blob_is_null = 0; + Uint64 blob_len = 0; + rec_attr_pre[l].bh->getNull(blob_is_null); + rec_attr_pre[l].bh->getLength(blob_len); + if (blob_is_null == 0 && blob_len != 0) { + Uint32 read_len = static_cast(blob_len); + query_str_pre.resize(read_len, '\0'); + rec_attr_pre[l].bh->readData(query_str_pre.data(), + read_len); + DEB_EVENT(" query: [%llu]%s", + blob_len, + query_str_pre.c_str()); + } else { + DEB_EVENT(" query: [0]"); + } + DEB_EVENT(" ->"); + blob_is_null = 0; + blob_len = 0; + rec_attr[l].bh->getNull(blob_is_null); + rec_attr[l].bh->getLength(blob_len); + if (blob_is_null == 0 && blob_len != 0) { + Uint32 read_len = static_cast(blob_len); + query_str.resize(read_len, '\0'); + rec_attr[l].bh->readData(query_str.data(), read_len); + DEB_EVENT(" [%llu]%s", + blob_len, + query_str.c_str()); + } else { + DEB_EVENT(" [0]"); + } + break; + } + case 4: + node_id = rec_attr[l].ra->u_32_value(); + DEB_EVENT(" node_id: %u -> %u", + rec_attr_pre[l].ra->u_32_value(), + node_id); + break; + case 5: + DEB_EVENT(" epoch: %u -> %u", + rec_attr_pre[l].ra->u_32_value(), + rec_attr[l].ra->u_32_value()); + break; + case 6: + id = rec_attr[l].ra->u_32_value(); + DEB_EVENT(" id: %u -> %u", + rec_attr_pre[l].ra->u_32_value(), + id); + break; + case 7: + DEB_EVENT(" version: %u -> %u", + rec_attr_pre[l].ra->u_32_value(), + rec_attr[l].ra->u_32_value()); + break; + case 8: + // SCHEMA_OP_TYPE + type = rec_attr[l].ra->u_32_value(); + DEB_EVENT(" type: %u -> %u", + rec_attr_pre[l].ra->u_32_value(), + type); + break; + case 9: + schema_op_id = rec_attr[l].ra->u_32_value(); + DEB_EVENT(" schema_op_id: %u -> %u", + rec_attr_pre[l].ra->u_32_value(), + schema_op_id); + break; + default: + break; + } + } + DEB_EVENT("----------------------------"); + + // Check event and update local cache in nessary + clear_slock = false; + cache_updated = false; + switch (type) { + case SCHEMA_OP_TYPE::SOT_RENAME_TABLE: + { + std::string new_table_str; + auto pos = query_str_pre.find(db_str); + if (pos != std::string::npos) { + pos += db_str.length(); + assert(query_str_pre.at(pos) == '/'); + pos += 1; + new_table_str = query_str_pre.substr(pos); + } + if (ndb_->setDatabaseName(db_str.c_str()) != 0) { + g_eventLogger->warning("[TTL SWatcher] Failed to select " + "database: %s" + ", error: %d(%s). Retry...", + db_str.c_str(), + ndb_->getNdbError().code, + ndb_->getNdbError().message); + goto err; + } + dict->invalidateTable(table_str.c_str()); + const NdbDictionary::Table* tab = dict->getTable( + new_table_str.c_str()); + if (tab == nullptr) { + g_eventLogger->warning("[TTL SWatcher] Failed to get table:" + " %s, error: %d(%s). Retry...", + new_table_str.c_str(), + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + const std::lock_guard lock(mutex_); + cache_updated = UpdateLocalCache(db_str, table_str, + new_table_str, tab); + break; + } + case SCHEMA_OP_TYPE::SOT_DROP_TABLE: + { + const std::lock_guard lock(mutex_); + cache_updated = UpdateLocalCache(db_str, table_str, nullptr); + break; + } + case SCHEMA_OP_TYPE::SOT_DROP_DB: + { + const std::lock_guard lock(mutex_); + cache_updated = DropDBLocalCache(db_str); + break; + } + case SCHEMA_OP_TYPE::SOT_CREATE_TABLE: + case SCHEMA_OP_TYPE::SOT_ALTER_TABLE_COMMIT: + case SCHEMA_OP_TYPE::SOT_ONLINE_ALTER_TABLE_COMMIT: + { + if (ndb_->setDatabaseName(db_str.c_str()) != 0) { + g_eventLogger->warning("[TTL SWatcher] Failed to select " + "database: %s" + ", error: %d(%s). Retry...", + db_str.c_str(), + ndb_->getNdbError().code, + ndb_->getNdbError().message); + goto err; + } + dict->invalidateTable(table_str.c_str()); + const NdbDictionary::Table* tab = dict->getTable( + table_str.c_str()); + if (tab == nullptr) { + g_eventLogger->warning("[TTL SWatcher] Failed to get table:" + " %s, error: %d(%s). Retry...", + table_str.c_str(), + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + const std::lock_guard lock(mutex_); + cache_updated = UpdateLocalCache(db_str, table_str, tab); + break; + } + case SCHEMA_OP_TYPE::SOT_CLEAR_SLOCK: + clear_slock = true; + break; + default: + break; + } + + // Only purge worker can set cache_updated_ to false; + if (cache_updated) { + cache_updated_ = true; + } + + if (clear_slock) { + continue; + } + + trx_succ = false; + trx_failure_times = 0; + do { + trans = ndb_->startTransaction(); + if (trans == nullptr) { + g_eventLogger->warning("[TTL SWatcher] Failed to start " + "transaction" + ", error: %d(%s). Retry...", + ndb_->getNdbError().code, + ndb_->getNdbError().message); + goto trx_err; + } + top = trans->getNdbOperation(schema_res_tab); + if (top == nullptr) { + g_eventLogger->warning("[TTL SWatcher] Failed to get the Ndb " + "operation" + ", error: %d(%s). Retry...", + trans->getNdbError().code, + trans->getNdbError().message); + goto trx_err; + } + if (top->insertTuple() != 0 || + /*Ndb_schema_result_table::COL_NODEID*/ + top->equal("nodeid", node_id) != 0 || + /*Ndb_schema_result_table::COL_SCHEMA_OP_ID*/ + top->equal("schema_op_id", schema_op_id) != 0 || + /*Ndb_schema_result_table::COL_PARTICIPANT_NODEID*/ + top->equal("participant_nodeid", ndb_->getNodeId()) != 0 || + /*Ndb_schema_result_table::COL_RESULT*/ + top->setValue("result", 0) != 0 || + /*Ndb_schema_result_table::COL_MESSAGE*/ + top->setValue("message", message_buf) != 0) { + std::cerr << "Failed to insert tuple, error: " + << top->getNdbError().code << "(" + << top->getNdbError().message << "), retry..." + << std::endl; + g_eventLogger->warning("[TTL SWatcher] Failed to insert tuple " + ", error: %d(%s). Retry...", + top->getNdbError().code, + top->getNdbError().message); + goto trx_err; + } + if (trans->execute(NdbTransaction::Commit, + NdbOperation::DefaultAbortOption, + 1 /*force send*/) != 0) { + g_eventLogger->warning("[TTL SWatcher] Failed to the execute " + "transaction" + ", error: %d(%s). Retry...", + trans->getNdbError().code, + trans->getNdbError().message); + goto trx_err; + } else { + trx_succ = true; + } +trx_err: + if (trans != nullptr) { + ndb_->closeTransaction(trans); + } + if (!trx_succ) { + trx_failure_times++; + if (trx_failure_times > 10) { + goto err; + } else { + sleep(1); + } + } + } while (!trx_succ); + break; + default: + break; + } + } + } else if (purge_worker_asks_for_retry_) { + g_eventLogger->warning("[TTL SWatcher] Purge worker asks for retry"); + purge_worker_asks_for_retry_ = false; + goto err; + } else if (res < 0) { + std::cerr << "Failed to poll event, error: " + << ndb_->getNdbError().code << "(" + << ndb_->getNdbError().message << "), retry..." + << std::endl; + g_eventLogger->warning("[TTL SWatcher] Failed to poll event " + ", error: %d(%s). Retry...", + ndb_->getNdbError().code, + ndb_->getNdbError().message); + goto err; + } + } +err: + if (ev_op != nullptr) { + ndb_->dropEventOperation(ev_op); + } + ev_op = nullptr; + op = nullptr; + if (dict != nullptr) { + dict->dropEvent(kSchemaEventName); + } + // TODO(Zhao): stop purge worker + RS_Status status; + rdrsRonDBConnectionPool->ReturnTTLSchemaWatcherNdbObject(ndb_, &status); + ndb_ = nullptr; + if (!exit_) { + sleep(2); + goto retry; + } + return; +} + +bool TTLPurger::UpdateLocalCache(const std::string& db, + const std::string& table, + const NdbDictionary::Table* tab) { + bool updated = false; + auto iter = ttl_cache_.find(db + "/" + table); + if (tab != nullptr) { + if (iter != ttl_cache_.end()) { + if (tab->isTTLEnabled()) { + assert(iter->second.table_id == tab->getTableId()); + std::cerr << "Update TTL of table " << db + "/" + table + << " in cache: [" << iter->second.table_id + << ", " << iter->second.ttl_sec + << ", " << iter->second.col_no + << "] -> [" << tab->getTableId() + << ", " << tab->getTTLSec() + << ", " << tab->getTTLColumnNo() + << "]" << std::endl; + g_eventLogger->info("[TTL SWatcher] Update TTL of table %s.%s " + "in cache: [%u, %u@%u] -> [%u, %u@%u]", + db.c_str(), table.c_str(), + iter->second.table_id, iter->second.ttl_sec, + iter->second.col_no, + tab->getTableId(), tab->getTTLSec(), + tab->getTTLColumnNo()); + iter->second.ttl_sec = tab->getTTLSec(); + iter->second.col_no = tab->getTTLColumnNo(); + } else { + g_eventLogger->info("[TTL SWatcher] Remove[1] TTL of table %s.%s " + "in cache: [%u, %u@%u]", + db.c_str(), table.c_str(), iter->second.table_id, + iter->second.ttl_sec, iter->second.col_no); + ttl_cache_.erase(iter); + } + updated = true; + } else { + if (tab->isTTLEnabled()) { + g_eventLogger->info("[TTL SWatcher] Insert TTL of table %s.%s " + "in cache: [%u, %u@%u]", + db.c_str(), table.c_str(), tab->getTableId(), + tab->getTTLSec(), tab->getTTLColumnNo()); + ttl_cache_.insert({db + "/" + table, {tab->getTableId(), + tab->getTTLSec(), tab->getTTLColumnNo()}}); + updated = true; + } else { + // check mysql.ttl_purge_nodes + // TODO(zhao): handle ttl_purge_tables as well + if (db == kSystemDBName && table == kTTLPurgeNodesTabName) { + updated = true; + } + } + } + } else { + if (iter != ttl_cache_.end()) { + std::cerr << "Remove[2] TTL of table " << db + "/" + table + << " in cache: [" << iter->second.table_id + << ", " << iter->second.ttl_sec + << ", " << iter->second.col_no + << "]" << std::endl; + g_eventLogger->info("[TTL SWatcher] Remove[2] TTL of table %s.%s " + "in cache: [%u, %u@%u]", + db.c_str(), table.c_str(), iter->second.table_id, + iter->second.ttl_sec, iter->second.col_no); + ttl_cache_.erase(iter); + updated = true; + } else { + // check mysql.ttl_purge_nodes + // TODO(zhao): handle ttl_purge_tables as well + if (db == kSystemDBName && table == kTTLPurgeNodesTabName) { + updated = true; + } + } + } + return updated; +} + +bool TTLPurger::UpdateLocalCache(const std::string& db, + const std::string& table, + const std::string& new_table, + const NdbDictionary::Table* tab) { + // 1. Remove old table + bool ret = UpdateLocalCache(db, table, nullptr); + assert(ret); + // 2. Insert new table + ret = UpdateLocalCache(db, new_table, tab); + assert(ret); + return ret; +} + +char* TTLPurger::GetEventName(NdbDictionary::Event::TableEvent event_type, + char* name_buf) { + switch (event_type) { + case NdbDictionary::Event::TE_INSERT: + strcpy(name_buf, "TE_INSERT"); + break; + case NdbDictionary::Event::TE_DELETE: + strcpy(name_buf, "TE_DELETE"); + break; + case NdbDictionary::Event::TE_UPDATE: + strcpy(name_buf, "TE_UPDATE"); + break; + case NdbDictionary::Event::TE_SCAN: + strcpy(name_buf, "TE_SCAN"); + break; + case NdbDictionary::Event::TE_DROP: + strcpy(name_buf, "TE_DROP"); + break; + case NdbDictionary::Event::TE_ALTER: + strcpy(name_buf, "TE_ALTER"); + break; + case NdbDictionary::Event::TE_CREATE: + strcpy(name_buf, "TE_CREATE"); + break; + case NdbDictionary::Event::TE_GCP_COMPLETE: + strcpy(name_buf, "TE_GCP_COMPLETE"); + break; + case NdbDictionary::Event::TE_CLUSTER_FAILURE: + strcpy(name_buf, "TE_CLUSTER_FAILURE"); + break; + case NdbDictionary::Event::TE_STOP: + strcpy(name_buf, "TE_STOP"); + break; + case NdbDictionary::Event::TE_NODE_FAILURE: + strcpy(name_buf, "TE_NODE_FAILURE"); + break; + case NdbDictionary::Event::TE_SUBSCRIBE: + strcpy(name_buf, "TE_SUBSCRIBE"); + break; + case NdbDictionary::Event::TE_UNSUBSCRIBE: + strcpy(name_buf, "TE_UNSUBSCRIBE"); + break; + case NdbDictionary::Event::TE_EMPTY: + strcpy(name_buf, "TE_EMPTY"); + break; + case NdbDictionary::Event::TE_INCONSISTENT: + strcpy(name_buf, "TE_INCONSISTENT"); + break; + case NdbDictionary::Event::TE_OUT_OF_MEMORY: + strcpy(name_buf, "TE_OUT_OF_MEMEORY"); + break; + case NdbDictionary::Event::TE_ALL: + strcpy(name_buf, "TE_ALL"); + break; + default: + strcpy(name_buf, "UNKNOWN"); + break; + } + return name_buf; +} + +bool TTLPurger::DropDBLocalCache(const std::string& db_str) { + bool updated = false; + for (auto iter = ttl_cache_.begin(); iter != ttl_cache_.end();) { + auto pos = iter->first.find('/'); + if (pos != std::string::npos) { + std::string db = iter->first.substr(0, pos); + if (db == db_str) { + g_eventLogger->info("[TTL SWatcher] Remove[3] TTL of table %s " + "in cache: [%u, %u@%u]", + iter->first.c_str(), iter->second.table_id, + iter->second.ttl_sec, iter->second.col_no); + iter = ttl_cache_.erase(iter); + updated = true; + continue; + } + } + iter++; + } + return updated; +} + +bool TTLPurger::Run() { + if (!schema_watcher_running_) { + assert(schema_watcher_ == nullptr); + assert(!purge_worker_running_); + schema_watcher_ = new std::thread( + std::bind(&TTLPurger::SchemaWatcherJob, this)); + schema_watcher_running_ = true; + } + return true; +} diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.hpp b/storage/ndb/rest-server2/server/src/ttl_purge.hpp new file mode 100644 index 000000000000..7bae095567fa --- /dev/null +++ b/storage/ndb/rest-server2/server/src/ttl_purge.hpp @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2024 Hopsworks AB + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, + * USA. + */ + +#ifndef STORAGE_NDB_REST_SERVER2_SERVER_SRC_TTL_PURGE_HPP_ +#define STORAGE_NDB_REST_SERVER2_SERVER_SRC_TTL_PURGE_HPP_ + +#include +#include +#include +#include + +#include + +class TTLPurger { + public: + static constexpr const char* kSchemaEventName = "REPL$mysql/ndb_schema"; + static constexpr const char* kSystemDBName = "mysql"; + static constexpr const char* kSchemaTableName = "ndb_schema"; + static constexpr const char* kSchemaResTabName = "ndb_schema_result"; + static constexpr const char* kTTLPurgeNodesTabName = "ttl_purge_nodes"; + static constexpr int kNoEventCol = 10; + static constexpr const char* kEventColNames[kNoEventCol] = { + "db", + "name", + "slock", + "query", + "node_id", + "epoch", + "id", + "version", + "type", + "schema_op_id" + }; + bool Init(); + static TTLPurger* CreateTTLPurger(); + bool Run(); + ~TTLPurger(); + + private: + TTLPurger(); + void SchemaWatcherJob(); + Ndb* ndb_; + std::atomic exit_; + + typedef struct { + int32_t table_id; + uint32_t ttl_sec; + uint32_t col_no; + uint32_t part_id = {0}; // Only valid in local ttl cache + char last_purged[8] = {0}; // Only valid in local ttl cache + } TTLInfo; + std::map ttl_cache_; + std::mutex mutex_; + std::atomic cache_updated_; + bool UpdateLocalCache(const std::string& db, + const std::string& table, + const NdbDictionary::Table* tab); + bool UpdateLocalCache(const std::string& db, + const std::string& table, + const std::string& new_table, + const NdbDictionary::Table* tab); + static char* GetEventName( + NdbDictionary::Event::TableEvent event_type, + char* name_buf); + bool DropDBLocalCache(const std::string& db_str); + + std::atomic purge_worker_asks_for_retry_; + bool schema_watcher_running_; + std::thread* schema_watcher_; + bool purge_worker_running_; + std::thread* purge_worker_; +}; + +#endif // STORAGE_NDB_REST_SERVER2_SERVER_SRC_TTL_PURGE_HPP_ From eaada182aff4774a6ed59ee4ea935d0ac1815f7c Mon Sep 17 00:00:00 2001 From: KernelMaker Date: Fri, 29 Nov 2024 21:04:42 +0100 Subject: [PATCH 4/8] Develop TTLPurger in rest-server2 [PART-2] --- .../ndb/rest-server2/server/src/ttl_purge.cpp | 952 ++++++++++++++++-- .../ndb/rest-server2/server/src/ttl_purge.hpp | 17 +- 2 files changed, 898 insertions(+), 71 deletions(-) diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.cpp b/storage/ndb/rest-server2/server/src/ttl_purge.cpp index ec6200692d57..47b9a106f1af 100644 --- a/storage/ndb/rest-server2/server/src/ttl_purge.cpp +++ b/storage/ndb/rest-server2/server/src/ttl_purge.cpp @@ -16,10 +16,15 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, * USA. */ +#include +#include + #include "src/rdrs_rondb_connection_pool.hpp" #include "src/ttl_purge.hpp" #include "src/status.hpp" #include "storage/ndb/plugin/ndb_schema_dist.h" +#include "include/my_systime.h" +#include "include/my_time.h" #include extern EventLogger *g_eventLogger; @@ -30,31 +35,44 @@ extern EventLogger *g_eventLogger; #endif TTLPurger::TTLPurger() : - ndb_(nullptr), exit_(false), cache_updated_(false), + watcher_ndb_(nullptr), worker_ndb_(nullptr), + exit_(false), cache_updated_(false), purge_worker_asks_for_retry_(false), - schema_watcher_running_(false), schema_watcher_(), - purge_worker_running_(false), purge_worker_() { + schema_watcher_running_(false), schema_watcher_(nullptr), + purge_worker_running_(false), purge_worker_(nullptr), + purge_worker_exit_(false) { } extern RDRSRonDBConnectionPool *rdrsRonDBConnectionPool; TTLPurger::~TTLPurger() { exit_ = true; - if (schema_watcher_running_ && schema_watcher_->joinable()) { - schema_watcher_->join(); + if (schema_watcher_running_) { + assert(schema_watcher_ != nullptr); + if (schema_watcher_->joinable()) { + schema_watcher_->join(); + } schema_watcher_ = nullptr; schema_watcher_running_ = false; - if (ndb_) { - RS_Status status; - rdrsRonDBConnectionPool->ReturnTTLSchemaWatcherNdbObject(ndb_, &status); - } } + assert(purge_worker_exit_ == true && purge_worker_ == nullptr && + purge_worker_running_ == false); } bool TTLPurger::Init() { RS_Status status = rdrsRonDBConnectionPool-> - GetTTLSchemaWatcherNdbObject(&ndb_); + GetTTLSchemaWatcherNdbObject(&watcher_ndb_); + if (status.http_code != SUCCESS) { + watcher_ndb_ = nullptr; + return false; + } + + status = rdrsRonDBConnectionPool-> + GetTTLPurgeWorkerNdbObject(&worker_ndb_); if (status.http_code != SUCCESS) { - ndb_ = nullptr; + worker_ndb_ = nullptr; + rdrsRonDBConnectionPool->ReturnTTLSchemaWatcherNdbObject( + watcher_ndb_, &status); + watcher_ndb_ = nullptr; return false; } return true; @@ -84,7 +102,9 @@ void TTLPurger::SchemaWatcherJob() { char slock_buf_pre[32]; char slock_buf[32]; + g_eventLogger->info("[TTL SWatcher] Started"); retry: + g_eventLogger->info("[TTL SWatcher] retry from here"); init_event_succ = false; dict = nullptr; schema_tab = nullptr; @@ -93,25 +113,37 @@ void TTLPurger::SchemaWatcherJob() { op = nullptr; // Init event do { - if (ndb_ == nullptr) { + if (watcher_ndb_ == nullptr) { RS_Status status = rdrsRonDBConnectionPool-> - GetTTLSchemaWatcherNdbObject(&ndb_); + GetTTLSchemaWatcherNdbObject(&watcher_ndb_); if (status.http_code != SUCCESS) { - g_eventLogger->warning("[TTL SWatcher] Failed to get NdbObject. Retry"); + g_eventLogger->warning("[TTL SWatcher] Failed to get schema " + "watcher's NdbObject. Retry..."); + watcher_ndb_ = nullptr; + goto err; + } + } + if (worker_ndb_ == nullptr) { + RS_Status status = rdrsRonDBConnectionPool-> + GetTTLPurgeWorkerNdbObject(&worker_ndb_); + if (status.http_code != SUCCESS) { + g_eventLogger->warning("[TTL SWatcher] Failed to get purge " + "worker's NdbObject. Retry..."); + worker_ndb_ = nullptr; goto err; } } - if (ndb_->setDatabaseName(kSystemDBName) != 0) { + if (watcher_ndb_->setDatabaseName(kSystemDBName) != 0) { g_eventLogger->warning("[TTL SWatcher] Failed to select system database: " "%s, error: %d(%s). Retry...", kSystemDBName, - ndb_->getNdbError().code, - ndb_->getNdbError().message); + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); goto err; } - dict = ndb_->getDictionary(); + dict = watcher_ndb_->getDictionary(); schema_tab = dict->getTable(kSchemaTableName); if (schema_tab == nullptr) { g_eventLogger->warning("[TTL SWatcher] Failed to get system table: %s" @@ -173,11 +205,12 @@ void TTLPurger::SchemaWatcherJob() { } while (!exit_ && !init_event_succ); // Create event operation - if ((ev_op = ndb_->createEventOperation(kSchemaEventName)) == nullptr) { + if ((ev_op = watcher_ndb_->createEventOperation(kSchemaEventName)) + == nullptr) { g_eventLogger->warning("[TTL SWatcher] Failed to create event operation" ", error: %d(%s). Retry...", - ndb_->getNdbError().code, - ndb_->getNdbError().message); + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); goto err; } ev_op->mergeEvents(true); @@ -206,6 +239,7 @@ void TTLPurger::SchemaWatcherJob() { // Fetch tables ttl_cache_.clear(); + list.clear(); if (dict->listObjects(list, NdbDictionary::Object::UserTable) != 0) { g_eventLogger->warning("[TTL SWatcher] Failed to list objects" ", error: %d(%s). Retry...", @@ -222,12 +256,12 @@ void TTLPurger::SchemaWatcherJob() { if (strcmp(db_str, "mysql") == 0) { continue; } - if (ndb_->setDatabaseName(db_str) != 0) { + if (watcher_ndb_->setDatabaseName(db_str) != 0) { g_eventLogger->warning("[TTL SWatcher] Failed to select database: %s" ", error: %d(%s). Retry...", db_str, - ndb_->getNdbError().code, - ndb_->getNdbError().message); + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); goto err; } const NdbDictionary::Table* tab = dict->getTable( @@ -243,18 +277,20 @@ void TTLPurger::SchemaWatcherJob() { UpdateLocalCache(db_str, table_str, tab); } - // TODO(Zhao): start purge worker + assert(!purge_worker_running_); + // Set it to true to make purge worker load cache + cache_updated_ = true; + purge_worker_exit_ = false; + purge_worker_ = new std::thread( + std::bind(&TTLPurger::PurgeWorkerJob, this)); + purge_worker_running_ = true; // Main schema_watcher_ task while (!exit_) { - int res = ndb_->pollEvents(1000); // wait for event or 1000 ms + int res = watcher_ndb_->pollEvents(1000); // wait for event or 1000 ms if (res > 0) { - while ((op = ndb_->nextEvent())) { + while ((op = watcher_ndb_->nextEvent())) { if (op->hasError()) { - std::cerr << "Get an event error, " << op->getNdbError().code - << "(" << op->getNdbError().message - << ") on handling ndb_schema event, retry..." - << std::endl; g_eventLogger->warning("[TTL SWatcher] Get an event error on " "handling event" ", error: %d(%s). Retry...", @@ -437,13 +473,13 @@ void TTLPurger::SchemaWatcherJob() { pos += 1; new_table_str = query_str_pre.substr(pos); } - if (ndb_->setDatabaseName(db_str.c_str()) != 0) { + if (watcher_ndb_->setDatabaseName(db_str.c_str()) != 0) { g_eventLogger->warning("[TTL SWatcher] Failed to select " "database: %s" ", error: %d(%s). Retry...", db_str.c_str(), - ndb_->getNdbError().code, - ndb_->getNdbError().message); + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); goto err; } dict->invalidateTable(table_str.c_str()); @@ -478,13 +514,13 @@ void TTLPurger::SchemaWatcherJob() { case SCHEMA_OP_TYPE::SOT_ALTER_TABLE_COMMIT: case SCHEMA_OP_TYPE::SOT_ONLINE_ALTER_TABLE_COMMIT: { - if (ndb_->setDatabaseName(db_str.c_str()) != 0) { + if (watcher_ndb_->setDatabaseName(db_str.c_str()) != 0) { g_eventLogger->warning("[TTL SWatcher] Failed to select " "database: %s" ", error: %d(%s). Retry...", db_str.c_str(), - ndb_->getNdbError().code, - ndb_->getNdbError().message); + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); goto err; } dict->invalidateTable(table_str.c_str()); @@ -511,6 +547,8 @@ void TTLPurger::SchemaWatcherJob() { // Only purge worker can set cache_updated_ to false; if (cache_updated) { + // TODO(Zhao) Is it better to put it after + // notify ndb_schema_result? cache_updated_ = true; } @@ -521,13 +559,13 @@ void TTLPurger::SchemaWatcherJob() { trx_succ = false; trx_failure_times = 0; do { - trans = ndb_->startTransaction(); + trans = watcher_ndb_->startTransaction(); if (trans == nullptr) { g_eventLogger->warning("[TTL SWatcher] Failed to start " "transaction" ", error: %d(%s). Retry...", - ndb_->getNdbError().code, - ndb_->getNdbError().message); + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); goto trx_err; } top = trans->getNdbOperation(schema_res_tab); @@ -545,15 +583,12 @@ void TTLPurger::SchemaWatcherJob() { /*Ndb_schema_result_table::COL_SCHEMA_OP_ID*/ top->equal("schema_op_id", schema_op_id) != 0 || /*Ndb_schema_result_table::COL_PARTICIPANT_NODEID*/ - top->equal("participant_nodeid", ndb_->getNodeId()) != 0 || + top->equal("participant_nodeid", + watcher_ndb_->getNodeId()) != 0 || /*Ndb_schema_result_table::COL_RESULT*/ top->setValue("result", 0) != 0 || /*Ndb_schema_result_table::COL_MESSAGE*/ top->setValue("message", message_buf) != 0) { - std::cerr << "Failed to insert tuple, error: " - << top->getNdbError().code << "(" - << top->getNdbError().message << "), retry..." - << std::endl; g_eventLogger->warning("[TTL SWatcher] Failed to insert tuple " ", error: %d(%s). Retry...", top->getNdbError().code, @@ -574,7 +609,7 @@ void TTLPurger::SchemaWatcherJob() { } trx_err: if (trans != nullptr) { - ndb_->closeTransaction(trans); + watcher_ndb_->closeTransaction(trans); } if (!trx_succ) { trx_failure_times++; @@ -595,34 +630,46 @@ void TTLPurger::SchemaWatcherJob() { purge_worker_asks_for_retry_ = false; goto err; } else if (res < 0) { - std::cerr << "Failed to poll event, error: " - << ndb_->getNdbError().code << "(" - << ndb_->getNdbError().message << "), retry..." - << std::endl; g_eventLogger->warning("[TTL SWatcher] Failed to poll event " ", error: %d(%s). Retry...", - ndb_->getNdbError().code, - ndb_->getNdbError().message); + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); goto err; } } err: if (ev_op != nullptr) { - ndb_->dropEventOperation(ev_op); + watcher_ndb_->dropEventOperation(ev_op); } ev_op = nullptr; op = nullptr; if (dict != nullptr) { dict->dropEvent(kSchemaEventName); } - // TODO(Zhao): stop purge worker + // Stop purge worker + purge_worker_exit_ = true; + if (purge_worker_running_) { + assert(purge_worker_ != nullptr); + if (purge_worker_->joinable()) { + purge_worker_->join(); + } + purge_worker_ = nullptr; + purge_worker_running_ = false; + } + // Return 2 NdbObjects RS_Status status; - rdrsRonDBConnectionPool->ReturnTTLSchemaWatcherNdbObject(ndb_, &status); - ndb_ = nullptr; + rdrsRonDBConnectionPool->ReturnTTLSchemaWatcherNdbObject( + watcher_ndb_, &status); + rdrsRonDBConnectionPool->ReturnTTLPurgeWorkerNdbObject( + worker_ndb_, &status); + watcher_ndb_ = nullptr; + worker_ndb_ = nullptr; + if (!exit_) { sleep(2); goto retry; } + g_eventLogger->info("[TTL SWatcher] Exit"); return; } @@ -635,14 +682,6 @@ bool TTLPurger::UpdateLocalCache(const std::string& db, if (iter != ttl_cache_.end()) { if (tab->isTTLEnabled()) { assert(iter->second.table_id == tab->getTableId()); - std::cerr << "Update TTL of table " << db + "/" + table - << " in cache: [" << iter->second.table_id - << ", " << iter->second.ttl_sec - << ", " << iter->second.col_no - << "] -> [" << tab->getTableId() - << ", " << tab->getTTLSec() - << ", " << tab->getTTLColumnNo() - << "]" << std::endl; g_eventLogger->info("[TTL SWatcher] Update TTL of table %s.%s " "in cache: [%u, %u@%u] -> [%u, %u@%u]", db.c_str(), table.c_str(), @@ -679,11 +718,6 @@ bool TTLPurger::UpdateLocalCache(const std::string& db, } } else { if (iter != ttl_cache_.end()) { - std::cerr << "Remove[2] TTL of table " << db + "/" + table - << " in cache: [" << iter->second.table_id - << ", " << iter->second.ttl_sec - << ", " << iter->second.col_no - << "]" << std::endl; g_eventLogger->info("[TTL SWatcher] Remove[2] TTL of table %s.%s " "in cache: [%u, %u@%u]", db.c_str(), table.c_str(), iter->second.table_id, @@ -796,6 +830,786 @@ bool TTLPurger::DropDBLocalCache(const std::string& db_str) { return updated; } +enum SpecialShardVal { + kShardNotPurger = -2, + kShardNosharding = -1, + kShardFirst = 0 +}; + +void TTLPurger::PurgeWorkerJob() { + bool purge_trx_started = false; + bool update_objects = false; + std::map local_ttl_cache; + Int32 shard = -1; + Int32 n_purge_nodes = 0; + unsigned char encoded_now[8]; + std::string log_buf; + size_t pos = 0; + std::string db_str; + std::string table_str; + uint32_t ttl_col_no = 0; + int check = 0; + uint32_t deletedRows = 0; + int trx_failure_times = 0; + std::map::iterator iter; + std::map>::iterator purge_tab_iter; + std::map::iterator purge_part_iter; + + NdbDictionary::Dictionary* dict = nullptr; + const NdbDictionary::Table* ttl_tab = nullptr; + const NdbDictionary::Index* ttl_index = nullptr; + NdbTransaction* trans = nullptr; + NdbScanOperation* scan_op = nullptr; + Int64 packed_last = 0; + unsigned char encoded_last[8]; + unsigned char encoded_curr_purge[8]; + MYSQL_TIME datetime; + Int64 packed_now = 0; + NdbRecAttr* rec_attr[3]; + + g_eventLogger->info("[TTL PWorker] Started"); + purged_pos_.clear(); + do { + purge_trx_started = false; + update_objects = false; + if (cache_updated_) { + local_ttl_cache.clear(); + const std::lock_guard lock(mutex_); + local_ttl_cache = ttl_cache_; + cache_updated_ = false; + update_objects = true; + g_eventLogger->info("[TTL PWorker] Detected cache updated, " + "reloaded %lu TTL tables", + local_ttl_cache.size()); + } + + shard = kShardNosharding; + n_purge_nodes = 0; + if (GetShard(&shard, &n_purge_nodes, update_objects) == false) { + g_eventLogger->info("[TTL PWorker] Failed to get shard, " + "error: %u(%s). Retry...", + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); + goto err; + } + if (shard == kShardNotPurger) { + g_eventLogger->info("Not the configured purging node, skip purging..."); + sleep(2); + continue; + } + + GetNow(encoded_now); + if (shard >= kShardFirst && !UpdateLease(encoded_now)) { + g_eventLogger->warning("[TTL PWorker] Failed to update the lease"); + goto err; + } + + if (local_ttl_cache.empty()) { + // No TTL table is found + sleep(2); + continue; + } + dict = worker_ndb_->getDictionary(); + for (iter = local_ttl_cache.begin(); iter != local_ttl_cache.end(); + iter++) { + purge_trx_started = false; + { + GetNow(encoded_now); + if (shard >= kShardFirst && !UpdateLease(encoded_now)) { + g_eventLogger->warning("[TTL PWorker] Failed to update the lease[2]"); + goto err; + } + } + if (cache_updated_) { + break; + } + + log_buf = "[TTL PWorker] Processing " + iter->first + ": "; + + pos = iter->first.find('/'); + assert(pos != std::string::npos); + db_str = iter->first.substr(0, pos); + assert(pos + 1 < iter->first.length()); + table_str = iter->first.substr(pos + 1); + ttl_col_no = iter->second.col_no; + check = 0; + deletedRows = 0; + trx_failure_times = 0; + + if (worker_ndb_->setDatabaseName(db_str.c_str()) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to select " + "database: %s" + ", error: %d(%s). Retry...", + db_str.c_str(), + worker_ndb_->getNdbError().code, + worker_ndb_->getNdbError().message); + } + if (update_objects) { + /* + * Notice: + * Based on the comment below, + * here we need to call invalidateIndex() for ttl_index, the reason is + * removeCachedTable() just decrease the reference count of the table + * object in the global list, it won't remove the object even the counter + * becomes to 0. But invalidateIndex() will set the object to DROP and + * remove it if the counter is 0. Since we don't call invalidateIndex + * in main thread(it's a major different with other normal table objects), + * so here we need to call invalidateIndex() + */ + dict->invalidateIndex(kTTLPurgeIndexName, table_str.c_str()); + /* + * Notice: + * Purge thread can only call removeCachedXXX to remove its + * thread local cached table object and decrease the reference + * count of the global cached table object. + * If we call invalidateTable() and following by getTable() here, + * Purge thread will invalidate the global cached table object + * and generate a new version of table object, which will make + * the main thread's following invalidateTable() + getTable() gets + * this table object, stops the chance to get the latest one from + * data nodes. + */ + dict->removeCachedTable(table_str.c_str()); + + purged_pos_.clear(); + } + ttl_tab = dict->getTable(table_str.c_str()); + if (ttl_tab == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to get table: " + "%s, error: %d(%s). Retry...", + table_str.c_str(), + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + if (shard >= kShardFirst && n_purge_nodes > 0 && + std::hash{}( + (std::to_string(ttl_tab->getTableId()) + table_str)) % + n_purge_nodes != static_cast(shard)) { + continue; + } + log_buf += ("[P" + std::to_string(iter->second.part_id) + + "/" + + std::to_string(ttl_tab->getPartitionCount()) + "]"); + assert(iter->second.part_id < ttl_tab->getPartitionCount()); + + trx_failure_times = 0; +retry_trx: + trans = worker_ndb_->startTransaction(); + if (trans == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to start " + "transaction" + ", error: %d(%s). Retry...", + worker_ndb_->getNdbError().code, + worker_ndb_->getNdbError().message); + goto err; + } + purge_trx_started = true; + + ttl_index = dict->getIndex(kTTLPurgeIndexName, table_str.c_str()); + + check = 0; + deletedRows = 0; + if (ttl_index != nullptr) { + // Found index on ttl column, use it + log_buf += "[INDEX scan]"; + const NdbDictionary::Column* ttl_col_index = ttl_index->getColumn(0); + assert(ttl_col_index != nullptr && ttl_col_index->getType() == + NdbDictionary::Column::Datetime2); + const NdbDictionary::Column* ttl_col_table = + ttl_tab->getColumn(ttl_col_index->getName()); + assert(ttl_col_table != nullptr && ttl_col_table->getType() == + NdbDictionary::Column::Datetime2 && + ttl_col_table->getColumnNo() == static_cast(ttl_col_no)); + + NdbIndexScanOperation* index_scan_op = + trans->getNdbIndexScanOperation(ttl_index); + if (index_scan_op == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to start get index " + "scan operations on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + index_scan_op->setPartitionId(iter->second.part_id); + /* Index Scan */ + Uint32 scanFlags = + /*NdbScanOperation::SF_OrderBy | + *NdbScanOperation::SF_MultiRange | + */ + NdbScanOperation::SF_KeyInfo | + NdbScanOperation::SF_OnlyExpiredScan; + + if (index_scan_op->readTuples(NdbOperation::LM_Exclusive, + scanFlags, + 1, // parallel + kPurgeBatchSize) // batch + != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to readTuples " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + + log_buf += "-["; + packed_last = 0; + purge_tab_iter = purged_pos_.find(iter->second.table_id); + if (purge_tab_iter != purged_pos_.end()) { + purge_part_iter = purge_tab_iter->second.find(iter->second.part_id); + if (purge_part_iter != purge_tab_iter->second.end()) { + packed_last = purge_part_iter->second; + } + } + if (packed_last != 0) { + my_datetime_packed_to_binary(packed_last, encoded_last, 0); + TIME_from_longlong_datetime_packed(&datetime, packed_last); + log_buf += std::to_string(TIME_to_ulonglong_datetime(datetime)); + } else { + memset(encoded_last, 0, 8); + log_buf += "INF"; + } + log_buf += " --- "; + packed_now = GetNow(encoded_now); + TIME_from_longlong_datetime_packed(&datetime, packed_now); + log_buf += std::to_string(TIME_to_ulonglong_datetime(datetime)); + log_buf += ")"; + + if (index_scan_op->setBound(ttl_col_index->getName(), + NdbIndexScanOperation::BoundLE, + encoded_last)) { + g_eventLogger->warning("[TTL PWorker] Failed to setBound " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + if (index_scan_op->setBound(ttl_col_index->getName(), + NdbIndexScanOperation::BoundGT, encoded_now)) { + g_eventLogger->warning("[TTL PWorker] Failed to setBound " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + rec_attr[0] = index_scan_op->getValue(ttl_col_no); + if (rec_attr[0] == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to getValue " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + if (trans->execute(NdbTransaction::NoCommit) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to execute transaction " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + memset(encoded_curr_purge, 0, 8); + while ((check = index_scan_op->nextResult(true)) == 0) { + do { + memset(encoded_curr_purge, 0, 8); + memcpy(encoded_curr_purge, rec_attr[0]->aRef(), + rec_attr[0]->get_size_in_bytes()); + // std::cerr << "Get a expired row: timestamp = [" + // << rec_attr[0]->get_size_in_bytes() << "]"; + // for (Uint32 i = 0; i < rec_attr[0]->get_size_in_bytes(); i++) { + // std::cerr << std::hex + // << static_cast(rec_attr[0]->aRef()[i]) + // << " "; + // } + // std::cerr << std::endl; + if (index_scan_op->deleteCurrentTuple() != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to deleteTuple " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + deletedRows++; + } while ((check = index_scan_op->nextResult(false)) == 0); + + if (check == -1) { + g_eventLogger->warning("[TTL PWorker] Failed to execute " + "transaction[2] on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + break; + } + /** + * Commit all prepared operations + */ + if (trans->execute(NdbTransaction::Commit) == -1) { + g_eventLogger->warning("[TTL PWorker] Failed to commit transaction " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } else if (*reinterpret_cast(encoded_curr_purge) != 0) { + packed_last = my_datetime_packed_from_binary(encoded_curr_purge, 0); + if (purge_tab_iter != purged_pos_.end()) { + purge_tab_iter->second[iter->second.part_id] = packed_last; + } else { + purged_pos_[iter->second.table_id][iter->second.part_id] + = packed_last; + } + } + } else if (dict->getNdbError().code == 4243) { + // Can't find the index on ttl column, use table instead + log_buf += "[TABLE scan]"; + scan_op = trans->getNdbScanOperation(ttl_tab); + if (scan_op == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to start get scan " + "operations on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + scan_op->setPartitionId(iter->second.part_id); + Uint32 scanFlags = NdbScanOperation::SF_OnlyExpiredScan; + if (scan_op->readTuples(NdbOperation::LM_Exclusive, scanFlags, + 1, kPurgeBatchSize) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to readTuples " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + rec_attr[0] = scan_op->getValue(ttl_col_no); + if (rec_attr[0] == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to getValue " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + if (trans->execute(NdbTransaction::NoCommit) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to execute transaction " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + while ((check = scan_op->nextResult(true)) == 0) { + do { + // std::cerr << "Get a expired row: timestamp = [" + // << rec_attr[0]->get_size_in_bytes() << "]"; + // for (Uint32 i = 0; i < rec_attr[0]->get_size_in_bytes(); i++) { + // std::cerr << std::hex + // << static_cast(rec_attr[0]->aRef()[i]) + // << " "; + // } + // std::cerr << std::endl; + if (scan_op->deleteCurrentTuple() != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to deleteTuple " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + deletedRows++; + } while ((check = scan_op->nextResult(false)) == 0); + + if (check == -1) { + g_eventLogger->warning("[TTL PWorker] Failed to execute " + "transaction[2] on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + + break; + } + /** + * Commit all prepared operations + */ + if (trans->execute(NdbTransaction::Commit) == -1) { + g_eventLogger->warning("[TTL PWorker] Failed to commit transaction " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + } else { + g_eventLogger->warning("[TTL PWorker] Failed to get Table/Index " + "object on table %s" + ", error: %d(%s). Retry...", + table_str.c_str(), + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + worker_ndb_->closeTransaction(trans); + trans = nullptr; + log_buf += " Purged " + std::to_string(deletedRows) + " rows"; + g_eventLogger->info("%s", log_buf.c_str()); + iter->second.part_id = + ((iter->second.part_id + 1) % ttl_tab->getPartitionCount()); + // Finish 1 batch + // keep the ttl_tab in local table cache ? + continue; +err: + if (trans != nullptr) { + worker_ndb_->closeTransaction(trans); + } + trx_failure_times++; + sleep(1); + if (trx_failure_times > kMaxTrxRetryTimes) { + g_eventLogger->warning("[TTL PWorker] Has retried for %d times..." + "Quit and notify schema worker", + kMaxTrxRetryTimes); + purge_worker_asks_for_retry_ = true; + purge_worker_exit_ = true; + break; + } else if (purge_trx_started) { + goto retry_trx; + } else { + // retry from begining + break; // jump out from for-loop + } + } + // Finish 1 round + sleep(2); + } while (!purge_worker_exit_); + + // No need to return PurgeWorker NdbObject here, SchemaWatch will do that. + g_eventLogger->info("[TTL PWorker] Exit"); + return; +} + +bool TTLPurger::GetShard(int32_t* shard, int32_t* n_purge_nodes, + bool update_objects) { + *shard = kShardNosharding; + *n_purge_nodes = 0; + if (worker_ndb_->setDatabaseName(kSystemDBName) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to select system database: " + "%s, error: %d(%s). Retry...", + kSystemDBName, + worker_ndb_->getNdbError().code, + worker_ndb_->getNdbError().message); + return false; + } + NdbDictionary::Dictionary* dict = worker_ndb_->getDictionary(); + if (update_objects) { + dict->removeCachedTable(kTTLPurgeNodesTabName); + } + const NdbDictionary::Table* tab = dict->getTable(kTTLPurgeNodesTabName); + if (tab == nullptr) { + if (dict->getNdbError().code == 723) { + // Purging nodes configuration table is not found, no sharding + return true; + } else { + g_eventLogger->warning("[TTL PWorker] Failed to get table: " + "%s, error: %d(%s). Retry...", + kTTLPurgeNodesTabName, + dict->getNdbError().code, + dict->getNdbError().message); + return false; + } + } + NdbRecAttr* rec_attr[3]; + NdbTransaction* trans = nullptr; + NdbScanOperation* scan_op = nullptr; + int32_t n_nodes = 0;; + std::vector purge_nodes; + size_t pos = 0; + bool check = 0; + std::string log_buf = "[TTL PWorker] "; + std::string active_nodes = "["; + std::string inactive_nodes = "["; + + trans = worker_ndb_->startTransaction(); + if (trans == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to start " + "transaction" + ", error: %d(%s). Retry...", + worker_ndb_->getNdbError().code, + worker_ndb_->getNdbError().message); + goto err; + } + scan_op = trans->getNdbScanOperation(tab); + if (scan_op == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to start get scan " + "operations on table %s" + ", error: %d(%s). Retry...", + tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + if (scan_op->readTuples(NdbOperation::LM_CommittedRead) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to readTuples " + "on table %s" + ", error: %d(%s). Retry...", + tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + + rec_attr[0] = scan_op->getValue("node_id"); + if (rec_attr[0] == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to getValue " + "on table %s" + ", error: %d(%s). Retry...", + tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + rec_attr[1] = scan_op->getValue("last_active"); + if (rec_attr[1] == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to getValue " + "on table %s" + ", error: %d(%s). Retry...", + tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + if (trans->execute(NdbTransaction::NoCommit) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to execute transaction " + "on table %s" + ", error: %d(%s). Retry...", + tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + n_nodes = 0; + purge_nodes.clear(); + pos = 0; + while ((check = scan_op->nextResult(true)) == 0) { + do { + if (rec_attr[0]->int32_value() != worker_ndb_->getNodeId() && + (rec_attr[1]->isNULL() || + !IsNodeAlive(reinterpret_cast( + rec_attr[1]->aRef())))) { + inactive_nodes += (std::to_string(rec_attr[0]->int32_value()) + " "); + continue; + } + n_nodes++; + purge_nodes.push_back(rec_attr[0]->int32_value()); + } while ((check = scan_op->nextResult(false)) == 0); + } + + std::sort(purge_nodes.begin(), purge_nodes.end()); + if (!purge_nodes.empty()) { + for (auto iter : purge_nodes) { + active_nodes += (std::to_string(iter) + " "); + if (worker_ndb_->getNodeId() == iter) { + *shard = pos; + } + pos++; + } + } + if (!purge_nodes.empty() && *shard == -1) { + // if the current node id is not in the purging nodes list, + // set shard to -2 to tell the purge thread sleep + *shard = kShardNotPurger; + } + *n_purge_nodes = n_nodes; + worker_ndb_->closeTransaction(trans); + if (active_nodes.length() > 1) { + active_nodes[active_nodes.length() - 1] = ']'; + } else { + active_nodes += "]"; + } + if (inactive_nodes.length() > 1) { + inactive_nodes[inactive_nodes.length() - 1] = ']'; + } else { + inactive_nodes += "]"; + } + log_buf += ("Shard: [" + std::to_string(*shard) + + "/" + std::to_string(n_nodes) + "]"); + log_buf += (", Active purging nodes: " + active_nodes); + log_buf += (", Inactive purging nodes: " + inactive_nodes); + g_eventLogger->info("%s", log_buf.c_str()); + return true; + +err: + if (trans != nullptr) { + worker_ndb_->closeTransaction(trans); + } + return false; +} + +Int64 TTLPurger::GetNow(unsigned char* encoded_now) { + assert(encoded_now != nullptr); + Int64 packed_now = 0; + memset(encoded_now, 0, 8); + MYSQL_TIME curr_dt; + struct tm tmp_tm; + time_t t_now = (time_t)my_micro_time() / 1000000; /* second */ + gmtime_r(&t_now, &tmp_tm); + curr_dt.neg = false; + curr_dt.second_part = 0; + curr_dt.year = ((tmp_tm.tm_year + 1900) % 10000); + curr_dt.month = tmp_tm.tm_mon + 1; + curr_dt.day = tmp_tm.tm_mday; + curr_dt.hour = tmp_tm.tm_hour; + curr_dt.minute = tmp_tm.tm_min; + curr_dt.second = tmp_tm.tm_sec; + curr_dt.time_zone_displacement = 0; + curr_dt.time_type = MYSQL_TIMESTAMP_DATETIME; + if (curr_dt.second == 60 || curr_dt.second == 61) { + curr_dt.second = 59; + } + packed_now = TIME_to_longlong_datetime_packed(curr_dt); + my_datetime_packed_to_binary(packed_now, encoded_now, 0); + + return packed_now; +} + +bool TTLPurger::UpdateLease(const unsigned char* encoded_now) { + NdbDictionary::Dictionary* dict = nullptr; + const NdbDictionary::Table* tab = nullptr; + NdbTransaction* trans = nullptr; + NdbOperation *op = nullptr; + if (worker_ndb_->setDatabaseName(kSystemDBName) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to select system database: " + "%s, error: %d(%s). Retry...", + kSystemDBName, + worker_ndb_->getNdbError().code, + worker_ndb_->getNdbError().message); + goto err; + } + dict = worker_ndb_->getDictionary(); + tab = dict->getTable(kTTLPurgeNodesTabName); + if (tab == nullptr) { + if (dict->getNdbError().code == 723) { + /* + * Purging nodes configuration table is not found, + * no need to update lease + */ + return true; + } else { + g_eventLogger->warning("[TTL PWorker] Failed to get table: " + "%s, error: %d(%s). Retry...", + kTTLPurgeNodesTabName, + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } + } + + trans = worker_ndb_->startTransaction(); + if (trans == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to start " + "transaction" + ", error: %d(%s). Retry...", + worker_ndb_->getNdbError().code, + worker_ndb_->getNdbError().message); + goto err; + } + op = trans->getNdbOperation(tab); + if (op == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to get the Ndb " + "operation on table %s" + ", error: %d(%s). Retry...", + tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + op->updateTuple(); + op->equal("node_id", worker_ndb_->getNodeId()); + op->setValue("last_active", reinterpret_cast(encoded_now)); + + if (trans->execute(NdbTransaction::Commit) != 0) { + if (trans->getNdbError().code != 626 /*not found*/) { + g_eventLogger->warning("[TTL PWorker] Failed to commit transaction " + "on table %s" + ", error: %d(%s). Retry...", + tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + goto err; + } + } + worker_ndb_->closeTransaction(trans); + return true; +err: + if (trans != nullptr) { + worker_ndb_->closeTransaction(trans); + } + return false; +} + +bool TTLPurger::IsNodeAlive(const unsigned char* encoded_last_active) { + assert(encoded_last_active != nullptr); + Uint64 packed_last_active = + my_datetime_packed_from_binary(encoded_last_active, 0); + MYSQL_TIME last_active_dt; + TIME_from_longlong_datetime_packed(&last_active_dt, packed_last_active); + // Add lease seconds + Interval interval; + memset(&interval, 0, sizeof(interval)); + interval.second = kLeaseSeconds; + date_add_interval(&last_active_dt, INTERVAL_SECOND, interval, nullptr); + + MYSQL_TIME curr_dt; + struct tm tmp_tm; + time_t t_now = (time_t)my_micro_time() / 1000000; /* second */ + gmtime_r(&t_now, &tmp_tm); + curr_dt.neg = false; + curr_dt.second_part = 0; + curr_dt.year = ((tmp_tm.tm_year + 1900) % 10000); + curr_dt.month = tmp_tm.tm_mon + 1; + curr_dt.day = tmp_tm.tm_mday; + curr_dt.hour = tmp_tm.tm_hour; + curr_dt.minute = tmp_tm.tm_min; + curr_dt.second = tmp_tm.tm_sec; + curr_dt.time_zone_displacement = 0; + curr_dt.time_type = MYSQL_TIMESTAMP_DATETIME; + if (curr_dt.second == 60 || curr_dt.second == 61) { + curr_dt.second = 59; + } + + int res = my_time_compare(last_active_dt, curr_dt); + if (res >= 0) { + return true; + } else { + return false; + } +} + bool TTLPurger::Run() { if (!schema_watcher_running_) { assert(schema_watcher_ == nullptr); diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.hpp b/storage/ndb/rest-server2/server/src/ttl_purge.hpp index 7bae095567fa..d87bcd2c303c 100644 --- a/storage/ndb/rest-server2/server/src/ttl_purge.hpp +++ b/storage/ndb/rest-server2/server/src/ttl_purge.hpp @@ -34,7 +34,11 @@ class TTLPurger { static constexpr const char* kSchemaTableName = "ndb_schema"; static constexpr const char* kSchemaResTabName = "ndb_schema_result"; static constexpr const char* kTTLPurgeNodesTabName = "ttl_purge_nodes"; + static constexpr const char* kTTLPurgeIndexName = "ttl_index"; static constexpr int kNoEventCol = 10; + static constexpr int kLeaseSeconds = 20; + static constexpr Uint32 kPurgeBatchSize = 10; + static constexpr int kMaxTrxRetryTimes = 10; static constexpr const char* kEventColNames[kNoEventCol] = { "db", "name", @@ -55,7 +59,9 @@ class TTLPurger { private: TTLPurger(); void SchemaWatcherJob(); - Ndb* ndb_; + Ndb* watcher_ndb_; + void PurgeWorkerJob(); + Ndb* worker_ndb_; std::atomic exit_; typedef struct { @@ -63,7 +69,7 @@ class TTLPurger { uint32_t ttl_sec; uint32_t col_no; uint32_t part_id = {0}; // Only valid in local ttl cache - char last_purged[8] = {0}; // Only valid in local ttl cache + unsigned char last_purged[8] = {0}; // Only valid in local ttl cache } TTLInfo; std::map ttl_cache_; std::mutex mutex_; @@ -83,8 +89,15 @@ class TTLPurger { std::atomic purge_worker_asks_for_retry_; bool schema_watcher_running_; std::thread* schema_watcher_; + + bool GetShard(int32_t* shard, int32_t* n_purge_nodes, bool update_objects); + static Int64 GetNow(unsigned char* encoded_now); + bool UpdateLease(const unsigned char* encoded_now); + bool IsNodeAlive(const unsigned char* encoded_last_active); bool purge_worker_running_; std::thread* purge_worker_; + std::atomic purge_worker_exit_; + std::map> purged_pos_; }; #endif // STORAGE_NDB_REST_SERVER2_SERVER_SRC_TTL_PURGE_HPP_ From 2152409b20209c48cc0a86d565f447559cd0ae26 Mon Sep 17 00:00:00 2001 From: KernelMaker Date: Mon, 2 Dec 2024 17:10:26 +0100 Subject: [PATCH 5/8] Develop TTLPurger in rest-server2 [PART-3] 1. use murmur3 hash function in GetShard() 2. define the behavior of TTL_EXPIRED_ONLY flag on non-TTL table: return not-found 3. add example of how to use TTL_EXPIRED_ONLY and TTL_IGNORE flags in ndbapi_recattr_vs_record example program 4. bugfix: options can be nullptr in NdbScanOperation::scanImpl() --- .../ndbapi_recattr_vs_record/main.cpp | 132 ++++++++++++++++-- .../ndb/rest-server2/server/src/ttl_purge.cpp | 17 ++- .../kernel/blocks/dbtup/DbtupExecQuery.cpp | 50 ++++++- storage/ndb/src/ndbapi/NdbScanOperation.cpp | 7 +- 4 files changed, 184 insertions(+), 22 deletions(-) diff --git a/storage/ndb/ndbapi-examples/ndbapi_recattr_vs_record/main.cpp b/storage/ndb/ndbapi-examples/ndbapi_recattr_vs_record/main.cpp index e2d3c7ff3256..143e07dfea03 100644 --- a/storage/ndb/ndbapi-examples/ndbapi_recattr_vs_record/main.cpp +++ b/storage/ndb/ndbapi-examples/ndbapi_recattr_vs_record/main.cpp @@ -50,6 +50,9 @@ // Used for cout #include +// Turn on it to try TTL related flags +// #define TRY_TTL_FLAGS + // Do we use old-style (NdbRecAttr?) or new style (NdbRecord?) enum ApiType { api_attr, api_record }; @@ -455,9 +458,18 @@ static void do_update(Ndb &myNdb, ApiType accessType) { */ unsigned char attrMask = (1 << attr2ColNum) | (1 << attr3ColNum); +#ifdef TRY_TTL_FLAGS + NdbOperation::OperationOptions options; + options.optionsPresent = NdbOperation::OperationOptions::OO_TTL_ONLY_EXPIRED; + // options.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_IGNORE; +#endif // TRY_TTL_FLAGS const NdbOperation *pop = myTransaction->updateTuple(pkeyColumnRecord, (char *)&row, - pallColsRecord, (char *)&row, &attrMask); + pallColsRecord, (char *)&row, &attrMask +#ifdef TRY_TTL_FLAGS + ,&options, sizeof(NdbOperation::OperationOptions) +#endif // TRY_TTL_FLAGS + ); if (pop == NULL) APIERROR(myTransaction->getNdbError()); break; @@ -505,8 +517,18 @@ static void do_delete(Ndb &myNdb, ApiType accessType) { RowData keyInfo; keyInfo.attr1 = 3; +#ifdef TRY_TTL_FLAGS + NdbOperation::OperationOptions options; + options.optionsPresent = NdbOperation::OperationOptions::OO_TTL_ONLY_EXPIRED; + // options.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_IGNORE; +#endif // TRY_TTL_FLAGS const NdbOperation *pop = myTransaction->deleteTuple( - pkeyColumnRecord, (char *)&keyInfo, pallColsRecord); + pkeyColumnRecord, (char *)&keyInfo, pallColsRecord +#ifdef TRY_TTL_FLAGS + ,nullptr, nullptr, + &options, sizeof(NdbOperation::OperationOptions) +#endif // TRY_TTL_FLAGS + ); if (pop == NULL) APIERROR(myTransaction->getNdbError()); break; @@ -556,6 +578,10 @@ static void do_mixed_update(Ndb &myNdb) { NdbOperation::OperationOptions opts; opts.optionsPresent = NdbOperation::OperationOptions::OO_SETVALUE; +#ifdef TRY_TTL_FLAGS + // opts.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_IGNORE; + opts.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS opts.extraSetValues = &setvalspecs[0]; opts.numExtraSetValues = 1; @@ -614,11 +640,22 @@ static void do_read(Ndb &myNdb, ApiType accessType) { break; } case api_record: { +#ifdef TRY_TTL_FLAGS + NdbOperation::OperationOptions options; + options.optionsPresent = NdbOperation::OperationOptions::OO_TTL_ONLY_EXPIRED; + // options.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_IGNORE; +#endif // TRY_TTL_FLAGS rowData.attr1 = i; const NdbOperation *pop = myTransaction->readTuple(pkeyColumnRecord, (char *)&rowData, pallColsRecord, // Read PK+ATTR2+ATTR3 - (char *)&rowData); + (char *)&rowData +#ifdef TRY_TTL_FLAGS + ,NdbOperation::LM_Read, + nullptr, + &options, sizeof(NdbOperation::OperationOptions) +#endif // TRY_TTL_FLAGS + ); if (pop == NULL) APIERROR(myTransaction->getNdbError()); break; @@ -695,6 +732,10 @@ static void do_mixed_read(Ndb &myNdb) { NdbOperation::OperationOptions opts; opts.optionsPresent = NdbOperation::OperationOptions::OO_GETVALUE; +#ifdef TRY_TTL_FLAGS + // opts.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_IGNORE; + opts.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS opts.extraGetValues = &extraCols[0]; opts.numExtraGetValues = 2; @@ -768,7 +809,11 @@ static void do_scan(Ndb &myNdb, ApiType accessType) { if (psop == NULL) APIERROR(myTransaction->getNdbError()); - if (psop->readTuples(NdbOperation::LM_Read) != 0) + if (psop->readTuples(NdbOperation::LM_Read +#ifdef TRY_TTL_FLAGS + , NdbScanOperation::SF_OnlyExpiredScan +#endif // TRY_TTL_FLAGS + ) != 0) APIERROR(myTransaction->getNdbError()); recAttrAttr1 = psop->getValue("ATTR1"); @@ -782,7 +827,17 @@ static void do_scan(Ndb &myNdb, ApiType accessType) { * The scan will fetch a batch and give the user a series of pointers * to rows in the batch in nextResult() below */ - psop = myTransaction->scanTable(pallColsRecord, NdbOperation::LM_Read); +#ifdef TRY_TTL_FLAGS + NdbScanOperation::ScanOptions options; + // options.optionsPresent = NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent = NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS + psop = myTransaction->scanTable(pallColsRecord, NdbOperation::LM_Read +#ifdef TRY_TTL_FLAGS + , nullptr, + &options, sizeof(NdbScanOperation::ScanOptions) +#endif // TRY_TTL_FLAGS + ); if (psop == NULL) APIERROR(myTransaction->getNdbError()); @@ -867,6 +922,10 @@ static void do_mixed_scan(Ndb &myNdb) { NdbScanOperation::ScanOptions options; options.optionsPresent = NdbScanOperation::ScanOptions::SO_GETVALUE; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS options.extraGetValues = &extraGets[0]; options.numExtraGetValues = 1; @@ -939,7 +998,12 @@ static void do_indexScan(Ndb &myNdb, ApiType accessType) { */ Uint32 scanFlags = NdbScanOperation::SF_OrderBy | NdbScanOperation::SF_MultiRange | - NdbScanOperation::SF_ReadRangeNo; + NdbScanOperation::SF_ReadRangeNo +#ifdef TRY_TTL_FLAGS + | + NdbScanOperation::SF_OnlyExpiredScan +#endif // TRY_TTL_FLAGS + ; if (psop->readTuples(NdbOperation::LM_Read, scanFlags, (Uint32)0, // batch @@ -992,6 +1056,10 @@ static void do_indexScan(Ndb &myNdb, ApiType accessType) { NdbScanOperation::ScanOptions options; options.optionsPresent = NdbScanOperation::ScanOptions::SO_SCANFLAGS; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS options.scan_flags = scanFlags; psop = myTransaction->scanIndex( @@ -1132,6 +1200,10 @@ static void do_mixed_indexScan(Ndb &myNdb) { NdbScanOperation::ScanOptions options; options.optionsPresent = NdbScanOperation::ScanOptions::SO_SCANFLAGS | NdbScanOperation::ScanOptions::SO_GETVALUE; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS options.scan_flags = scanFlags; options.extraGetValues = &extraGets[0]; options.numExtraGetValues = 1; @@ -1239,6 +1311,10 @@ static void do_read_and_delete(Ndb &myNdb) { extraGets[1].recAttr = NULL; options.optionsPresent = NdbOperation::OperationOptions::OO_GETVALUE; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_IGNORE; + options.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS options.extraGetValues = &extraGets[0]; options.numExtraGetValues = 2; @@ -1303,6 +1379,9 @@ static void do_scan_update(Ndb &myNdb, ApiType accessType) { * returned, with SF_KeyInfo */ if (psop->readTuples(NdbOperation::LM_Read, +#ifdef TRY_TTL_FLAGS + NdbScanOperation::SF_OnlyExpiredScan | +#endif // TRY_TTL_FLAGS NdbScanOperation::SF_KeyInfo) != 0) APIERROR(myTransaction->getNdbError()); @@ -1315,6 +1394,10 @@ static void do_scan_update(Ndb &myNdb, ApiType accessType) { case api_record: { NdbScanOperation::ScanOptions options; options.optionsPresent = NdbScanOperation::ScanOptions::SO_SCANFLAGS; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS options.scan_flags = NdbScanOperation::SF_KeyInfo; psop = myTransaction->scanTable(pallColsRecord, NdbOperation::LM_Read, @@ -1456,6 +1539,9 @@ static void do_scan_delete(Ndb &myNdb, ApiType accessType) { /* Need KeyInfo when performing scanning delete */ if (psop->readTuples(NdbOperation::LM_Read, +#ifdef TRY_TTL_FLAGS + NdbScanOperation::SF_OnlyExpiredScan | +#endif // TRY_TTL_FLAGS NdbScanOperation::SF_KeyInfo) != 0) APIERROR(myTransaction->getNdbError()); @@ -1466,6 +1552,10 @@ static void do_scan_delete(Ndb &myNdb, ApiType accessType) { case api_record: { NdbScanOperation::ScanOptions options; options.optionsPresent = NdbScanOperation::ScanOptions::SO_SCANFLAGS; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS /* Need KeyInfo when performing scanning delete */ options.scan_flags = NdbScanOperation::SF_KeyInfo; @@ -1649,6 +1739,9 @@ static void do_scan_lock_reread(Ndb &myNdb, ApiType accessType) { /* Need KeyInfo for lock takeover */ if (psop->readTuples(NdbOperation::LM_Read, +#ifdef TRY_TTL_FLAGS + NdbScanOperation::SF_OnlyExpiredScan | +#endif // TRY_TTL_FLAGS NdbScanOperation::SF_KeyInfo) != 0) APIERROR(myTransaction->getNdbError()); @@ -1659,6 +1752,10 @@ static void do_scan_lock_reread(Ndb &myNdb, ApiType accessType) { case api_record: { NdbScanOperation::ScanOptions options; options.optionsPresent = NdbScanOperation::ScanOptions::SO_SCANFLAGS; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS /* Need KeyInfo for lock takeover */ options.scan_flags = NdbScanOperation::SF_KeyInfo; @@ -1857,6 +1954,10 @@ static void do_all_extras_read(Ndb &myNdb) { NdbOperation::OperationOptions opts; opts.optionsPresent = NdbOperation::OperationOptions::OO_GETVALUE; +#ifdef TRY_TTL_FLAGS + // opts.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_IGNORE; + opts.optionsPresent |= NdbOperation::OperationOptions::OO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS opts.extraGetValues = &extraCols[0]; opts.numExtraGetValues = 4; @@ -1922,6 +2023,9 @@ static void do_secondary_indexScan(Ndb &myNdb, ApiType accessType) { Uint32 scanFlags = NdbScanOperation::SF_OrderBy | NdbScanOperation::SF_Descending | +#ifdef TRY_TTL_FLAGS + NdbScanOperation::SF_OnlyExpiredScan | +#endif // TRY_TTL_FLAGS NdbScanOperation::SF_MultiRange | NdbScanOperation::SF_ReadRangeNo; switch (accessType) { @@ -1957,6 +2061,10 @@ static void do_secondary_indexScan(Ndb &myNdb, ApiType accessType) { case api_record: { NdbScanOperation::ScanOptions options; options.optionsPresent = NdbScanOperation::ScanOptions::SO_SCANFLAGS; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS options.scan_flags = scanFlags; psop = myTransaction->scanIndex( @@ -2064,9 +2172,13 @@ static void do_secondary_indexScanEqual(Ndb &myNdb, ApiType accessType) { NdbRecAttr *recAttrAttr2 = nullptr; NdbRecAttr *recAttrAttr3 = nullptr; - Uint32 scanFlags = NdbScanOperation::SF_OrderBy; + Uint32 scanFlags = NdbScanOperation::SF_OrderBy +#ifdef TRY_TTL_FLAGS + | NdbScanOperation::SF_OnlyExpiredScan +#endif // TRY_TTL_FLAGS + ; - Uint32 attr3Eq = 44; + Uint32 attr3Eq = 4; switch (accessType) { case api_attr: { @@ -2092,6 +2204,10 @@ static void do_secondary_indexScanEqual(Ndb &myNdb, ApiType accessType) { case api_record: { NdbScanOperation::ScanOptions options; options.optionsPresent = NdbScanOperation::ScanOptions::SO_SCANFLAGS; +#ifdef TRY_TTL_FLAGS + // options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_IGNORE; + options.optionsPresent |= NdbScanOperation::ScanOptions::SO_TTL_ONLY_EXPIRED; +#endif // TRY_TTL_FLAGS options.scan_flags = scanFlags; psop = myTransaction->scanIndex( diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.cpp b/storage/ndb/rest-server2/server/src/ttl_purge.cpp index 47b9a106f1af..c5a45788b16e 100644 --- a/storage/ndb/rest-server2/server/src/ttl_purge.cpp +++ b/storage/ndb/rest-server2/server/src/ttl_purge.cpp @@ -23,6 +23,7 @@ #include "src/ttl_purge.hpp" #include "src/status.hpp" #include "storage/ndb/plugin/ndb_schema_dist.h" +#include "include/my_murmur3.h" #include "include/my_systime.h" #include "include/my_time.h" @@ -104,7 +105,6 @@ void TTLPurger::SchemaWatcherJob() { g_eventLogger->info("[TTL SWatcher] Started"); retry: - g_eventLogger->info("[TTL SWatcher] retry from here"); init_event_succ = false; dict = nullptr; schema_tab = nullptr; @@ -669,7 +669,7 @@ void TTLPurger::SchemaWatcherJob() { sleep(2); goto retry; } - g_eventLogger->info("[TTL SWatcher] Exit"); + g_eventLogger->info("[TTL SWatcher] Exited"); return; } @@ -849,6 +849,8 @@ void TTLPurger::PurgeWorkerJob() { std::string table_str; uint32_t ttl_col_no = 0; int check = 0; + int table_id = 0; + Uint32 hash_val = 0; uint32_t deletedRows = 0; int trx_failure_times = 0; std::map::iterator iter; @@ -982,10 +984,11 @@ void TTLPurger::PurgeWorkerJob() { dict->getNdbError().message); goto err; } - if (shard >= kShardFirst && n_purge_nodes > 0 && - std::hash{}( - (std::to_string(ttl_tab->getTableId()) + table_str)) % - n_purge_nodes != static_cast(shard)) { + table_id = ttl_tab->getTableId(); + hash_val = murmur3_32(reinterpret_cast(&table_id), + sizeof(int), 0); + if (shard >= kShardFirst && n_purge_nodes > 0 && + hash_val % n_purge_nodes != static_cast(shard)) { continue; } log_buf += ("[P" + std::to_string(iter->second.part_id) + @@ -1309,7 +1312,7 @@ void TTLPurger::PurgeWorkerJob() { } while (!purge_worker_exit_); // No need to return PurgeWorker NdbObject here, SchemaWatch will do that. - g_eventLogger->info("[TTL PWorker] Exit"); + g_eventLogger->info("[TTL PWorker] Exited"); return; } diff --git a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp index 78815850f902..15ef6cebca01 100644 --- a/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp +++ b/storage/ndb/src/kernel/blocks/dbtup/DbtupExecQuery.cpp @@ -2309,6 +2309,21 @@ int Dbtup::handleReadReq( } dst = &signal->theData[start_index]; dstLen = (MAX_READ / 4) - start_index; + + if (unlikely(_regOperPtr->ttl_ignore == 0 && + _regOperPtr->ttl_only_expired == 1)) { + ndbassert(req_struct->fragPtrP != nullptr); + if (!c_lqh->is_ttl_table(req_struct->fragPtrP->fragTableId)) { + g_eventLogger->warning("(Read) Received an read request with " + "ttl_only_expired on a non-TTL table: %d", + req_struct->fragPtrP->fragTableId); + // return Notfound + terrorCode = 626; + tupkeyErrorLab(req_struct); + return -1; + } + } + /* * Zart * Here we check whether the row is expired @@ -2460,6 +2475,22 @@ int Dbtup::handleUpdateReq(Signal* signal, KeyReqStruct* req_struct, bool disk) { + if (unlikely(operPtrP->ttl_ignore == 0 && + operPtrP->ttl_only_expired == 1 && + operPtrP->original_op_type != ZWRITE)) { + ndbassert(req_struct->fragPtrP != nullptr); + if (!c_lqh->is_ttl_table(req_struct->fragPtrP->fragTableId)) { + g_eventLogger->warning("(Update) Received an update request with " + "ttl_only_expired on a non-TTL table: %d", + req_struct->fragPtrP->fragTableId); + // return Notfound + terrorCode = 626; + tupkeyErrorLab(req_struct); + return -1; + } + } + +#ifdef TTL_DEBUG /* * Zart * Here we check whether the row is expired @@ -2470,17 +2501,14 @@ int Dbtup::handleUpdateReq(Signal* signal, */ if (operPtrP->original_op_type == ZWRITE && is_ttl_table(regTabPtr)) { -#ifdef TTL_DEBUG g_eventLogger->info("Zart, (UPDATE) Skip checking TTL since " "the original operation is ZWRITE."); -#endif // TTL_DEBUG } if (operPtrP->ttl_ignore == 1) { -#ifdef TTL_DEBUG g_eventLogger->info("Zart, (Update) Skip checking TTL since " "ttl ignore is set"); -#endif // TTL_DEBUG } +#endif // TTL_DEBUG if (operPtrP->ttl_ignore == 0 && operPtrP->original_op_type != ZWRITE && is_ttl_table(regTabPtr)) { @@ -3681,6 +3709,20 @@ int Dbtup::handleDeleteReq(Signal* signal, KeyReqStruct *req_struct, bool disk) { + if (unlikely(regOperPtr->ttl_ignore == 0 + && regOperPtr->ttl_only_expired == 1)) { + ndbassert(req_struct->fragPtrP != nullptr); + if (!c_lqh->is_ttl_table(req_struct->fragPtrP->fragTableId)) { + g_eventLogger->warning("(Delete) Received an delete request with " + "ttl_only_expired on a non-TTL table: %d", + req_struct->fragPtrP->fragTableId); + // return Notfound + terrorCode = 626; + tupkeyErrorLab(req_struct); + return -1; + } + } + /* * Zart * Here we check whether the row is expired diff --git a/storage/ndb/src/ndbapi/NdbScanOperation.cpp b/storage/ndb/src/ndbapi/NdbScanOperation.cpp index 54630711e8a3..7fbe803c26bf 100644 --- a/storage/ndb/src/ndbapi/NdbScanOperation.cpp +++ b/storage/ndb/src/ndbapi/NdbScanOperation.cpp @@ -483,11 +483,12 @@ inline int NdbScanOperation::scanImpl( * Zart * TTL */ - if (options && - options->optionsPresent & ScanOptions::SO_TTL_IGNORE) { + if (options != nullptr && + (options->optionsPresent & ScanOptions::SO_TTL_IGNORE)) { m_flags |= OF_TTL_IGNORE; } - if (options->optionsPresent & ScanOptions::SO_TTL_ONLY_EXPIRED) { + if (options != nullptr && + (options->optionsPresent & ScanOptions::SO_TTL_ONLY_EXPIRED)) { m_flags |= OF_TTL_ONLY_EXPIRED; } From 487f06744eb079f5bfc777fba9fdb505cefce7b0 Mon Sep 17 00:00:00 2001 From: KernelMaker Date: Tue, 3 Dec 2024 18:24:52 +0100 Subject: [PATCH 6/8] Develop TTLPurger in rest-server2 [PART-4] 1. handle NdbObjects properly by removeCachedXXX and InvalidateCacheXXX 2. adaptive scan batch size on each TTL table 3. adaptive sleeping time between each purging round 4. Handle the potential error 296 and 499 if TransactionInactiveTimeout is configured too small --- .../ndb/rest-server2/server/src/ttl_purge.cpp | 286 +++++++++++++++--- .../ndb/rest-server2/server/src/ttl_purge.hpp | 15 +- .../ndb/src/kernel/blocks/dbtc/DbtcMain.cpp | 6 + 3 files changed, 263 insertions(+), 44 deletions(-) diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.cpp b/storage/ndb/rest-server2/server/src/ttl_purge.cpp index c5a45788b16e..ac6a08fcb9dc 100644 --- a/storage/ndb/rest-server2/server/src/ttl_purge.cpp +++ b/storage/ndb/rest-server2/server/src/ttl_purge.cpp @@ -17,7 +17,9 @@ * USA. */ #include +#include #include +#include #include "src/rdrs_rondb_connection_pool.hpp" #include "src/ttl_purge.hpp" @@ -88,6 +90,18 @@ TTLPurger* TTLPurger::CreateTTLPurger() { return ttl_purger; } +static void RandomSleep(int lower_bound, int upper_bound) { + if (lower_bound > upper_bound) { + std::swap(lower_bound, upper_bound); + } + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dist(lower_bound, upper_bound); + + int sleep_duration = dist(gen); + std::this_thread::sleep_for(std::chrono::milliseconds(sleep_duration)); +} + static constexpr int NDB_INVALID_SCHEMA_OBJECT = 241; void TTLPurger::SchemaWatcherJob() { bool init_event_succ = false; @@ -500,14 +514,33 @@ void TTLPurger::SchemaWatcherJob() { } case SCHEMA_OP_TYPE::SOT_DROP_TABLE: { + if (watcher_ndb_->setDatabaseName(db_str.c_str()) != 0) { + g_eventLogger->warning("[TTL SWatcher] Failed to select " + "database: %s" + ", error: %d(%s). Retry...", + db_str.c_str(), + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); + goto err; + } + dict->invalidateTable(table_str.c_str()); const std::lock_guard lock(mutex_); cache_updated = UpdateLocalCache(db_str, table_str, nullptr); break; } case SCHEMA_OP_TYPE::SOT_DROP_DB: { + if (watcher_ndb_->setDatabaseName(db_str.c_str()) != 0) { + g_eventLogger->warning("[TTL SWatcher] Failed to select " + "database: %s" + ", error: %d(%s). Retry...", + db_str.c_str(), + watcher_ndb_->getNdbError().code, + watcher_ndb_->getNdbError().message); + goto err; + } const std::lock_guard lock(mutex_); - cache_updated = DropDBLocalCache(db_str); + cache_updated = DropDBLocalCache(db_str, dict); break; } case SCHEMA_OP_TYPE::SOT_CREATE_TABLE: @@ -809,7 +842,9 @@ char* TTLPurger::GetEventName(NdbDictionary::Event::TableEvent event_type, return name_buf; } -bool TTLPurger::DropDBLocalCache(const std::string& db_str) { +bool TTLPurger::DropDBLocalCache(const std::string& db_str, + NdbDictionary::Dictionary* dict) { + assert(dict != nullptr); bool updated = false; for (auto iter = ttl_cache_.begin(); iter != ttl_cache_.end();) { auto pos = iter->first.find('/'); @@ -820,6 +855,10 @@ bool TTLPurger::DropDBLocalCache(const std::string& db_str) { "in cache: [%u, %u@%u]", iter->first.c_str(), iter->second.table_id, iter->second.ttl_sec, iter->second.col_no); + if (pos + 1 < iter->first.length()) { + std::string table = iter->first.substr(pos + 1); + dict->invalidateTable(table.c_str()); + } iter = ttl_cache_.erase(iter); updated = true; continue; @@ -860,6 +899,9 @@ void TTLPurger::PurgeWorkerJob() { NdbDictionary::Dictionary* dict = nullptr; const NdbDictionary::Table* ttl_tab = nullptr; const NdbDictionary::Index* ttl_index = nullptr; + Uint64 start_time = 0; + Uint64 end_time = 0; + bool sleep_between_each_round = true; NdbTransaction* trans = nullptr; NdbScanOperation* scan_op = nullptr; Int64 packed_last = 0; @@ -875,7 +917,50 @@ void TTLPurger::PurgeWorkerJob() { purge_trx_started = false; update_objects = false; if (cache_updated_) { + for (iter = local_ttl_cache.begin(); iter != local_ttl_cache.end(); + iter++) { + pos = iter->first.find('/'); + assert(pos != std::string::npos); + db_str = iter->first.substr(0, pos); + assert(pos + 1 < iter->first.length()); + table_str = iter->first.substr(pos + 1); + if (worker_ndb_->setDatabaseName(db_str.c_str()) != 0) { + g_eventLogger->warning("[TTL PWorker] Failed to select " + "database: %s" + ", error: %d(%s). Retry...", + db_str.c_str(), + worker_ndb_->getNdbError().code, + worker_ndb_->getNdbError().message); + goto err; + } + /* + * Notice: + * Based on the comment below, + * here we need to call invalidateIndex() for ttl_index, the reason is + * removeCachedTable() just decrease the reference count of the table + * object in the global list, it won't remove the object even the counter + * becomes to 0. But invalidateIndex() will set the object to DROP and + * remove it if the counter is 0. Since we don't call invalidateIndex + * in main thread(it's a major different with other normal table objects), + * so here we need to call invalidateIndex() + */ + dict->invalidateIndex(kTTLPurgeIndexName, table_str.c_str()); + /* + * Notice: + * Purge thread can only call removeCachedXXX to remove its + * thread local cached table object and decrease the reference + * count of the global cached table object. + * If we call invalidateTable() and following by getTable() here, + * Purge thread will invalidate the global cached table object + * and generate a new version of table object, which will make + * the main thread's following invalidateTable() + getTable() gets + * this table object, stops the chance to get the latest one from + * data nodes. + */ + dict->removeCachedTable(table_str.c_str()); + } local_ttl_cache.clear(); + purged_pos_.clear(); const std::lock_guard lock(mutex_); local_ttl_cache = ttl_cache_; cache_updated_ = false; @@ -911,9 +996,14 @@ void TTLPurger::PurgeWorkerJob() { sleep(2); continue; } + + sleep_between_each_round = true; dict = worker_ndb_->getDictionary(); for (iter = local_ttl_cache.begin(); iter != local_ttl_cache.end(); iter++) { + if (purge_worker_exit_) { + break; + } purge_trx_started = false; { GetNow(encoded_now); @@ -926,6 +1016,8 @@ void TTLPurger::PurgeWorkerJob() { break; } + start_time = my_micro_time(); + log_buf = "[TTL PWorker] Processing " + iter->first + ": "; pos = iter->first.find('/'); @@ -940,40 +1032,12 @@ void TTLPurger::PurgeWorkerJob() { if (worker_ndb_->setDatabaseName(db_str.c_str()) != 0) { g_eventLogger->warning("[TTL PWorker] Failed to select " - "database: %s" - ", error: %d(%s). Retry...", - db_str.c_str(), - worker_ndb_->getNdbError().code, - worker_ndb_->getNdbError().message); - } - if (update_objects) { - /* - * Notice: - * Based on the comment below, - * here we need to call invalidateIndex() for ttl_index, the reason is - * removeCachedTable() just decrease the reference count of the table - * object in the global list, it won't remove the object even the counter - * becomes to 0. But invalidateIndex() will set the object to DROP and - * remove it if the counter is 0. Since we don't call invalidateIndex - * in main thread(it's a major different with other normal table objects), - * so here we need to call invalidateIndex() - */ - dict->invalidateIndex(kTTLPurgeIndexName, table_str.c_str()); - /* - * Notice: - * Purge thread can only call removeCachedXXX to remove its - * thread local cached table object and decrease the reference - * count of the global cached table object. - * If we call invalidateTable() and following by getTable() here, - * Purge thread will invalidate the global cached table object - * and generate a new version of table object, which will make - * the main thread's following invalidateTable() + getTable() gets - * this table object, stops the chance to get the latest one from - * data nodes. - */ - dict->removeCachedTable(table_str.c_str()); - - purged_pos_.clear(); + "database: %s" + ", error: %d(%s). Retry...", + db_str.c_str(), + worker_ndb_->getNdbError().code, + worker_ndb_->getNdbError().message); + goto err; } ttl_tab = dict->getTable(table_str.c_str()); if (ttl_tab == nullptr) { @@ -996,8 +1060,13 @@ void TTLPurger::PurgeWorkerJob() { std::to_string(ttl_tab->getPartitionCount()) + "]"); assert(iter->second.part_id < ttl_tab->getPartitionCount()); + log_buf += ("[BS: " + std::to_string(iter->second.batch_size) + "]"); + trx_failure_times = 0; retry_trx: + if (purge_worker_exit_) { + break; + } trans = worker_ndb_->startTransaction(); if (trans == nullptr) { g_eventLogger->warning("[TTL PWorker] Failed to start " @@ -1047,8 +1116,8 @@ void TTLPurger::PurgeWorkerJob() { if (index_scan_op->readTuples(NdbOperation::LM_Exclusive, scanFlags, - 1, // parallel - kPurgeBatchSize) // batch + 1, // parallel + iter->second.batch_size) // batch != 0) { g_eventLogger->warning("[TTL PWorker] Failed to readTuples " "on table %s" @@ -1123,6 +1192,13 @@ void TTLPurger::PurgeWorkerJob() { goto err; } memset(encoded_curr_purge, 0, 8); + /* + * Sleeping here can produce error + * 296(Time-out in NDB, probably caused by deadlock), + * which is handled below. + * + * sleep(XXX); + */ while ((check = index_scan_op->nextResult(true)) == 0) { do { memset(encoded_curr_purge, 0, 8); @@ -1159,9 +1235,38 @@ void TTLPurger::PurgeWorkerJob() { } break; } + if (check == -1) { + g_eventLogger->warning("[TTL PWorker] Failed to nextResult(true) " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + if (trans->getNdbError().code == 296) { + /* + * if the TransactionInactiveTimeout is set too small, + * error 296(Time-out in NDB, probably caused by deadlock) + * may happen, change the batch size to the minimum and retry + */ + iter->second.batch_size = kPurgeBatchSize; + g_eventLogger->warning("[TTL PWorker] Changed the purgine batch " + "size of table %s to the minimum size %u, " + "Retry...", + ttl_tab->getName(), + iter->second.batch_size); + } + goto err; + } /** * Commit all prepared operations */ + /* + * Sleeping here can produce error + * 499(Scan take over error) + * which is handled below. + * + * sleep(XXX); + */ if (trans->execute(NdbTransaction::Commit) == -1) { g_eventLogger->warning("[TTL PWorker] Failed to commit transaction " "on table %s" @@ -1169,6 +1274,19 @@ void TTLPurger::PurgeWorkerJob() { ttl_tab->getName(), trans->getNdbError().code, trans->getNdbError().message); + if (trans->getNdbError().code == 499) { + /* + * if the TransactionInactiveTimeout is set too small, + * error 499(Scan take over error) may happen, + * change the batch size to the minimum and retry + */ + iter->second.batch_size = kPurgeBatchSize; + g_eventLogger->warning("[TTL PWorker] Changed the purgine batch " + "size of table %s to the minimum size %u, " + "Retry...", + ttl_tab->getName(), + iter->second.batch_size); + } goto err; } else if (*reinterpret_cast(encoded_curr_purge) != 0) { packed_last = my_datetime_packed_from_binary(encoded_curr_purge, 0); @@ -1195,7 +1313,7 @@ void TTLPurger::PurgeWorkerJob() { scan_op->setPartitionId(iter->second.part_id); Uint32 scanFlags = NdbScanOperation::SF_OnlyExpiredScan; if (scan_op->readTuples(NdbOperation::LM_Exclusive, scanFlags, - 1, kPurgeBatchSize) != 0) { + 1, iter->second.batch_size) != 0) { g_eventLogger->warning("[TTL PWorker] Failed to readTuples " "on table %s" ", error: %d(%s). Retry...", @@ -1223,6 +1341,13 @@ void TTLPurger::PurgeWorkerJob() { trans->getNdbError().message); goto err; } + /* + * Sleeping here can produce error + * 296(Time-out in NDB, probably caused by deadlock), + * which is handled below. + * + * sleep(XXX); + */ while ((check = scan_op->nextResult(true)) == 0) { do { // std::cerr << "Get a expired row: timestamp = [" @@ -1257,9 +1382,39 @@ void TTLPurger::PurgeWorkerJob() { break; } + + if (check == -1) { + g_eventLogger->warning("[TTL PWorker] Failed to nextResult(true) " + "on table %s" + ", error: %d(%s). Retry...", + ttl_tab->getName(), + trans->getNdbError().code, + trans->getNdbError().message); + if (trans->getNdbError().code == 296) { + /* + * if the TransactionInactiveTimeout is set too small, + * error 296(Time-out in NDB, probably caused by deadlock) + * may happen, change the batch size to the minimum and retry + */ + iter->second.batch_size = kPurgeBatchSize; + g_eventLogger->warning("[TTL PWorker] Changed the purgine batch " + "size of table %s to the minimum size %u, " + "Retry...", + ttl_tab->getName(), + iter->second.batch_size); + } + goto err; + } /** * Commit all prepared operations */ + /* + * Sleeping here can produce error + * 499(Scan take over error) + * which is handled below. + * + * sleep(XXX); + */ if (trans->execute(NdbTransaction::Commit) == -1) { g_eventLogger->warning("[TTL PWorker] Failed to commit transaction " "on table %s" @@ -1267,6 +1422,19 @@ void TTLPurger::PurgeWorkerJob() { ttl_tab->getName(), trans->getNdbError().code, trans->getNdbError().message); + if (trans->getNdbError().code == 499) { + /* + * if the TransactionInactiveTimeout is set too small, + * error 499(Scan take over error) may happen, + * change the batch size to the minimum and retry + */ + iter->second.batch_size = kPurgeBatchSize; + g_eventLogger->warning("[TTL PWorker] Changed the purgine batch " + "size of table %s to the minimum size %u, " + "Retry...", + ttl_tab->getName(), + iter->second.batch_size); + } goto err; } } else { @@ -1282,6 +1450,18 @@ void TTLPurger::PurgeWorkerJob() { trans = nullptr; log_buf += " Purged " + std::to_string(deletedRows) + " rows"; g_eventLogger->info("%s", log_buf.c_str()); + end_time = my_micro_time(); + + iter->second.batch_size = AdjustBatchSize(iter->second.batch_size, + deletedRows, + end_time - start_time); + if (sleep_between_each_round && + iter->second.batch_size == kMaxPurgeBatchSize) { + // At least 1 table finished its batch purging in the max size, + // so don't sleep and start the next round as soon as possible + sleep_between_each_round = false; + } + iter->second.part_id = ((iter->second.part_id + 1) % ttl_tab->getPartitionCount()); // Finish 1 batch @@ -1290,6 +1470,7 @@ void TTLPurger::PurgeWorkerJob() { err: if (trans != nullptr) { worker_ndb_->closeTransaction(trans); + trans = nullptr; } trx_failure_times++; sleep(1); @@ -1308,7 +1489,10 @@ void TTLPurger::PurgeWorkerJob() { } } // Finish 1 round - sleep(2); + if (sleep_between_each_round) { + // Sleep for 1000 - 2000 ms after finishing each round + RandomSleep(1000, 2000); + } } while (!purge_worker_exit_); // No need to return PurgeWorker NdbObject here, SchemaWatch will do that. @@ -1623,3 +1807,25 @@ bool TTLPurger::Run() { } return true; } + +Uint32 TTLPurger::AdjustBatchSize(Uint32 curr_batch_size, + Uint32 deleted_rows, + Uint64 used_time) { + if (deleted_rows == curr_batch_size && used_time < kPurgeThresholdTime) { + if (curr_batch_size + kPurgeBatchSizePerIncr <= kMaxPurgeBatchSize) { + // Increase + return curr_batch_size + kPurgeBatchSizePerIncr; + } else { + // Keep as max + assert(curr_batch_size == kMaxPurgeBatchSize); + return curr_batch_size; + } + } else if (curr_batch_size - kPurgeBatchSizePerIncr >= kPurgeBatchSize) { + // Decrease + return curr_batch_size - kPurgeBatchSizePerIncr; + } else { + // Keep as min + assert(curr_batch_size == kPurgeBatchSize); + return curr_batch_size; + } +} diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.hpp b/storage/ndb/rest-server2/server/src/ttl_purge.hpp index d87bcd2c303c..4cbca4ebdffe 100644 --- a/storage/ndb/rest-server2/server/src/ttl_purge.hpp +++ b/storage/ndb/rest-server2/server/src/ttl_purge.hpp @@ -37,7 +37,10 @@ class TTLPurger { static constexpr const char* kTTLPurgeIndexName = "ttl_index"; static constexpr int kNoEventCol = 10; static constexpr int kLeaseSeconds = 20; - static constexpr Uint32 kPurgeBatchSize = 10; + static constexpr Uint32 kPurgeBatchSize = 5; + static constexpr Uint32 kPurgeBatchSizePerIncr = 5; + static constexpr Uint32 kMaxPurgeBatchSize = 50; + static constexpr Uint32 kPurgeThresholdTime = 1000000; // 1 second static constexpr int kMaxTrxRetryTimes = 10; static constexpr const char* kEventColNames[kNoEventCol] = { "db", @@ -68,8 +71,8 @@ class TTLPurger { int32_t table_id; uint32_t ttl_sec; uint32_t col_no; - uint32_t part_id = {0}; // Only valid in local ttl cache - unsigned char last_purged[8] = {0}; // Only valid in local ttl cache + uint32_t part_id = {0}; // Only valid in local ttl cache + uint32_t batch_size = {kPurgeBatchSize}; // Only valid in local ttl cache } TTLInfo; std::map ttl_cache_; std::mutex mutex_; @@ -84,7 +87,8 @@ class TTLPurger { static char* GetEventName( NdbDictionary::Event::TableEvent event_type, char* name_buf); - bool DropDBLocalCache(const std::string& db_str); + bool DropDBLocalCache(const std::string& db_str, + NdbDictionary::Dictionary* dict); std::atomic purge_worker_asks_for_retry_; bool schema_watcher_running_; @@ -94,6 +98,9 @@ class TTLPurger { static Int64 GetNow(unsigned char* encoded_now); bool UpdateLease(const unsigned char* encoded_now); bool IsNodeAlive(const unsigned char* encoded_last_active); + Uint32 AdjustBatchSize(Uint32 curr_batch_size, + Uint32 deleted_rows, + Uint64 used_time); bool purge_worker_running_; std::thread* purge_worker_; std::atomic purge_worker_exit_; diff --git a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp index 8f0a18bacd1a..9221501fd011 100644 --- a/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp +++ b/storage/ndb/src/kernel/blocks/dbtc/DbtcMain.cpp @@ -16209,9 +16209,15 @@ void Dbtc::execDIH_SCAN_TAB_CONF(Signal *signal, ScanRecordPtr scanptr, if (scanptr.p->m_scan_dist_key_flag) // Pruned scan { jamDebug(); + /* + * NOTICE: + * Have to comment this assert since + * is_ttl_table() can be false when a new TTL purging scan request + * just comes after an altering table(TTL=OFF) ndbrequire(DictTabInfo::isOrderedIndex(tabPtr.p->tableType) || tabPtr.p->get_user_defined_partitioning() || is_ttl_table(tabPtr.p)); + */ /** * Prepare for sendDihGetNodeReq to request DBDIH info for From 9fabcee1bf92ebf638e7f7ffe5bc02e8e252082b Mon Sep 17 00:00:00 2001 From: KernelMaker Date: Tue, 10 Dec 2024 08:10:45 +0100 Subject: [PATCH 7/8] Develop TTLPurger in rest-server2 [PART-5][bugfix] --- .../ndb/rest-server2/server/src/ttl_purge.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.cpp b/storage/ndb/rest-server2/server/src/ttl_purge.cpp index ac6a08fcb9dc..524f2b670248 100644 --- a/storage/ndb/rest-server2/server/src/ttl_purge.cpp +++ b/storage/ndb/rest-server2/server/src/ttl_purge.cpp @@ -1067,6 +1067,21 @@ void TTLPurger::PurgeWorkerJob() { if (purge_worker_exit_) { break; } + /* + * Transaction may be failed by the schema changing, + * here we getTable() to get the latest NdbObject(the + * previous has already been removed by removeCachedObject() + * in the 'err' handling + */ + ttl_tab = dict->getTable(table_str.c_str()); + if (ttl_tab == nullptr) { + g_eventLogger->warning("[TTL PWorker] Failed to get table: " + "%s, error: %d(%s). Retry...", + table_str.c_str(), + dict->getNdbError().code, + dict->getNdbError().message); + goto err; + } trans = worker_ndb_->startTransaction(); if (trans == nullptr) { g_eventLogger->warning("[TTL PWorker] Failed to start " @@ -1482,6 +1497,7 @@ void TTLPurger::PurgeWorkerJob() { purge_worker_exit_ = true; break; } else if (purge_trx_started) { + dict->removeCachedTable(table_str.c_str()); goto retry_trx; } else { // retry from begining From d3acd500f6f73b349b111e73bc62c1d4cffa3772 Mon Sep 17 00:00:00 2001 From: KernelMaker Date: Thu, 9 Jan 2025 10:59:06 +0100 Subject: [PATCH 8/8] Develop TTLPurger in rest-server2 [PART-6] Some modifications based on the review comments --- .../ndb/rest-server2/server/src/ttl_purge.cpp | 49 +++++++++++++------ .../ndb/rest-server2/server/src/ttl_purge.hpp | 23 +++++---- 2 files changed, 46 insertions(+), 26 deletions(-) diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.cpp b/storage/ndb/rest-server2/server/src/ttl_purge.cpp index 524f2b670248..79662944de54 100644 --- a/storage/ndb/rest-server2/server/src/ttl_purge.cpp +++ b/storage/ndb/rest-server2/server/src/ttl_purge.cpp @@ -28,6 +28,7 @@ #include "include/my_murmur3.h" #include "include/my_systime.h" #include "include/my_time.h" +#include "NdbSleep.h" #include extern EventLogger *g_eventLogger; @@ -51,9 +52,9 @@ TTLPurger::~TTLPurger() { exit_ = true; if (schema_watcher_running_) { assert(schema_watcher_ != nullptr); - if (schema_watcher_->joinable()) { - schema_watcher_->join(); - } + void* status; + NdbThread_WaitFor(schema_watcher_, &status); + NdbThread_Destroy(&schema_watcher_); schema_watcher_ = nullptr; schema_watcher_running_ = false; } @@ -99,7 +100,13 @@ static void RandomSleep(int lower_bound, int upper_bound) { std::uniform_int_distribution dist(lower_bound, upper_bound); int sleep_duration = dist(gen); - std::this_thread::sleep_for(std::chrono::milliseconds(sleep_duration)); + NdbSleep_MilliSleep(sleep_duration); +} + +void* TTLPurger::_PurgeWorkerJob(void* arg) { + TTLPurger* p_this = static_cast(arg); + p_this->PurgeWorkerJob(); + return nullptr; } static constexpr int NDB_INVALID_SCHEMA_OBJECT = 241; @@ -295,8 +302,10 @@ void TTLPurger::SchemaWatcherJob() { // Set it to true to make purge worker load cache cache_updated_ = true; purge_worker_exit_ = false; - purge_worker_ = new std::thread( - std::bind(&TTLPurger::PurgeWorkerJob, this)); + purge_worker_ = NdbThread_Create(TTLPurger::_PurgeWorkerJob, + (NDB_THREAD_ARG *)this, + 0, "PurgeWorker", + NDB_THREAD_PRIO_MEAN); purge_worker_running_ = true; // Main schema_watcher_ task @@ -683,9 +692,9 @@ void TTLPurger::SchemaWatcherJob() { purge_worker_exit_ = true; if (purge_worker_running_) { assert(purge_worker_ != nullptr); - if (purge_worker_->joinable()) { - purge_worker_->join(); - } + void* status; + NdbThread_WaitFor(purge_worker_, &status); + NdbThread_Destroy(&purge_worker_); purge_worker_ = nullptr; purge_worker_running_ = false; } @@ -886,11 +895,11 @@ void TTLPurger::PurgeWorkerJob() { size_t pos = 0; std::string db_str; std::string table_str; - uint32_t ttl_col_no = 0; + Uint32 ttl_col_no = 0; int check = 0; int table_id = 0; Uint32 hash_val = 0; - uint32_t deletedRows = 0; + Uint32 deletedRows = 0; int trx_failure_times = 0; std::map::iterator iter; std::map>::iterator purge_tab_iter; @@ -1516,7 +1525,7 @@ void TTLPurger::PurgeWorkerJob() { return; } -bool TTLPurger::GetShard(int32_t* shard, int32_t* n_purge_nodes, +bool TTLPurger::GetShard(Int32* shard, Int32* n_purge_nodes, bool update_objects) { *shard = kShardNosharding; *n_purge_nodes = 0; @@ -1549,8 +1558,8 @@ bool TTLPurger::GetShard(int32_t* shard, int32_t* n_purge_nodes, NdbRecAttr* rec_attr[3]; NdbTransaction* trans = nullptr; NdbScanOperation* scan_op = nullptr; - int32_t n_nodes = 0;; - std::vector purge_nodes; + Int32 n_nodes = 0;; + std::vector purge_nodes; size_t pos = 0; bool check = 0; std::string log_buf = "[TTL PWorker] "; @@ -1813,12 +1822,20 @@ bool TTLPurger::IsNodeAlive(const unsigned char* encoded_last_active) { } } +void* TTLPurger::_SchemaWatcherJob(void* arg) { + TTLPurger* p_this = static_cast(arg); + p_this->SchemaWatcherJob(); + return nullptr; +} + bool TTLPurger::Run() { if (!schema_watcher_running_) { assert(schema_watcher_ == nullptr); assert(!purge_worker_running_); - schema_watcher_ = new std::thread( - std::bind(&TTLPurger::SchemaWatcherJob, this)); + schema_watcher_ = NdbThread_Create(TTLPurger::_SchemaWatcherJob, + (NDB_THREAD_ARG *)this, + 0, "SchemaWatcher", + NDB_THREAD_PRIO_MEAN); schema_watcher_running_ = true; } return true; diff --git a/storage/ndb/rest-server2/server/src/ttl_purge.hpp b/storage/ndb/rest-server2/server/src/ttl_purge.hpp index 4cbca4ebdffe..d5b809b76b56 100644 --- a/storage/ndb/rest-server2/server/src/ttl_purge.hpp +++ b/storage/ndb/rest-server2/server/src/ttl_purge.hpp @@ -26,6 +26,7 @@ #include #include +#include "NdbThread.h" class TTLPurger { public: @@ -58,21 +59,23 @@ class TTLPurger { static TTLPurger* CreateTTLPurger(); bool Run(); ~TTLPurger(); + void SchemaWatcherJob(); + void PurgeWorkerJob(); private: TTLPurger(); - void SchemaWatcherJob(); + static void* _SchemaWatcherJob(void* arg); + static void* _PurgeWorkerJob(void* arg); Ndb* watcher_ndb_; - void PurgeWorkerJob(); Ndb* worker_ndb_; std::atomic exit_; typedef struct { - int32_t table_id; - uint32_t ttl_sec; - uint32_t col_no; - uint32_t part_id = {0}; // Only valid in local ttl cache - uint32_t batch_size = {kPurgeBatchSize}; // Only valid in local ttl cache + Int32 table_id; + Uint32 ttl_sec; + Uint32 col_no; + Uint32 part_id = {0}; // Only valid in local ttl cache + Uint32 batch_size = {kPurgeBatchSize}; // Only valid in local ttl cache } TTLInfo; std::map ttl_cache_; std::mutex mutex_; @@ -92,9 +95,9 @@ class TTLPurger { std::atomic purge_worker_asks_for_retry_; bool schema_watcher_running_; - std::thread* schema_watcher_; + NdbThread* schema_watcher_; - bool GetShard(int32_t* shard, int32_t* n_purge_nodes, bool update_objects); + bool GetShard(Int32* shard, Int32* n_purge_nodes, bool update_objects); static Int64 GetNow(unsigned char* encoded_now); bool UpdateLease(const unsigned char* encoded_now); bool IsNodeAlive(const unsigned char* encoded_last_active); @@ -102,7 +105,7 @@ class TTLPurger { Uint32 deleted_rows, Uint64 used_time); bool purge_worker_running_; - std::thread* purge_worker_; + NdbThread* purge_worker_; std::atomic purge_worker_exit_; std::map> purged_pos_; };