Skip to content

Commit

Permalink
[frontend] List named blobs SQL (#2955)
Browse files Browse the repository at this point in the history
Fix list-named-blobs SQL to use smaller joins. This improves query performance by 1000x.
  • Loading branch information
snalli authored Dec 4, 2024
1 parent a808933 commit e9dd1d1
Show file tree
Hide file tree
Showing 3 changed files with 160 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,18 @@ public class MySqlNamedBlobDbConfig {
public static final String QUERY_STALE_DATA_MAX_RESULTS = PREFIX + "query.stale.data.max.results";
public static final String STALE_DATA_RETENTION_DAYS = PREFIX + "stale.data.retention.days";
public static final String TRANSACTION_ISOLATION_LEVEL = PREFIX + "transaction.isolation.level";
public static final String LIST_NAMED_BLOBS_SQL_OPTION = "list.named.blobs.sql.option";

/**
* Option to pick the SQL query to use for listing named blobs.
* Check getListNamedBlobsSQL() for more details.
*/
@Config(LIST_NAMED_BLOBS_SQL_OPTION)
public static final int DEFAULT_LIST_NAMED_BLOBS_SQL_OPTION = 2;
public static final int MIN_LIST_NAMED_BLOBS_SQL_OPTION = 1;
public static final int MAX_LIST_NAMED_BLOBS_SQL_OPTION = 2;
public final int listNamedBlobsSQLOption;


/**
* Serialized json array containing the information about all mysql end points.
Expand Down Expand Up @@ -93,6 +105,8 @@ public class MySqlNamedBlobDbConfig {
public final TransactionIsolationLevel transactionIsolationLevel;

public MySqlNamedBlobDbConfig(VerifiableProperties verifiableProperties) {
this.listNamedBlobsSQLOption = verifiableProperties.getIntInRange(LIST_NAMED_BLOBS_SQL_OPTION,
DEFAULT_LIST_NAMED_BLOBS_SQL_OPTION, MIN_LIST_NAMED_BLOBS_SQL_OPTION, MAX_LIST_NAMED_BLOBS_SQL_OPTION);
this.dbInfo = verifiableProperties.getString(DB_INFO);
this.localPoolSize = verifiableProperties.getIntInRange(LOCAL_POOL_SIZE, 5, 1, Integer.MAX_VALUE);
this.remotePoolSize = verifiableProperties.getIntInRange(REMOTE_POOL_SIZE, 1, 1, Integer.MAX_VALUE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
import java.util.Arrays;
import java.util.Calendar;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
Expand Down Expand Up @@ -266,6 +267,79 @@ record = new NamedBlobRecord(account.getName(), container.getName(), blobName, b
namedBlobDb.get(account.getName(), container.getName(), blobName).get());
}

/**
* Test behavior with list named blob
*/
@Test
public void testListNamedBlobsWithStaleRecords() throws Exception {
Iterator<Account> accountIter = accountService.getAllAccounts().iterator();
Account a1 = accountIter.next();
Iterator<Container> a1containerIter = a1.getAllContainers().iterator();
Container a1c1 = a1containerIter.next();
Container a1c2 = a1containerIter.next();
Account a2 = accountIter.next();
Iterator<Container> a2containerIter = a2.getAllContainers().iterator();
Container a2c1 = a2containerIter.next();
String blobName = "testListNamedBlobsWithStaleRecords";
NamedBlobRecord v1, v1_other, v2, v2_other;
Page<NamedBlobRecord> page;

// put blob Ready and list should return the blob
v1 = new NamedBlobRecord(a1.getName(), a1c1.getName(), blobName, getBlobId(a1, a1c1),
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis() + TimeUnit.HOURS.toMillis(1));
v1_other = new NamedBlobRecord(a1.getName(), a1c1.getName(), blobName + "-other", getBlobId(a1, a1c1),
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis() + TimeUnit.HOURS.toMillis(1));
// add some extra blobs in other accounts and containers for testing
NamedBlobRecord a1c2Blob = new NamedBlobRecord(a1.getName(), a1c2.getName(), blobName, getBlobId(a1, a1c2),
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis() + TimeUnit.HOURS.toMillis(1));
NamedBlobRecord a2c1Blob = new NamedBlobRecord(a2.getName(), a2c1.getName(), blobName, getBlobId(a2, a2c1),
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis() + TimeUnit.HOURS.toMillis(1));
namedBlobDb.put(v1, NamedBlobState.READY, true).get();
NamedBlobRecord v1_get = namedBlobDb.get(a1.getName(), a1c1.getName(), blobName).get();
assertEquals(v1, v1_get);
namedBlobDb.put(v1_other, NamedBlobState.READY, true).get();
NamedBlobRecord v1_other_get = namedBlobDb.get(a1.getName(), a1c1.getName(), blobName + "-other").get();
assertEquals(v1_other, v1_other_get);
page = namedBlobDb.list(a1.getName(), a1c1.getName(), blobName, null, null).get();
assertEquals(2, page.getEntries().size());
assertEquals(v1_get, page.getEntries().get(0));
assertEquals(v1_other_get, page.getEntries().get(1));
time.sleep(100);

// put blob in-progress and list should return the Ready blob
v2 = new NamedBlobRecord(a1.getName(), a1c1.getName(), blobName, getBlobId(a1, a1c1),
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis() + TimeUnit.HOURS.toMillis(1));
namedBlobDb.put(v2, NamedBlobState.IN_PROGRESS, true).get();
page = namedBlobDb.list(a1.getName(), a1c1.getName(), blobName, null, null).get();
assertEquals(2, page.getEntries().size());
assertEquals(v1_get, page.getEntries().get(0));
assertEquals(v1_other_get, page.getEntries().get(1));
time.sleep(100);

// update blob and list should return the new blob
v2 = new NamedBlobRecord(a1.getName(), a1c1.getName(), blobName, getBlobId(a1, a1c1),
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis() + TimeUnit.HOURS.toMillis(1));
v2_other = new NamedBlobRecord(a1.getName(), a1c1.getName(), blobName + "-other", getBlobId(a1, a1c1),
Calendar.getInstance(TimeZone.getTimeZone("UTC")).getTimeInMillis() + TimeUnit.HOURS.toMillis(1));
namedBlobDb.put(v2, NamedBlobState.READY, true).get();
namedBlobDb.put(v2_other, NamedBlobState.READY, true).get();
page = namedBlobDb.list(a1.getName(), a1c1.getName(), blobName, null, null).get();
assertEquals(2, page.getEntries().size());
assertEquals(v2, page.getEntries().get(0));
assertEquals(v2_other, page.getEntries().get(1));
time.sleep(100);

// delete blob and list should return empty
namedBlobDb.delete(a1.getName(), a1c1.getName(), blobName).get();
page = namedBlobDb.list(a1.getName(), a1c1.getName(), blobName, null, null).get();
assertEquals(1, page.getEntries().size());
assertEquals(v2_other, page.getEntries().get(0));
time.sleep(100);
namedBlobDb.delete(a1.getName(), a1c1.getName(), blobName + "-other").get();
page = namedBlobDb.list(a1.getName(), a1c1.getName(), blobName, null, null).get();
assertEquals(0, page.getEntries().size());
}

/**
* Test behavior with list named blob
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,7 @@ class MySqlNamedBlobDb implements NamedBlobDb {
* with filter on the blob_name, and order by blob_name.
*/
// @formatter:off
private static final String LIST_QUERY_V2 = String.format(""
+ "SELECT t1.blob_name, t1.blob_id, t1.version, t1.deleted_ts, t1.blob_size, t1.modified_ts "
+ "FROM named_blobs_v2 t1 "
+ "INNER JOIN "
+ "(SELECT account_id, container_id, blob_name, max(version) as version "
+ "FROM named_blobs_v2 "
+ "WHERE (account_id, container_id) = (?, ?) AND %1$s "
+ " AND (deleted_ts IS NULL OR deleted_ts>%2$S) "
+ " GROUP BY account_id, container_id, blob_name) t2 "
+ "ON (t1.account_id,t1.container_id,t1.blob_name,t1.version) = (t2.account_id,t2.container_id,t2.blob_name,t2.version) "
+ "WHERE t1.blob_name LIKE ? AND t1.blob_name >= ? ORDER BY t1.blob_name ASC LIMIT ?",STATE_MATCH, CURRENT_TIME);
private final String LIST_NAMED_BLOBS_SQL;
// @formatter:on

/**
Expand Down Expand Up @@ -230,6 +220,7 @@ class MySqlNamedBlobDb implements NamedBlobDb {
String localDatacenter, MetricRegistry metricRegistry, Time time) {
this.accountService = accountService;
this.config = config;
this.LIST_NAMED_BLOBS_SQL = getListNamedBlobsSQL(config);
this.localDatacenter = localDatacenter;
this.retryExecutor = new RetryExecutor(null);
this.transactionExecutors = MySqlUtils.getDbEndpointsPerDC(config.dbInfo)
Expand All @@ -251,6 +242,63 @@ class MySqlNamedBlobDb implements NamedBlobDb {
this(accountService, config, dataSourceFactory, localDatacenter, metricRegistry, SystemTime.getInstance());
}

public String getListNamedBlobsSQL(MySqlNamedBlobDbConfig config) {
switch (config.listNamedBlobsSQLOption) {
case 1:
// old query that joins the entire table with a few selected rows
return String.format(""
+ "SELECT t1.blob_name, t1.blob_id, t1.version, t1.deleted_ts, t1.blob_size, t1.modified_ts "
+ "FROM named_blobs_v2 t1 "
+ "INNER JOIN "
+ "(SELECT account_id, container_id, blob_name, max(version) as version "
+ "FROM named_blobs_v2 "
+ "WHERE (account_id, container_id) = (?, ?) AND %1$s "
+ " AND (deleted_ts IS NULL OR deleted_ts>%2$S) "
+ " GROUP BY account_id, container_id, blob_name) t2 "
+ "ON (t1.account_id,t1.container_id,t1.blob_name,t1.version) = (t2.account_id,t2.container_id,t2.blob_name,t2.version) "
+ "WHERE t1.blob_name LIKE ? AND t1.blob_name >= ? ORDER BY t1.blob_name ASC LIMIT ?",STATE_MATCH, CURRENT_TIME);
case 2:
/**
* List named-blobs query, given a prefix.
* The first query selects all versions of blobs with a given prefix that are not deleted, from a given account and container.
* The second query selects the most recent version of blobs with a given prefix, from a given account and container.
* Finally, we join and select a version for each blob, that is ready to serve.
* This can be the most recent version if it is not deleted, or nothing.
*/
return String.format(""
+ " WITH "
+ " BlobsAllVersion AS ( "
+ " SELECT blob_name, blob_id, version, deleted_ts, blob_size, modified_ts "
+ " FROM named_blobs_v2 "
+ " WHERE account_id = ? " // 1
+ " AND container_id = ? " // 2
+ " AND %1$s " // blob_state = x
+ " AND blob_name LIKE ? " // 3
+ " AND blob_name >= ? " // 4
+ " AND (deleted_ts IS NULL OR deleted_ts > %2$s) "
+ " ), "
+ " BlobsMaxVersion AS ( "
+ " SELECT blob_name, MAX(version) as version "
+ " FROM named_blobs_v2 "
+ " WHERE account_id = ? " // 5
+ " AND container_id = ? " // 6
+ " AND %1$s " // blob_state = x
+ " AND blob_name LIKE ? " // 7
+ " AND blob_name >= ? " // 8
+ " GROUP BY blob_name "
+ " ) "
+ " SELECT BlobsAllVersion.* "
+ " FROM BlobsAllVersion "
+ " INNER JOIN BlobsMaxVersion "
+ " ON (BlobsAllVersion.blob_name = BlobsMaxVersion.blob_name "
+ " AND BlobsAllVersion.version = BlobsMaxVersion.version) "
+ " ORDER BY BlobsAllVersion.blob_name "
+ " LIMIT ?", STATE_MATCH, CURRENT_TIME); // 9
default:
throw new IllegalArgumentException("Invalid listNamedBlobsSQLOption: " + config.listNamedBlobsSQLOption);
}
}

@Override
public void close() throws IOException {
this.transactionExecutors.values().forEach(TransactionExecutor::close);
Expand Down Expand Up @@ -628,19 +676,28 @@ private NamedBlobRecord run_get_v2(String accountName, String containerName, Str
private Page<NamedBlobRecord> run_list_v2(String accountName, String containerName, String blobNamePrefix,
String pageToken, short accountId, short containerId, Connection connection, Integer maxKeys) throws Exception {
String query = "";
String queryStatement = blobNamePrefix == null ? LIST_ALL_QUERY_V2 : LIST_QUERY_V2;
String queryStatement = blobNamePrefix == null ? LIST_ALL_QUERY_V2 : LIST_NAMED_BLOBS_SQL;
int maxKeysValue = maxKeys == null ? config.listMaxResults : maxKeys;
try (PreparedStatement statement = connection.prepareStatement(queryStatement)) {
statement.setInt(1, accountId);
statement.setInt(2, containerId);
if (blobNamePrefix == null) {
// list-all no prefix
statement.setInt(1, accountId);
statement.setInt(2, containerId);
statement.setString(3, pageToken);
statement.setString(4, pageToken);
statement.setInt(5, maxKeysValue + 1);
} else {
// list with prefix
statement.setInt(1, accountId);
statement.setInt(2, containerId);
statement.setString(3, blobNamePrefix + "%");
statement.setString(4, pageToken != null ? pageToken : blobNamePrefix);
statement.setInt(5, accountId);
statement.setInt(6, containerId);
statement.setString(7, blobNamePrefix + "%");
statement.setString(8, pageToken != null ? pageToken : blobNamePrefix);
statement.setInt(9, maxKeysValue + 1);
}
statement.setInt(5, maxKeysValue + 1);
query = statement.toString();
logger.debug("Getting list of blobs matching prefix {} from MySql. Query {}", blobNamePrefix, query);
try (ResultSet resultSet = statement.executeQuery()) {
Expand Down

0 comments on commit e9dd1d1

Please sign in to comment.