diff --git a/rocketmq-tieredstore-s3/README.md b/rocketmq-tieredstore-s3/README.md new file mode 100644 index 000000000..1fc06c6f0 --- /dev/null +++ b/rocketmq-tieredstore-s3/README.md @@ -0,0 +1 @@ +# RocketMQ5.0 TieredStore, ObjectStorage File Provider diff --git a/rocketmq-tieredstore-s3/pom.xml b/rocketmq-tieredstore-s3/pom.xml new file mode 100644 index 000000000..ec193109e --- /dev/null +++ b/rocketmq-tieredstore-s3/pom.xml @@ -0,0 +1,177 @@ + + + + 4.0.0 + org.apache.rocketmq + rocketmq-tieredstore-s3 + 0.0.1-SNAPSHOT + rocketmq-tieredstore-s3 + jar + + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + + + + UTF-8 + UTF-8 + 2.20.127 + 5.3.0 + 4.13.2 + 3.22.0 + 3.10.0 + 4.11.0 + + 1.8 + 1.8 + + + + + + org.codehaus.mojo + versions-maven-plugin + 2.3 + + + org.codehaus.mojo + clirr-maven-plugin + 2.7 + + + org.codehaus.mojo + findbugs-maven-plugin + 3.0.4 + + + maven-compiler-plugin + 3.6.1 + + ${maven.compiler.source} + ${maven.compiler.target} + ${maven.compiler.source} + true + true + + + + maven-surefire-plugin + 2.19.1 + + -Xms512m -Xmx1024m + always + + **/*Test.java + + + + + maven-site-plugin + 3.6 + + en_US + UTF-8 + UTF-8 + + + + maven-source-plugin + 3.0.1 + + + attach-sources + + jar + + + + + + maven-resources-plugin + 3.0.2 + + ${project.build.sourceEncoding} + + + + + maven-checkstyle-plugin + 2.17 + + + verify + verify + + style/rmq_checkstyle.xml + UTF-8 + true + true + false + false + + + check + + + + + + + + + + org.apache.rocketmq + rocketmq-store + ${rocektmq.version} + + + org.apache.rocketmq + rocketmq-tiered-store + ${rocektmq.version} + + + + software.amazon.awssdk + s3 + ${s3.version} + + + software.amazon.awssdk + netty-nio-client + ${s3.version} + + + + junit + junit + ${junit.version} + test + + + org.assertj + assertj-core + ${assertj-core.version} + test + + + org.mockito + mockito-core + ${mockito-core.version} + test + + + org.mockito + mockito-junit-jupiter + ${mockito-junit-jupiter.version} + test + + + \ No newline at end of file diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/S3FileLock.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/S3FileLock.java new file mode 100644 index 000000000..a44003359 --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/S3FileLock.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.rocketmq.tieredstore.s3; + +import io.netty.buffer.ByteBuf; +import io.netty.util.ReferenceCountUtil; +import java.nio.charset.Charset; +import java.util.UUID; +import org.apache.rocketmq.tieredstore.s3.object.bytebuf.ByteBufAlloc; +import org.apache.rocketmq.tieredstore.util.MessageStoreUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class S3FileLock { + + private static final Logger log = LoggerFactory.getLogger(S3FileLock.class); + private final TieredS3Storage storage; + + private final String lockObjectPath; + + private final String lockKey; + + public S3FileLock(TieredS3Storage storage) { + this.storage = storage; + String clusterName = storage.getTieredMessageStoreConfig().getBrokerClusterName(); + String clusterBasePath = String.format("%s_%s", MessageStoreUtil.getHash(clusterName), clusterName); + this.lockObjectPath = clusterBasePath + "/" + storage.getTieredMessageStoreConfig().getBrokerName() + "/" + "lock"; + this.lockKey = UUID.randomUUID().toString(); + } + + public void lock() { + boolean exit = storage.isObjectExist(lockObjectPath); + if (exit) { + log.error("s3 storage is locked by other processes,please wait for it to release"); + throw new RuntimeException("s3 storage is locked by other processes,please wait for it to release"); + } + ByteBuf byteBuf = null; + try { + byte[] contentBytes = lockKey.getBytes(Charset.defaultCharset()); + byteBuf = ByteBufAlloc.byteBuffer(contentBytes.length); + byteBuf.writeBytes(contentBytes); + byteBuf.retain(); + storage.writeObject(lockObjectPath, byteBuf); + // double check + ByteBuf checkContent = storage.rangeRead(lockObjectPath, 0, lockKey.length()).join(); + String checkContentStr = new String(checkContent.array(), Charset.defaultCharset()); + if (!lockKey.equals(checkContentStr)) { + throw new RuntimeException("lock double check failed"); + } + } + catch (Throwable e) { + log.error("try lock s3 storages path {} failed", lockObjectPath, e); + throw new RuntimeException(e); + } + finally { + if (byteBuf != null) { + ReferenceCountUtil.safeRelease(byteBuf); + } + } + } + + public void release() { + try { + storage.deleteObject(lockObjectPath).join(); + log.info("release s3 file lock success {}", lockObjectPath); + } + catch (Throwable e) { + log.error("release lock s3 storages path {} failed", lockObjectPath, e); + } + } +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/S3FileSegment.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/S3FileSegment.java new file mode 100644 index 000000000..88206b0bb --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/S3FileSegment.java @@ -0,0 +1,568 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.rocketmq.tieredstore.s3; + +import com.alibaba.fastjson.JSON; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.CompositeByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.util.ReferenceCountUtil; +import io.opentelemetry.api.common.Attributes; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicBoolean; +import org.apache.rocketmq.tieredstore.MessageStoreConfig; +import org.apache.rocketmq.tieredstore.common.FileSegmentType; +import org.apache.rocketmq.tieredstore.exception.TieredStoreErrorCode; +import org.apache.rocketmq.tieredstore.exception.TieredStoreException; +import org.apache.rocketmq.tieredstore.metrics.TieredStoreMetricsManager; +import org.apache.rocketmq.tieredstore.provider.FileSegment; +import org.apache.rocketmq.tieredstore.s3.metadata.S3ChunkMetadata; +import org.apache.rocketmq.tieredstore.s3.metadata.S3FileSegmentMetadata; +import org.apache.rocketmq.tieredstore.s3.object.AbstractS3Storage; +import org.apache.rocketmq.tieredstore.s3.object.S3Storage; +import org.apache.rocketmq.tieredstore.s3.object.bytebuf.ByteBufAlloc; +import org.apache.rocketmq.tieredstore.s3.util.S3PathUtils; +import org.apache.rocketmq.tieredstore.stream.FileSegmentInputStream; +import org.apache.rocketmq.tieredstore.util.MessageStoreUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.rocketmq.tieredstore.metrics.TieredStoreMetricsConstant.LABEL_FILE_TYPE; +import static org.apache.rocketmq.tieredstore.metrics.TieredStoreMetricsConstant.LABEL_PATH; +import static org.apache.rocketmq.tieredstore.s3.constants.S3Constants.MIN_PART_SIZE; +import static org.apache.rocketmq.tieredstore.util.MessageFormatUtil.CONSUME_QUEUE_UNIT_SIZE; + +public class S3FileSegment extends FileSegment { + + private static final Logger log = LoggerFactory.getLogger(MessageStoreUtil.TIERED_STORE_LOGGER_NAME); + + private final String basePath; + + /** + * The path of the chunk file in S3. Format: + *
+     *     {@link #filePath}/chunk
+     * 
+ */ + private final String chunkPath; + + /** + * The path of the segment file in S3. Format: + *
+     *     {@link #filePath}/segment
+     * 
+ */ + private final String segmentPath; + + private final TieredS3Storage s3Storage; + + private final S3FileSegmentMetadata metadata; + + private final boolean isObjectMergeEnable = false; + + private final AtomicBoolean compactStatus = new AtomicBoolean(false); + + private final Attributes attributes = TieredStoreMetricsManager.newAttributesBuilder().put(LABEL_PATH, filePath) + .put(LABEL_FILE_TYPE, this.fileType.name().toLowerCase()).build(); + + public S3FileSegment(MessageStoreConfig storeConfig, + FileSegmentType fileType, String filePath, long baseOffset) { + super(storeConfig, fileType, filePath, baseOffset); + + // fullPath: clusterBasePath/broker/topic/queueId/fileType/seg-${baseOffset} + String clusterName = storeConfig.getBrokerClusterName(); + String clusterBasePath = String.format("%s_%s", MessageStoreUtil.getHash(clusterName), clusterName); + this.basePath = clusterBasePath + S3PathUtils.FILE_SEPARATOR + filePath + S3PathUtils.FILE_SEPARATOR + + fileType.toString() + S3PathUtils.FILE_SEPARATOR + "seg-" + baseOffset; + this.chunkPath = S3PathUtils.getBaseChunkPath(basePath); + this.segmentPath = S3PathUtils.getBaseSegmentPath(basePath); + this.s3Storage = TieredS3Storage.getInstance(storeConfig); + this.metadata = new S3FileSegmentMetadata(); + this.initialize(); + } + + private void initialize() { + + S3Storage.ObjectInfo segmentObjectHeader = s3Storage.readHeader(S3PathUtils.getSegmentPath(segmentPath)).join(); + if (segmentObjectHeader != null) { + this.metadata.setSegment( + new S3ChunkMetadata(0, (int) segmentObjectHeader.size(), true)); + return; + } + + S3Storage.ObjectInfo chunkZeroObjectHeader = s3Storage.readHeader(S3PathUtils.getChunkPathByPosition(chunkPath, 0)).join(); + // if chunk start position equal 0, means new file + if (chunkZeroObjectHeader == null) { + log.info("create new S3FileSegment {}", basePath); + return; + } + + CompletableFuture> listChunks = this.s3Storage.listChunks(this.chunkPath); + List chunks = listChunks.join(); + // add all chunks into metadata + checkAndLoadChunks(chunks); + if (log.isDebugEnabled()) { + log.debug("init file segment metadata successfully. path: {} meta: {}", basePath, JSON.toJSONString(this.metadata)); + } + + log.info("init file segment metadata successfully. path: {}", basePath); + + } + + private void checkAndLoadChunks(List chunks) { + if (chunks.size() == 0) { + return; + } + for (S3ChunkMetadata chunk : chunks) { + S3ChunkMetadata newChunk = + new S3ChunkMetadata(chunk.getStartPosition(), + chunk.getChunkSize(), false); + if (!this.metadata.addChunk(newChunk)) { + // the chunk is not valid + log.error("Check and load chunks failed, the path {} the chunk: {} is not valid, now chunks last end position: {}, please check it.", + basePath, newChunk, + this.metadata.getEndPosition()); + throw new RuntimeException( + "The chunk: " + chunk + " is not valid, now chunks last end position: " + this.metadata.getEndPosition() + ", please check it."); + } + } + } + + @Override + public String getPath() { + return this.filePath; + } + + @Override + public long getSize() { + return this.metadata.getSize(); + } + + @Override + public boolean exists() { + return this.metadata.getSize() > 0; + } + + @Override + public void createFile() { + + } + + @Override + public void destroyFile() { + this.s3Storage.deleteObjects(basePath).join(); + this.metadata.clear(); + } + + @Override + public CompletableFuture read0(long position, int length) { + CompletableFuture completableFuture = new CompletableFuture<>(); + List chunks; + try { + chunks = this.metadata.seek(position, length); + } + catch (IndexOutOfBoundsException e) { + log.error("Read position {} and length {} out of range, the file segment size is {}.", position, length, this.metadata.getSize()); + completableFuture.completeExceptionally(new TieredStoreException(TieredStoreErrorCode.DOWNLOAD_LENGTH_NOT_CORRECT, + "read data from segment error because of position or length not correct")); + return completableFuture; + } + long endPosition = position + length - 1; + List> subFutures = new ArrayList<>(chunks.size()); + byte[] bytes = new byte[length]; + for (S3ChunkMetadata chunk : chunks) { + long startPositionInChunk = position >= chunk.getStartPosition() ? position - chunk.getStartPosition() : 0; + long endPositionInChunk = endPosition <= chunk.getEndPosition() ? endPosition - chunk.getStartPosition() : chunk.getChunkSize() - 1; + + String objectPath = chunk.isSegment() ? S3PathUtils.getSegmentPath(segmentPath) : S3PathUtils.getChunkPathByPosition(chunkPath, chunk.getStartPosition()); + subFutures.add(this.s3Storage.rangeRead(objectPath, startPositionInChunk, + endPositionInChunk) + .thenAccept(buf -> { + buf.readBytes(bytes, (int) (chunk.getStartPosition() + startPositionInChunk - position), buf.readableBytes()); + buf.release(); + }).exceptionally(throwable -> { + log.error("Failed to read data from s3, chunk: {}, start position: {}, end position: {}", chunk, startPositionInChunk, + endPositionInChunk, throwable); + return null; + })); + } + CompletableFuture.allOf(subFutures.toArray(new CompletableFuture[0])).thenAccept(v -> { + completableFuture.complete(ByteBuffer.wrap(bytes)); + TieredStoreMetricsManager.downloadBytes.record(length, attributes); + }) + .exceptionally(throwable -> { + log.error("Failed to read data from s3, position: {}, length: {}", position, length, throwable); + completableFuture.completeExceptionally( + new TieredStoreException(TieredStoreErrorCode.IO_ERROR, "wait all sub download tasks complete error")); + return null; + }); + return completableFuture; + } + + @Override + public CompletableFuture commit0(FileSegmentInputStream inputStream, long position, int length, + boolean append) { + // TODO: Deal with the case that the param: append is false + CompletableFuture completableFuture = new CompletableFuture<>(); + // check if now the segment is sealed + if (this.metadata.isSealed()) { + log.error("The segment is sealed, the position: {}, the length: {}.", position, length); + TieredStoreException exception = new TieredStoreException(TieredStoreErrorCode.SEGMENT_SEALED, "the segment is sealed"); + exception.setPosition(this.metadata.getEndPosition() + 1); + completableFuture.completeExceptionally(exception); + return completableFuture; + } + // check if the position is valid + if (length < 0 || position != this.metadata.getEndPosition() + 1) { + log.error("The position is invalid, the position: {}, the length: {}, now segment end position: {}.", position, length, + this.metadata.getEndPosition()); + TieredStoreException exception = new TieredStoreException(TieredStoreErrorCode.ILLEGAL_OFFSET, "the position is invalid"); + exception.setPosition(this.metadata.getEndPosition() + 1); + completableFuture.completeExceptionally(exception); + return completableFuture; + } + // upload chunk + String chunkPath = S3PathUtils.getChunkPathByPosition(this.chunkPath, position); + inputStream.rewind(); + CompositeByteBuf compositeByteBuf = ByteBufAlloc.compositeByteBuffer(); + for (ByteBuffer byteBuffer : inputStream.getBufferList()) { + compositeByteBuf.addComponent(true, Unpooled.wrappedBuffer(byteBuffer)); + } + + if (compositeByteBuf.readableBytes() != length) { + log.error("byteBuffer length not equal compositeByteBuf.readableBytes() length {} readableBytes{} ", length, + compositeByteBuf.readableBytes()); + TieredStoreException exception = new TieredStoreException(TieredStoreErrorCode.ILLEGAL_OFFSET, "the position is invalid"); + completableFuture.completeExceptionally(exception); + return completableFuture; + } + compositeByteBuf.retain(); + this.s3Storage.writeObject(chunkPath, compositeByteBuf).thenAccept(result -> { + try { + S3ChunkMetadata chunk = new S3ChunkMetadata(position, length, false); + if (!this.metadata.addChunk(chunk)) { + // the chunk is not valid + log.error( + "Add chunk after uploading chunk to S3 failed, the chunk: {} is not valid, now chunks last end position: {}, please check it.", + chunk, this.metadata.getEndPosition()); + throw new RuntimeException( + "The chunk: " + chunk + " is not valid, now chunks last end position: " + this.metadata.getEndPosition() + + ", please check it."); + } + + completableFuture.complete(true); + + } + finally { + ReferenceCountUtil.safeRelease(compositeByteBuf); + } + }) + .exceptionally(throwable -> { + ReferenceCountUtil.safeRelease(compositeByteBuf); + log.error("Failed to write data to s3, position: {}, length: {}", position, length, throwable); + TieredStoreException exception = new TieredStoreException(TieredStoreErrorCode.IO_ERROR, "write data to s3 error"); + exception.setPosition(position); + completableFuture.completeExceptionally(exception); + return null; + }); + return completableFuture; + } + + public void compactFile() { + // check if the segment file exists + if (this.metadata.isSealed() && this.metadata.getChunkCount() == 0) { + return; + } + + if (isObjectMergeEnable) { + return; + } + boolean ret = compactStatus.compareAndSet(false, true); + if (!ret) { + log.error("compact operator is running,do nothing filePath {}", basePath); + } + + // merge all chunks into a segment file and delete all chunks + try { + compactFile0(); + } + finally { + boolean rt = compactStatus.compareAndSet(true, false); + if (!rt) { + log.error("compactFile compactStatus update error {}", basePath); + } + } + + } + + private void compactFile0() { + // merge all chunks + log.info("compact file segment filePath {} fileType {} baseOffset {}", basePath, fileType, + baseOffset); + String segmentName = S3PathUtils.getSegmentPath(segmentPath); + + boolean result = mergeAllChunksIntoSegment(this.metadata.getChunks(), segmentName, fileType); + if (!result) { + log.error("Merge chunks into segment failed, chunk path is {}, segment path is {}.", this.chunkPath, this.segmentPath); + throw new RuntimeException("merge chunks into segment failed"); + } + + List segmentObjects = this.s3Storage.listObjects(segmentPath).join(); + boolean isCorrectCount = segmentObjects.size() == 1; + boolean isCorrectSize = false; + if (isCorrectCount) { + isCorrectSize = segmentObjects.get(0).size() == metadata.getSize(); + } + if (isCorrectCount && isCorrectSize) { + this.metadata.setSegment(new S3ChunkMetadata(0, (int) this.metadata.getSize(), true)); + this.metadata.removeAllChunks(); + log.info("merge chunks into segment success, chunk path is {}, segment path is {}", this.chunkPath, this.segmentPath); + try { + this.s3Storage.deleteObjects(chunkPath).join(); + log.info("compactFile0 after merge success , delete old objects success path{}", chunkPath); + } + catch (Throwable e) { + log.error("compactFile0 after merge success , delete old objects failed path{}", chunkPath, e); + } + } + else { + if (!isCorrectCount) { + log.error("Merge chunks into segment failed, segment count {} wrong path is {}.", segmentObjects.size(), this.segmentPath); + } + if (!isCorrectSize) { + log.error("Merge chunks into segment failed, segment size {} wrong path is {}.", segmentObjects.get(0).size(), this.segmentPath); + } + } + } + + private boolean mergeAllChunksIntoSegment(List chunks, String segmentName, + FileSegmentType fileSegmentType) { + if (FileSegmentType.COMMIT_LOG.equals(fileSegmentType)) { + boolean checkObjectSize = checkChunkSizeForMultiUploadCopy(); + if (!checkObjectSize) { + // The commit log has a minimum limit of Object size less than merge, so it cannot be merged. + log.warn("commit log file segment contain chunk size too small,can not merge {}", filePath); + return false; + } + return new AsyncS3ChunksMergeCopy(segmentName, chunks).run(); + } + else if (FileSegmentType.CONSUME_QUEUE.equals(fileSegmentType)) { + return new AsyncS3ChunksMerge(segmentName, chunks).run(); + } + else { + throw new RuntimeException("mergeAllChunksIntoSegment invalid fileSegmentType"); + } + } + + private boolean checkChunkSizeForMultiUploadCopy() { + for (int i = 0; i < metadata.getChunks().size(); i++) { + S3ChunkMetadata chunk = metadata.getChunks().get(i); + if (i != metadata.getChunks().size() - 1 && chunk.getChunkSize() < MIN_PART_SIZE) { + return false; + } + } + return true; + } + + abstract class AbstractChunkMergeTask { + + protected final String segmentKey; + protected String uploadId; + protected final List completedParts; + + protected final List chunks; + + AbstractChunkMergeTask(String segmentKey, List chunks) { + this.segmentKey = segmentKey; + this.uploadId = null; + this.completedParts = new ArrayList<>(); + this.chunks = chunks; + } + + abstract boolean run(); + + protected CompletableFuture initiateUpload() { + return s3Storage.getObjectStorage().createMultipartUpload(segmentKey) + .whenComplete((result, error) -> { + if (error != null) { + log.error("Error initiating multi part {} upload: ", segmentKey, error); + } + else { + uploadId = result; + } + }); + } + + protected CompletableFuture uploadPartCopy(int partNumber, + String chunkKey, long chunkSize) { + return s3Storage.getObjectStorage() + .uploadPartCopy(chunkKey, segmentKey, 0, chunkSize, uploadId, partNumber) + .whenComplete((result, error) -> { + if (error != null) { + log.error("Error uploading part copy, chunkKey: {}, partNumber: {}, uploadId: {}, error:", chunkKey, partNumber, uploadId, + error); + } + else { + completedParts.add(result); + } + }); + } + + protected CompletableFuture uploadPart(int partNumber, + ByteBuf data) { + return s3Storage.getObjectStorage() + .uploadPart(segmentKey, uploadId, partNumber, data) + .whenComplete((result, error) -> { + if (error != null) { + log.error("Error uploading part, segmentKey: {}, partNumber: {}, uploadId: {}, error:", segmentKey, partNumber, uploadId, + error); + } + else { + completedParts.add(result); + } + }); + } + + protected CompletableFuture completeUpload() { + Collections.sort(completedParts, Comparator.comparingInt(AbstractS3Storage.ObjectStorageCompletedPart::getPartNumber)); + return s3Storage.getObjectStorage().completeMultipartUpload(segmentKey, uploadId, completedParts) + .thenApply(resp -> true) + .whenComplete((result, error) -> { + if (error != null) { + log.error("Error completing multi part upload, uploadId: {}, ", uploadId, error); + } + }); + } + + protected CompletableFuture abortUpload() { + return s3Storage.getObjectStorage().abortMultipartUpload(segmentKey, uploadId) + .thenApply(resp -> true) + .whenComplete((result, error) -> { + if (error != null) { + log.error("Error abort multi part upload, uploadId: {}, ", uploadId, error); + } + }); + } + } + + class AsyncS3ChunksMergeCopy extends AbstractChunkMergeTask { + + public AsyncS3ChunksMergeCopy(String segmentKey, + List chunks) { + super(segmentKey, chunks); + } + + public boolean run() { + try { + initiateUpload().join(); + for (int i = 0; i < chunks.size(); i++) { + String chunkKey = S3PathUtils.getChunkPathByPosition(chunkPath, chunks.get(i).getStartPosition()); + int partNumber = i + 1; + uploadPartCopy(partNumber, chunkKey, chunks.get(i).getChunkSize()).join(); + } + completeUpload().join(); + return true; + } + catch (Throwable e) { + log.error("Merge all chunks into segment by copy failed, chunks: {}, segmentName: {}, region: {}, bucket: {}", chunks, segmentKey, + storeConfig.getObjectStoreEndpoint(), storeConfig.getObjectStoreBucket(), e); + abortUpload().join(); + return false; + } + } + } + + class AsyncS3ChunksMerge extends AbstractChunkMergeTask { + + public AsyncS3ChunksMerge(String segmentKey, + List chunks) { + super(segmentKey, chunks); + } + + @Override + boolean run() { + try { + initiateUpload().join(); + CompositeByteBuf compositeByteBuf = null; + try { + int partNumber = 0; + compositeByteBuf = ByteBufAlloc.compositeByteBuffer(); + S3ChunkMetadata pre = null; + for (S3ChunkMetadata chunk : chunks) { + + if (pre != null && pre.getEndPosition() + 1 != chunk.getStartPosition()) { + log.error("AsyncS3ChunksMerge chunk position check failed {}", segmentKey); + throw new RuntimeException("AsyncS3ChunksMerge chunk position check failed"); + } + + ByteBuf byteBuf = readByChunk(chunk).join(); + if (byteBuf.readableBytes() != chunk.getChunkSize() || byteBuf.readableBytes() % CONSUME_QUEUE_UNIT_SIZE != 0) { + log.error("AsyncS3ChunksMerge read chunk data failed chunk size invalid segmentKey {} chunkStartPosition {}", segmentKey, chunk.getStartPosition()); + throw new RuntimeException("AsyncS3ChunksMerge read chunk data failed chunk size invalid"); + } + compositeByteBuf.addComponents(true, byteBuf); + + if (compositeByteBuf.readableBytes() >= MIN_PART_SIZE) { + partNumber++; + try { + compositeByteBuf.retain(); + uploadPart(partNumber, compositeByteBuf).join(); + } + finally { + ReferenceCountUtil.safeRelease(compositeByteBuf); + } + compositeByteBuf = ByteBufAlloc.compositeByteBuffer(); + } + + pre = chunk; + } + + if (compositeByteBuf.readableBytes() > 0) { + partNumber++; + compositeByteBuf.retain(); + uploadPart(partNumber, compositeByteBuf).join(); + } + } + finally { + if (compositeByteBuf != null) { + ReferenceCountUtil.safeRelease(compositeByteBuf); + } + } + + completeUpload().join(); + return true; + } + catch (Throwable e) { + log.error("Merge all chunks into segment failed, chunks: {}, segmentName: {}, region: {}, bucket: {}", chunks, segmentKey, + storeConfig.getObjectStoreEndpoint(), storeConfig.getObjectStoreBucket(), e); + abortUpload().join(); + return false; + } + } + + private CompletableFuture readByChunk(S3ChunkMetadata chunk) { + String objectPath = S3PathUtils.getChunkPathByPosition(chunkPath, chunk.getStartPosition()); + return s3Storage.rangeRead(objectPath, 0, chunk.getChunkSize()); + } + } +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/TieredS3Storage.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/TieredS3Storage.java new file mode 100644 index 000000000..f62546b3d --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/TieredS3Storage.java @@ -0,0 +1,169 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.rocketmq.tieredstore.s3; + +import com.google.common.collect.Maps; +import io.netty.buffer.ByteBuf; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.stream.Collectors; +import org.apache.rocketmq.logging.org.slf4j.Logger; +import org.apache.rocketmq.logging.org.slf4j.LoggerFactory; +import org.apache.rocketmq.tieredstore.MessageStoreConfig; +import org.apache.rocketmq.tieredstore.s3.metadata.S3ChunkMetadata; +import org.apache.rocketmq.tieredstore.s3.object.AwsS3Storage; +import org.apache.rocketmq.tieredstore.s3.object.S3Storage; +import org.apache.rocketmq.tieredstore.s3.object.S3URI; +import org.apache.rocketmq.tieredstore.util.MessageStoreUtil; + +public class TieredS3Storage { + + private static final Logger log = LoggerFactory.getLogger(MessageStoreUtil.TIERED_STORE_LOGGER_NAME); + + private static final int MAX_OBJECT_STORAGE_CONCURRENCY = 1000; + + private volatile static TieredS3Storage instance; + + private final MessageStoreConfig tieredMessageStoreConfig; + + private final S3Storage objectStorage; + + private final S3FileLock fileLock; + + public static TieredS3Storage getInstance(MessageStoreConfig config) { + if (instance == null) { + synchronized (MessageStoreConfig.class) { + if (instance == null) { + instance = new TieredS3Storage(config); + } + } + } + return instance; + } + + private TieredS3Storage(MessageStoreConfig config) { + this.tieredMessageStoreConfig = config; + S3URI objectURI = S3URI.builder() + .bucket(tieredMessageStoreConfig.getObjectStoreBucket()) + .endpoint(tieredMessageStoreConfig.getObjectStoreEndpoint()) + .extension(Maps.newHashMap()) + .build(); + objectURI.addExtension(S3URI.ACCESS_KEY_KEY, tieredMessageStoreConfig.getObjectStoreAccessKey()); + objectURI.addExtension(S3URI.SECRET_KEY_KEY, tieredMessageStoreConfig.getObjectStoreSecretKey()); + objectStorage = AwsS3Storage.builder() + .bucket(objectURI) + .maxObjectStorageConcurrency(MAX_OBJECT_STORAGE_CONCURRENCY) + .readWriteIsolate(true) + .build(); + fileLock = new S3FileLock(this); + } + + public boolean readinessCheck() { + return objectStorage.readinessCheck(); + } + + public void start() { + + } + + public void lock() { + fileLock.lock(); + } + + public void releaseLock() { + fileLock.release(); + } + + public S3Storage getObjectStorage() { + return objectStorage; + } + + public MessageStoreConfig getTieredMessageStoreConfig() { + return tieredMessageStoreConfig; + } + + public CompletableFuture deleteObjects(String prefix) { + CompletableFuture future = new CompletableFuture<>(); + listObjects(prefix).thenCompose(objects -> { + List objectPaths = new ArrayList<>(); + objects.forEach(v -> objectPaths.add(v.key())); + return objectStorage.delete(objectPaths); + }) + .thenAccept(v -> future.complete(null)) + .exceptionally(throwable -> { + log.error("deleteObjects prefix {} exception", prefix); + future.completeExceptionally(throwable); + return null; + }); + return future; + } + + public CompletableFuture deleteObject(String path) { + return objectStorage.delete(Arrays.asList(path)); + } + + CompletableFuture> listObjects(String prefix) { + return objectStorage.list(prefix); + } + + CompletableFuture> listChunks(String prefix) { + CompletableFuture> future = new CompletableFuture<>(); + objectStorage.list(prefix) + .thenAccept(objectInfos -> { + future.complete(objectInfos.stream() + .map(objectInfo -> { + S3ChunkMetadata chunkMetadata = new S3ChunkMetadata(); + String key = objectInfo.key(); + chunkMetadata.setChunkSize((int) objectInfo.size()); + String[] paths = key.split("/"); + String chunkSubName = paths[paths.length - 1]; + Integer startPosition = Integer.valueOf(chunkSubName); + chunkMetadata.setStartPosition(startPosition); + return chunkMetadata; + }).sorted((o1, o2) -> (int) (o1.getStartPosition() - o2.getStartPosition())).collect(Collectors.toList())); + }) + .exceptionally(e -> { + future.completeExceptionally(e); + return null; + }); + return future; + } + + public CompletableFuture rangeRead(String objectPath, long start, long end) { + return objectStorage.rangeRead(objectPath, start, end); + } + + public CompletableFuture readHeader(String objectPath) { + return objectStorage.readHeader(objectPath); + } + + public boolean isObjectExist(String objectPath) { + return objectStorage.readHeader(objectPath).join() != null ? true : false; + } + + public CompletableFuture writeObject(String objectPath, ByteBuf buf) { + return objectStorage.write(objectPath, buf); + } + + public void close() { + objectStorage.close(); + } + +} \ No newline at end of file diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/constants/S3Constants.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/constants/S3Constants.java new file mode 100644 index 000000000..f4f28aa4f --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/constants/S3Constants.java @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.rocketmq.tieredstore.s3.constants; + +public class S3Constants { + public static final int MIN_PART_SIZE = 5 * 1024 * 1024; +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/metadata/S3ChunkMetadata.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/metadata/S3ChunkMetadata.java new file mode 100644 index 000000000..f74fdc3a3 --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/metadata/S3ChunkMetadata.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.rocketmq.tieredstore.s3.metadata; + +import org.apache.rocketmq.tieredstore.MessageStoreConfig; + +import java.util.Objects; + +import com.alibaba.fastjson.annotation.JSONField; +import org.apache.rocketmq.tieredstore.s3.S3FileSegment; + +/** + * Metadata of a chunk in S3. + * + *

+ * There are two types of chunks in S3: + *

    + *
  • Normal chunk, represents a normal chunk in S3, which size is usually less than {@link + * MessageStoreConfig#getTieredStoreGroupCommitSize()} ()} + *
  • Segment chunk, means that this all normal chunks in one logic segment have been merged into a single chunk, + * which is named as segment chunk, which size is usually equals to {@link MessageStoreConfig#getTieredStoreCommitLogMaxSize()} + * or {@link MessageStoreConfig#getTieredStoreConsumeQueueMaxSize()} + *
+ * Once a segment chunk is created, it will never be changed, and we should delete all normal chunks in this segment. + */ +public class S3ChunkMetadata implements Comparable { + + /** + * Name of the chunk in S3. Format: + *

+ * Chunk: + *

+     *     {@link S3FileSegment#getPath()} ()}/chunk/${startPosition}
+     * 
+ *

+ * Segment: + *

+     *     {@link S3FileSegment#getPath()}/segment/${startPosition}
+     * 
+ */ + + @JSONField(ordinal = 1) + private long startPosition; + + @JSONField(ordinal = 2) + private int chunkSize; + + private boolean isSegment; + + public S3ChunkMetadata() { + + } + + public S3ChunkMetadata(long startPosition, int chunkSize, boolean isSegment) { + this.startPosition = startPosition; + this.chunkSize = chunkSize; + this.isSegment = isSegment; + } + + public int getChunkSize() { + return chunkSize; + } + + public long getStartPosition() { + return startPosition; + } + + public void setStartPosition(long startPosition) { + this.startPosition = startPosition; + } + + public void setChunkSize(int chunkSize) { + this.chunkSize = chunkSize; + } + + public long getEndPosition() { + return startPosition + chunkSize - 1; + } + + public boolean isSegment() { + return isSegment; + } + + @Override + public boolean equals(Object o) { + + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + S3ChunkMetadata that = (S3ChunkMetadata) o; + return startPosition == that.startPosition && chunkSize == that.chunkSize; + } + + @Override + public int hashCode() { + return Objects.hash(startPosition, chunkSize); + } + + @Override + public String toString() { + return "S3ChunkMetadata{" + + "startPosition=" + startPosition + + ", chunkSize=" + chunkSize + + '}'; + } + + @Override + public int compareTo(S3ChunkMetadata o) { + return this.startPosition > o.getStartPosition() ? 1 : -1; + } +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/metadata/S3FileSegmentMetadata.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/metadata/S3FileSegmentMetadata.java new file mode 100644 index 000000000..8ce5a47e4 --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/metadata/S3FileSegmentMetadata.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.rocketmq.tieredstore.s3.metadata; + +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +public class S3FileSegmentMetadata { + + private final LinkedList chunks = new LinkedList<>(); + + private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock(); + + private final ReentrantReadWriteLock.ReadLock readLock = lock.readLock(); + + private final ReentrantReadWriteLock.WriteLock writeLock = lock.writeLock(); + + private volatile boolean isSealed = false; + + private S3ChunkMetadata segment; + + public S3FileSegmentMetadata() { + } + + /** + * Seek the chunks that need to be read, which is the intersection of the chunks and the range of [position, + * position + length) + * + * @param position start position + * @param length data length + * @return the chunks that need to be read + * @throws IndexOutOfBoundsException if position or length is negative or position + */ + public List seek(long position, int length) throws IndexOutOfBoundsException { + readLock.lock(); + try { + long endPosition = position + length - 1; + if (position < 0 || length < 0 || position < getStartPosition() || endPosition > getEndPosition()) { + throw new IndexOutOfBoundsException( + "position: " + position + ", length: " + length + ", Metadata: start: " + getStartPosition() + ", end: " + getEndPosition()); + } + List needChunks = new LinkedList<>(); + if (length == 0) { + return needChunks; + } + if (segment != null) { + needChunks.add(segment); + return needChunks; + } + for (S3ChunkMetadata chunk : chunks) { + if (endPosition < chunk.getStartPosition()) { + break; + } + if (position > chunk.getEndPosition()) { + continue; + } + if (position <= chunk.getEndPosition() || endPosition >= chunk.getStartPosition()) { + needChunks.add(chunk); + } + } + return needChunks; + } + finally { + readLock.unlock(); + } + } + + public boolean addChunk(S3ChunkMetadata chunk) { + this.writeLock.lock(); + try { + if (chunks.size() == 0 && chunk.getStartPosition() != 0) { + return false; + } + if (chunks.size() > 0 && chunks.getLast().getEndPosition() + 1 != chunk.getStartPosition()) { + return false; + } + chunks.addLast(chunk); + return true; + } + finally { + this.writeLock.unlock(); + } + } + + public void setSegment(S3ChunkMetadata segment) { + this.writeLock.lock(); + try { + this.isSealed = true; + this.segment = segment; + } + finally { + this.writeLock.unlock(); + } + } + + public void removeAllChunks() { + this.writeLock.lock(); + try { + this.chunks.clear(); + } + finally { + this.writeLock.unlock(); + } + } + + public long getStartPosition() { + this.readLock.lock(); + try { + if (segment != null) { + return segment.getStartPosition(); + } + if (chunks.size() == 0) { + return -1; + } + return chunks.getFirst().getStartPosition(); + } + finally { + this.readLock.unlock(); + } + } + + public long getEndPosition() { + this.readLock.lock(); + try { + if (segment != null) { + return segment.getEndPosition(); + } + if (chunks.size() == 0) { + return -1; + } + return chunks.getLast().getEndPosition(); + } + finally { + this.readLock.unlock(); + } + } + + public long getSize() { + this.readLock.lock(); + try { + if (segment != null) { + return segment.getEndPosition() - segment.getStartPosition() + 1; + } + if (chunks.size() == 0) { + return 0; + } + + return chunks.getLast().getEndPosition() - chunks.getFirst().getStartPosition() + 1; + } + finally { + this.readLock.unlock(); + } + } + + public void clear() { + this.writeLock.lock(); + try { + chunks.clear(); + segment = null; + } + finally { + this.writeLock.unlock(); + } + } + + public long getChunkCount() { + this.readLock.lock(); + try { + return chunks.size(); + } + finally { + this.readLock.unlock(); + } + } + + public boolean isSealed() { + return isSealed || segment != null; + } + + public List getChunks() { + this.readLock.lock(); + try { + return new ArrayList<>(chunks); + } + finally { + this.readLock.unlock(); + } + } + + public void setSealed(boolean sealed) { + this.isSealed = sealed; + } + +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/AbstractS3Storage.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/AbstractS3Storage.java new file mode 100644 index 000000000..b2af45edb --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/AbstractS3Storage.java @@ -0,0 +1,374 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.rocketmq.tieredstore.s3.object; + +import com.google.common.base.Stopwatch; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.Unpooled; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import org.apache.rocketmq.common.ThreadFactoryImpl; +import org.apache.rocketmq.common.utils.ThreadUtils; +import org.apache.rocketmq.tieredstore.util.MessageStoreUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.http.HttpStatusCode; +import software.amazon.awssdk.services.s3.model.S3Exception; + +@SuppressWarnings("this-escape") +public abstract class AbstractS3Storage implements S3Storage { + static final Logger log = LoggerFactory.getLogger(MessageStoreUtil.TIERED_STORE_LOGGER_NAME); + private final Semaphore inflightReadLimiter; + private final Semaphore inflightWriteLimiter; + private final ExecutorService readCallbackExecutor; + private final ExecutorService writeCallbackExecutor; + protected final S3URI objectURI; + + protected AbstractS3Storage( + S3URI objectURI, + int maxObjectStorageConcurrency, + boolean readWriteIsolate) { + this.objectURI = objectURI; + this.inflightWriteLimiter = new Semaphore(maxObjectStorageConcurrency); + this.inflightReadLimiter = readWriteIsolate ? new Semaphore(maxObjectStorageConcurrency) : inflightWriteLimiter; + + readCallbackExecutor = ThreadUtils.newThreadPoolExecutor( + 1, + 1, + TimeUnit.MINUTES.toMillis(1), TimeUnit.MILLISECONDS, + new LinkedBlockingQueue<>(Integer.MAX_VALUE), + new ThreadFactoryImpl("s3-read-cb-executor")); + + writeCallbackExecutor = ThreadUtils.newThreadPoolExecutor( + 1, + 1, + TimeUnit.MINUTES.toMillis(1), TimeUnit.MILLISECONDS, + new LinkedBlockingQueue<>(Integer.MAX_VALUE), + new ThreadFactoryImpl("s3-write-cb-executor")); + } + + @Override + public CompletableFuture rangeRead(String objectPath, long start, long end) { + CompletableFuture cf = new CompletableFuture<>(); + CompletableFuture retCf = acquireReadPermit(cf); + if (retCf.isDone()) { + return retCf; + } + if (end != RANGE_READ_TO_END && start > end) { + IllegalArgumentException ex = new IllegalArgumentException(); + log.error("[UNEXPECTED] rangeRead [{}, {})", start, end, ex); + cf.completeExceptionally(ex); + return retCf; + } + else if (start == end) { + cf.complete(Unpooled.EMPTY_BUFFER); + return retCf; + } + + Stopwatch stopwatch = Stopwatch.createStarted(); + doRangeRead(objectPath, start, end).thenAccept(buf -> { + // the end may be RANGE_READ_TO_END (-1) for read all object + long dataSize = buf.readableBytes(); + if (log.isDebugEnabled()) { + log.debug("getObject from path: {}, {}-{}, size: {}, cost: {} ms", + objectPath, start, end, dataSize, stopwatch.elapsed(TimeUnit.MILLISECONDS)); + } + cf.complete(buf); + }) + .exceptionally(ex -> { + log.error("GetObject for object {} [{}, {}) fail", objectPath, start, end, ex); + cf.completeExceptionally(ex); + return null; + }); + return retCf; + } + + @Override + public CompletableFuture write(String objectPath, ByteBuf data) { + CompletableFuture cf = new CompletableFuture<>(); + CompletableFuture retCf = acquireWritePermit(cf).thenApply(nil -> null); + if (retCf.isDone()) { + data.release(); + return retCf; + } + + doWrite(objectPath, data).thenAccept(aVoid -> { + data.release(); + retCf.complete(null); + }) + .exceptionally(throwable -> { + retCf.completeExceptionally(throwable); + return null; + }); + return retCf; + } + + @Override + public CompletableFuture createMultipartUpload(String path) { + CompletableFuture cf = new CompletableFuture<>(); + CompletableFuture retCf = acquireWritePermit(cf); + if (retCf.isDone()) { + return retCf; + } + createMultipartUpload0(path, cf); + return retCf; + } + + private void createMultipartUpload0(String path, CompletableFuture cf) { + doCreateMultipartUpload(path).thenAccept(uploadId -> { + cf.complete(uploadId); + }) + .exceptionally(ex -> { + log.error("CreateMultipartUpload for object {} fail", path, ex); + cf.completeExceptionally(ex); + return null; + }); + } + + @Override + public CompletableFuture uploadPart(String path, String uploadId, + int partNumber, ByteBuf data) { + CompletableFuture cf = new CompletableFuture<>(); + CompletableFuture refCf = acquireWritePermit(cf); + if (refCf.isDone()) { + data.release(); + return refCf; + } + doUploadPart(path, uploadId, partNumber, data).thenAccept(part -> { + data.release(); + cf.complete(part); + }) + .exceptionally(ex -> { + log.error("UploadPart for object {}-{} fail", path, partNumber, ex); + data.release(); + cf.completeExceptionally(ex); + return null; + }); + return refCf; + } + + @Override + public CompletableFuture uploadPartCopy(String sourcePath, + String path, long start, long end, String uploadId, int partNumber) { + CompletableFuture cf = new CompletableFuture<>(); + CompletableFuture retCf = acquireWritePermit(cf); + if (retCf.isDone()) { + return retCf; + } + doUploadPartCopy(sourcePath, path, start, end, uploadId, partNumber).thenAccept(part -> { + cf.complete(part); + }) + .exceptionally(ex -> { + log.error("UploadPartCopy for object {}-{} [{}, {}] fail", path, partNumber, start, end, ex); + cf.completeExceptionally(ex); + return null; + }); + return retCf; + } + + @Override + public CompletableFuture completeMultipartUpload(String path, String uploadId, + List parts) { + CompletableFuture cf = new CompletableFuture<>(); + CompletableFuture retCf = acquireWritePermit(cf); + if (retCf.isDone()) { + return retCf; + } + + doCompleteMultipartUpload(path, uploadId, parts).thenAccept(nil -> cf.complete(null)) + .exceptionally(ex -> { + log.error("CompleteMultipartUpload for object {} fail", path, ex); + cf.completeExceptionally(ex); + return null; + }); + return retCf; + } + + @Override + public CompletableFuture abortMultipartUpload(String key, String uploadId) { + return doAbortMultipartUpload(key, uploadId); + } + + @Override + public CompletableFuture delete(List objectPaths) { + if (objectPaths.isEmpty()) { + return CompletableFuture.completedFuture(null); + } + CompletableFuture cf = new CompletableFuture<>(); + doDeleteObjects(objectPaths).thenAccept(aVoid -> cf.complete(null)).exceptionally(throwable -> { + log.error("delete objects failed {} fail", objectPaths, throwable); + cf.completeExceptionally(throwable); + return null; + }); + return cf; + } + + @Override + public CompletableFuture> list(String prefix) { + Stopwatch stopwatch = Stopwatch.createStarted(); + CompletableFuture> cf = doList(prefix); + cf.thenAccept(keyList -> + log.info("List objects finished, count: {}, cost: {}ms", keyList.size(), stopwatch.elapsed(TimeUnit.MILLISECONDS))) + .exceptionally(ex -> { + log.info("List objects failed, cost: {}, ex: {}", stopwatch.elapsed(TimeUnit.NANOSECONDS), ex.getMessage()); + return null; + }); + return cf; + } + + @Override + public CompletableFuture readHeader(String objectPath) { + Stopwatch stopwatch = Stopwatch.createStarted(); + CompletableFuture cf = new CompletableFuture<>(); + doReadHeader(objectPath).thenAccept(header -> { + cf.complete(header); + log.info("read object header finished, count: {}, cost: {}ms", header.size(), stopwatch.elapsed(TimeUnit.MILLISECONDS)); + }) + .exceptionally(ex -> { + Throwable cause = futureCause(ex); + if (cause instanceof S3Exception && ((S3Exception) cause).statusCode() == HttpStatusCode.NOT_FOUND) { + // not found + cf.complete(null); + } + else { + cf.completeExceptionally(ex); + log.error("read object header, cost: {}", stopwatch.elapsed(TimeUnit.NANOSECONDS), cause); + } + return null; + }); + return cf; + } + + @Override + public void close() { + readCallbackExecutor.shutdown(); + writeCallbackExecutor.shutdown(); + doClose(); + } + + abstract CompletableFuture doRangeRead(String path, long start, long end); + + abstract CompletableFuture doWrite(String path, ByteBuf data); + + abstract CompletableFuture doCreateMultipartUpload(String path); + + abstract CompletableFuture doUploadPart(String path, + String uploadId, int partNumber, ByteBuf part); + + abstract CompletableFuture doUploadPartCopy(String sourcePath, + String path, long start, long end, String uploadId, int partNumber); + + abstract CompletableFuture doCompleteMultipartUpload(String path, String uploadId, + List parts); + + abstract CompletableFuture doAbortMultipartUpload(String key, String uploadId); + + abstract CompletableFuture doDeleteObjects(List objectKeys); + + abstract void doClose(); + + abstract CompletableFuture> doList(String prefix); + + abstract CompletableFuture doReadHeader(String prefix); + + /** + * Acquire read permit, permit will auto release when cf complete. + * + * @return retCf the retCf should be used as method return value to ensure release before following operations. + */ + CompletableFuture acquireReadPermit(CompletableFuture cf) { + try { + inflightReadLimiter.acquire(); + CompletableFuture newCf = new CompletableFuture<>(); + cf.whenComplete((rst, ex) -> { + inflightReadLimiter.release(); + readCallbackExecutor.execute(() -> { + if (ex != null) { + newCf.completeExceptionally(ex); + } + else { + newCf.complete(rst); + } + }); + }); + return newCf; + } + catch (InterruptedException e) { + cf.completeExceptionally(e); + return cf; + } + } + + /** + * Acquire write permit, permit will auto release when cf complete. + * + * @return retCf the retCf should be used as method return value to ensure release before following operations. + */ + CompletableFuture acquireWritePermit(CompletableFuture cf) { + // this future will be return by the caller + CompletableFuture newCf = new CompletableFuture<>(); + + try { + + inflightWriteLimiter.acquire(); + cf.whenComplete((rst, ex) -> { + inflightWriteLimiter.release(); + writeCallbackExecutor.execute(() -> { + if (ex != null) { + newCf.completeExceptionally(ex); + } + else { + newCf.complete(rst); + } + }); + }); + return newCf; + } + catch (InterruptedException e) { + newCf.completeExceptionally(e); + return newCf; + } + } + + public static Throwable futureCause(Throwable ex) { + if (ex instanceof ExecutionException) { + if (ex.getCause() != null) { + return futureCause(ex.getCause()); + } + else { + return ex; + } + } + else if (ex instanceof CompletionException) { + if (ex.getCause() != null) { + return futureCause(ex.getCause()); + } + else { + return ex; + } + } + return ex; + } + +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/AwsS3Storage.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/AwsS3Storage.java new file mode 100644 index 000000000..d6095f157 --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/AwsS3Storage.java @@ -0,0 +1,466 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.rocketmq.tieredstore.s3.object; + +import com.google.common.collect.Lists; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.CompositeByteBuf; +import io.netty.buffer.Unpooled; +import io.netty.handler.ssl.OpenSsl; +import java.net.URI; +import java.nio.charset.StandardCharsets; +import java.time.Duration; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Date; +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.function.Supplier; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; +import org.apache.rocketmq.tieredstore.s3.object.bytebuf.ByteBufAlloc; +import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProviderChain; +import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; +import software.amazon.awssdk.auth.credentials.StaticCredentialsProvider; +import software.amazon.awssdk.core.async.AsyncRequestBody; +import software.amazon.awssdk.core.async.AsyncResponseTransformer; +import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.http.HttpStatusCode; +import software.amazon.awssdk.http.async.SdkAsyncHttpClient; +import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.regions.Region; +import software.amazon.awssdk.services.s3.S3AsyncClient; +import software.amazon.awssdk.services.s3.S3AsyncClientBuilder; +import software.amazon.awssdk.services.s3.model.AbortMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.ChecksumAlgorithm; +import software.amazon.awssdk.services.s3.model.ChecksumMode; +import software.amazon.awssdk.services.s3.model.CompleteMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CompletedMultipartUpload; +import software.amazon.awssdk.services.s3.model.CompletedPart; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadRequest; +import software.amazon.awssdk.services.s3.model.CreateMultipartUploadResponse; +import software.amazon.awssdk.services.s3.model.Delete; +import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest; +import software.amazon.awssdk.services.s3.model.GetObjectRequest; +import software.amazon.awssdk.services.s3.model.HeadObjectRequest; +import software.amazon.awssdk.services.s3.model.ObjectIdentifier; +import software.amazon.awssdk.services.s3.model.PutObjectRequest; +import software.amazon.awssdk.services.s3.model.S3Exception; +import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; +import software.amazon.awssdk.services.s3.model.UploadPartRequest; + +@SuppressWarnings({"this-escape", "NPathComplexity"}) +public class AwsS3Storage extends AbstractS3Storage { + public static final String PATH_STYLE_KEY = "pathStyle"; + public static final String AUTH_TYPE_KEY = "authType"; + public static final String STATIC_AUTH_TYPE = "static"; + public static final String INSTANCE_AUTH_TYPE = "instance"; + public static final String CHECKSUM_ALGORITHM_KEY = "checksumAlgorithm"; + + private final String bucket; + private final S3AsyncClient readS3Client; + private final S3AsyncClient writeS3Client; + + private final ChecksumAlgorithm checksumAlgorithm; + + private volatile static InstanceProfileCredentialsProvider instanceProfileCredentialsProvider; + + public AwsS3Storage(S3URI objectURI, int maxObjectStorageConcurrency, + boolean readWriteIsolate) { + super(objectURI, maxObjectStorageConcurrency, readWriteIsolate); + this.bucket = objectURI.bucket(); + List credentialsProviders = credentialsProviders(); + + ChecksumAlgorithm checksumAlgorithm = ChecksumAlgorithm.fromValue(objectURI.extensionString(CHECKSUM_ALGORITHM_KEY)); + if (checksumAlgorithm == null) { + checksumAlgorithm = ChecksumAlgorithm.UNKNOWN_TO_SDK_VERSION; + } + this.checksumAlgorithm = checksumAlgorithm; + + Supplier clientSupplier = + () -> newS3Client(objectURI.endpoint(), objectURI.region(), objectURI.extensionBool(PATH_STYLE_KEY, false), credentialsProviders, + maxObjectStorageConcurrency); + this.writeS3Client = clientSupplier.get(); + this.readS3Client = readWriteIsolate ? clientSupplier.get() : writeS3Client; + } + + public S3AsyncClient getReadS3Client() { + return readS3Client; + } + + public static Builder builder() { + return new Builder(); + } + + @Override + CompletableFuture doRangeRead(String path, long start, long end) { + GetObjectRequest.Builder builder = GetObjectRequest.builder().bucket(bucket).key(path).range(range(start, end)); + + if (checksumAlgorithm != ChecksumAlgorithm.UNKNOWN_TO_SDK_VERSION) { + builder.checksumMode(ChecksumMode.ENABLED); + } + + CompletableFuture cf = new CompletableFuture<>(); + readS3Client.getObject(builder.build(), AsyncResponseTransformer.toPublisher()) + .thenAccept(responsePublisher -> { + CompositeByteBuf buf = ByteBufAlloc.compositeByteBuffer(); + responsePublisher.subscribe(bytes -> { + // the aws client will copy DefaultHttpContent to heap ByteBuffer + buf.addComponent(true, Unpooled.wrappedBuffer(bytes)); + }) + .whenComplete((rst, ex) -> { + if (ex != null) { + buf.release(); + cf.completeExceptionally(ex); + } + else { + cf.complete(buf); + } + }); + }) + .exceptionally(ex -> { + cf.completeExceptionally(ex); + return null; + }); + return cf; + } + + @Override + CompletableFuture doWrite(String path, ByteBuf data) { + PutObjectRequest.Builder builder = PutObjectRequest.builder().bucket(bucket).key(path); + if (checksumAlgorithm != ChecksumAlgorithm.UNKNOWN_TO_SDK_VERSION) { + builder.checksumAlgorithm(checksumAlgorithm); + } + + PutObjectRequest request = builder.build(); + AsyncRequestBody body = AsyncRequestBody.fromByteBuffersUnsafe(data.nioBuffers()); + return writeS3Client.putObject(request, body).thenApply(rst -> null); + } + + @Override + CompletableFuture doCreateMultipartUpload(String path) { + CreateMultipartUploadRequest.Builder builder = CreateMultipartUploadRequest.builder().bucket(bucket).key(path); + + if (checksumAlgorithm != ChecksumAlgorithm.UNKNOWN_TO_SDK_VERSION) { + builder.checksumAlgorithm(checksumAlgorithm); + } + + CreateMultipartUploadRequest request = builder.build(); + return writeS3Client.createMultipartUpload(request).thenApply(CreateMultipartUploadResponse::uploadId); + } + + @Override + CompletableFuture doUploadPart(String path, String uploadId, + int partNumber, ByteBuf part) { + AsyncRequestBody body = AsyncRequestBody.fromByteBuffersUnsafe(part.nioBuffers()); + UploadPartRequest.Builder builder = UploadPartRequest.builder() + .bucket(bucket) + .key(path) + .uploadId(uploadId) + .partNumber(partNumber); + + if (checksumAlgorithm != ChecksumAlgorithm.UNKNOWN_TO_SDK_VERSION) { + builder.checksumAlgorithm(checksumAlgorithm); + } + + return writeS3Client.uploadPart(builder.build(), body) + .thenApply(resp -> { + String checksum; + switch (checksumAlgorithm) { + case CRC32_C: + checksum = resp.checksumCRC32C(); + break; + case CRC32: + checksum = resp.checksumCRC32(); + break; + case SHA1: + checksum = resp.checksumSHA1(); + break; + case SHA256: + checksum = resp.checksumSHA256(); + break; + default: + checksum = null; + } + return new ObjectStorageCompletedPart(partNumber, resp.eTag(), checksum); + }); + } + + @Override + CompletableFuture doUploadPartCopy(String sourcePath, String path, + long start, long end, String uploadId, int partNumber) { + UploadPartCopyRequest request = UploadPartCopyRequest.builder().sourceBucket(bucket).sourceKey(sourcePath) + .destinationBucket(bucket).destinationKey(path).copySourceRange(range(start, end)).uploadId(uploadId).partNumber(partNumber) + .build(); + return writeS3Client.uploadPartCopy(request) + .thenApply(resp -> new ObjectStorageCompletedPart(partNumber, resp.copyPartResult().eTag(), resp.copyPartResult().checksumCRC32C())); + } + + @Override + public CompletableFuture doCompleteMultipartUpload(String path, String uploadId, + List parts) { + List completedParts = parts.stream() + .map(part -> CompletedPart.builder().partNumber(part.getPartNumber()).eTag(part.getPartId()).checksumCRC32C(part.getCheckSum()).build()) + .collect(Collectors.toList()); + CompletedMultipartUpload multipartUpload = CompletedMultipartUpload.builder().parts(completedParts).build(); + CompleteMultipartUploadRequest request = + CompleteMultipartUploadRequest.builder().bucket(bucket).key(path).uploadId(uploadId).multipartUpload(multipartUpload).build(); + return writeS3Client.completeMultipartUpload(request).thenApply(resp -> null); + } + + @Override + CompletableFuture doAbortMultipartUpload(String key, String uploadId) { + AbortMultipartUploadRequest request = AbortMultipartUploadRequest.builder() + .bucket(bucket) + .key(key) + .uploadId(uploadId) + .build(); + return writeS3Client.abortMultipartUpload(request).thenApply(v -> true).exceptionally(e -> false); + } + + public CompletableFuture doDeleteObjects(List objectKeys) { + ObjectIdentifier[] toDeleteKeys = objectKeys.stream().map(key -> + ObjectIdentifier.builder() + .key(key) + .build() + ).toArray(ObjectIdentifier[]::new); + + DeleteObjectsRequest request = DeleteObjectsRequest.builder() + .bucket(bucket) + .delete(Delete.builder().objects(toDeleteKeys).build()) + .build(); + CompletableFuture cf = new CompletableFuture<>(); + this.writeS3Client.deleteObjects(request) + .thenAccept(resp -> { + try { + cf.complete(null); + } + catch (Throwable ex) { + cf.completeExceptionally(ex); + } + }) + .exceptionally(ex -> { + cf.completeExceptionally(ex); + return null; + }); + return cf; + } + + @Override + void doClose() { + writeS3Client.close(); + if (readS3Client != writeS3Client) { + readS3Client.close(); + } + } + + @Override + CompletableFuture> doList(String prefix) { + return readS3Client.listObjectsV2(builder -> builder.bucket(bucket).prefix(prefix).maxKeys(Integer.MAX_VALUE)) + .thenApply(resp -> + resp.contents() + .stream() + .map(object -> new ObjectInfo(object.key(), object.lastModified().toEpochMilli(), object.size())) + .collect(Collectors.toList())); + } + + @Override + CompletableFuture doReadHeader(String objectPath) { + return readS3Client.headObject(builder -> builder.bucket(bucket).key(objectPath)) + .thenApply(resp -> + new ObjectInfo(objectPath, resp.lastModified() == null ? -1 : resp.lastModified().toEpochMilli(), resp.contentLength())); + } + + protected List credentialsProviders() { + String authType = objectURI.extensionString(AUTH_TYPE_KEY, STATIC_AUTH_TYPE); + switch (authType) { + case STATIC_AUTH_TYPE: { + String accessKey = objectURI.extensionString(S3URI.ACCESS_KEY_KEY, System.getenv("TIERED_STORE_S3_ACCESS_KEY")); + String secretKey = objectURI.extensionString(S3URI.SECRET_KEY_KEY, System.getenv("TIERED_STORE_S3_SECRET_KEY")); + if (StringUtils.isBlank(accessKey) || StringUtils.isBlank(secretKey)) { + return Collections.emptyList(); + } + return Lists.newArrayList(StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKey, secretKey))); + } + case INSTANCE_AUTH_TYPE: { + return Lists.newArrayList(instanceProfileCredentialsProvider()); + } + default: + throw new UnsupportedOperationException("Unsupported auth type: " + authType); + } + } + + protected AwsCredentialsProvider instanceProfileCredentialsProvider() { + if (instanceProfileCredentialsProvider == null) { + synchronized (AwsS3Storage.class) { + if (instanceProfileCredentialsProvider == null) { + instanceProfileCredentialsProvider = InstanceProfileCredentialsProvider.builder().build(); + } + } + } + return instanceProfileCredentialsProvider; + } + + private String range(long start, long end) { + if (end == -1L) { + return "bytes=" + start + "-"; + } + // the range end is inclusive end + return "bytes=" + start + "-" + (end - 1); + } + + protected S3AsyncClient newS3Client(String endpoint, String region, boolean forcePathStyle, + List credentialsProviders, int maxConcurrency) { + S3AsyncClientBuilder builder = S3AsyncClient.builder().region(Region.of(region)); + if (StringUtils.isNotBlank(endpoint)) { + builder.endpointOverride(URI.create(endpoint)); + } + if (!OpenSsl.isAvailable()) { + log.warn("OpenSSL is not available, using JDK SSL provider, which may have performance issue.", OpenSsl.unavailabilityCause()); + } + SdkAsyncHttpClient httpClient = NettyNioAsyncHttpClient.builder() + .maxConcurrency(maxConcurrency) + .build(); + builder.httpClient(httpClient); + builder.serviceConfiguration(c -> c.pathStyleAccessEnabled(forcePathStyle)); + builder.credentialsProvider(newCredentialsProviderChain(credentialsProviders)); + builder.overrideConfiguration(b -> b.apiCallTimeout(Duration.ofMinutes(2)) + .apiCallAttemptTimeout(Duration.ofSeconds(60))); + return builder.build(); + } + + private AwsCredentialsProvider newCredentialsProviderChain(List credentialsProviders) { + List providers = new ArrayList<>(credentialsProviders); + // Add default providers to the end of the chain + providers.add(InstanceProfileCredentialsProvider.create()); + providers.add(AnonymousCredentialsProvider.create()); + return AwsCredentialsProviderChain.builder() + .reuseLastProviderEnabled(true) + .credentialsProviders(providers) + .build(); + } + + public boolean readinessCheck() { + return new ReadinessCheck().readinessCheck(); + } + + class ReadinessCheck { + public boolean readinessCheck() { + log.info("Start readiness check for {}", objectURI); + String normalPath = String.format("__rocketmq/readiness_check/normal_obj/%d", System.nanoTime()); + try { + writeS3Client.headObject(HeadObjectRequest.builder().bucket(bucket).key(normalPath).build()).get(); + } + catch (Throwable e) { + Throwable cause = futureCause(e); + if (cause instanceof SdkClientException) { + log.error("Cannot connect to s3, please check the s3 endpoint config", cause); + } + else if (cause instanceof S3Exception) { + int code = ((S3Exception) cause).statusCode(); + switch (code) { + case HttpStatusCode.NOT_FOUND: + break; + case HttpStatusCode.FORBIDDEN: + log.error("Please check whether config is correct", cause); + return false; + default: + log.error("Please check config is correct", cause); + } + } + } + + try { + byte[] content = new Date().toString().getBytes(StandardCharsets.UTF_8); + doWrite(normalPath, Unpooled.wrappedBuffer(content)).get(); + } + catch (Throwable e) { + Throwable cause = futureCause(e); + if (cause instanceof S3Exception && ((S3Exception) cause).statusCode() == HttpStatusCode.NOT_FOUND) { + log.error("Cannot find the bucket={}", bucket, cause); + } + else { + log.error("Please check the identity have the permission to do Write Object operation", cause); + } + return false; + } + + try { + doDeleteObjects(Lists.newArrayList(normalPath)).get(); + } + catch (Throwable e) { + log.error("Please check the identity have the permission to do Delete Object operation", futureCause(e)); + return false; + } + + String multiPartPath = String.format("__rocketmq/readiness_check/multi_obj/%d", System.nanoTime()); + try { + String uploadId = doCreateMultipartUpload(multiPartPath).get(); + byte[] content = new Date().toString().getBytes(StandardCharsets.UTF_8); + ObjectStorageCompletedPart part = doUploadPart(multiPartPath, uploadId, 1, Unpooled.wrappedBuffer(content)).get(); + doCompleteMultipartUpload(multiPartPath, uploadId, Lists.newArrayList(part)).get(); + + ByteBuf buf = doRangeRead(multiPartPath, 0, -1L).get(); + byte[] readContent = new byte[buf.readableBytes()]; + buf.readBytes(readContent); + buf.release(); + if (!Arrays.equals(content, readContent)) { + log.error("Read get mismatch content from multi-part upload object, expect {}, but {}", content, readContent); + } + doDeleteObjects(Lists.newArrayList(multiPartPath)).get(); + } + catch (Throwable e) { + log.error("Please check the identity have the permission to do MultiPart Object operation", futureCause(e)); + return false; + } + + log.info("Readiness check pass!"); + return true; + } + + } + + public static class Builder { + private S3URI bucketURI; + private boolean readWriteIsolate; + private int maxObjectStorageConcurrency; + + public Builder bucket(S3URI bucketURI) { + this.bucketURI = bucketURI; + return this; + } + + public Builder readWriteIsolate(boolean readWriteIsolate) { + this.readWriteIsolate = readWriteIsolate; + return this; + } + + public Builder maxObjectStorageConcurrency(int maxObjectStorageConcurrency) { + this.maxObjectStorageConcurrency = maxObjectStorageConcurrency; + return this; + } + + public AwsS3Storage build() { + return new AwsS3Storage(bucketURI, maxObjectStorageConcurrency, readWriteIsolate); + } + } +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/S3Storage.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/S3Storage.java new file mode 100644 index 000000000..80aa99a6b --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/S3Storage.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.rocketmq.tieredstore.s3.object; + +import java.util.List; +import java.util.concurrent.CompletableFuture; + +import io.netty.buffer.ByteBuf; + +public interface S3Storage { + long RANGE_READ_TO_END = -1L; + + /** + * Check whether the object storage is available. + * + * @return available or not + */ + boolean readinessCheck(); + + void close(); + + /** + * Read object from the object storage. It will throw {@link ObjectNotFoundException} if the object not found. + */ + default CompletableFuture read(String objectPath) { + return rangeRead(objectPath, 0, RANGE_READ_TO_END); + } + + /** + * Range read object from the object storage. It will throw {@link ObjectNotFoundException} if the object not + * found. + */ + CompletableFuture rangeRead(String objectPath, long start, long end); + + // Low level API + CompletableFuture write(String objectPath, ByteBuf buf); + + CompletableFuture> list(String prefix); + + //read object header + CompletableFuture readHeader(String objectPath); + + CompletableFuture delete(List objectPaths); + + CompletableFuture createMultipartUpload(String path); + + CompletableFuture uploadPart(String path, String uploadId, + int partNumber, + ByteBuf data); + + CompletableFuture uploadPartCopy(String sourcePath, String path, + long start, long end, String uploadId, int partNumber); + + CompletableFuture completeMultipartUpload(String path, String uploadId, + List parts); + + CompletableFuture abortMultipartUpload(String key, String uploadId); + + class ObjectInfo { + private final String key; + private final long timestamp; + private final long size; + + public ObjectInfo(String key, long timestamp, long size) { + this.key = key; + this.timestamp = timestamp; + this.size = size; + } + + public long timestamp() { + return timestamp; + } + + public long size() { + return size; + } + + public String key() { + return key; + } + } + + class ObjectStorageCompletedPart { + private final int partNumber; + private final String partId; + private final String checkSum; + + public ObjectStorageCompletedPart(int partNumber, String partId, String checkSum) { + this.partNumber = partNumber; + this.partId = partId; + this.checkSum = checkSum; + } + + public int getPartNumber() { + return partNumber; + } + + public String getPartId() { + return partId; + } + + public String getCheckSum() { + return checkSum; + } + + } + + class ObjectNotFoundException extends Exception { + public ObjectNotFoundException(Throwable cause) { + super(cause); + } + } +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/S3URI.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/S3URI.java new file mode 100644 index 000000000..94934633e --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/S3URI.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.rocketmq.tieredstore.s3.object; + +import com.google.common.collect.Lists; +import java.util.List; +import java.util.Map; +import org.apache.commons.lang3.StringUtils; + +public class S3URI { + private static final String ENDPOINT_KEY = "endpoint"; + private static final String REGION_KEY = "region"; + public static final String ACCESS_KEY_KEY = "accessKey"; + public static final String SECRET_KEY_KEY = "secretKey"; + private static final String EMPTY_STRING = ""; + private final String bucket; + private final String region; + private String endpoint; + private final Map> extension; + + private S3URI(String bucket, String region, String endpoint, + Map> extension) { + this.bucket = bucket; + this.region = region; + this.endpoint = endpoint; + this.extension = extension; + } + + public String endpoint() { + return endpoint; + } + + public void endpoint(String endpoint) { + this.endpoint = endpoint; + } + + public String bucket() { + return bucket; + } + + public String region() { + return region; + } + + public String extensionString(String key) { + return getString(extension, key, null); + } + + public String extensionString(String key, String defaultVal) { + return getString(extension, key, defaultVal); + } + + public boolean extensionBool(String key, boolean defaultVal) { + String value = getString(extension, key, null); + if (StringUtils.isBlank(value)) { + return defaultVal; + } + return Boolean.parseBoolean(value); + } + + public void addExtension(String key, String value) { + extension.put(key, Lists.newArrayList(value)); + } + + private String getString(Map> queries, String key, String defaultValue) { + List value = queries.get(key); + if (value == null) { + return defaultValue; + } + if (value.size() > 1) { + throw new IllegalArgumentException("expect only one value for key: " + key + " but found " + value); + } + return value.get(0); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("BucketURL{" + + "bucket='" + bucket + '\'' + + ", region='" + region + '\'' + + ", endpoint='" + endpoint + '\''); + sb.append(", extension={"); + extension.forEach((k, v) -> { + sb.append(k).append("="); + if (k.equals(SECRET_KEY_KEY)) { + sb.append("*******"); + } + else { + sb.append(v); + } + sb.append(", "); + }); + sb.append("}"); + return sb.toString(); + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private String bucket; + private String region; + private String endpoint; + private Map> extension; + + public Builder bucket(String bucket) { + this.bucket = bucket; + return this; + } + + public Builder region(String region) { + this.region = region; + return this; + } + + public Builder endpoint(String endpoint) { + this.endpoint = endpoint; + return this; + } + + public Builder extension(Map> extension) { + this.extension = extension; + return this; + } + + public S3URI build() { + return new S3URI(bucket, region, endpoint, extension); + } + + } +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/bytebuf/ByteBufAlloc.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/bytebuf/ByteBufAlloc.java new file mode 100644 index 000000000..cfdd30c0d --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/bytebuf/ByteBufAlloc.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.rocketmq.tieredstore.s3.object.bytebuf; + +import io.netty.buffer.AbstractByteBufAllocator; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.CompositeByteBuf; +import io.netty.buffer.PooledByteBufAllocator; +import io.netty.buffer.UnpooledByteBufAllocator; + +public class ByteBufAlloc { + + /** + * The policy used to allocate memory. + */ + private static ByteBufAllocPolicy policy = ByteBufAllocPolicy.POOLED_HEAP; + /** + * The allocator used to allocate memory. It should be updated when {@link #policy} is updated. + */ + private static AbstractByteBufAllocator allocator = getAllocatorByPolicy(policy); + + /** + * Set the policy used to allocate memory. + */ + public static void setPolicy(ByteBufAllocPolicy policy) { + ByteBufAlloc.policy = policy; + ByteBufAlloc.allocator = getAllocatorByPolicy(policy); + } + + public static ByteBufAllocPolicy getPolicy() { + return policy; + } + + public static CompositeByteBuf compositeByteBuffer() { + return allocator.compositeDirectBuffer(Integer.MAX_VALUE); + } + + public static ByteBuf byteBuffer(int initCapacity) { + try { + return policy.isDirect() ? allocator.directBuffer(initCapacity) : allocator.heapBuffer(initCapacity); + } + catch (OutOfMemoryError e) { + System.err.println("alloc buffer OOM"); + Runtime.getRuntime().halt(1); + throw e; + } + } + + private static AbstractByteBufAllocator getAllocatorByPolicy(ByteBufAllocPolicy policy) { + if (policy.isPooled()) { + return PooledByteBufAllocator.DEFAULT; + } + return UnpooledByteBufAllocator.DEFAULT; + } + +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/bytebuf/ByteBufAllocPolicy.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/bytebuf/ByteBufAllocPolicy.java new file mode 100644 index 000000000..61442e551 --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/object/bytebuf/ByteBufAllocPolicy.java @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.rocketmq.tieredstore.s3.object.bytebuf; + +public enum ByteBufAllocPolicy { + + /** + * Allocate memory from the heap with pooling. + */ + POOLED_HEAP(true, false), + + /** + * Use pooled direct memory. + */ + POOLED_DIRECT(true, true); + + /** + * Whether the buffer should be pooled or not. + */ + private final boolean pooled; + + /** + * Whether the buffer should be direct or not. + */ + private final boolean direct; + + ByteBufAllocPolicy(boolean pooled, boolean direct) { + this.pooled = pooled; + this.direct = direct; + } + + public boolean isPooled() { + return pooled; + } + + public boolean isDirect() { + return direct; + } +} diff --git a/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/util/S3PathUtils.java b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/util/S3PathUtils.java new file mode 100644 index 000000000..33b61cbe7 --- /dev/null +++ b/rocketmq-tieredstore-s3/src/main/java/org/apache/rocketmq/tieredstore/s3/util/S3PathUtils.java @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.rocketmq.tieredstore.s3.util; + +public class S3PathUtils { + + public static final String FILE_SEPARATOR = "/"; + + public static String getBaseChunkPath(String basePath) { + return basePath + FILE_SEPARATOR + "chunk"; + } + + public static String getBaseSegmentPath(String basePath) { + return basePath + FILE_SEPARATOR + "segment"; + } + + public static String getChunkPathByPosition(String basePath, long position) { + return basePath + FILE_SEPARATOR + position; + } + + public static String getSegmentPath(String basePath) { + return basePath + FILE_SEPARATOR + 0; + } +} diff --git a/rocketmq-tieredstore-s3/style/copyright/Apache.xml b/rocketmq-tieredstore-s3/style/copyright/Apache.xml new file mode 100644 index 000000000..8b918e85d --- /dev/null +++ b/rocketmq-tieredstore-s3/style/copyright/Apache.xml @@ -0,0 +1,24 @@ + + + + + + \ No newline at end of file diff --git a/rocketmq-tieredstore-s3/style/copyright/profiles_settings.xml b/rocketmq-tieredstore-s3/style/copyright/profiles_settings.xml new file mode 100644 index 000000000..b5e0819c6 --- /dev/null +++ b/rocketmq-tieredstore-s3/style/copyright/profiles_settings.xml @@ -0,0 +1,64 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/rocketmq-tieredstore-s3/style/rmq_checkstyle.xml b/rocketmq-tieredstore-s3/style/rmq_checkstyle.xml new file mode 100644 index 000000000..82079e511 --- /dev/null +++ b/rocketmq-tieredstore-s3/style/rmq_checkstyle.xml @@ -0,0 +1,135 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/rocketmq-tieredstore-s3/style/rmq_codeStyle.xml b/rocketmq-tieredstore-s3/style/rmq_codeStyle.xml new file mode 100644 index 000000000..840572b5e --- /dev/null +++ b/rocketmq-tieredstore-s3/style/rmq_codeStyle.xml @@ -0,0 +1,157 @@ + + + + + + + \ No newline at end of file