From 88c7bfa8fb21f5f54ba6f5d1e7b97ec8652e4720 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 23 Oct 2024 00:31:17 +0300 Subject: [PATCH 1/4] refactor: rename to configs Reorganize sections and move rst into docs module --- docs/build.gradle | 4 +-- config.rst => docs/configs.rst | 25 +++++++++++-------- .../{ConfigDocs.java => ConfigsDocs.java} | 16 ++++++------ 3 files changed, 25 insertions(+), 20 deletions(-) rename config.rst => docs/configs.rst (98%) rename docs/src/main/java/io/aiven/kafka/tieredstorage/misc/{ConfigDocs.java => ConfigsDocs.java} (91%) diff --git a/docs/build.gradle b/docs/build.gradle index 7ae589ca..a63d823a 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -28,6 +28,6 @@ dependencies { tasks.register('genConfigDocs', JavaExec) { classpath = sourceSets.main.runtimeClasspath - mainClass = 'io.aiven.kafka.tieredstorage.misc.ConfigDocs' - standardOutput = new File("config.rst").newOutputStream() + mainClass = 'io.aiven.kafka.tieredstorage.misc.ConfigsDocs' + standardOutput = new File("docs/configs.rst").newOutputStream() } diff --git a/config.rst b/docs/configs.rst similarity index 98% rename from config.rst rename to docs/configs.rst index 36d0c6c8..6b997532 100644 --- a/config.rst +++ b/docs/configs.rst @@ -1,6 +1,9 @@ ================= -RemoteStorageManagerConfig +Core components ================= +----------------- +RemoteStorageManagerConfig +----------------- ``chunk.size`` Segment files are chunked into smaller parts to allow for faster processing (e.g. encryption, compression) and for range-fetching. It is recommended to benchmark this value, starting with 4MiB. @@ -91,9 +94,9 @@ RemoteStorageManagerConfig * Importance: low -================= +----------------- SegmentManifestCacheConfig -================= +----------------- Under ``fetch.manifest.cache.`` ``retention.ms`` @@ -129,9 +132,9 @@ Under ``fetch.manifest.cache.`` * Importance: low -================= +----------------- SegmentIndexesCacheConfig -================= +----------------- Under ``fetch.indexes.cache.`` ``retention.ms`` @@ -167,9 +170,9 @@ Under ``fetch.indexes.cache.`` * Importance: low -================= +----------------- ChunkManagerFactoryConfig -================= +----------------- ``fetch.chunk.cache.class`` Chunk cache implementation. There are 2 implementations included: io.aiven.kafka.tieredstorage.fetch.cache.MemoryChunkCache and io.aiven.kafka.tieredstorage.fetch.cache.DiskChunkCache @@ -179,9 +182,9 @@ ChunkManagerFactoryConfig * Importance: medium -================= +----------------- MemoryChunkCacheConfig -================= +----------------- Under ``fetch.chunk.cache.`` ``size`` @@ -224,9 +227,9 @@ Under ``fetch.chunk.cache.`` * Importance: low -================= +----------------- DiskChunkCacheConfig -================= +----------------- Under ``fetch.chunk.cache.`` ``path`` diff --git a/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigsDocs.java similarity index 91% rename from docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java rename to docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigsDocs.java index d0dcfc5d..ef7f588e 100644 --- a/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigDocs.java +++ b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/ConfigsDocs.java @@ -37,32 +37,34 @@ /** * Gather all config definitions across the project and generate a documentation page **/ -public class ConfigDocs { +public class ConfigsDocs { public static void main(final String[] args) { - printSectionTitle("RemoteStorageManagerConfig"); + printSectionTitle("Core components"); + + printSubsectionTitle("RemoteStorageManagerConfig"); final var rsmConfigDef = RemoteStorageManagerConfig.configDef(); System.out.println(rsmConfigDef.toEnrichedRst()); - printSectionTitle("SegmentManifestCacheConfig"); + printSubsectionTitle("SegmentManifestCacheConfig"); System.out.println("Under ``" + SEGMENT_MANIFEST_CACHE_PREFIX + "``\n"); final var segmentManifestCacheDef = MemorySegmentManifestCache.configDef(); System.out.println(segmentManifestCacheDef.toEnrichedRst()); - printSectionTitle("SegmentIndexesCacheConfig"); + printSubsectionTitle("SegmentIndexesCacheConfig"); System.out.println("Under ``" + FETCH_INDEXES_CACHE_PREFIX + "``\n"); final var segmentIndexesCacheDef = MemorySegmentIndexesCache.configDef(); System.out.println(segmentIndexesCacheDef.toEnrichedRst()); - printSectionTitle("ChunkManagerFactoryConfig"); + printSubsectionTitle("ChunkManagerFactoryConfig"); final var chunkCacheFactoryDef = ChunkManagerFactoryConfig.configDef(); System.out.println(chunkCacheFactoryDef.toEnrichedRst()); - printSectionTitle("MemoryChunkCacheConfig"); + printSubsectionTitle("MemoryChunkCacheConfig"); System.out.println("Under ``" + FETCH_CHUNK_CACHE_PREFIX + "``\n"); final var memChunkCacheDef = ChunkCacheConfig.configDef(new ConfigDef()); System.out.println(memChunkCacheDef.toEnrichedRst()); - printSectionTitle("DiskChunkCacheConfig"); + printSubsectionTitle("DiskChunkCacheConfig"); System.out.println("Under ``" + FETCH_CHUNK_CACHE_PREFIX + "``\n"); final var diskChunkCacheDef = DiskChunkCacheConfig.configDef(); System.out.println(diskChunkCacheDef.toEnrichedRst()); From 936d6d7fff089e749bbd0022a7eb7209ef07d9ed Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 23 Oct 2024 16:46:22 +0300 Subject: [PATCH 2/4] fix: add details about s3 retries to timeout configs --- docs/configs.rst | 4 ++-- .../kafka/tieredstorage/storage/s3/S3StorageConfig.java | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/docs/configs.rst b/docs/configs.rst index 6b997532..e74206fd 100644 --- a/docs/configs.rst +++ b/docs/configs.rst @@ -455,7 +455,7 @@ S3StorageConfig * Importance: low ``s3.api.call.attempt.timeout`` - AWS S3 API call attempt timeout in milliseconds + AWS S3 API call attempt (single retry) timeout in milliseconds * Type: long * Default: null @@ -463,7 +463,7 @@ S3StorageConfig * Importance: low ``s3.api.call.timeout`` - AWS S3 API call timeout in milliseconds + AWS S3 API call timeout in milliseconds, including all retries * Type: long * Default: null diff --git a/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java b/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java index 0df700c9..32cfd1fb 100644 --- a/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java +++ b/storage/s3/src/main/java/io/aiven/kafka/tieredstorage/storage/s3/S3StorageConfig.java @@ -63,9 +63,11 @@ public class S3StorageConfig extends AbstractConfig { static final int S3_MULTIPART_UPLOAD_PART_SIZE_DEFAULT = S3_MULTIPART_UPLOAD_PART_SIZE_MIN; private static final String S3_API_CALL_TIMEOUT_CONFIG = "s3.api.call.timeout"; - private static final String S3_API_CALL_TIMEOUT_DOC = "AWS S3 API call timeout in milliseconds"; + private static final String S3_API_CALL_TIMEOUT_DOC = "AWS S3 API call timeout in milliseconds, " + + "including all retries"; private static final String S3_API_CALL_ATTEMPT_TIMEOUT_CONFIG = "s3.api.call.attempt.timeout"; - private static final String S3_API_CALL_ATTEMPT_TIMEOUT_DOC = "AWS S3 API call attempt timeout in milliseconds"; + private static final String S3_API_CALL_ATTEMPT_TIMEOUT_DOC = "AWS S3 API call attempt " + + "(single retry) timeout in milliseconds"; public static final String AWS_CREDENTIALS_PROVIDER_CLASS_CONFIG = "aws.credentials.provider.class"; private static final String AWS_CREDENTIALS_PROVIDER_CLASS_DOC = "AWS credentials provider. " + "If not set, AWS SDK uses the default " From 138ca5e87ee4055212663d30af07489072a4a0f3 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 23 Oct 2024 00:32:54 +0300 Subject: [PATCH 3/4] feat: add metrics document generation --- docs/build.gradle | 6 + docs/metrics.rst | 367 ++++++++++++++++++ .../kafka/tieredstorage/misc/MetricsDocs.java | 194 +++++++++ 3 files changed, 567 insertions(+) create mode 100644 docs/metrics.rst create mode 100644 docs/src/main/java/io/aiven/kafka/tieredstorage/misc/MetricsDocs.java diff --git a/docs/build.gradle b/docs/build.gradle index a63d823a..548e384e 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -31,3 +31,9 @@ tasks.register('genConfigDocs', JavaExec) { mainClass = 'io.aiven.kafka.tieredstorage.misc.ConfigsDocs' standardOutput = new File("docs/configs.rst").newOutputStream() } + +tasks.register('genMetricsDocs', JavaExec) { + classpath = sourceSets.main.runtimeClasspath + mainClass = 'io.aiven.kafka.tieredstorage.misc.MetricsDocs' + standardOutput = new File("docs/metrics.rst").newOutputStream() +} \ No newline at end of file diff --git a/docs/metrics.rst b/docs/metrics.rst new file mode 100644 index 00000000..5110439c --- /dev/null +++ b/docs/metrics.rst @@ -0,0 +1,367 @@ +================= +Core components metrics +================= + +----------------- +RemoteStorageManager metrics +----------------- + +aiven.kafka.server.tieredstorage:type=remote-storage-manager-metrics +==================================================================== + +==================================== =============================================================================================== +Attribute name Description +==================================== =============================================================================================== +object-upload-bytes-rate Rate of bytes uploaded to a storage backend +object-upload-bytes-total Total number of bytes uploaded to a storage backend +object-upload-rate Rate of upload to a storage backend operations +object-upload-total Total number of upload to a storage backend operations +segment-copy-time-avg Average time spent processing and uploading a log segment and indexes +segment-copy-time-max Maximum time spent processing and uploading a log segment and indexes +segment-delete-bytes-total Total number of deleted number of bytes estimated from segment size +segment-delete-errors-rate Rate of errors during remote log segment deletion +segment-delete-errors-total Total number of errors during remote log segment deletion +segment-delete-rate Rate of delete remote segment operations, including all its objects +segment-delete-time-avg Average time spent deleting log segment and indexes +segment-delete-time-max Maximum time spent deleting log segment and indexes +segment-delete-total Total number of delete remote segment operations, including all its objects +segment-fetch-requested-bytes-rate Rate of bytes requested by broker, not necessarily the amount to be consumed by fetcher +segment-fetch-requested-bytes-total Total number of bytes requested by broker, not necessarily the amount to be consumed by fetcher +==================================== =============================================================================================== + +aiven.kafka.server.tieredstorage:type=remote-storage-manager-metrics,object-type="{object-type}" +================================================================================================ + +========================== ============================================================================ +Attribute name Description +========================== ============================================================================ +object-upload-bytes-rate Rate of bytes uploaded to a storage backend tagged by object type +object-upload-bytes-total Total number of bytes uploaded to a storage backend tagged by object type +object-upload-rate Rate of upload to a storage backend operations tagged by object type +object-upload-total Total number of upload to a storage backend operations tagged by object type +========================== ============================================================================ + +aiven.kafka.server.tieredstorage:type=remote-storage-manager-metrics,topic="{topic}" +==================================================================================== + +==================================== =============================================================================================================== +Attribute name Description +==================================== =============================================================================================================== +object-upload-bytes-rate Rate of bytes uploaded to a storage backend tagged by topic +object-upload-bytes-total Total number of bytes uploaded to a storage backend tagged by topic +object-upload-rate Rate of upload to a storage backend operations tagged by topic +object-upload-total Total number of upload to a storage backend operations tagged by topic +segment-copy-time-avg Average time spent processing and uploading a log segment and indexes tagged by topic +segment-copy-time-max Maximum time spent processing and uploading a log segment and indexes tagged by topic +segment-delete-bytes-total Total number of deleted number of bytes estimated from segment size tagged by topic +segment-delete-errors-rate Rate of errors during remote log segment deletion tagged by topic +segment-delete-errors-total Total number of errors during remote log segment deletion tagged by topic +segment-delete-rate Rate of delete remote segment operations, including all its objects tagged by topic +segment-delete-time-avg Average time spent deleting log segment and indexes tagged by topic +segment-delete-time-max Maximum time spent deleting log segment and indexes tagged by topic +segment-delete-total Total number of delete remote segment operations, including all its objects tagged by topic +segment-fetch-requested-bytes-rate Rate of bytes requested by broker, not necessarily the amount to be consumed by fetcher tagged by topic +segment-fetch-requested-bytes-total Total number of bytes requested by broker, not necessarily the amount to be consumed by fetcher tagged by topic +==================================== =============================================================================================================== + +aiven.kafka.server.tieredstorage:type=remote-storage-manager-metrics,topic="{topic}",object-type="{object-type}" +================================================================================================================ + +========================== ====================================================================================== +Attribute name Description +========================== ====================================================================================== +object-upload-bytes-rate Rate of bytes uploaded to a storage backend tagged by topic and object type +object-upload-bytes-total Total number of bytes uploaded to a storage backend tagged by topic and object type +object-upload-rate Rate of upload to a storage backend operations tagged by topic and object type +object-upload-total Total number of upload to a storage backend operations tagged by topic and object type +========================== ====================================================================================== + +aiven.kafka.server.tieredstorage:type=remote-storage-manager-metrics,topic="{topic}",partition="{partition}" +============================================================================================================ + +==================================== ============================================================================================================================= +Attribute name Description +==================================== ============================================================================================================================= +object-upload-bytes-rate Rate of bytes uploaded to a storage backend tagged by topic and partition +object-upload-bytes-total Total number of bytes uploaded to a storage backend tagged by topic and partition +object-upload-rate Rate of upload to a storage backend operations tagged by topic and partition +object-upload-total Total number of upload to a storage backend operations tagged by topic and partition +segment-copy-time-avg Average time spent processing and uploading a log segment and indexes tagged by topic and partition +segment-copy-time-max Maximum time spent processing and uploading a log segment and indexes tagged by topic and partition +segment-delete-bytes-total Total number of deleted number of bytes estimated from segment size tagged by topic and partition +segment-delete-errors-rate Rate of errors during remote log segment deletion tagged by topic and partition +segment-delete-errors-total Total number of errors during remote log segment deletion tagged by topic and partition +segment-delete-rate Rate of delete remote segment operations, including all its objects tagged by topic and partition +segment-delete-time-avg Average time spent deleting log segment and indexes tagged by topic and partition +segment-delete-time-max Maximum time spent deleting log segment and indexes tagged by topic and partition +segment-delete-total Total number of delete remote segment operations, including all its objects tagged by topic and partition +segment-fetch-requested-bytes-rate Rate of bytes requested by broker, not necessarily the amount to be consumed by fetcher tagged by topic and partition +segment-fetch-requested-bytes-total Total number of bytes requested by broker, not necessarily the amount to be consumed by fetcher tagged by topic and partition +==================================== ============================================================================================================================= + +aiven.kafka.server.tieredstorage:type=remote-storage-manager-metrics,topic="{topic}",partition="{partition}",object-type="{object-type}" +======================================================================================================================================== + +========================== ============================================================================================== +Attribute name Description +========================== ============================================================================================== +object-upload-bytes-rate Rate of bytes uploaded to a storage backend tagged by topic, partition and object type +object-upload-bytes-total Total number of bytes uploaded to a storage backend tagged by topic, partition and object type +object-upload-rate Rate of upload to a storage backend operations tagged by topic, partition and object type +object-upload-total Rate of upload to a storage backend operations tagged by topic, partition and object type +========================== ============================================================================================== + + + +----------------- +SegmentManifestCache metrics +----------------- + +aiven.kafka.server.tieredstorage.cache:type=segment-manifest-cache-metrics +========================================================================== + +============================== ======================================== +Attribute name Description +============================== ======================================== +cache-eviction-total Eviction of an entry from the cache +cache-eviction-weight-total Weight of evicted entry +cache-hits-total Cache hits +cache-load-failure-time-total Time when failing to load a new entry +cache-load-failure-total Failures to load a new entry +cache-load-success-time-total Time to load a new entry +cache-load-success-total Successful load of a new entry +cache-misses-total Cache misses +cache-size-total Estimated number of entries in the cache +============================== ======================================== + +aiven.kafka.server.tieredstorage.cache:type=segment-manifest-cache-metrics,cause="{cause}" +========================================================================================== + +============================ =================================================== +Attribute name Description +============================ =================================================== +cache-eviction-total Eviction of an entry from the cache tagged by cause +cache-eviction-weight-total Weight of evicted entry tagged by cause +============================ =================================================== + + + +aiven.kafka.server.tieredstorage.thread-pool:type=segment-manifest-cache-thread-pool-metrics +============================================================================================ + +=========================== ======================================================================================================== +Attribute name Description +=========================== ======================================================================================================== +active-thread-count-total Number of threads currently executing tasks +parallelism-total Targeted parallelism level of the pool +pool-size-total Current number of threads in the pool +queued-task-count-total Tasks submitted to the pool that have not yet begun executing. +running-thread-count-total Number of worker threads that are not blocked waiting to join tasks or for other managed synchronization +steal-task-count-total Number of tasks stolen from one thread's work queue by another +=========================== ======================================================================================================== + + + +----------------- +SegmentIndexesCache metrics +----------------- +aiven.kafka.server.tieredstorage.cache:type=segment-indexes-cache-metrics +========================================================================= + +============================== ======================================== +Attribute name Description +============================== ======================================== +cache-eviction-total Eviction of an entry from the cache +cache-eviction-weight-total Weight of evicted entry +cache-hits-total Cache hits +cache-load-failure-time-total Time when failing to load a new entry +cache-load-failure-total Failures to load a new entry +cache-load-success-time-total Time to load a new entry +cache-load-success-total Successful load of a new entry +cache-misses-total Cache misses +cache-size-total Estimated number of entries in the cache +============================== ======================================== + +aiven.kafka.server.tieredstorage.cache:type=segment-indexes-cache-metrics,cause="{cause}" +========================================================================================= + +============================ =================================================== +Attribute name Description +============================ =================================================== +cache-eviction-total Eviction of an entry from the cache tagged by cause +cache-eviction-weight-total Weight of evicted entry tagged by cause +============================ =================================================== + + +aiven.kafka.server.tieredstorage.thread-pool:type=segment-indexes-cache-thread-pool-metrics +=========================================================================================== + +=========================== ======================================================================================================== +Attribute name Description +=========================== ======================================================================================================== +active-thread-count-total Number of threads currently executing tasks +parallelism-total Targeted parallelism level of the pool +pool-size-total Current number of threads in the pool +queued-task-count-total Tasks submitted to the pool that have not yet begun executing. +running-thread-count-total Number of worker threads that are not blocked waiting to join tasks or for other managed synchronization +steal-task-count-total Number of tasks stolen from one thread's work queue by another +=========================== ======================================================================================================== + + + +----------------- +ChunkCache metrics +----------------- + +aiven.kafka.server.tieredstorage.cache:type=chunk-cache-metrics +=============================================================== + +============================== ======================================== +Attribute name Description +============================== ======================================== +cache-eviction-total Eviction of an entry from the cache +cache-eviction-weight-total Weight of evicted entry +cache-hits-total Cache hits +cache-load-failure-time-total Time when failing to load a new entry +cache-load-failure-total Failures to load a new entry +cache-load-success-time-total Time to load a new entry +cache-load-success-total Successful load of a new entry +cache-misses-total Cache misses +cache-size-total Estimated number of entries in the cache +============================== ======================================== + +aiven.kafka.server.tieredstorage.cache:type=chunk-cache-metrics,cause="{cause}" +=============================================================================== + +============================ =================================================== +Attribute name Description +============================ =================================================== +cache-eviction-total Eviction of an entry from the cache tagged by cause +cache-eviction-weight-total Weight of evicted entry tagged by cause +============================ =================================================== + + + +aiven.kafka.server.tieredstorage.thread-pool:type=chunk-cache-thread-pool-metrics +================================================================================= + +=========================== ======================================================================================================== +Attribute name Description +=========================== ======================================================================================================== +active-thread-count-total Number of threads currently executing tasks +parallelism-total Targeted parallelism level of the pool +pool-size-total Current number of threads in the pool +queued-task-count-total Tasks submitted to the pool that have not yet begun executing. +running-thread-count-total Number of worker threads that are not blocked waiting to join tasks or for other managed synchronization +steal-task-count-total Number of tasks stolen from one thread's work queue by another +=========================== ======================================================================================================== + + + +================= +Storage Backend metrics +================= + +----------------- +AzureBlobStorage metrics +----------------- + +aiven.kafka.server.tieredstorage.azure:type=azure-blob-storage-client-metrics +============================================================================= + +======================== ============================================================ +Attribute name Description +======================== ============================================================ +blob-delete-rate Rate of object delete operations +blob-delete-total Total number of object delete operations +blob-get-rate Rate of get object operations +blob-get-total Total number of get object operations +blob-upload-rate Rate of object upload operations +blob-upload-total Total number of object upload operations +block-list-upload-rate Rate of block list (making a blob) upload operations +block-list-upload-total Total number of block list (making a blob) upload operations +block-upload-rate Rate of block (blob part) upload operations +block-upload-total Total number of block (blob part) upload operations +======================== ============================================================ + + + +----------------- +GcsStorage metrics +----------------- + +aiven.kafka.server.tieredstorage.gcs:type=gcs-client-metrics +============================================================ + +================================ =================================================================== +Attribute name Description +================================ =================================================================== +object-delete-rate Rate of delete object operations +object-delete-total Total number of delete object operations +object-get-rate Rate of get object operations +object-get-total Total number of get object operations +object-metadata-get-rate Rate of get object metadata operations +object-metadata-get-total Total number of get object metadata operations +resumable-chunk-upload-rate Rate of upload chunk operations as part of resumable upload +resumable-chunk-upload-total Total number of upload chunk operations as part of resumable upload +resumable-upload-initiate-rate Rate of initiate resumable upload operations +resumable-upload-initiate-total Total number of initiate resumable upload operations +================================ =================================================================== + + + +----------------- +S3Storage metrics +----------------- + +aiven.kafka.server.tieredstorage.s3:type=s3-client-metrics +========================================================== + +========================================= ============================================================================= +Attribute name Description +========================================= ============================================================================= +abort-multipart-upload-requests-rate Rate of abort multi-part upload operations +abort-multipart-upload-requests-total Total number of abort multi-part upload operations +abort-multipart-upload-time-avg Average time spent aborting a new multi-part upload operation +abort-multipart-upload-time-max Maximum time spent aborting a new multi-part upload operation +complete-multipart-upload-requests-rate Rate of complete multi-part upload operations +complete-multipart-upload-requests-total Total number of complete multi-part upload operations +complete-multipart-upload-time-avg Average time spent completing a new multi-part upload operation +complete-multipart-upload-time-max Maximum time spent completing a new multi-part upload operation +configured-timeout-errors-rate Rate of configured timeout errors +configured-timeout-errors-total Total number of configured timeout errors +create-multipart-upload-requests-rate Rate of create multi-part upload operations +create-multipart-upload-requests-total Total number of create multi-part upload operations +create-multipart-upload-time-avg Average time spent creating a new multi-part upload operation +create-multipart-upload-time-max Maximum time spent creating a new multi-part upload operation +delete-object-requests-rate Rate of delete object request operations +delete-object-requests-total Total number of delete object request operations +delete-object-time-avg Average time spent deleting an object +delete-object-time-max Maximum time spent deleting an object +delete-objects-requests-rate Rate of delete a set of objects request operations +delete-objects-requests-total Total number of delete a set of objects request operations +delete-objects-time-avg Average time spent deleting a set of objects +delete-objects-time-max Maximum time spent deleting a set of objects +get-object-requests-rate Rate of get object request operations +get-object-requests-total Total number of get object request operations +get-object-time-avg Average time spent getting a response from a get object request +get-object-time-max Maximum time spent getting a response from a get object request +io-errors-rate Rate of IO errors +io-errors-total Total number of IO errors +other-errors-rate Rate of other errors +other-errors-total Total number of other errors +put-object-requests-rate Rate of put object request operations +put-object-requests-total Total number of put object request operations +put-object-time-avg Average time spent uploading an object +put-object-time-max Maximum time spent uploading an object +server-errors-rate Rate of server errors +server-errors-total Total number of server errors +throttling-errors-rate Rate of throttling errors +throttling-errors-total Total number of throttling errors +upload-part-requests-rate Rate of upload part request operations (as part of multi-part upload) +upload-part-requests-total Total number of upload part request operations (as part of multi-part upload) +upload-part-time-avg Average time spent uploading a single part +upload-part-time-max Maximum time spent uploading a single part +========================================= ============================================================================= + + diff --git a/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/MetricsDocs.java b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/MetricsDocs.java new file mode 100644 index 00000000..c6549733 --- /dev/null +++ b/docs/src/main/java/io/aiven/kafka/tieredstorage/misc/MetricsDocs.java @@ -0,0 +1,194 @@ +/* + * Copyright 2024 Aiven Oy + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package io.aiven.kafka.tieredstorage.misc; + +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.kafka.common.MetricName; +import org.apache.kafka.common.MetricNameTemplate; +import org.apache.kafka.common.metrics.Metrics; +import org.apache.kafka.common.utils.Sanitizer; + +import io.aiven.kafka.tieredstorage.fetch.cache.ChunkCache; +import io.aiven.kafka.tieredstorage.fetch.index.MemorySegmentIndexesCache; +import io.aiven.kafka.tieredstorage.fetch.manifest.MemorySegmentManifestCache; +import io.aiven.kafka.tieredstorage.metrics.CaffeineMetricsRegistry; +import io.aiven.kafka.tieredstorage.metrics.MetricsRegistry; +import io.aiven.kafka.tieredstorage.metrics.ThreadPoolMonitorMetricsRegistry; + +public class MetricsDocs { + public static void main(final String[] args) { + printSectionTitle("Core components metrics"); + System.out.println(); + printSubsectionTitle("RemoteStorageManager metrics"); + System.out.println(); + System.out.println(toRstTable(MetricsRegistry.METRIC_CONTEXT, new MetricsRegistry().all())); + + System.out.println(); + printSubsectionTitle("SegmentManifestCache metrics"); + System.out.println(); + System.out.println(toRstTable( + CaffeineMetricsRegistry.METRIC_CONTEXT, + new CaffeineMetricsRegistry(MemorySegmentManifestCache.METRIC_GROUP).all())); + System.out.println(); + System.out.println(toRstTable( + ThreadPoolMonitorMetricsRegistry.METRIC_CONFIG, + new ThreadPoolMonitorMetricsRegistry(MemorySegmentManifestCache.THREAD_POOL_METRIC_GROUP).all())); + + System.out.println(); + printSubsectionTitle("SegmentIndexesCache metrics"); + System.out.println(toRstTable( + CaffeineMetricsRegistry.METRIC_CONTEXT, + new CaffeineMetricsRegistry(MemorySegmentIndexesCache.METRIC_GROUP).all())); + System.out.println(toRstTable( + ThreadPoolMonitorMetricsRegistry.METRIC_CONFIG, + new ThreadPoolMonitorMetricsRegistry(MemorySegmentIndexesCache.THREAD_POOL_METRIC_GROUP).all())); + System.out.println(); + printSubsectionTitle("ChunkCache metrics"); + System.out.println(); + System.out.println(toRstTable( + CaffeineMetricsRegistry.METRIC_CONTEXT, + new CaffeineMetricsRegistry(ChunkCache.METRIC_GROUP).all())); + System.out.println(); + System.out.println(toRstTable( + ThreadPoolMonitorMetricsRegistry.METRIC_CONFIG, + new ThreadPoolMonitorMetricsRegistry(ChunkCache.THREAD_POOL_METRIC_GROUP).all())); + + System.out.println(); + printSectionTitle("Storage Backend metrics"); + System.out.println(); + printSubsectionTitle("AzureBlobStorage metrics"); + System.out.println(); + System.out.println(toRstTable( + io.aiven.kafka.tieredstorage.storage.azure.MetricRegistry.METRIC_CONTEXT, + new io.aiven.kafka.tieredstorage.storage.azure.MetricRegistry().all())); + System.out.println(); + printSubsectionTitle("GcsStorage metrics"); + System.out.println(); + System.out.println(toRstTable( + io.aiven.kafka.tieredstorage.storage.gcs.MetricRegistry.METRIC_CONTEXT, + new io.aiven.kafka.tieredstorage.storage.gcs.MetricRegistry().all())); + System.out.println(); + printSubsectionTitle("S3Storage metrics"); + System.out.println(); + System.out.println(toRstTable( + io.aiven.kafka.tieredstorage.storage.s3.MetricRegistry.METRIC_CONTEXT, + new io.aiven.kafka.tieredstorage.storage.s3.MetricRegistry().all())); + } + + // o.a.k.common.metrics.Metrics does only have generation of Html documentation. + // as there is no plans to publish HTML docs, this util method is added to generate RST. + // may be upstreamed. + static String toRstTable(final String domain, final Iterable allMetrics) { + final Map> beansAndAttributes = new TreeMap<>(); + + try (final Metrics metrics = new Metrics()) { + for (final MetricNameTemplate template : allMetrics) { + final Map tags = new LinkedHashMap<>(); + for (final String s : template.tags()) { + tags.put(s, "{" + s + "}"); + } + + final MetricName metricName = metrics.metricName( + template.name(), + template.group(), + template.description(), + tags + ); + final String beanName = getMBeanName(domain, metricName); + beansAndAttributes.computeIfAbsent(beanName, k -> new TreeMap<>()); + final Map attrAndDesc = beansAndAttributes.get(beanName); + if (!attrAndDesc.containsKey(template.name())) { + attrAndDesc.put(template.name(), template.description()); + } else { + throw new IllegalArgumentException( + "mBean '" + beanName + + "' attribute '" + + template.name() + + "' is defined twice." + ); + } + } + } + + final StringBuilder b = new StringBuilder(); + + for (final Map.Entry> e : beansAndAttributes.entrySet()) { + // Add mBean name as a section title + b.append(e.getKey()).append("\n"); + b.append("=".repeat(e.getKey().length())).append("\n\n"); + + // Determine the maximum lengths for each column + final int maxAttrLength = Math.max("Attribute name".length(), + e.getValue().keySet().stream().mapToInt(String::length).max().orElse(0)); + final int maxDescLength = Math.max("Description".length(), + e.getValue().values().stream().mapToInt(String::length).max().orElse(0)); + + // Create the table header + final String headerFormat = "%-" + maxAttrLength + "s %-" + maxDescLength + "s\n"; + final String separatorLine = "=" + "=".repeat(maxAttrLength) + " " + "=".repeat(maxDescLength) + "\n"; + + b.append(separatorLine); + b.append(String.format(headerFormat, "Attribute name", "Description")); + b.append(separatorLine); + + // Add table rows + for (final Map.Entry e2 : e.getValue().entrySet()) { + b.append(String.format(headerFormat, e2.getKey(), e2.getValue())); + } + + // Close the table + b.append(separatorLine); + b.append("\n"); // Add an empty line between tables + } + + return b.toString(); + } + + // same as o.a.k.common.metrics.JmxReporter#getMBeanName but copy/pasted + // to avoid adding another dependency to this module. + static String getMBeanName(final String prefix, final MetricName metricName) { + final StringBuilder beanName = new StringBuilder(); + beanName.append(prefix); + beanName.append(":type="); + beanName.append(metricName.group()); + for (final Map.Entry entry : metricName.tags().entrySet()) { + if (entry.getKey().length() <= 0 || entry.getValue().length() <= 0) { + continue; + } + beanName.append(","); + beanName.append(entry.getKey()); + beanName.append("="); + beanName.append(Sanitizer.jmxSanitize(entry.getValue())); + } + return beanName.toString(); + } + + static void printSectionTitle(final String title) { + System.out.println("=================\n" + + title + "\n" + + "================="); + } + + static void printSubsectionTitle(final String title) { + System.out.println("-----------------\n" + + title + "\n" + + "-----------------"); + } +} From 280ed90165614217892463e4a0cc461732d10ae7 Mon Sep 17 00:00:00 2001 From: Jorge Esteban Quilcate Otoya Date: Wed, 23 Oct 2024 00:33:13 +0300 Subject: [PATCH 4/4] chore: add task to generate docs --- Makefile | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 202d7930..2ef484ea 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ all: clean build test clean: ./gradlew clean + rm -f docs/*.rst checkstyle: ./gradlew checkstyleMain checkstyleTest checkstyleIntegrationTest @@ -43,9 +44,14 @@ storage/azure/build/distributions/azure-$(VERSION).tgz: ./gradlew build :storage:azure:distTar -x test -x integrationTest -x e2e:test .PHONY: docs -docs: +docs: config.rst metrics.rst + +config.rst: ./gradlew :docs:genConfigDocs +metrics.rst: + ./gradlew :docs:genMetricsDocs + test: build ./gradlew test -x e2e:test