-
Notifications
You must be signed in to change notification settings - Fork 0
/
blog.html
90 lines (90 loc) · 36.1 KB
/
blog.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
<!doctype html>
<html lang="en" dir="ltr" class="blog-wrapper blog-list-page plugin-blog plugin-id-default" data-has-hydrated="false">
<head>
<meta charset="UTF-8">
<meta name="generator" content="Docusaurus v3.1.0">
<title data-rh="true">Blog | LakeSoul - An Opensource Cloud Native Realtime Lakehouse Framework</title><meta data-rh="true" name="viewport" content="width=device-width,initial-scale=1"><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://lakesoul-io.github.io/img/LakeSoul_Horizontal_White.png"><meta data-rh="true" name="twitter:image" content="https://lakesoul-io.github.io/img/LakeSoul_Horizontal_White.png"><meta data-rh="true" property="og:url" content="https://lakesoul-io.github.io/blog"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" property="og:locale:alternate" content="zh_Hans"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" property="og:title" content="Blog | LakeSoul - An Opensource Cloud Native Realtime Lakehouse Framework"><meta data-rh="true" name="description" content="Blog"><meta data-rh="true" property="og:description" content="Blog"><meta data-rh="true" name="docusaurus_tag" content="blog_posts_list"><meta data-rh="true" name="docsearch:docusaurus_tag" content="blog_posts_list"><link data-rh="true" rel="icon" href="/img/favicon.ico"><link data-rh="true" rel="canonical" href="https://lakesoul-io.github.io/blog"><link data-rh="true" rel="alternate" href="https://lakesoul-io.github.io/blog" hreflang="en"><link data-rh="true" rel="alternate" href="https://lakesoul-io.github.io/zh-Hans/blog" hreflang="zh-Hans"><link data-rh="true" rel="alternate" href="https://lakesoul-io.github.io/blog" hreflang="x-default"><link rel="alternate" type="application/rss+xml" href="/blog/rss.xml" title="LakeSoul - An Opensource Cloud Native Realtime Lakehouse Framework RSS Feed">
<link rel="alternate" type="application/atom+xml" href="/blog/atom.xml" title="LakeSoul - An Opensource Cloud Native Realtime Lakehouse Framework Atom Feed"><link rel="stylesheet" href="/assets/css/styles.4ec8676e.css">
<script src="/assets/js/runtime~main.07fd9d8b.js" defer="defer"></script>
<script src="/assets/js/main.d23814d6.js" defer="defer"></script>
</head>
<body class="navigation-with-keyboard">
<script>!function(){function t(t){document.documentElement.setAttribute("data-theme",t)}var e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return localStorage.getItem("theme")}catch(t){}}();t(null!==e?e:"light")}(),function(){try{const c=new URLSearchParams(window.location.search).entries();for(var[t,e]of c)if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/logo.svg" alt="LakeSoul Logo" class="themedComponent_mlkZ themedComponent--light_NVdE"><img src="/img/logo.svg" alt="LakeSoul Logo" class="themedComponent_mlkZ themedComponent--dark_xIcU"></div><b class="navbar__title text--truncate">LakeSoul</b></a><a class="navbar__item navbar__link" href="/docs/intro">Docs</a><a aria-current="page" class="navbar__item navbar__link navbar__link--active" href="/blog">Blog</a></div><div class="navbar__items navbar__items--right"><div class="navbar__item dropdown dropdown--hoverable dropdown--right"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link"><svg viewBox="0 0 24 24" width="20" height="20" aria-hidden="true" class="iconLanguage_nlXk"><path fill="currentColor" d="M12.87 15.07l-2.54-2.51.03-.03c1.74-1.94 2.98-4.17 3.71-6.53H17V4h-7V2H8v2H1v1.99h11.17C11.5 7.92 10.44 9.75 9 11.35 8.07 10.32 7.3 9.19 6.69 8h-2c.73 1.63 1.73 3.17 2.98 4.56l-5.09 5.02L4 19l5-5 3.11 3.11.76-2.04zM18.5 10h-2L12 22h2l1.12-3h4.75L21 22h2l-4.5-12zm-2.62 7l1.62-4.33L19.12 17h-3.24z"></path></svg>English</a><ul class="dropdown__menu"><li><a href="/blog" target="_self" rel="noopener noreferrer" class="dropdown__link dropdown__link--active" lang="en">English</a></li><li><a href="/zh-Hans/blog" target="_self" rel="noopener noreferrer" class="dropdown__link" lang="zh-Hans">简体中文</a></li></ul></div><a href="https://github.com/lakesoul-io/LakeSoul" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a><div class="navbarSearchContainer_Bca1"></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0"><div class="container margin-vert--lg"><div class="row"><aside class="col col--3"><nav class="sidebar_re4s thin-scrollbar" aria-label="Blog recent posts navigation"><div class="sidebarItemTitle_pO2u margin-bottom--md">Recent posts</div><ul class="sidebarItemList_Yudw clean-list"><li class="sidebarItem__DBe"><a class="sidebarItemLink_mo7H" href="/blog/2024/01/10/lakesoul-native-io">LakeSoul NativeIO Introduction</a></li><li class="sidebarItem__DBe"><a class="sidebarItemLink_mo7H" href="/blog/2023/12/01/lakesoul-introduction">LakeSoul Opensource Project Introduction</a></li><li class="sidebarItem__DBe"><a class="sidebarItemLink_mo7H" href="/blog/2023/07/17/lakesoul-2.3.0-release">LakeSoul releases version 2.3.0, with Fully Support of CDC Incremental Computing and Other Important Features</a></li><li class="sidebarItem__DBe"><a class="sidebarItemLink_mo7H" href="/blog/2023/04/21/lakesoul-2.2.0-release">What's new in version 2.2.0</a></li></ul></nav></aside><main class="col col--7" itemscope="" itemtype="https://schema.org/Blog"><article class="margin-bottom--xl" itemprop="blogPost" itemscope="" itemtype="https://schema.org/BlogPosting"><header><h2 class="title_f1Hy" itemprop="headline"><a itemprop="url" href="/blog/2024/01/10/lakesoul-native-io">LakeSoul NativeIO Introduction</a></h2><div class="container_mt6G margin-vert--md"><time datetime="2024-01-10T00:00:00.000Z" itemprop="datePublished">January 10, 2024</time> · <!-- -->One min read</div></header><div class="markdown" itemprop="articleBody"><div style="height:750px;width:900px;margin-left:auto;margin-right:auto"><div class="rpv-core__viewer rpv-core__viewer--light" data-testid="core__viewer" style="height:100%;width:100%"></div></div></div><footer class="row docusaurus-mt-lg"></footer></article><article class="margin-bottom--xl" itemprop="blogPost" itemscope="" itemtype="https://schema.org/BlogPosting"><header><h2 class="title_f1Hy" itemprop="headline"><a itemprop="url" href="/blog/2023/12/01/lakesoul-introduction">LakeSoul Opensource Project Introduction</a></h2><div class="container_mt6G margin-vert--md"><time datetime="2023-12-01T00:00:00.000Z" itemprop="datePublished">December 1, 2023</time> · <!-- -->One min read</div></header><div class="markdown" itemprop="articleBody"><div style="height:750px;width:900px;margin-left:auto;margin-right:auto"><div class="rpv-core__viewer rpv-core__viewer--light" data-testid="core__viewer" style="height:100%;width:100%"></div></div></div><footer class="row docusaurus-mt-lg"></footer></article><article class="margin-bottom--xl" itemprop="blogPost" itemscope="" itemtype="https://schema.org/BlogPosting"><meta itemprop="description" content="<!--"><header><h2 class="title_f1Hy" itemprop="headline"><a itemprop="url" href="/blog/2023/07/17/lakesoul-2.3.0-release">LakeSoul releases version 2.3.0, with Fully Support of CDC Incremental Computing and Other Important Features</a></h2><div class="container_mt6G margin-vert--md"><time datetime="2023-07-17T00:00:00.000Z" itemprop="datePublished">July 17, 2023</time> · <!-- -->4 min read</div></header><div class="markdown" itemprop="articleBody"><p>Recently, LakeSoul, the lakehouse framework released version 2.3.0. This new release is the first release of LakeSoul after it entered the incubation of the Linux Foundation AI & Data as a sandbox project. This new version adds Flink SQL/Table API, which supports stream and batch read and write. The Flink DataStream API for multi-table real-time CDC stream ingestion has been refactored to better support data ingestionfrom multiple data sources to the lakehouse. A new global automatic small file compaction service has been added.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="flink-sqltable-api">Flink SQL/Table API<a href="#flink-sqltable-api" class="hash-link" aria-label="Direct link to Flink SQL/Table API" title="Direct link to Flink SQL/Table API"></a></h2>
<p>In version 2.3.0, LakeSoul fully supports the Flink SQL/Table API, and supports both streaming and batch methods to read or write LakeSoul tables. When reading or writing streams, LakeSoul fully supports the semantics of Flink Changelog Stream.</p>
<p>When writing in stream mode, it can be connected to a variety of stream sources and also CDC collecting tools, including Debezium and Flink CDC Connector. LakeSoul supports row-level upsert and delete. LakeSoul supports stream read for tables into Changelog Stream format to facilitate incremental streaming SQL calculation in Flink. At the same time, LakeSoul also supports Flink batch mode, which can support batch upsert, full read, snapshot read and other functions.</p>
<p>Using LakeSoul + Flink SQL, you can easily build a large-scale, low-cost, high-performance real-time data warehouse on the data lake. For specific usage methods, please refer to <a href="https://lakesoul-io.github.io/docs/Usage%20Docs/flink-lakesoul-connector" target="_blank" rel="noopener noreferrer">Flink SQL Documentation</a>.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="flink-multi-source-ingestion-stream-api">Flink Multi-Source Ingestion Stream API<a href="#flink-multi-source-ingestion-stream-api" class="hash-link" aria-label="Direct link to Flink Multi-Source Ingestion Stream API" title="Direct link to Flink Multi-Source Ingestion Stream API"></a></h2>
<p>LakeSoul can support the synchronization of the entire database from version 2.1, and provides [MySQL entire database automatic synchronization tool] (<a href="https://lakesoul-io.github.io/docs/Usage%20Docs/flink-cdc-sync" target="_blank" rel="noopener noreferrer">https://lakesoul-io.github.io/docs/Usage%20Docs/flink-cdc-sync</a>).</p>
<p>In this version 2.3 update, we refactored the DDL parsing logic when the entire database containing multiple tables is synchronized in one Flink job. Specifically, LakeSoul no longer needs to parse DDL events from the upstream datasources, or go to the source database to obtain information such as the schema of the table when synchronizing the entire database, but directly parses from the DML events to determine whether there is a new table or the schema of an existing table has changed. When a new table or schema change is encountered, the table creation or schema change will be automatically executed in the LakeSoul side.</p>
<p>This change allows LakeSoul to support any type of data source ingestion, such as MySQL, Oracle CDC collection, or consumption of CDC events from Kafka. Developers only need to parse the CDC message into <a href="https://github.com/lakesoul-io/LakeSoul/blob/main/lakesoul-flink/src/main/java/org/apache/flink/lakesoul/types/BinarySourceRecord.java" target="_blank" rel="noopener noreferrer">BinarySourceRecord</a> object, and create <code>DataStream<BinarySourceRecord></code>, then the whole datasource can be synchronized into LakeSoul. LakeSoul has implemented the conversion from Debezium DML message format to <code>BinarySourceRecord</code> object. To accommodate other CDC formats developers can refer to that implementation.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="global-automatic-small-file-compaction-service">Global Automatic Small File Compaction Service<a href="#global-automatic-small-file-compaction-service" class="hash-link" aria-label="Direct link to Global Automatic Small File Compaction Service" title="Direct link to Global Automatic Small File Compaction Service"></a></h2>
<p>LakeSoul supports streaming and concurrent Upsert or Append operations. Each Upsert/Append operation will write several files, which are automatically merged when read (Merge on Read).</p>
<p>LakeSoul's MOR performance is already relatively efficient (refer to <a href="https://lakesoul-io.github.io/blog/2023/04/21/lakesoul-2.2.0-release" target="_blank" rel="noopener noreferrer">Previous Performance Comparison</a>), It is measured that the MOR performance drops by about 15% after 100 upserts. However, in order to have higher read performance, LakeSoul also provides the function of small file compaction. The compaction functionality is a Spark API that needs to be called independently for each table, which is inconvenient to use.</p>
<p>In this version 2.3 update, LakeSoul provides <a href="https://lakesoul-io.github.io/docs/Usage%20Docs/auto-compaction-task" target="_blank" rel="noopener noreferrer">Global Automatic Small File Consolidation Service</a>. This service is actually a Spark job, which automatically triggers the merge operation of eligible tables by listening to the write events of the LakeSoul PG metadata database. This compaction service has several advantages:</p>
<ol>
<li>Global Compaction Service. The compaction service only needs to be started once in the cluster, and it will automatically compact all the tables (it also supports dividing into multiple databases), and it does not need to be configured in the write job of each table, which is easy to use.</li>
<li>Separate Compaction Service. Since LakeSoul can support concurrent writing, the writing of the compaction service does not affect other writing jobs and can be executed concurrently.</li>
<li>Elastic Resource Scaling. The global compaction service is implemented using Spark, and automatic scaling can be achieved by enabling Spark's <a href="https://spark.apache.org/docs/3.3.1/job-scheduling.html#dynamic-resource-allocation" target="_blank" rel="noopener noreferrer">Dynamic Allocation</a>.</li>
</ol>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="summary">Summary<a href="#summary" class="hash-link" aria-label="Direct link to Summary" title="Direct link to Summary"></a></h2>
<p>The LakeSoul 2.3 version update can better support the construction of large-scale real-time lakehouses, and provides core functionalities such as high-performance IO, incremental streaming computing, and convenient and fast multi-source data ingestion. It is easy to use and reduces the maintenance cost of the data lake.</p>
<p>In the next version, LakeSoul will provide more functions such as built-in RBAC and native Python reader. LakeSoul is currently a sandbox incubation project of the Linux Foundation AI & Data, and developers and users are welcome to participate in the community to build a faster and more usable lakehouse framework.</p></div><footer class="row docusaurus-mt-lg"></footer></article><article class="margin-bottom--xl" itemprop="blogPost" itemscope="" itemtype="https://schema.org/BlogPosting"><meta itemprop="description" content="<!--"><header><h2 class="title_f1Hy" itemprop="headline"><a itemprop="url" href="/blog/2023/04/21/lakesoul-2.2.0-release">What's new in version 2.2.0</a></h2><div class="container_mt6G margin-vert--md"><time datetime="2023-04-21T00:00:00.000Z" itemprop="datePublished">April 21, 2023</time> · <!-- -->10 min read</div></header><div class="markdown" itemprop="articleBody"><p>Recently, after months of research and development, LakeSoul released version 2.2.0(<a href="https://github.com/lakesoul-io/LakeSoul/releases/tag/v2.2.0" target="_blank" rel="noopener noreferrer">Github Release Notes</a>). The most important upgrade in this version is that the new Native IO is enabled by default in both Spark and Flink, and LakeSoul's performance has once again significantly been improved and expanded its performance leadership advantage in the field of Cloud Native Data Lakehouse.This article provides you with a detailed explanation of the updates to LakeSoul version 2.2.0 and the technical details of Native IO.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="lakesoul-version-220-update-content">LakeSoul Version 2.2.0 Update Content<a href="#lakesoul-version-220-update-content" class="hash-link" aria-label="Direct link to LakeSoul Version 2.2.0 Update Content" title="Direct link to LakeSoul Version 2.2.0 Update Content"></a></h2>
<p>In version 2.2.0, LakeSoul implemented a brand-new Native IO, migrating full and incremental read-write logic to the new IO layer, and conducting extensive performance and correctness testing. The new IO layer was enabled by default in Spark and Flink.</p>
<p>Version 2.2.0 also released several new interfaces in Spark, such as <a href="https://lakesoul-io.github.io/docs/Tutorials/snapshot-manage" target="_blank" rel="noopener noreferrer">snapshot reading, rollback and cleaning</a>, <a href="https://lakesoul-io.github.io/docs/Tutorials/incremental-query" target="_blank" rel="noopener noreferrer">incremental batch reading, and incremental streaming reading</a>, which more perfectly supports the high-performance streaming incremental ETL data modeling process. Below, we will provide a detailed explanation of these new improvements and feature points.</p>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="1-native-io-detailed-explanation">1. Native IO Detailed Explanation<a href="#1-native-io-detailed-explanation" class="hash-link" aria-label="Direct link to 1. Native IO Detailed Explanation" title="Direct link to 1. Native IO Detailed Explanation"></a></h3>
<p>As a Cloud Native Data Lakehouse Framework, the read-write performance of HDFS and Cloud storage is the top priority. LakeSoul supports the feature of streaming and batch integration, and supports Upsert update writing and Merge on Read reading for the primary key table in storage. The implementation of the IO layer has complexity. Previously, LakeSoul's Upsert writing mainly relied on computational frameworks for implementation. For example, in Spark, the primary key needs to be shuffled and sorted by Spark before being written to storage. When reading, a set of orderly file merging Reader is implemented in Spark. This implementation brings several issues:</p>
<ol>
<li>Performance has room for optimization. LakeSoul primary key model adopts a hash bucket to store in an ordered manner, which does not need to merge with history during Upsert writing, resulting in high write throughput. However, MOR reading requires the orderly merging of multiple files, which has a greater impact on read performance in the case of more files, especially in the scenario of high latency object storage, and needs to be optimized.</li>
<li>The original solution is tightly coupled with Spark, making it inconvenient to reuse read-write logic in other computing engines such as Flink, especially the more complex MOR read logic.</li>
<li>The original solution was implemented in Java, which does not facilitate interfacing with computational engines implemented in other languages such as C++.</li>
</ol>
<p>Considering the above issues, LakeSoul chose to use Native Code (Rust language) to re-implement the read-write logic of IO layer and provide CFFI based interface to the upper layer to facilitate further encapsulation in other languages such as Java, Python, etc., so as to interface with the computing engine. The specific design and implementation includes the following points:</p>
<ol>
<li>
<p>Using Apache Arrow and Rust</p>
<p>LakeSoul uses Parquet as the physical storage format, so we choose to use Arrow as the in-memory intermediate representation layer for IO. On the one hand, Arrow is a mature in-memory column format with rich library support, such as Rust's implementation of Arrow-rs, and on the other hand, the column format is chosen to gain vectorization acceleration capability and can be easily interfaced with vectorization computation engines.</p>
<p>LakeSoul relies on Arrow's Rust implementation, namely <a href="https://github.com/apache/arrow-rs" target="_blank" rel="noopener noreferrer">arrow-rs</a>, which already includes Parquet Reader, Writer, and object_store abstraction layer, and can well support cloud-native object stores such as S3. The main reason for choosing Rust implementation is that Rust has better support for asynchronous, and the operations on object stores can be implemented as async interfaces, which can be further optimized for IO in an asynchronous way.</p>
</li>
<li>
<p>LakeSoul Native Writer</p>
<p>LakeSoul's Native Writer implements single-file concurrent writes through the <a href="https://docs.amazonaws.cn/en_us/AmazonS3/latest/userguide/mpuoverview.html" target="_blank" rel="noopener noreferrer">MultipartUpload</a> function encapsulated in Arrow-rs' object_store library. Specifically, a Parquet file can be organized into multiple RowGroups, each of which is serialized in memory as a contiguous buffer in advance. LakeSoul submits a RowGroup buffer as a part of object storage and asynchronously uploads it to the backend thread. At the same time, the computing thread can continue to organize the content of the next RowGroup, thus achieving concurrent writing of a single file on object storage.</p>
<p>LakeSoul Native Writer supports sorting primary keys on write, using the <a href="https://github.com/apache/arrow-datafusion" target="_blank" rel="noopener noreferrer">Arrow DataFusion</a> library's Sort implementation, which supports spilling the intermediate results of the sort to disk so that Writer can support large amounts of data writes, which is important in Flink Sink. The reason is that primary key sorting only sorts the same batch of written files, while LakeSoul's Flink Sink only writes files when Flink performs a checkpoint, which can be a long interval between checkpoints. With disk overflow writes, the problem of OOM due to excessive memory consumption is avoided.</p>
<p>MultipartUpload with object stores also has the advantage of eliminating the overhead of staging intermediate files and copying files at commit time. staging files were originally designed to avoid incomplete writes to the target table or partition directory, which could cause incomplete data to be read downstream. Both Flink and Spark need to write the staging file in a temporary directory first, and then rename the temporary directory to the final target directory when committing, which brings additional overhead for overwriting, especially when the object store does not support atomic renaming, and requires a complete copy of the data. And the Staging file also makes maintaining Flink Sink state more complex. LakeSoul supports the abort operation of MultipartUpload, which aborts the unwritten file if the write is cancelled in the middle, and LakeSoul's metadata layer implements a two-stage commit protocol in Spark and Flink to ensure consistency.</p>
</li>
<li>
<p>LakeSoul Native Reader</p>
<p>In the Reader section, we have optimized the object store for accessing Parquet files and MOR for merging multiple ordered files.</p>
<p>Object storage typically has high latency, the object_store library in arrow-rs has been optimized for small request merges (coalesce) and concurrent requests, but the results are still not good enough in our actual tests. Therefore, we made further optimizations in two areas:</p>
<p>One is to split read requests into multiple concurrent requests. The underlying object storage is actually the HTTP protocol, and you can specify the range of object files to be accessed in the HTTP request body. We limit the range size of a single request to 8M, and requests larger than 8M are automatically split into multiple concurrent requests to avoid long blocking waits caused by accessing too large a range of files in one request.</p>
<p>The second optimization is to do background asynchronous prefetching of Parquet's RowGroup, again to allow the compute and IO threads to work simultaneously, masking latency with prefetching. By default, a RowGroup is prefetched to avoid taking up too much memory.</p>
<p>For MOR ordered merging, we implement multiple Arrow RecordBatch asynchronous streams merging based on arrow-rs. The implementation uses the Row Format provided by arrow-rs to optimize the performance of comparing primary key order relationships and interleave to optimize the performance of merging multiple streams into one RecordBatch.</p>
</li>
<li>
<p>Spark, Flink Engine Interfacing with Native IO Layer</p>
<p>As mentioned earlier, the Native IO layer is implemented in Rust. On top of it, we encapsulate a C interface to provide read and write functionality. Both synchronous blocking and asynchronous callbacks are supported for reads, so that the upper layer can choose according to its needs (for writes only the blocking interface is provided, because the underlying layer has already done the concurrency of Multipart uploads). On top of the C interface, the Java interface is implemented using <a href="https://github.com/jnr/jnr-ffi" target="_blank" rel="noopener noreferrer">jnr-ffi</a>. Finally, the Reader and Writer interfaces are implemented on the Spark and Flink sides respectively.</p>
</li>
<li>
<p>LakeSoul IO Performance Review</p>
<p>LakeSoul's Native IO layer implements vectorized read and write to Parquet file format, and does asynchronous concurrency, prefetching and other performance optimizations for object storage. The direct read and write to Parquet files has significant performance improvement over the native parquet-mr and Hadoop S3A implementations in Spark and Flink:</p>
<p><img loading="lazy" alt="nativeio-vs-parquet-mr" src="/assets/images/nativeio-vs-parquet-mr-Eng-b751b5c44bc204e592a4e3855ed1ed94.PNG" width="1042" height="724" class="img_ev3q"></p>
<p>We also did a read/write performance comparison of LakeSoul with Hudi and Iceberg in COW and MOR scenarios. The evaluation scenario is to first write 10 million lines of data and then Upsert 10 times, 2 million lines each time. The data and code for this performance evaluation are publicly available in the <a href="https://github.com/meta-soul/ccf-bdci2022-datalake-contest-examples/tree/mor" target="_blank" rel="noopener noreferrer">performance evaluation code base</a>. The performance comparison is as follows:</p>
<p><img loading="lazy" alt="write-perf" src="/assets/images/writer-perf-Eng-40e54f273b4abc06117dbf1ea89a4e42.PNG" width="903" height="722" class="img_ev3q"></p>
<p>In terms of write performance, for COW mode, all three frameworks require full rewrite data, and the computational overhead is basically the same, LakeSoul is the best with the Native IO optimization. For MOR mode, LakeSoul uses the primary key bucketing mode, which does not need to maintain index updates, so it achieves high write throughput, significantly better than the other two frameworks.</p>
<p><img loading="lazy" alt="read-perf" src="/assets/images/read-perf-Eng-f61e173bf32c3295e8dc9578b554984c.PNG" width="883" height="700" class="img_ev3q"></p>
<p>In terms of read performance, for COW mode, both read the merged file without merge overhead, LakeSoul also benefits from IO layer optimization to get ahead. For MOR mode, although LakeSoul's ordered merge has a theoretical performance disadvantage compared to Hudi's bloom filter filter and Iceberg's pos-delete filter, LakeSoul still has a 1X read performance lead over Iceberg with several optimizations in the IO layer.</p>
<p>It is worth mentioning that LakeSoul has the smallest CPU and memory consumption among the three frameworks in the read and write process. In addition, LakeSoul's MOR read, after splitting incremental data into 100 Upsert writes, the read performance only decreases by about 15%, which means LakeSoul can guarantee better read performance even with high frequency writes without compaction.</p>
</li>
</ol>
<h3 class="anchor anchorWithStickyNavbar_LWe7" id="2-introduction-to-snapshot-and-incremental-read-functions">2. Introduction to Snapshot and Incremental Read Functions<a href="#2-introduction-to-snapshot-and-incremental-read-functions" class="hash-link" aria-label="Direct link to 2. Introduction to Snapshot and Incremental Read Functions" title="Direct link to 2. Introduction to Snapshot and Incremental Read Functions"></a></h3>
<p>LakeSoul 2.2.0 provides a series of new interfaces for snapshot, incremental scenarios. lakeSoul uses MVCC multi-version control mechanism, which records the files corresponding to each version in the metadata. Therefore, it is easy to support snapshot reading (also known as Time Travel), which means reading the snapshot version of data at a previous point in time. LakeSoul can also support rollback to a point in time, so that if there is a problem with the latest data, it can be restored to the previous correct version.</p>
<p>For snapshot read, snapshot rollback and snapshot cleanup, users only need to provide the snapshot timestamp, expressed as a timestamp string, e.g. <code>"2022-01-01 15:15:15"</code>, the timestamp does not need to strictly correspond to the actual write time, this time will be used as the upper bound of the timestamp of the write version, and LakeSoul will automatically find the snapshot corresponding to a timestamp less than or equal to this version.</p>
<p>Snapshot-related function points can be found in the <a href="https://lakesoul-io.github.io/docs/Tutorials/snapshot-manage" target="_blank" rel="noopener noreferrer">snapshot usage tutorial</a>.</p>
<p>LakeSoul also provides incremental reads. In streaming ETL, the incremental read function is of great significance. Incremental read can convert the entire ETL link to incremental computing, improve real-time performance, and save computing resources.LakeSoul 2.2.0 supports incremental batch reads and incremental streaming reads in Spark. When incremental streaming read, LakeSoul will act as Spark's Streaming data source and automatically discover and read incremental data of the table. For details, please refer to the <a href="https://lakesoul-io.github.io/docs/Tutorials/incremental-query" target="_blank" rel="noopener noreferrer">incremental query tutorial</a>.</p>
<p>It is worth mentioning that, unlike Hudi and Iceberg, LakeSoul can support incremental reads for both primary and non-primary key tables, and for LakeSoul CDC tables (<a href="https://lakesoul-io.github.io/docs/Tutorials/flink-cdc-sink" target="_blank" rel="noopener noreferrer">refer to LakeSoul CDC table format</a>), it can also read incremental CDC streams, which represent incremental changes to the LakeSoul table itself, including insert, update and delete operations, enabling flexible downstream incremental computation. In the next release, LakeSoul will support incremental reading of LakeSoul table CDC as Flink ChangeLog Stream, which can be used for efficient incremental ETL development with Flink SQL.</p>
<h2 class="anchor anchorWithStickyNavbar_LWe7" id="summary">Summary<a href="#summary" class="hash-link" aria-label="Direct link to Summary" title="Direct link to Summary"></a></h2>
<p>LakeSoul 2.2.0 releases a new Native IO that further extends the performance benefits of the cloud-native LakeSoul framework domain. Snapshot and incremental interfaces are provided to better support streaming data modeling development.</p>
<p>In the next release, LakeSoul will release global automatic Compaction, Flink Stream SQL Source and other important features, so stay tuned.</p></div><footer class="row docusaurus-mt-lg"></footer></article><nav class="pagination-nav" aria-label="Blog list page navigation"></nav></main></div></div></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Docs</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/Getting Started/setup-local-env">Getting Started</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/Usage Docs/setup-meta-env">Docs</a></li><li class="footer__item"><a class="footer__link-item" href="/docs/Tutorials/consume-cdc-via-spark-streaming">Tutorials</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://discord.gg/WJrHKq4BPf" target="_blank" rel="noopener noreferrer" class="footer__link-item">Discord<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://twitter.com/lakesoul" target="_blank" rel="noopener noreferrer" class="footer__link-item">Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://lists.lfaidata.foundation/g/lakesoul-announce" target="_blank" rel="noopener noreferrer" class="footer__link-item">LakeSoul Announce<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://lists.lfaidata.foundation/g/lakesoul-technical-discuss" target="_blank" rel="noopener noreferrer" class="footer__link-item">LakeSoul Technical-Discuss<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://lists.lfaidata.foundation/g/lakesoul-tsc" target="_blank" rel="noopener noreferrer" class="footer__link-item">LakeSoul TSC<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">More</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/blog">Blog</a></li><li class="footer__item"><a href="https://github.com/lakesoul-io/lakesoul" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright"><div class="customCopyright">Copyright © 2024 LakeSoul The Linux Foundation®. All rights reserved. The Linux Foundation has registered trademarks and uses trademarks. <br> For a list of trademarks of The Linux Foundation, please see our <a href="https://www.linuxfoundation.org/legal/trademark-usage" target="_blank">Trademark Usage</a> page. Linux is a registered trademark of Linus Torvalds. <a href="https://www.linuxfoundation.org/legal/privacy-policy" target="_blank">Privacy Policy</a> and <a href="https://www.linuxfoundation.org/legal/terms" target="_blank">Terms of Use</a></div></div></div></div></footer></div>
</body>
</html>