diff --git a/engine/e2e-test/test_api_engine.py b/engine/e2e-test/test_api_engine.py
index e652e4495..f68138ae4 100644
--- a/engine/e2e-test/test_api_engine.py
+++ b/engine/e2e-test/test_api_engine.py
@@ -44,7 +44,8 @@ def test_engines_install_llamacpp_specific_version_and_null_variant(self):
     # engines uninstall
     @pytest.mark.asyncio
     async def test_engines_install_uninstall_llamacpp_should_be_successful(self):
-        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        data = {"version": "v0.1.43"}
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install", json=data)
         assert response.status_code == 200
         await wait_for_websocket_download_success_event(timeout=None)
         time.sleep(30)
diff --git a/engine/e2e-test/test_api_engine_install_nightly.py b/engine/e2e-test/test_api_engine_install_nightly.py
index de4914c28..3084e4633 100644
--- a/engine/e2e-test/test_api_engine_install_nightly.py
+++ b/engine/e2e-test/test_api_engine_install_nightly.py
@@ -19,7 +19,8 @@ def setup_and_teardown(self):
         stop_server()
 
     def test_engines_install_llamacpp_should_be_successful(self):
-        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        data = {"version": "v0.1.43"}
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install", json=data)
         assert response.status_code == 200
 
     def test_engines_install_llamacpp_specific_version_and_variant(self):
diff --git a/engine/e2e-test/test_api_model.py b/engine/e2e-test/test_api_model.py
index 8f2e4b07a..b23aa2947 100644
--- a/engine/e2e-test/test_api_model.py
+++ b/engine/e2e-test/test_api_model.py
@@ -85,9 +85,12 @@ async def test_model_pull_with_direct_url_should_have_desired_name(self):
             ],
         )
         
+    @pytest.mark.asyncio
     async def test_models_start_stop_should_be_successful(self):
         print("Install engine")
-        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install")
+        # TODO(sang) Remove version after marking 0.1.43 as stable
+        data = {"version": "v0.1.43"}
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install", json=data)
         assert response.status_code == 200
         await wait_for_websocket_download_success_event(timeout=None)
         # TODO(sang) need to fix for cuda download
diff --git a/engine/e2e-test/test_cli_engine_install_nightly.py b/engine/e2e-test/test_cli_engine_install_nightly.py
index 8c66c284c..e657af6b3 100644
--- a/engine/e2e-test/test_cli_engine_install_nightly.py
+++ b/engine/e2e-test/test_cli_engine_install_nightly.py
@@ -47,6 +47,7 @@ def test_engines_install_onnx_on_tensorrt_should_be_failed(self):
         assert "is not supported on" in output, "Should display error message"
         assert exit_code == 0, f"Install engine failed with error: {error}"
 
+    @pytest.mark.skipif(platform.system() != "Linux", reason="Wait for linux arm ready")
     def test_engines_should_fallback_to_download_llamacpp_engine_if_not_exists(self):
         exit_code, output, error = run(
             "Install Engine",
diff --git a/engine/e2e-test/test_cli_engine_uninstall.py b/engine/e2e-test/test_cli_engine_uninstall.py
index fcc5f5c73..0f63f8d09 100644
--- a/engine/e2e-test/test_cli_engine_uninstall.py
+++ b/engine/e2e-test/test_cli_engine_uninstall.py
@@ -24,7 +24,8 @@ def setup_and_teardown(self):
 
     @pytest.mark.asyncio
     async def test_engines_uninstall_llamacpp_should_be_successfully(self):
-        requests.post("http://127.0.0.1:3928/v1/engines/llama-cpp/install")
+        data = {"version": "v0.1.43"}
+        response = requests.post("http://localhost:3928/v1/engines/llama-cpp/install", json=data)
         await wait_for_websocket_download_success_event(timeout=None)
         exit_code, output, error = run(
             "Uninstall engine", ["engines", "uninstall", "llama-cpp"]
diff --git a/engine/test/components/test_engine_matcher_utils.cc b/engine/test/components/test_engine_matcher_utils.cc
index 7da4e3cd1..1d1ed47a8 100644
--- a/engine/test/components/test_engine_matcher_utils.cc
+++ b/engine/test/components/test_engine_matcher_utils.cc
@@ -19,6 +19,7 @@ class EngineMatcherUtilsTestSuite : public ::testing::Test {
       "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx-cuda-12-0.tar.gz",
       "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-noavx.tar.gz",
       "cortex.llamacpp-0.1.25-25.08.24-linux-amd64-vulkan.tar.gz",
+      "cortex.llamacpp-0.1.43-linux-arm64.tar.gz",
       "cortex.llamacpp-0.1.25-25.08.24-mac-amd64.tar.gz",
       "cortex.llamacpp-0.1.25-25.08.24-mac-arm64.tar.gz",
       "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx-cuda-11-7.tar.gz",
@@ -134,6 +135,18 @@ TEST_F(EngineMatcherUtilsTestSuite, TestValidate) {
     EXPECT_EQ(variant,
               "cortex.llamacpp-0.1.25-25.08.24-windows-amd64-avx2.tar.gz");
   }
+
+  {
+    auto os{"linux"};
+    auto cpu_arch{"arm64"};
+    auto suitable_avx{""};
+    auto cuda_version{""};
+
+    auto variant = engine_matcher_utils::Validate(
+        cortex_llamacpp_variants, os, cpu_arch, suitable_avx, cuda_version);
+
+    EXPECT_EQ(variant, "cortex.llamacpp-0.1.43-linux-arm64.tar.gz");
+  }
 }
 
 TEST_F(EngineMatcherUtilsTestSuite, TestGetVersionAndArch) {
diff --git a/engine/utils/engine_matcher_utils.h b/engine/utils/engine_matcher_utils.h
index a6135e532..28c0f0c2a 100644
--- a/engine/utils/engine_matcher_utils.h
+++ b/engine/utils/engine_matcher_utils.h
@@ -156,6 +156,11 @@ inline std::string Validate(const std::vector<std::string>& variants,
   if (os == "mac" && !os_and_arch_compatible_list.empty())
     return os_and_arch_compatible_list[0];
 
+  if (os == "linux" && cpu_arch == "arm64" &&
+      !os_and_arch_compatible_list.empty()) {
+    return os_and_arch_compatible_list[0];
+  }
+
   std::vector<std::string> avx_compatible_list;
 
   std::copy_if(os_and_arch_compatible_list.begin(),