diff --git a/src/hotspot/cpu/aarch64/aarch64_vector.ad b/src/hotspot/cpu/aarch64/aarch64_vector.ad
index b14295ca15c..1ebc6408a60 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector.ad
+++ b/src/hotspot/cpu/aarch64/aarch64_vector.ad
@@ -135,9 +135,9 @@ source %{
           (opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
           (opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
           (opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
-          // The vector implementation of Op_AddReductionVD/F is for the Vector API only.
-          // It is not suitable for auto-vectorization because it does not add the elements
-          // in the same order as sequential code, and FP addition is non-associative.
+          // The implementations of Op_AddReductionVD/F in Neon are for the Vector API only.
+          // They are not suitable for auto-vectorization because the result would not conform
+          // to the JLS, Section Evaluation Order.
           opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
           opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
           opcode == Op_MulVL) {
@@ -2858,14 +2858,14 @@ instruct reduce_addL_sve(iRegLNoSp dst, iRegL isrc, vReg vsrc, vRegD tmp) %{
 %}
 
 // reduction addF
-// Floating-point addition is not associative, so the rules for AddReductionVF
-// on NEON can't be used to auto-vectorize floating-point reduce-add.
-// Currently, on NEON, AddReductionVF is only generated by Vector API.
-instruct reduce_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{
-  predicate(UseSVE == 0 && Matcher::vector_length(n->in(2)) == 2);
+
+instruct reduce_non_strict_order_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{
+  // Non-strictly ordered floating-point add reduction for a 64-bits-long vector. This rule is
+  // intended for the VectorAPI (which allows for non-strictly ordered add reduction).
+  predicate(Matcher::vector_length(n->in(2)) == 2 && !n->as_Reduction()->requires_strict_order());
   match(Set dst (AddReductionVF fsrc vsrc));
   effect(TEMP_DEF dst);
-  format %{ "reduce_add2F_neon $dst, $fsrc, $vsrc" %}
+  format %{ "reduce_non_strict_order_add2F_neon $dst, $fsrc, $vsrc" %}
   ins_encode %{
     __ faddp($dst$$FloatRegister, $vsrc$$FloatRegister, __ S);
     __ fadds($dst$$FloatRegister, $dst$$FloatRegister, $fsrc$$FloatRegister);
@@ -2873,11 +2873,13 @@ instruct reduce_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{
   ins_pipe(pipe_slow);
 %}
 
-instruct reduce_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
-  predicate(UseSVE == 0 && Matcher::vector_length(n->in(2)) == 4);
+instruct reduce_non_strict_order_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
+  // Non-strictly ordered floating-point add reduction for 128-bits-long vector. This rule is
+  // intended for the VectorAPI (which allows for non-strictly ordered add reduction).
+  predicate(Matcher::vector_length(n->in(2)) == 4 && !n->as_Reduction()->requires_strict_order());
   match(Set dst (AddReductionVF fsrc vsrc));
   effect(TEMP_DEF dst, TEMP tmp);
-  format %{ "reduce_add4F_neon $dst, $fsrc, $vsrc\t# KILL $tmp" %}
+  format %{ "reduce_non_strict_order_add4F_neon $dst, $fsrc, $vsrc\t# KILL $tmp" %}
   ins_encode %{
     __ faddp($tmp$$FloatRegister, __ T4S, $vsrc$$FloatRegister, $vsrc$$FloatRegister);
     __ faddp($dst$$FloatRegister, $tmp$$FloatRegister, __ S);
@@ -2886,11 +2888,21 @@ instruct reduce_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
   ins_pipe(pipe_slow);
 %}
 
+// This rule calculates the reduction result in strict order. Two cases will
+// reach here:
+// 1. Non strictly-ordered AddReductionVF when vector size > 128-bits. For example -
+//    AddReductionVF generated by Vector API. For vector size > 128-bits, it is more
+//    beneficial performance-wise to generate direct SVE instruction even if it is
+//    strictly ordered.
+// 2. Strictly-ordered AddReductionVF. For example - AddReductionVF generated by
+//    auto-vectorization on SVE machine.
 instruct reduce_addF_sve(vRegF dst_src1, vReg src2) %{
-  predicate(UseSVE > 0);
+  predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) ||
+            n->as_Reduction()->requires_strict_order());
   match(Set dst_src1 (AddReductionVF dst_src1 src2));
   format %{ "reduce_addF_sve $dst_src1, $dst_src1, $src2" %}
   ins_encode %{
+    assert(UseSVE > 0, "must be sve");
     uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
     __ sve_fadda($dst_src1$$FloatRegister, __ S, ptrue, $src2$$FloatRegister);
@@ -2899,14 +2911,14 @@ instruct reduce_addF_sve(vRegF dst_src1, vReg src2) %{
 %}
 
 // reduction addD
-// Floating-point addition is not associative, so the rule for AddReductionVD
-// on NEON can't be used to auto-vectorize floating-point reduce-add.
-// Currently, on NEON, AddReductionVD is only generated by Vector API.
-instruct reduce_addD_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{
-  predicate(UseSVE == 0);
+
+instruct reduce_non_strict_order_add2D_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{
+  // Non-strictly ordered floating-point add reduction for doubles. This rule is
+  // intended for the VectorAPI (which allows for non-strictly ordered add reduction).
+  predicate(!n->as_Reduction()->requires_strict_order());
   match(Set dst (AddReductionVD dsrc vsrc));
   effect(TEMP_DEF dst);
-  format %{ "reduce_addD_neon $dst, $dsrc, $vsrc\t# 2D" %}
+  format %{ "reduce_non_strict_order_add2D_neon $dst, $dsrc, $vsrc\t# 2D" %}
   ins_encode %{
     __ faddp($dst$$FloatRegister, $vsrc$$FloatRegister, __ D);
     __ faddd($dst$$FloatRegister, $dst$$FloatRegister, $dsrc$$FloatRegister);
@@ -2914,11 +2926,21 @@ instruct reduce_addD_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{
   ins_pipe(pipe_slow);
 %}
 
+// This rule calculates the reduction result in strict order. Two cases will
+// reach here:
+// 1. Non strictly-ordered AddReductionVD when vector size > 128-bits. For example -
+//    AddReductionVD generated by Vector API. For vector size > 128-bits, it is more
+//    beneficial performance-wise to generate direct SVE instruction even if it is
+//    strictly ordered.
+// 2. Strictly-ordered AddReductionVD. For example - AddReductionVD generated by
+//    auto-vectorization on SVE machine.
 instruct reduce_addD_sve(vRegD dst_src1, vReg src2) %{
-  predicate(UseSVE > 0);
+  predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) ||
+            n->as_Reduction()->requires_strict_order());
   match(Set dst_src1 (AddReductionVD dst_src1 src2));
   format %{ "reduce_addD_sve $dst_src1, $dst_src1, $src2" %}
   ins_encode %{
+    assert(UseSVE > 0, "must be sve");
     uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
     __ sve_fadda($dst_src1$$FloatRegister, __ D, ptrue, $src2$$FloatRegister);
diff --git a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4 b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
index 060bb4a11d4..29f92772368 100644
--- a/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
+++ b/src/hotspot/cpu/aarch64/aarch64_vector_ad.m4
@@ -125,9 +125,9 @@ source %{
           (opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
           (opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
           (opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
-          // The vector implementation of Op_AddReductionVD/F is for the Vector API only.
-          // It is not suitable for auto-vectorization because it does not add the elements
-          // in the same order as sequential code, and FP addition is non-associative.
+          // The implementations of Op_AddReductionVD/F in Neon are for the Vector API only.
+          // They are not suitable for auto-vectorization because the result would not conform
+          // to the JLS, Section Evaluation Order.
           opcode == Op_AddReductionVD || opcode == Op_AddReductionVF ||
           opcode == Op_MulReductionVD || opcode == Op_MulReductionVF ||
           opcode == Op_MulVL) {
@@ -1752,14 +1752,14 @@ REDUCE_ADD_INT_NEON_SVE_PAIRWISE(I, iRegIorL2I)
 REDUCE_ADD_INT_NEON_SVE_PAIRWISE(L, iRegL)
 
 // reduction addF
-// Floating-point addition is not associative, so the rules for AddReductionVF
-// on NEON can't be used to auto-vectorize floating-point reduce-add.
-// Currently, on NEON, AddReductionVF is only generated by Vector API.
-instruct reduce_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{
-  predicate(UseSVE == 0 && Matcher::vector_length(n->in(2)) == 2);
+
+instruct reduce_non_strict_order_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{
+  // Non-strictly ordered floating-point add reduction for a 64-bits-long vector. This rule is
+  // intended for the VectorAPI (which allows for non-strictly ordered add reduction).
+  predicate(Matcher::vector_length(n->in(2)) == 2 && !n->as_Reduction()->requires_strict_order());
   match(Set dst (AddReductionVF fsrc vsrc));
   effect(TEMP_DEF dst);
-  format %{ "reduce_add2F_neon $dst, $fsrc, $vsrc" %}
+  format %{ "reduce_non_strict_order_add2F_neon $dst, $fsrc, $vsrc" %}
   ins_encode %{
     __ faddp($dst$$FloatRegister, $vsrc$$FloatRegister, __ S);
     __ fadds($dst$$FloatRegister, $dst$$FloatRegister, $fsrc$$FloatRegister);
@@ -1767,11 +1767,13 @@ instruct reduce_add2F_neon(vRegF dst, vRegF fsrc, vReg vsrc) %{
   ins_pipe(pipe_slow);
 %}
 
-instruct reduce_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
-  predicate(UseSVE == 0 && Matcher::vector_length(n->in(2)) == 4);
+instruct reduce_non_strict_order_add4F_neon(vRegF dst, vRegF fsrc, vReg vsrc, vReg tmp) %{
+  // Non-strictly ordered floating-point add reduction for 128-bits-long vector. This rule is
+  // intended for the VectorAPI (which allows for non-strictly ordered add reduction).
+  predicate(Matcher::vector_length(n->in(2)) == 4 && !n->as_Reduction()->requires_strict_order());
   match(Set dst (AddReductionVF fsrc vsrc));
   effect(TEMP_DEF dst, TEMP tmp);
-  format %{ "reduce_add4F_neon $dst, $fsrc, $vsrc\t# KILL $tmp" %}
+  format %{ "reduce_non_strict_order_add4F_neon $dst, $fsrc, $vsrc\t# KILL $tmp" %}
   ins_encode %{
     __ faddp($tmp$$FloatRegister, __ T4S, $vsrc$$FloatRegister, $vsrc$$FloatRegister);
     __ faddp($dst$$FloatRegister, $tmp$$FloatRegister, __ S);
@@ -1783,11 +1785,21 @@ dnl
 dnl REDUCE_ADD_FP_SVE($1,   $2  )
 dnl REDUCE_ADD_FP_SVE(type, size)
 define(`REDUCE_ADD_FP_SVE', `
+// This rule calculates the reduction result in strict order. Two cases will
+// reach here:
+// 1. Non strictly-ordered AddReductionV$1 when vector size > 128-bits. For example -
+//    AddReductionV$1 generated by Vector API. For vector size > 128-bits, it is more
+//    beneficial performance-wise to generate direct SVE instruction even if it is
+//    strictly ordered.
+// 2. Strictly-ordered AddReductionV$1. For example - AddReductionV$1 generated by
+//    auto-vectorization on SVE machine.
 instruct reduce_add$1_sve(vReg$1 dst_src1, vReg src2) %{
-  predicate(UseSVE > 0);
+  predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(2))) ||
+            n->as_Reduction()->requires_strict_order());
   match(Set dst_src1 (AddReductionV$1 dst_src1 src2));
   format %{ "reduce_add$1_sve $dst_src1, $dst_src1, $src2" %}
   ins_encode %{
+    assert(UseSVE > 0, "must be sve");
     uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src2);
     assert(length_in_bytes == MaxVectorSize, "invalid vector length");
     __ sve_fadda($dst_src1$$FloatRegister, __ $2, ptrue, $src2$$FloatRegister);
@@ -1798,14 +1810,14 @@ dnl
 REDUCE_ADD_FP_SVE(F, S)
 
 // reduction addD
-// Floating-point addition is not associative, so the rule for AddReductionVD
-// on NEON can't be used to auto-vectorize floating-point reduce-add.
-// Currently, on NEON, AddReductionVD is only generated by Vector API.
-instruct reduce_addD_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{
-  predicate(UseSVE == 0);
+
+instruct reduce_non_strict_order_add2D_neon(vRegD dst, vRegD dsrc, vReg vsrc) %{
+  // Non-strictly ordered floating-point add reduction for doubles. This rule is
+  // intended for the VectorAPI (which allows for non-strictly ordered add reduction).
+  predicate(!n->as_Reduction()->requires_strict_order());
   match(Set dst (AddReductionVD dsrc vsrc));
   effect(TEMP_DEF dst);
-  format %{ "reduce_addD_neon $dst, $dsrc, $vsrc\t# 2D" %}
+  format %{ "reduce_non_strict_order_add2D_neon $dst, $dsrc, $vsrc\t# 2D" %}
   ins_encode %{
     __ faddp($dst$$FloatRegister, $vsrc$$FloatRegister, __ D);
     __ faddd($dst$$FloatRegister, $dst$$FloatRegister, $dsrc$$FloatRegister);
diff --git a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
index 84caef57f87..974214d985b 100644
--- a/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
+++ b/src/hotspot/cpu/aarch64/nativeInst_aarch64.hpp
@@ -168,6 +168,7 @@ class NativeCall: public NativeInstruction {
     return_address_offset       =    4
   };
 
+  static int byte_size() { return instruction_size; }
   address instruction_address() const { return addr_at(instruction_offset); }
   address next_instruction_address() const { return addr_at(return_address_offset); }
   int displacement() const { return (int_at(displacement_offset) << 6) >> 4; }
diff --git a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp
index 15b57188730..e26c23cd983 100644
--- a/src/hotspot/cpu/arm/nativeInst_arm_32.hpp
+++ b/src/hotspot/cpu/arm/nativeInst_arm_32.hpp
@@ -415,6 +415,7 @@ inline NativeJump* nativeJump_at(address address) {
 
 class NativeCall: public RawNativeCall {
 public:
+  static int byte_size() { return instruction_size; }
   // NativeCall::next_instruction_address() is used only to define the
   // range where to look for the relocation information. We need not
   // walk over composed instructions (as long as the relocation information
diff --git a/src/hotspot/cpu/ppc/nativeInst_ppc.hpp b/src/hotspot/cpu/ppc/nativeInst_ppc.hpp
index 113cedfee7c..f21d76f8a67 100644
--- a/src/hotspot/cpu/ppc/nativeInst_ppc.hpp
+++ b/src/hotspot/cpu/ppc/nativeInst_ppc.hpp
@@ -137,6 +137,8 @@ class NativeCall: public NativeInstruction {
     instruction_size                            = 16 // Used in shared code for calls with reloc_info.
   };
 
+  static int byte_size() { return instruction_size; }
+
   static bool is_call_at(address a) {
     return Assembler::is_bl(*(int*)(a));
   }
diff --git a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
index e9b3624d9d2..f925f8950aa 100644
--- a/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
+++ b/src/hotspot/cpu/riscv/nativeInst_riscv.hpp
@@ -126,6 +126,7 @@ class NativeCall: public NativeInstruction {
     return_address_offset       =    4
   };
 
+  static int byte_size()                    { return instruction_size; }
   address instruction_address() const       { return addr_at(instruction_offset); }
   address next_instruction_address() const  { return addr_at(return_address_offset); }
   address return_address() const            { return addr_at(return_address_offset); }
diff --git a/src/hotspot/cpu/s390/nativeInst_s390.hpp b/src/hotspot/cpu/s390/nativeInst_s390.hpp
index 13f15224f8b..8003e1d42f2 100644
--- a/src/hotspot/cpu/s390/nativeInst_s390.hpp
+++ b/src/hotspot/cpu/s390/nativeInst_s390.hpp
@@ -212,6 +212,7 @@ class NativeCall: public NativeInstruction {
     call_far_pcrelative_displacement_alignment =  4
   };
 
+  static int byte_size() { return instruction_size; }
 
   // Maximum size (in bytes) of a call to an absolute address.
   // Used when emitting call to deopt handler blob, which is a
diff --git a/src/hotspot/cpu/x86/nativeInst_x86.hpp b/src/hotspot/cpu/x86/nativeInst_x86.hpp
index f8cbf70f189..70cb6179366 100644
--- a/src/hotspot/cpu/x86/nativeInst_x86.hpp
+++ b/src/hotspot/cpu/x86/nativeInst_x86.hpp
@@ -160,6 +160,7 @@ class NativeCall: public NativeInstruction {
     return_address_offset       =    5
   };
 
+  static int byte_size()                    { return instruction_size; }
   address instruction_address() const       { return addr_at(instruction_offset); }
   address next_instruction_address() const  { return addr_at(return_address_offset); }
   int   displacement() const                { return (jint) int_at(displacement_offset); }
diff --git a/src/hotspot/cpu/zero/nativeInst_zero.hpp b/src/hotspot/cpu/zero/nativeInst_zero.hpp
index 77a7d511ac5..2f3d9b80617 100644
--- a/src/hotspot/cpu/zero/nativeInst_zero.hpp
+++ b/src/hotspot/cpu/zero/nativeInst_zero.hpp
@@ -70,6 +70,8 @@ class NativeCall : public NativeInstruction {
     instruction_size = 0 // not used within the interpreter
   };
 
+  static int byte_size() { return instruction_size; }
+
   address instruction_address() const {
     ShouldNotCallThis();
     return nullptr;
diff --git a/src/hotspot/share/code/nmethod.inline.hpp b/src/hotspot/share/code/nmethod.inline.hpp
index 4af4d3ffaed..49af1e0b95f 100644
--- a/src/hotspot/share/code/nmethod.inline.hpp
+++ b/src/hotspot/share/code/nmethod.inline.hpp
@@ -37,7 +37,7 @@ inline bool nmethod::is_deopt_pc(address pc) { return is_deopt_entry(pc) || is_d
 inline bool nmethod::is_deopt_entry(address pc) {
   return pc == deopt_handler_begin()
 #if INCLUDE_JVMCI
-    || (is_compiled_by_jvmci() && pc == (deopt_handler_begin() + NativeCall::instruction_size))
+    || (is_compiled_by_jvmci() && pc == (deopt_handler_begin() + NativeCall::byte_size()))
 #endif
     ;
 }
@@ -45,7 +45,7 @@ inline bool nmethod::is_deopt_entry(address pc) {
 inline bool nmethod::is_deopt_mh_entry(address pc) {
   return pc == deopt_mh_handler_begin()
 #if INCLUDE_JVMCI
-    || (is_compiled_by_jvmci() && pc == (deopt_mh_handler_begin() + NativeCall::instruction_size))
+    || (is_compiled_by_jvmci() && pc == (deopt_mh_handler_begin() + NativeCall::byte_size()))
 #endif
     ;
 }
diff --git a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
index ed594d72d06..49f1c82a98a 100644
--- a/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
+++ b/src/hotspot/share/gc/g1/g1CollectedHeap.inline.hpp
@@ -227,7 +227,7 @@ void G1CollectedHeap::register_region_with_region_attr(G1HeapRegion* r) {
 void G1CollectedHeap::register_old_region_with_region_attr(G1HeapRegion* r) {
   assert(!r->has_pinned_objects(), "must be");
   assert(r->rem_set()->is_complete(), "must be");
-  _region_attr.set_in_old(r->hrm_index(), r->rem_set()->is_tracked());
+  _region_attr.set_in_old(r->hrm_index(), true);
   _rem_set->exclude_region_from_scan(r->hrm_index());
 }
 
diff --git a/src/hotspot/share/opto/loopnode.hpp b/src/hotspot/share/opto/loopnode.hpp
index ad603439e59..90ef4da4f1e 100644
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1460,7 +1460,7 @@ class PhaseIdealLoop : public PhaseTransform {
   };
   AutoVectorizeStatus auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared);
 
-  // Move UnorderedReduction out of loop if possible
+  // Move an unordered Reduction out of loop if possible
   void move_unordered_reduction_out_of_loop(IdealLoopTree* loop);
 
   // Create a scheduled list of nodes control dependent on ctrl set.
diff --git a/src/hotspot/share/opto/loopopts.cpp b/src/hotspot/share/opto/loopopts.cpp
index a3227d47832..b0effb6d4f5 100644
--- a/src/hotspot/share/opto/loopopts.cpp
+++ b/src/hotspot/share/opto/loopopts.cpp
@@ -4310,11 +4310,19 @@ PhaseIdealLoop::auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared) {
   return AutoVectorizeStatus::Success;
 }
 
+// Returns true if the Reduction node is unordered.
+static bool is_unordered_reduction(Node* n) {
+  return n->is_Reduction() && !n->as_Reduction()->requires_strict_order();
+}
+
 // Having ReductionNodes in the loop is expensive. They need to recursively
 // fold together the vector values, for every vectorized loop iteration. If
 // we encounter the following pattern, we can vector accumulate the values
 // inside the loop, and only have a single UnorderedReduction after the loop.
 //
+// Note: UnorderedReduction represents a ReductionNode which does not require
+// calculating in strict order.
+//
 // CountedLoop     init
 //          |        |
 //          +------+ | +-----------------------+
@@ -4354,21 +4362,24 @@ PhaseIdealLoop::auto_vectorize(IdealLoopTree* lpt, VSharedData &vshared) {
 // wise. This is a single operation per vector_accumulator, rather than many
 // for a UnorderedReduction. We can then reduce the last vector_accumulator
 // after the loop, and also reduce the init value into it.
+//
 // We can not do this with all reductions. Some reductions do not allow the
-// reordering of operations (for example float addition).
+// reordering of operations (for example float addition/multiplication require
+// strict order).
 void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {
   assert(!C->major_progress() && loop->is_counted() && loop->is_innermost(), "sanity");
 
-  // Find all Phi nodes with UnorderedReduction on backedge.
+  // Find all Phi nodes with an unordered Reduction on backedge.
   CountedLoopNode* cl = loop->_head->as_CountedLoop();
   for (DUIterator_Fast jmax, j = cl->fast_outs(jmax); j < jmax; j++) {
     Node* phi = cl->fast_out(j);
-    // We have a phi with a single use, and a UnorderedReduction on the backedge.
-    if (!phi->is_Phi() || phi->outcnt() != 1 || !phi->in(2)->is_UnorderedReduction()) {
+    // We have a phi with a single use, and an unordered Reduction on the backedge.
+    if (!phi->is_Phi() || phi->outcnt() != 1 || !is_unordered_reduction(phi->in(2))) {
       continue;
     }
 
-    UnorderedReductionNode* last_ur = phi->in(2)->as_UnorderedReduction();
+    ReductionNode* last_ur = phi->in(2)->as_Reduction();
+    assert(!last_ur->requires_strict_order(), "must be");
 
     // Determine types
     const TypeVect* vec_t = last_ur->vect_type();
@@ -4385,14 +4396,14 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {
         continue; // not implemented -> fails
     }
 
-    // Traverse up the chain of UnorderedReductions, checking that it loops back to
-    // the phi. Check that all UnorderedReductions only have a single use, except for
+    // Traverse up the chain of unordered Reductions, checking that it loops back to
+    // the phi. Check that all unordered Reductions only have a single use, except for
     // the last (last_ur), which only has phi as a use in the loop, and all other uses
     // are outside the loop.
-    UnorderedReductionNode* current = last_ur;
-    UnorderedReductionNode* first_ur = nullptr;
+    ReductionNode* current = last_ur;
+    ReductionNode* first_ur = nullptr;
     while (true) {
-      assert(current->is_UnorderedReduction(), "sanity");
+      assert(!current->requires_strict_order(), "sanity");
 
       // Expect no ctrl and a vector_input from within the loop.
       Node* ctrl = current->in(0);
@@ -4409,7 +4420,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {
         break; // Chain traversal fails.
       }
 
-      // Expect single use of UnorderedReduction, except for last_ur.
+      // Expect single use of an unordered Reduction, except for last_ur.
       if (current == last_ur) {
         // Expect all uses to be outside the loop, except phi.
         for (DUIterator_Fast kmax, k = current->fast_outs(kmax); k < kmax; k++) {
@@ -4427,12 +4438,13 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {
         }
       }
 
-      // Expect another UnorderedReduction or phi as the scalar input.
+      // Expect another unordered Reduction or phi as the scalar input.
       Node* scalar_input = current->in(1);
-      if (scalar_input->is_UnorderedReduction() &&
+      if (is_unordered_reduction(scalar_input) &&
           scalar_input->Opcode() == current->Opcode()) {
-        // Move up the UnorderedReduction chain.
-        current = scalar_input->as_UnorderedReduction();
+        // Move up the unordered Reduction chain.
+        current = scalar_input->as_Reduction();
+        assert(!current->requires_strict_order(), "must be");
       } else if (scalar_input == phi) {
         // Chain terminates at phi.
         first_ur = current;
@@ -4456,7 +4468,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {
     VectorNode* identity_vector = VectorNode::scalar2vector(identity_scalar, vector_length, bt_t);
     register_new_node(identity_vector, C->root());
     assert(vec_t == identity_vector->vect_type(), "matching vector type");
-    VectorNode::trace_new_vector(identity_vector, "UnorderedReduction");
+    VectorNode::trace_new_vector(identity_vector, "Unordered Reduction");
 
     // Turn the scalar phi into a vector phi.
     _igvn.rehash_node_delayed(phi);
@@ -4465,7 +4477,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {
     phi->as_Type()->set_type(vec_t);
     _igvn.set_type(phi, vec_t);
 
-    // Traverse down the chain of UnorderedReductions, and replace them with vector_accumulators.
+    // Traverse down the chain of unordered Reductions, and replace them with vector_accumulators.
     current = first_ur;
     while (true) {
       // Create vector_accumulator to replace current.
@@ -4474,11 +4486,12 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {
       VectorNode* vector_accumulator = VectorNode::make(vopc, last_vector_accumulator, vector_input, vec_t);
       register_new_node(vector_accumulator, cl);
       _igvn.replace_node(current, vector_accumulator);
-      VectorNode::trace_new_vector(vector_accumulator, "UnorderedReduction");
+      VectorNode::trace_new_vector(vector_accumulator, "Unordered Reduction");
       if (current == last_ur) {
         break;
       }
-      current = vector_accumulator->unique_out()->as_UnorderedReduction();
+      current = vector_accumulator->unique_out()->as_Reduction();
+      assert(!current->requires_strict_order(), "must be");
     }
 
     // Create post-loop reduction.
@@ -4495,7 +4508,7 @@ void PhaseIdealLoop::move_unordered_reduction_out_of_loop(IdealLoopTree* loop) {
       }
     }
     register_new_node(post_loop_reduction, get_late_ctrl(post_loop_reduction, cl));
-    VectorNode::trace_new_vector(post_loop_reduction, "UnorderedReduction");
+    VectorNode::trace_new_vector(post_loop_reduction, "Unordered Reduction");
 
     assert(last_accumulator->outcnt() == 2, "last_accumulator has 2 uses: phi and post_loop_reduction");
     assert(post_loop_reduction->outcnt() > 0, "should have taken over all non loop uses of last_accumulator");
diff --git a/src/hotspot/share/opto/node.hpp b/src/hotspot/share/opto/node.hpp
index dc9dc6654b5..ae379c4833a 100644
--- a/src/hotspot/share/opto/node.hpp
+++ b/src/hotspot/share/opto/node.hpp
@@ -175,7 +175,6 @@ class SubTypeCheckNode;
 class Type;
 class TypeNode;
 class UnlockNode;
-class UnorderedReductionNode;
 class VectorNode;
 class LoadVectorNode;
 class LoadVectorMaskedNode;
@@ -739,7 +738,6 @@ class Node {
         DEFINE_CLASS_ID(ExpandV, Vector, 5)
         DEFINE_CLASS_ID(CompressM, Vector, 6)
         DEFINE_CLASS_ID(Reduction, Vector, 7)
-          DEFINE_CLASS_ID(UnorderedReduction, Reduction, 0)
         DEFINE_CLASS_ID(NegV, Vector, 8)
       DEFINE_CLASS_ID(Con, Type, 8)
           DEFINE_CLASS_ID(ConI, Con, 0)
@@ -991,7 +989,6 @@ class Node {
   DEFINE_CLASS_QUERY(Sub)
   DEFINE_CLASS_QUERY(SubTypeCheck)
   DEFINE_CLASS_QUERY(Type)
-  DEFINE_CLASS_QUERY(UnorderedReduction)
   DEFINE_CLASS_QUERY(Vector)
   DEFINE_CLASS_QUERY(VectorMaskCmp)
   DEFINE_CLASS_QUERY(VectorUnbox)
diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
index d8a9b14c4ad..8ea456d3417 100644
--- a/src/hotspot/share/opto/output.cpp
+++ b/src/hotspot/share/opto/output.cpp
@@ -1321,7 +1321,7 @@ CodeBuffer* PhaseOutput::init_buffer() {
   int code_req  = _buf_sizes._code;
   int const_req = _buf_sizes._const;
 
-  int pad_req   = NativeCall::instruction_size;
+  int pad_req   = NativeCall::byte_size();
 
   BarrierSetC2* bs = BarrierSet::barrier_set()->barrier_set_c2();
   stub_req += bs->estimate_stub_size();
diff --git a/src/hotspot/share/opto/vectorIntrinsics.cpp b/src/hotspot/share/opto/vectorIntrinsics.cpp
index 3ef6ae02534..b31f6ace5a6 100644
--- a/src/hotspot/share/opto/vectorIntrinsics.cpp
+++ b/src/hotspot/share/opto/vectorIntrinsics.cpp
@@ -1616,21 +1616,23 @@ bool LibraryCallKit::inline_vector_reduction() {
   }
 
   Node* init = ReductionNode::make_identity_con_scalar(gvn(), opc, elem_bt);
-  Node* value = nullptr;
-  if (mask == nullptr) {
-    assert(!is_masked_op, "Masked op needs the mask value never null");
-    value = ReductionNode::make(opc, nullptr, init, opd, elem_bt);
-  } else {
-    if (use_predicate) {
-      value = ReductionNode::make(opc, nullptr, init, opd, elem_bt);
-      value->add_req(mask);
-      value->add_flag(Node::Flag_is_predicated_vector);
-    } else {
-      Node* reduce_identity = gvn().transform(VectorNode::scalar2vector(init, num_elem, Type::get_const_basic_type(elem_bt)));
-      value = gvn().transform(new VectorBlendNode(reduce_identity, opd, mask));
-      value = ReductionNode::make(opc, nullptr, init, value, elem_bt);
-    }
+  Node* value = opd;
+
+  assert(mask != nullptr || !is_masked_op, "Masked op needs the mask value never null");
+  if (mask != nullptr && !use_predicate) {
+    Node* reduce_identity = gvn().transform(VectorNode::scalar2vector(init, num_elem, Type::get_const_basic_type(elem_bt)));
+    value = gvn().transform(new VectorBlendNode(reduce_identity, value, mask));
   }
+
+  // Make an unordered Reduction node. This affects only AddReductionVF/VD and MulReductionVF/VD,
+  // as these operations are allowed to be associative (not requiring strict order) in VectorAPI.
+  value = ReductionNode::make(opc, nullptr, init, value, elem_bt, /* requires_strict_order */ false);
+
+  if (mask != nullptr && use_predicate) {
+    value->add_req(mask);
+    value->add_flag(Node::Flag_is_predicated_vector);
+  }
+
   value = gvn().transform(value);
 
   Node* bits = nullptr;
diff --git a/src/hotspot/share/opto/vectornode.cpp b/src/hotspot/share/opto/vectornode.cpp
index b14a7f7b165..d560f112039 100644
--- a/src/hotspot/share/opto/vectornode.cpp
+++ b/src/hotspot/share/opto/vectornode.cpp
@@ -1296,7 +1296,8 @@ int ReductionNode::opcode(int opc, BasicType bt) {
 }
 
 // Return the appropriate reduction node.
-ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, BasicType bt) {
+ReductionNode* ReductionNode::make(int opc, Node* ctrl, Node* n1, Node* n2, BasicType bt,
+                                   bool requires_strict_order) {
 
   int vopc = opcode(opc, bt);
 
@@ -1306,17 +1307,17 @@ ReductionNode* ReductionNode::make(int opc, Node *ctrl, Node* n1, Node* n2, Basi
   switch (vopc) {
   case Op_AddReductionVI: return new AddReductionVINode(ctrl, n1, n2);
   case Op_AddReductionVL: return new AddReductionVLNode(ctrl, n1, n2);
-  case Op_AddReductionVF: return new AddReductionVFNode(ctrl, n1, n2);
-  case Op_AddReductionVD: return new AddReductionVDNode(ctrl, n1, n2);
+  case Op_AddReductionVF: return new AddReductionVFNode(ctrl, n1, n2, requires_strict_order);
+  case Op_AddReductionVD: return new AddReductionVDNode(ctrl, n1, n2, requires_strict_order);
   case Op_MulReductionVI: return new MulReductionVINode(ctrl, n1, n2);
   case Op_MulReductionVL: return new MulReductionVLNode(ctrl, n1, n2);
-  case Op_MulReductionVF: return new MulReductionVFNode(ctrl, n1, n2);
-  case Op_MulReductionVD: return new MulReductionVDNode(ctrl, n1, n2);
-  case Op_MinReductionV:  return new MinReductionVNode(ctrl, n1, n2);
-  case Op_MaxReductionV:  return new MaxReductionVNode(ctrl, n1, n2);
-  case Op_AndReductionV:  return new AndReductionVNode(ctrl, n1, n2);
-  case Op_OrReductionV:   return new OrReductionVNode(ctrl, n1, n2);
-  case Op_XorReductionV:  return new XorReductionVNode(ctrl, n1, n2);
+  case Op_MulReductionVF: return new MulReductionVFNode(ctrl, n1, n2, requires_strict_order);
+  case Op_MulReductionVD: return new MulReductionVDNode(ctrl, n1, n2, requires_strict_order);
+  case Op_MinReductionV:  return new MinReductionVNode (ctrl, n1, n2);
+  case Op_MaxReductionV:  return new MaxReductionVNode (ctrl, n1, n2);
+  case Op_AndReductionV:  return new AndReductionVNode (ctrl, n1, n2);
+  case Op_OrReductionV:   return new OrReductionVNode  (ctrl, n1, n2);
+  case Op_XorReductionV:  return new XorReductionVNode (ctrl, n1, n2);
   default:
     assert(false, "unknown node: %s", NodeClassNames[vopc]);
     return nullptr;
diff --git a/src/hotspot/share/opto/vectornode.hpp b/src/hotspot/share/opto/vectornode.hpp
index 17c7482d88c..6c5402eb511 100644
--- a/src/hotspot/share/opto/vectornode.hpp
+++ b/src/hotspot/share/opto/vectornode.hpp
@@ -203,7 +203,9 @@ class ReductionNode : public Node {
     init_class_id(Class_Reduction);
   }
 
-  static ReductionNode* make(int opc, Node* ctrl, Node* in1, Node* in2, BasicType bt);
+  static ReductionNode* make(int opc, Node* ctrl, Node* in1, Node* in2, BasicType bt,
+                             // This only effects floating-point add and mul reductions.
+                             bool requires_strict_order = true);
   static int  opcode(int opc, BasicType bt);
   static bool implemented(int opc, uint vlen, BasicType bt);
   // Make an identity scalar (zero for add, one for mul, etc) for scalar opc.
@@ -225,47 +227,97 @@ class ReductionNode : public Node {
 
   // Needed for proper cloning.
   virtual uint size_of() const { return sizeof(*this); }
-};
 
-//---------------------------UnorderedReductionNode-------------------------------------
-// Order of reduction does not matter. Example int add. Not true for float add.
-class UnorderedReductionNode : public ReductionNode {
-public:
-  UnorderedReductionNode(Node * ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {
-    init_class_id(Class_UnorderedReduction);
+  // Floating-point addition and multiplication are non-associative, so
+  // AddReductionVF/D and MulReductionVF/D require strict ordering
+  // in auto-vectorization. Vector API can generate AddReductionVF/D
+  // and MulReductionVF/VD without strict ordering, which can benefit
+  // some platforms.
+  //
+  // Other reductions don't need strict ordering.
+  virtual bool requires_strict_order() const {
+    return false;
+  }
+
+#ifndef PRODUCT
+  void dump_spec(outputStream* st) const {
+    if (requires_strict_order()) {
+      st->print("requires_strict_order");
+    } else {
+      st->print("no_strict_order");
+    }
   }
+#endif
 };
 
 //------------------------------AddReductionVINode--------------------------------------
 // Vector add byte, short and int as a reduction
-class AddReductionVINode : public UnorderedReductionNode {
+class AddReductionVINode : public ReductionNode {
 public:
-  AddReductionVINode(Node * ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  AddReductionVINode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddReductionVLNode--------------------------------------
 // Vector add long as a reduction
-class AddReductionVLNode : public UnorderedReductionNode {
+class AddReductionVLNode : public ReductionNode {
 public:
-  AddReductionVLNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  AddReductionVLNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
 //------------------------------AddReductionVFNode--------------------------------------
 // Vector add float as a reduction
 class AddReductionVFNode : public ReductionNode {
+private:
+  // True if add reduction operation for floats requires strict ordering.
+  // As an example - The value is true when add reduction for floats is auto-vectorized
+  // as auto-vectorization mandates strict ordering but the value is false when this node
+  // is generated through VectorAPI as VectorAPI does not impose any such rules on ordering.
+  const bool _requires_strict_order;
 public:
-  AddReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+  //_requires_strict_order is set to true by default as mandated by auto-vectorization
+  AddReductionVFNode(Node* ctrl, Node* in1, Node* in2, bool requires_strict_order = true) :
+    ReductionNode(ctrl, in1, in2), _requires_strict_order(requires_strict_order) {}
+
   virtual int Opcode() const;
+
+  virtual bool requires_strict_order() const { return _requires_strict_order; }
+
+  virtual uint hash() const { return Node::hash() + _requires_strict_order; }
+
+  virtual bool cmp(const Node& n) const {
+    return Node::cmp(n) && _requires_strict_order == ((ReductionNode&)n).requires_strict_order();
+  }
+
+  virtual uint size_of() const { return sizeof(*this); }
 };
 
 //------------------------------AddReductionVDNode--------------------------------------
 // Vector add double as a reduction
 class AddReductionVDNode : public ReductionNode {
+private:
+  // True if add reduction operation for doubles requires strict ordering.
+  // As an example - The value is true when add reduction for doubles is auto-vectorized
+  // as auto-vectorization mandates strict ordering but the value is false when this node
+  // is generated through VectorAPI as VectorAPI does not impose any such rules on ordering.
+  const bool _requires_strict_order;
 public:
-  AddReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+  //_requires_strict_order is set to true by default as mandated by auto-vectorization
+  AddReductionVDNode(Node* ctrl, Node* in1, Node* in2, bool requires_strict_order = true) :
+    ReductionNode(ctrl, in1, in2), _requires_strict_order(requires_strict_order) {}
+
   virtual int Opcode() const;
+
+  virtual bool requires_strict_order() const { return _requires_strict_order; }
+
+  virtual uint hash() const { return Node::hash() + _requires_strict_order; }
+
+  virtual bool cmp(const Node& n) const {
+    return Node::cmp(n) && _requires_strict_order == ((ReductionNode&)n).requires_strict_order();
+  }
+
+  virtual uint size_of() const { return sizeof(*this); }
 };
 
 //------------------------------SubVBNode--------------------------------------
@@ -400,34 +452,70 @@ class FmaVFNode : public FmaVNode {
 
 //------------------------------MulReductionVINode--------------------------------------
 // Vector multiply byte, short and int as a reduction
-class MulReductionVINode : public UnorderedReductionNode {
+class MulReductionVINode : public ReductionNode {
 public:
-  MulReductionVINode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  MulReductionVINode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
 //------------------------------MulReductionVLNode--------------------------------------
 // Vector multiply int as a reduction
-class MulReductionVLNode : public UnorderedReductionNode {
+class MulReductionVLNode : public ReductionNode {
 public:
-  MulReductionVLNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  MulReductionVLNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
 //------------------------------MulReductionVFNode--------------------------------------
 // Vector multiply float as a reduction
 class MulReductionVFNode : public ReductionNode {
+  // True if mul reduction operation for floats requires strict ordering.
+  // As an example - The value is true when mul reduction for floats is auto-vectorized
+  // as auto-vectorization mandates strict ordering but the value is false when this node
+  // is generated through VectorAPI as VectorAPI does not impose any such rules on ordering.
+  const bool _requires_strict_order;
 public:
-  MulReductionVFNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+  //_requires_strict_order is set to true by default as mandated by auto-vectorization
+  MulReductionVFNode(Node* ctrl, Node* in1, Node* in2, bool requires_strict_order = true) :
+    ReductionNode(ctrl, in1, in2), _requires_strict_order(requires_strict_order) {}
+
   virtual int Opcode() const;
+
+  virtual bool requires_strict_order() const { return _requires_strict_order; }
+
+  virtual uint hash() const { return Node::hash() + _requires_strict_order; }
+
+  virtual bool cmp(const Node& n) const {
+    return Node::cmp(n) && _requires_strict_order == ((ReductionNode&)n).requires_strict_order();
+  }
+
+  virtual uint size_of() const { return sizeof(*this); }
 };
 
 //------------------------------MulReductionVDNode--------------------------------------
 // Vector multiply double as a reduction
 class MulReductionVDNode : public ReductionNode {
+  // True if mul reduction operation for doubles requires strict ordering.
+  // As an example - The value is true when mul reduction for doubles is auto-vectorized
+  // as auto-vectorization mandates strict ordering but the value is false when this node
+  // is generated through VectorAPI as VectorAPI does not impose any such rules on ordering.
+  const bool _requires_strict_order;
 public:
-  MulReductionVDNode(Node *ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
+  //_requires_strict_order is set to true by default as mandated by auto-vectorization
+  MulReductionVDNode(Node* ctrl, Node* in1, Node* in2, bool requires_strict_order = true) :
+    ReductionNode(ctrl, in1, in2), _requires_strict_order(requires_strict_order) {}
+
   virtual int Opcode() const;
+
+  virtual bool requires_strict_order() const { return _requires_strict_order; }
+
+  virtual uint hash() const { return Node::hash() + _requires_strict_order; }
+
+  virtual bool cmp(const Node& n) const {
+    return Node::cmp(n) && _requires_strict_order == ((ReductionNode&)n).requires_strict_order();
+  }
+
+  virtual uint size_of() const { return sizeof(*this); }
 };
 
 //------------------------------DivVFNode--------------------------------------
@@ -753,9 +841,9 @@ class AndVNode : public VectorNode {
 
 //------------------------------AndReductionVNode--------------------------------------
 // Vector and byte, short, int, long as a reduction
-class AndReductionVNode : public UnorderedReductionNode {
+class AndReductionVNode : public ReductionNode {
  public:
-  AndReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  AndReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
@@ -770,9 +858,9 @@ class OrVNode : public VectorNode {
 
 //------------------------------OrReductionVNode--------------------------------------
 // Vector xor byte, short, int, long as a reduction
-class OrReductionVNode : public UnorderedReductionNode {
+class OrReductionVNode : public ReductionNode {
  public:
-  OrReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  OrReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
@@ -787,25 +875,25 @@ class XorVNode : public VectorNode {
 
 //------------------------------XorReductionVNode--------------------------------------
 // Vector and int, long as a reduction
-class XorReductionVNode : public UnorderedReductionNode {
+class XorReductionVNode : public ReductionNode {
  public:
-  XorReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  XorReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
 //------------------------------MinReductionVNode--------------------------------------
 // Vector min byte, short, int, long, float, double as a reduction
-class MinReductionVNode : public UnorderedReductionNode {
+class MinReductionVNode : public ReductionNode {
 public:
-  MinReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  MinReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
 //------------------------------MaxReductionVNode--------------------------------------
 // Vector min byte, short, int, long, float, double as a reduction
-class MaxReductionVNode : public UnorderedReductionNode {
+class MaxReductionVNode : public ReductionNode {
 public:
-  MaxReductionVNode(Node *ctrl, Node* in1, Node* in2) : UnorderedReductionNode(ctrl, in1, in2) {}
+  MaxReductionVNode(Node* ctrl, Node* in1, Node* in2) : ReductionNode(ctrl, in1, in2) {}
   virtual int Opcode() const;
 };
 
diff --git a/src/java.base/share/man/java.1 b/src/java.base/share/man/java.1
index 05b1ec6cdd8..4bd0306b2b8 100644
--- a/src/java.base/share/man/java.1
+++ b/src/java.base/share/man/java.1
@@ -36,7 +36,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JAVA" "1" "2024" "JDK 24" "JDK Commands"
+.TH "JAVA" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/java.base/share/man/keytool.1 b/src/java.base/share/man/keytool.1
index a61095d4504..63a134eb932 100644
--- a/src/java.base/share/man/keytool.1
+++ b/src/java.base/share/man/keytool.1
@@ -1,4 +1,4 @@
-.\" Copyright (c) 1998, 2023, Oracle and/or its affiliates. All rights reserved.
+.\" Copyright (c) 1998, 2024, Oracle and/or its affiliates. All rights reserved.
 .\" DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 .\"
 .\" This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "KEYTOOL" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "KEYTOOL" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
@@ -1747,7 +1747,7 @@ risk.
 The \f[V]keytool\f[R] command supports these named extensions.
 The names aren\[aq]t case-sensitive.
 .TP
-\f[V]BC\f[R] or \f[V]BasicContraints\f[R]
+\f[V]BC\f[R] or \f[V]BasicConstraints\f[R]
 Values:
 .RS
 .PP
diff --git a/src/java.rmi/share/man/rmiregistry.1 b/src/java.rmi/share/man/rmiregistry.1
index 29a2e2c965a..c168e1482a6 100644
--- a/src/java.rmi/share/man/rmiregistry.1
+++ b/src/java.rmi/share/man/rmiregistry.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "RMIREGISTRY" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "RMIREGISTRY" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/java.scripting/share/man/jrunscript.1 b/src/java.scripting/share/man/jrunscript.1
index 43029582b3e..59389c274d1 100644
--- a/src/java.scripting/share/man/jrunscript.1
+++ b/src/java.scripting/share/man/jrunscript.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JRUNSCRIPT" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JRUNSCRIPT" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.compiler/share/man/javac.1 b/src/jdk.compiler/share/man/javac.1
index de374e7d0c7..ebee0369238 100644
--- a/src/jdk.compiler/share/man/javac.1
+++ b/src/jdk.compiler/share/man/javac.1
@@ -1,4 +1,4 @@
-.\" Copyright (c) 1994, 2024, Oracle and/or its affiliates. All rights reserved.
+.\" Copyright (c) 1994, 2023, Oracle and/or its affiliates. All rights reserved.
 .\" DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 .\"
 .\" This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JAVAC" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JAVAC" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.compiler/share/man/serialver.1 b/src/jdk.compiler/share/man/serialver.1
index cd437b7e9bf..bad14872ee6 100644
--- a/src/jdk.compiler/share/man/serialver.1
+++ b/src/jdk.compiler/share/man/serialver.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "SERIALVER" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "SERIALVER" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.hotspot.agent/share/man/jhsdb.1 b/src/jdk.hotspot.agent/share/man/jhsdb.1
index 30b2d3fe95c..5b65f7eafb4 100644
--- a/src/jdk.hotspot.agent/share/man/jhsdb.1
+++ b/src/jdk.hotspot.agent/share/man/jhsdb.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JHSDB" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JHSDB" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.httpserver/share/man/jwebserver.1 b/src/jdk.httpserver/share/man/jwebserver.1
index 3c48d81fc9c..4fbaf9dd09d 100644
--- a/src/jdk.httpserver/share/man/jwebserver.1
+++ b/src/jdk.httpserver/share/man/jwebserver.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JWEBSERVER" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JWEBSERVER" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jartool/share/man/jar.1 b/src/jdk.jartool/share/man/jar.1
index 49a028e0f29..2d983eb561e 100644
--- a/src/jdk.jartool/share/man/jar.1
+++ b/src/jdk.jartool/share/man/jar.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JAR" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JAR" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jartool/share/man/jarsigner.1 b/src/jdk.jartool/share/man/jarsigner.1
index ea8f686a4c8..d085efcfcd0 100644
--- a/src/jdk.jartool/share/man/jarsigner.1
+++ b/src/jdk.jartool/share/man/jarsigner.1
@@ -36,7 +36,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JARSIGNER" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JARSIGNER" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.javadoc/share/man/javadoc.1 b/src/jdk.javadoc/share/man/javadoc.1
index 73d12efbf30..4e256a7ce38 100644
--- a/src/jdk.javadoc/share/man/javadoc.1
+++ b/src/jdk.javadoc/share/man/javadoc.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JAVADOC" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JAVADOC" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
@@ -218,7 +218,7 @@ summary is required.
 For more explicit control in any individual documentation comment,
 enclose the contents of the first sentence in a
 \f[V]{\[at]summary ...}\f[R] tag, or when applicable, in a
-\[ga]{\[at]return ...} tag.
+\f[V]{\[at]return ...}\f[R] tag.
 .RE
 .TP
 \f[V]-doclet\f[R] \f[I]class\f[R]
@@ -522,7 +522,8 @@ Allow JavaScript in documentation comments, and options whose value is
 \f[I]html-code\f[R].
 .TP
 \f[V]-author\f[R]
-Includes the \f[V]\[at]author\f[R] text in the generated docs.
+Includes the text of any \f[V]author\f[R] tags in the generated
+documentation.
 .TP
 \f[V]-bottom\f[R] \f[I]html-code\f[R]
 Specifies the text to be placed at the bottom of each generated page.
@@ -986,8 +987,8 @@ is used.
 .RE
 .TP
 \f[V]-nosince\f[R]
-Omits from the generated documents the \f[V]Since\f[R] sections
-associated with the \f[V]\[at]since\f[R] tags.
+Omits from the generated documentation the \f[V]Since\f[R] sections
+derived from any \f[V]since\f[R] tags.
 .TP
 \f[V]-notimestamp\f[R]
 Suppresses the time stamp, which is hidden in an HTML comment in the
@@ -1020,9 +1021,6 @@ to the current working directory.
 .PP
 The file may be an HTML file, with a filename ending in \f[V].html\f[R],
 or a Markdown file, with a filename ending in \f[V].md\f[R].
-.PD 0
-.P
-.PD
 If the file is an HTML file, the content for the overview documentation
 is taken from the \f[V]<main>\f[R] element in the file, if one is
 present, or from the \f[V]<body>\f[R] element is there is no
@@ -1213,10 +1211,11 @@ To access the generated Use page, go to the class or package and click
 the \f[B]USE\f[R] link in the navigation bar.
 .TP
 \f[V]-version\f[R]
-Includes the version text in the generated docs.
+Includes the text of any \f[V]version\f[R] tags in the generated
+documentation.
 This text is omitted by default.
-To find out what version of the \f[V]javadoc\f[R] tool you are using,
-use the \f[V]--version\f[R] option (with two hyphens).
+Note: To find out what version of the \f[V]javadoc\f[R] tool you are
+using, use the \f[V]--version\f[R] option (with two hyphens).
 .TP
 \f[V]-windowtitle\f[R] \f[I]title\f[R]
 Specifies the title to be placed in the HTML \f[V]<title>\f[R] tag.
diff --git a/src/jdk.jcmd/share/man/jcmd.1 b/src/jdk.jcmd/share/man/jcmd.1
index c88d2a22a41..2befaf5a949 100644
--- a/src/jdk.jcmd/share/man/jcmd.1
+++ b/src/jdk.jcmd/share/man/jcmd.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JCMD" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JCMD" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jcmd/share/man/jinfo.1 b/src/jdk.jcmd/share/man/jinfo.1
index fc87807c6fe..49d8a852633 100644
--- a/src/jdk.jcmd/share/man/jinfo.1
+++ b/src/jdk.jcmd/share/man/jinfo.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JINFO" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JINFO" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jcmd/share/man/jmap.1 b/src/jdk.jcmd/share/man/jmap.1
index 4865f43d1c4..42831aa68ac 100644
--- a/src/jdk.jcmd/share/man/jmap.1
+++ b/src/jdk.jcmd/share/man/jmap.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JMAP" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JMAP" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jcmd/share/man/jps.1 b/src/jdk.jcmd/share/man/jps.1
index be506be2e82..cbc69872b60 100644
--- a/src/jdk.jcmd/share/man/jps.1
+++ b/src/jdk.jcmd/share/man/jps.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JPS" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JPS" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jcmd/share/man/jstack.1 b/src/jdk.jcmd/share/man/jstack.1
index 9effcb0902f..933db5fc80d 100644
--- a/src/jdk.jcmd/share/man/jstack.1
+++ b/src/jdk.jcmd/share/man/jstack.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JSTACK" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JSTACK" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jcmd/share/man/jstat.1 b/src/jdk.jcmd/share/man/jstat.1
index dd7d3832908..22e111a812b 100644
--- a/src/jdk.jcmd/share/man/jstat.1
+++ b/src/jdk.jcmd/share/man/jstat.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JSTAT" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JSTAT" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jconsole/share/man/jconsole.1 b/src/jdk.jconsole/share/man/jconsole.1
index ce1f948e6f1..ec70040acf4 100644
--- a/src/jdk.jconsole/share/man/jconsole.1
+++ b/src/jdk.jconsole/share/man/jconsole.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JCONSOLE" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JCONSOLE" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jdeps/share/man/javap.1 b/src/jdk.jdeps/share/man/javap.1
index f9ec998e0af..27b0a29d0ba 100644
--- a/src/jdk.jdeps/share/man/javap.1
+++ b/src/jdk.jdeps/share/man/javap.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JAVAP" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JAVAP" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jdeps/share/man/jdeprscan.1 b/src/jdk.jdeps/share/man/jdeprscan.1
index ac850cf78de..fc13f05c449 100644
--- a/src/jdk.jdeps/share/man/jdeprscan.1
+++ b/src/jdk.jdeps/share/man/jdeprscan.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JDEPRSCAN" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JDEPRSCAN" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jdeps/share/man/jdeps.1 b/src/jdk.jdeps/share/man/jdeps.1
index 53891441665..d3dde37e3b9 100644
--- a/src/jdk.jdeps/share/man/jdeps.1
+++ b/src/jdk.jdeps/share/man/jdeps.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JDEPS" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JDEPS" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jdi/share/man/jdb.1 b/src/jdk.jdi/share/man/jdb.1
index 530c9bed6a1..88097ffeae4 100644
--- a/src/jdk.jdi/share/man/jdb.1
+++ b/src/jdk.jdi/share/man/jdb.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JDB" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JDB" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jfr/share/man/jfr.1 b/src/jdk.jfr/share/man/jfr.1
index c6a568582c0..71a487f558e 100644
--- a/src/jdk.jfr/share/man/jfr.1
+++ b/src/jdk.jfr/share/man/jfr.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JFR" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JFR" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jlink/share/man/jlink.1 b/src/jdk.jlink/share/man/jlink.1
index 1a0b79a39c1..9f4bf38ffa5 100644
--- a/src/jdk.jlink/share/man/jlink.1
+++ b/src/jdk.jlink/share/man/jlink.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JLINK" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JLINK" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jlink/share/man/jmod.1 b/src/jdk.jlink/share/man/jmod.1
index b26685615ed..4475505e524 100644
--- a/src/jdk.jlink/share/man/jmod.1
+++ b/src/jdk.jlink/share/man/jmod.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JMOD" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JMOD" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jpackage/share/man/jpackage.1 b/src/jdk.jpackage/share/man/jpackage.1
index f9848200059..13d9c41c31d 100644
--- a/src/jdk.jpackage/share/man/jpackage.1
+++ b/src/jdk.jpackage/share/man/jpackage.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JPACKAGE" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JPACKAGE" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jshell/share/man/jshell.1 b/src/jdk.jshell/share/man/jshell.1
index 28160bb49fd..6f478e57442 100644
--- a/src/jdk.jshell/share/man/jshell.1
+++ b/src/jdk.jshell/share/man/jshell.1
@@ -36,7 +36,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JSHELL" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JSHELL" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/src/jdk.jstatd/share/man/jstatd.1 b/src/jdk.jstatd/share/man/jstatd.1
index cbbaeaf49f2..4bd90104624 100644
--- a/src/jdk.jstatd/share/man/jstatd.1
+++ b/src/jdk.jstatd/share/man/jstatd.1
@@ -35,7 +35,7 @@
 . ftr VB CB
 . ftr VBI CBI
 .\}
-.TH "JSTATD" "1" "2024" "JDK 23-ea" "JDK Commands"
+.TH "JSTATD" "1" "2025" "JDK 24-ea" "JDK Commands"
 .hy
 .SH NAME
 .PP
diff --git a/test/hotspot/gtest/gc/shared/test_collectorPolicy.cpp b/test/hotspot/gtest/gc/shared/test_collectorPolicy.cpp
index 40df20ba964..102ede42de6 100644
--- a/test/hotspot/gtest/gc/shared/test_collectorPolicy.cpp
+++ b/test/hotspot/gtest/gc/shared/test_collectorPolicy.cpp
@@ -212,18 +212,6 @@ class TestGenCollectorPolicy {
 // depends on so many other configurable variables. These tests only try to
 // verify that there are some basic rules for NewSize honored by the policies.
 
-// If NewSize has been ergonomically set, the collector policy
-// should use it for min
-// This test doesn't work with 64k pages. See JDK-8331675.
-#if !defined(PPC)
-TEST_VM(CollectorPolicy, young_min_ergo) {
-  TestGenCollectorPolicy::SetNewSizeErgo setter(20 * M);
-  TestGenCollectorPolicy::CheckYoungMin checker(20 * M);
-
-  TestGenCollectorPolicy::TestWrapper::test(&setter, &checker);
-}
-#endif
-
 // If NewSize has been ergonomically set, the collector policy
 // should use it for min but calculate the initial young size
 // using NewRatio.
diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt
index 5da6ebaa6d5..78e09c0627a 100644
--- a/test/hotspot/jtreg/ProblemList.txt
+++ b/test/hotspot/jtreg/ProblemList.txt
@@ -122,6 +122,7 @@ serviceability/sa/TestRevPtrsForInvokeDynamic.java 8241235 generic-all
 serviceability/jvmti/ModuleAwareAgents/ThreadStart/MAAThreadStart.java 8225354 windows-all
 serviceability/jvmti/vthread/GetThreadStateMountedTest/GetThreadStateMountedTest.java 8318090,8318729 generic-all
 serviceability/jvmti/vthread/GetSetLocalTest/GetSetLocalTest.java 8286836 generic-all
+serviceability/jvmti/vthread/CarrierThreadEventNotification/CarrierThreadEventNotification.java 8333681 generic-all
 serviceability/dcmd/gc/RunFinalizationTest.java 8227120 generic-all
 
 serviceability/sa/ClhsdbCDSCore.java 8267433 macosx-x64
diff --git a/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java b/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java
new file mode 100644
index 00000000000..327e6e5e12d
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/loopopts/superword/TestVectorFPReduction.java
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2024, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.loopopts.superword;
+
+import compiler.lib.ir_framework.*;
+
+/*
+ * @test
+ * @bug 8320725
+ * @summary Ensure strictly ordered AddReductionVF/VD and MulReductionVF/VD nodes
+            are generated when these operations are auto-vectorized
+ * @library /test/lib /
+ * @run driver compiler.loopopts.superword.TestVectorFPReduction
+ */
+
+public class TestVectorFPReduction {
+
+    final private static int SIZE = 1024;
+
+    private static double[] da = new double[SIZE];
+    private static double[] db = new double[SIZE];
+    private static float[] fa = new float[SIZE];
+    private static float[] fb = new float[SIZE];
+    private static float fresult;
+    private static double dresult;
+
+    public static void main(String[] args) {
+        TestFramework.run();
+    }
+
+    @Test
+    @IR(failOn = {IRNode.ADD_REDUCTION_VF},
+        applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"})
+    @IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VF, ">=1"},
+        failOn = {"no_strict_order"},
+        applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
+        phase = CompilePhase.PRINT_IDEAL)
+    private static void testAddReductionVF() {
+        float result = 1;
+        for (int i = 0; i < SIZE; i++) {
+            result += (fa[i] + fb[i]);
+        }
+        fresult += result;
+    }
+
+    @Test
+    @IR(failOn = {IRNode.ADD_REDUCTION_VD},
+        applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"})
+    @IR(counts = {"requires_strict_order", ">=1", IRNode.ADD_REDUCTION_VD, ">=1"},
+        failOn = {"no_strict_order"},
+        applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
+        phase = CompilePhase.PRINT_IDEAL)
+    private static void testAddReductionVD() {
+        double result = 1;
+        for (int i = 0; i < SIZE; i++) {
+            result += (da[i] + db[i]);
+        }
+        dresult += result;
+    }
+
+    @Test
+    @IR(failOn = {IRNode.MUL_REDUCTION_VF},
+        applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"})
+    @IR(counts = {"requires_strict_order", ">=1", IRNode.MUL_REDUCTION_VF, ">=1"},
+        failOn = {"no_strict_order"},
+        applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
+        phase = CompilePhase.PRINT_IDEAL)
+    private static void testMulReductionVF() {
+        float result = 1;
+        for (int i = 0; i < SIZE; i++) {
+            result *= (fa[i] + fb[i]);
+        }
+        fresult += result;
+    }
+
+    @Test
+    @IR(failOn = {IRNode.MUL_REDUCTION_VD},
+        applyIfCPUFeatureAnd = {"asimd", "true", "sve", "false"})
+    @IR(counts = {"requires_strict_order", ">=1", IRNode.MUL_REDUCTION_VD, ">=1"},
+        failOn = {"no_strict_order"},
+        applyIfCPUFeatureOr = {"sve", "true", "sse2", "true"},
+        phase = CompilePhase.PRINT_IDEAL)
+    private static void testMulReductionVD() {
+        double result = 1;
+        for (int i = 0; i < SIZE; i++) {
+            result *= (da[i] + db[i]);
+        }
+        dresult += result;
+    }
+}
diff --git a/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java b/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java
new file mode 100644
index 00000000000..549d9aa5d49
--- /dev/null
+++ b/test/hotspot/jtreg/compiler/vectorapi/TestVectorAddMulReduction.java
@@ -0,0 +1,211 @@
+/*
+ * Copyright (c) 2024, Arm Limited. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package compiler.vectorapi;
+
+import compiler.lib.ir_framework.*;
+
+import jdk.incubator.vector.DoubleVector;
+import jdk.incubator.vector.FloatVector;
+import jdk.incubator.vector.VectorOperators;
+import jdk.incubator.vector.VectorSpecies;
+
+import java.util.Random;
+
+import jdk.test.lib.Asserts;
+import jdk.test.lib.Utils;
+
+/**
+ * @test
+ * @bug 8320725
+ * @library /test/lib /
+ * @summary Verify non-strictly ordered AddReductionVF/VD and MulReductionVF/VD
+ *          nodes are generated in VectorAPI
+ * @modules jdk.incubator.vector
+ * @run driver compiler.vectorapi.TestVectorAddMulReduction
+ */
+
+public class TestVectorAddMulReduction {
+
+    private static final int SIZE = 1024;
+    private static final Random RD = Utils.getRandomInstance();
+
+    private static float[] fa;
+    private static float fres;
+    private static double[] da;
+    private static double dres;
+
+    static {
+        fa = new float[SIZE];
+        da = new double[SIZE];
+        fres = 1;
+        dres = 1;
+        for (int i = 0; i < SIZE; i++) {
+            fa[i] = RD.nextFloat();
+            da[i] = RD.nextDouble();
+        }
+    }
+
+    // Test add reduction operation for floats
+    @ForceInline
+    public static void testFloatAddKernel(VectorSpecies SPECIES, float[] f) {
+        for (int i = 0; i < SPECIES.loopBound(f.length); i += SPECIES.length()) {
+            var av = FloatVector.fromArray(SPECIES, f, i);
+            fres += av.reduceLanes(VectorOperators.ADD);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=8"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testFloatAdd_64() {
+        testFloatAddKernel(FloatVector.SPECIES_64, fa);
+    }
+
+    @Test
+    @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=16"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testFloatAdd_128() {
+        testFloatAddKernel(FloatVector.SPECIES_128, fa);
+    }
+
+    @Test
+    @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=32"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testFloatAdd_256() {
+        testFloatAddKernel(FloatVector.SPECIES_256, fa);
+    }
+
+    @Test
+    @IR(counts = {IRNode.ADD_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=64"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testFloatAdd_512() {
+        testFloatAddKernel(FloatVector.SPECIES_512, fa);
+    }
+
+    // Test add reduction operation for doubles
+    @ForceInline
+    public static void testDoubleAddKernel(VectorSpecies SPECIES, double[] d) {
+        for (int i = 0; i < SPECIES.loopBound(d.length); i += SPECIES.length()) {
+            var av = DoubleVector.fromArray(SPECIES, d, i);
+            dres += av.reduceLanes(VectorOperators.ADD);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=16"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testDoubleAdd_128() {
+        testDoubleAddKernel(DoubleVector.SPECIES_128, da);
+    }
+
+    @Test
+    @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=32"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testDoubleAdd_256() {
+        testDoubleAddKernel(DoubleVector.SPECIES_256, da);
+    }
+
+    @Test
+    @IR(counts = {IRNode.ADD_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=64"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testDoubleAdd_512() {
+        testDoubleAddKernel(DoubleVector.SPECIES_512, da);
+    }
+
+    // Test mul reduction operation for floats
+    // On aarch64, there are no direct vector mul reduction instructions for float/double mul reduction
+    // and scalar instructions are emitted for 64-bit/128-bit vectors. Thus MulReductionVF/VD nodes are generated
+    // only for vector length of 8B/16B on vectorAPI.
+    @ForceInline
+    public static void testFloatMulKernel(VectorSpecies SPECIES, float[] f) {
+        for (int i = 0; i < SPECIES.loopBound(f.length); i += SPECIES.length()) {
+            var av = FloatVector.fromArray(SPECIES, f, i);
+            fres += av.reduceLanes(VectorOperators.MUL);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.MUL_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=8"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testFloatMul_64() {
+        testFloatMulKernel(FloatVector.SPECIES_64, fa);
+    }
+
+    @Test
+    @IR(counts = {IRNode.MUL_REDUCTION_VF, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=16"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testFloatMul_128() {
+        testFloatMulKernel(FloatVector.SPECIES_128, fa);
+    }
+
+    // Test mul reduction operation for doubles
+    @ForceInline
+    public static void testDoubleMulKernel(VectorSpecies SPECIES, double[] d) {
+        for (int i = 0; i < SPECIES.loopBound(d.length); i += SPECIES.length()) {
+            var av = DoubleVector.fromArray(SPECIES, d, i);
+            dres += av.reduceLanes(VectorOperators.MUL);
+        }
+    }
+
+    @Test
+    @IR(counts = {IRNode.MUL_REDUCTION_VD, ">=1", "no_strict_order", ">=1"},
+        failOn = {"requires_strict_order"},
+        applyIfCPUFeatureOr = {"asimd", "true", "sse2", "true"},
+        applyIf = {"MaxVectorSize", ">=16"},
+        phase = CompilePhase.PRINT_IDEAL)
+    public static void testDoubleMul_128() {
+        testDoubleMulKernel(DoubleVector.SPECIES_128, da);
+    }
+
+    public static void main(String[] args) {
+        TestFramework.runWithFlags("--add-modules=jdk.incubator.vector");
+    }
+}
diff --git a/test/jdk/java/lang/instrument/NativeMethodPrefixApp.java b/test/jdk/java/lang/instrument/NativeMethodPrefixApp.java
index 5060e67ca87..65cc54793cb 100644
--- a/test/jdk/java/lang/instrument/NativeMethodPrefixApp.java
+++ b/test/jdk/java/lang/instrument/NativeMethodPrefixApp.java
@@ -25,6 +25,7 @@
 import java.io.File;
 import java.nio.file.Path;
 import java.lang.management.*;
+import java.util.zip.CRC32;
 
 import bootreporter.*;
 import jdk.test.lib.helpers.ClassFileInstaller;
@@ -96,6 +97,10 @@ private static void launchApp(final Path agentJar) throws Exception {
         final OutputAnalyzer oa = ProcessTools.executeTestJava(
                 "--enable-preview", // due to usage of ClassFile API PreviewFeature in the agent
                 "-javaagent:" + agentJar.toString(),
+                // We disable CheckIntrinsic because the NativeMethodPrefixAgent modifies
+                // the native method names, which then causes a failure in the VM check
+                // for the presence of an intrinsic on a @IntrinsicCandidate native method.
+                "-XX:+UnlockDiagnosticVMOptions", "-XX:-CheckIntrinsics",
                 NativeMethodPrefixApp.class.getName());
         oa.shouldHaveExitValue(0);
         // make available stdout/stderr in the logs, even in case of successful completion
@@ -109,6 +114,10 @@ private void run() throws Exception {
         java.lang.reflect.Array.getLength(new short[5]);
         RuntimeMXBean mxbean = ManagementFactory.getRuntimeMXBean();
         System.err.println(mxbean.getVmVendor());
+        // Simply load a class containing an @IntrinsicCandidate on a native method
+        // to exercise the VM code which verifies the presence of the intrinsic
+        // implementation for that method.
+        System.err.println(new CRC32());
 
         NativeMethodPrefixAgent.checkErrors();